diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2023-11-01 18:35:30 +0300 |
---|---|---|
committer | robot-contrib <robot-contrib@yandex-team.com> | 2023-11-01 19:02:08 +0300 |
commit | dd876eb3c079859377abf5b952b766b7cd99037d (patch) | |
tree | 33c1e2659001168d59c59a1340385c5486e1f8cb | |
parent | 0a532f08a13bcf0b6c8406d508dd219167c5ab96 (diff) | |
download | ydb-dd876eb3c079859377abf5b952b766b7cd99037d.tar.gz |
Update contrib/libs/re2 to 2023-11-01
-rw-r--r-- | contrib/libs/re2/re2/parse.cc | 9 | ||||
-rw-r--r-- | contrib/libs/re2/re2/re2.cc | 11 | ||||
-rw-r--r-- | contrib/libs/re2/re2/re2.h | 17 | ||||
-rw-r--r-- | contrib/libs/re2/re2/unicode_casefold.cc | 16 | ||||
-rw-r--r-- | contrib/libs/re2/re2/unicode_groups.cc | 25 | ||||
-rw-r--r-- | contrib/libs/re2/ya.make | 4 |
6 files changed, 53 insertions, 29 deletions
diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc index 442c31c07f..90b913eb48 100644 --- a/contrib/libs/re2/re2/parse.cc +++ b/contrib/libs/re2/re2/parse.cc @@ -26,6 +26,7 @@ #include <vector> #include "absl/base/macros.h" +#include "absl/strings/ascii.h" #include "util/logging.h" #include "util/utf.h" #include "re2/pod_array.h" @@ -1322,14 +1323,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) { // Parses a decimal integer, storing it in *np. // Sets *s to span the remainder of the string. static bool ParseInteger(absl::string_view* s, int* np) { - if (s->empty() || !isdigit((*s)[0] & 0xFF)) + if (s->empty() || !absl::ascii_isdigit((*s)[0] & 0xFF)) return false; // Disallow leading zeros. - if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF)) + if (s->size() >= 2 && (*s)[0] == '0' && absl::ascii_isdigit((*s)[1] & 0xFF)) return false; int n = 0; int c; - while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) { + while (!s->empty() && absl::ascii_isdigit(c = (*s)[0] & 0xFF)) { // Avoid overflow. if (n >= 100000000) return false; @@ -1468,7 +1469,7 @@ static bool ParseEscape(absl::string_view* s, Rune* rp, int code; switch (c) { default: - if (c < Runeself && !isalpha(c) && !isdigit(c)) { + if (c < Runeself && !absl::ascii_isalnum(c)) { // Escaped non-word characters are always themselves. // PCRE is not quite so rigorous: it accepts things like // \q, but we don't. We once rejected \_, but too many diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc index 3138b3b555..d592fc3ac7 100644 --- a/contrib/libs/re2/re2/re2.cc +++ b/contrib/libs/re2/re2/re2.cc @@ -27,6 +27,7 @@ #include "absl/base/macros.h" #include "absl/container/fixed_array.h" +#include "absl/strings/ascii.h" #include "absl/strings/str_format.h" #include "util/logging.h" #include "util/strutil.h" @@ -975,7 +976,7 @@ bool RE2::CheckRewriteString(absl::string_view rewrite, if (c == '\\') { continue; } - if (!isdigit(c)) { + if (!absl::ascii_isdigit(c)) { *error = "Rewrite schema error: " "'\\' must be followed by a digit or '\\'."; return false; @@ -1005,7 +1006,7 @@ int RE2::MaxSubmatch(absl::string_view rewrite) { if (*s == '\\') { s++; int c = (s < end) ? *s : -1; - if (isdigit(c)) { + if (absl::ascii_isdigit(c)) { int n = (c - '0'); if (n > max) max = n; @@ -1029,7 +1030,7 @@ bool RE2::Rewrite(std::string* out, } s++; int c = (s < end) ? *s : -1; - if (isdigit(c)) { + if (absl::ascii_isdigit(c)) { int n = (c - '0'); if (n >= veclen) { if (options_.log_errors()) { @@ -1117,13 +1118,13 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str, size_t* np, bool accept_spaces) { size_t n = *np; if (n == 0) return ""; - if (n > 0 && isspace(*str)) { + if (n > 0 && absl::ascii_isspace(*str)) { // We are less forgiving than the strtoxxx() routines and do not // allow leading spaces. We do allow leading spaces for floats. if (!accept_spaces) { return ""; } - while (n > 0 && isspace(*str)) { + while (n > 0 && absl::ascii_isspace(*str)) { n--; str++; } diff --git a/contrib/libs/re2/re2/re2.h b/contrib/libs/re2/re2/re2.h index 6081c5f8b3..cc76f382d8 100644 --- a/contrib/libs/re2/re2/re2.h +++ b/contrib/libs/re2/re2/re2.h @@ -85,6 +85,11 @@ // std::string s; // CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); // +// Example: extracts "ruby" into "s" and no value into "i" +// absl::optional<int> i; +// std::string s; +// CHECK(RE2::FullMatch("ruby", "(\\w+)(?::(\\d+))?", &s, &i)); +// // Example: fails because string cannot be stored in integer // CHECK(!RE2::FullMatch("ruby", "(.*)", &i)); // @@ -381,6 +386,7 @@ class RE2 { // type, or one of: // std::string (matched piece is copied to string) // absl::string_view (string_view is mutated to point to matched piece) + // absl::optional<T> (T is a supported numeric or string type as above) // T ("bool T::ParseFrom(const char*, size_t)" must exist) // (void*)NULL (the corresponding matched sub-pattern is not copied) // @@ -394,11 +400,14 @@ class RE2 { // ignored. // // CAVEAT: An optional sub-pattern that does not exist in the - // matched string is assigned the empty string. Therefore, the - // following will return false (because the empty string is not a - // valid number): + // matched string is assigned the null string. Therefore, the + // following returns false because the null string - absence of + // a string (not even the empty string) - is not a valid number: + // // int number; // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number); + // + // Use absl::optional<int> instead to handle this case correctly. template <typename... A> static bool FullMatch(absl::string_view text, const RE2& re, A&&... a) { return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...); @@ -564,7 +573,7 @@ class RE2 { ANCHOR_BOTH // Anchor at start and end }; - // Return the number of capturing subpatterns, or -1 if the + // Return the number of capturing sub-patterns, or -1 if the // regexp wasn't valid on construction. The overall match ($0) // does not count: if the regexp is "(a)(b)", returns 2. int NumberOfCapturingGroups() const { return num_captures_; } diff --git a/contrib/libs/re2/re2/unicode_casefold.cc b/contrib/libs/re2/re2/unicode_casefold.cc index d9de2821d5..297d0c8a41 100644 --- a/contrib/libs/re2/re2/unicode_casefold.cc +++ b/contrib/libs/re2/re2/unicode_casefold.cc @@ -7,7 +7,7 @@ namespace re2 { -// 1424 groups, 2878 pairs, 367 ranges +// 1427 groups, 2884 pairs, 372 ranges const CaseFold unicode_casefold[] = { { 65, 90, 32 }, { 97, 106, -32 }, @@ -141,11 +141,13 @@ const CaseFold unicode_casefold[] = { { 904, 906, 37 }, { 908, 908, 64 }, { 910, 911, 63 }, + { 912, 912, 7235 }, { 913, 929, 32 }, { 931, 931, 31 }, { 932, 939, 32 }, { 940, 940, -38 }, { 941, 943, -37 }, + { 944, 944, 7219 }, { 945, 945, -32 }, { 946, 946, 30 }, { 947, 948, -32 }, @@ -278,9 +280,11 @@ const CaseFold unicode_casefold[] = { { 8136, 8139, -86 }, { 8140, 8140, -9 }, { 8144, 8145, 8 }, + { 8147, 8147, -7235 }, { 8152, 8153, -8 }, { 8154, 8155, -100 }, { 8160, 8161, 8 }, + { 8163, 8163, -7219 }, { 8165, 8165, 7 }, { 8168, 8169, -8 }, { 8170, 8171, -112 }, @@ -354,6 +358,7 @@ const CaseFold unicode_casefold[] = { { 42997, 42998, OddEven }, { 43859, 43859, -928 }, { 43888, 43967, -38864 }, + { 64261, 64262, OddEven }, { 65313, 65338, 32 }, { 65345, 65370, -32 }, { 66560, 66599, 40 }, @@ -377,9 +382,9 @@ const CaseFold unicode_casefold[] = { { 125184, 125217, 34 }, { 125218, 125251, -34 }, }; -const int num_unicode_casefold = 367; +const int num_unicode_casefold = 372; -// 1424 groups, 1454 pairs, 205 ranges +// 1427 groups, 1457 pairs, 208 ranges const CaseFold unicode_tolower[] = { { 65, 90, 32 }, { 181, 181, 775 }, @@ -515,8 +520,10 @@ const CaseFold unicode_tolower[] = { { 8126, 8126, -7173 }, { 8136, 8139, -86 }, { 8140, 8140, -9 }, + { 8147, 8147, -7235 }, { 8152, 8153, -8 }, { 8154, 8155, -100 }, + { 8163, 8163, -7219 }, { 8168, 8169, -8 }, { 8170, 8171, -112 }, { 8172, 8172, -7 }, @@ -575,6 +582,7 @@ const CaseFold unicode_tolower[] = { { 42966, 42968, EvenOddSkip }, { 42997, 42997, OddEven }, { 43888, 43967, -38864 }, + { 64261, 64261, OddEven }, { 65313, 65338, 32 }, { 66560, 66599, 40 }, { 66736, 66771, 40 }, @@ -587,7 +595,7 @@ const CaseFold unicode_tolower[] = { { 93760, 93791, 32 }, { 125184, 125217, 34 }, }; -const int num_unicode_tolower = 205; +const int num_unicode_tolower = 208; diff --git a/contrib/libs/re2/re2/unicode_groups.cc b/contrib/libs/re2/re2/unicode_groups.cc index 3b58be4cb8..b2a7ba666e 100644 --- a/contrib/libs/re2/re2/unicode_groups.cc +++ b/contrib/libs/re2/re2/unicode_groups.cc @@ -735,6 +735,7 @@ static const URange32 L_range32[] = { { 177984, 178205 }, { 178208, 183969 }, { 183984, 191456 }, + { 191472, 192093 }, { 194560, 195101 }, { 196608, 201546 }, { 201552, 205743 }, @@ -1986,6 +1987,7 @@ static const URange32 Lo_range32[] = { { 177984, 178205 }, { 178208, 183969 }, { 183984, 191456 }, + { 191472, 192093 }, { 194560, 195101 }, { 196608, 201546 }, { 201552, 205743 }, @@ -4519,7 +4521,7 @@ static const URange16 S_range16[] = { { 11904, 11929 }, { 11931, 12019 }, { 12032, 12245 }, - { 12272, 12283 }, + { 12272, 12287 }, { 12292, 12292 }, { 12306, 12307 }, { 12320, 12320 }, @@ -4529,6 +4531,7 @@ static const URange16 S_range16[] = { { 12688, 12689 }, { 12694, 12703 }, { 12736, 12771 }, + { 12783, 12783 }, { 12800, 12830 }, { 12842, 12871 }, { 12880, 12880 }, @@ -4862,7 +4865,7 @@ static const URange16 So_range16[] = { { 11904, 11929 }, { 11931, 12019 }, { 12032, 12245 }, - { 12272, 12283 }, + { 12272, 12287 }, { 12292, 12292 }, { 12306, 12307 }, { 12320, 12320 }, @@ -4871,6 +4874,7 @@ static const URange16 So_range16[] = { { 12688, 12689 }, { 12694, 12703 }, { 12736, 12771 }, + { 12783, 12783 }, { 12800, 12830 }, { 12842, 12871 }, { 12880, 12880 }, @@ -5220,8 +5224,7 @@ static const URange16 Common_range16[] = { { 11126, 11157 }, { 11159, 11263 }, { 11776, 11869 }, - { 12272, 12283 }, - { 12288, 12292 }, + { 12272, 12292 }, { 12294, 12294 }, { 12296, 12320 }, { 12336, 12343 }, @@ -5231,6 +5234,7 @@ static const URange16 Common_range16[] = { { 12539, 12540 }, { 12688, 12703 }, { 12736, 12771 }, + { 12783, 12783 }, { 12832, 12895 }, { 12927, 13007 }, { 13055, 13055 }, @@ -5604,6 +5608,7 @@ static const URange32 Han_range32[] = { { 177984, 178205 }, { 178208, 183969 }, { 183984, 191456 }, + { 191472, 192093 }, { 194560, 195101 }, { 196608, 201546 }, { 201552, 205743 }, @@ -6302,7 +6307,7 @@ static const URange16 Yi_range16[] = { static const URange32 Zanabazar_Square_range32[] = { { 72192, 72263 }, }; -// 4040 16-bit ranges, 1775 32-bit ranges +// 4042 16-bit ranges, 1778 32-bit ranges const UGroup unicode_groups[] = { { "Adlam", +1, 0, 0, Adlam_range32, 3 }, { "Ahom", +1, 0, 0, Ahom_range32, 3 }, @@ -6356,7 +6361,7 @@ const UGroup unicode_groups[] = { { "Gujarati", +1, Gujarati_range16, 14, 0, 0 }, { "Gunjala_Gondi", +1, 0, 0, Gunjala_Gondi_range32, 6 }, { "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 }, - { "Han", +1, Han_range16, 11, Han_range32, 10 }, + { "Han", +1, Han_range16, 11, Han_range32, 11 }, { "Hangul", +1, Hangul_range16, 14, 0, 0 }, { "Hanifi_Rohingya", +1, 0, 0, Hanifi_Rohingya_range32, 2 }, { "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 }, @@ -6378,7 +6383,7 @@ const UGroup unicode_groups[] = { { "Khmer", +1, Khmer_range16, 4, 0, 0 }, { "Khojki", +1, 0, 0, Khojki_range32, 2 }, { "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 }, - { "L", +1, L_range16, 380, L_range32, 279 }, + { "L", +1, L_range16, 380, L_range32, 280 }, { "Lao", +1, Lao_range16, 11, 0, 0 }, { "Latin", +1, Latin_range16, 34, Latin_range32, 5 }, { "Lepcha", +1, Lepcha_range16, 3, 0, 0 }, @@ -6388,7 +6393,7 @@ const UGroup unicode_groups[] = { { "Lisu", +1, Lisu_range16, 1, Lisu_range32, 1 }, { "Ll", +1, Ll_range16, 617, Ll_range32, 41 }, { "Lm", +1, Lm_range16, 57, Lm_range32, 14 }, - { "Lo", +1, Lo_range16, 290, Lo_range32, 220 }, + { "Lo", +1, Lo_range16, 290, Lo_range32, 221 }, { "Lt", +1, Lt_range16, 10, 0, 0 }, { "Lu", +1, Lu_range16, 605, Lu_range32, 41 }, { "Lycian", +1, 0, 0, Lycian_range32, 1 }, @@ -6457,7 +6462,7 @@ const UGroup unicode_groups[] = { { "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 }, { "Rejang", +1, Rejang_range16, 2, 0, 0 }, { "Runic", +1, Runic_range16, 2, 0, 0 }, - { "S", +1, S_range16, 151, S_range32, 81 }, + { "S", +1, S_range16, 152, S_range32, 81 }, { "Samaritan", +1, Samaritan_range16, 2, 0, 0 }, { "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 }, { "Sc", +1, Sc_range16, 18, Sc_range32, 3 }, @@ -6468,7 +6473,7 @@ const UGroup unicode_groups[] = { { "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 }, { "Sk", +1, Sk_range16, 30, Sk_range32, 1 }, { "Sm", +1, Sm_range16, 53, Sm_range32, 11 }, - { "So", +1, So_range16, 114, So_range32, 70 }, + { "So", +1, So_range16, 115, So_range32, 70 }, { "Sogdian", +1, 0, 0, Sogdian_range32, 1 }, { "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 }, { "Soyombo", +1, 0, 0, Soyombo_range32, 1 }, diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make index 81a886eed4..2e58923e52 100644 --- a/contrib/libs/re2/ya.make +++ b/contrib/libs/re2/ya.make @@ -9,9 +9,9 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(2023-09-01) +VERSION(2023-11-01) -ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-09-01.tar.gz) +ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-11-01.tar.gz) PEERDIR( contrib/restricted/abseil-cpp/absl/base |