Update contrib/libs/re2 to 2023-11-01

author: robot-contrib <robot-contrib@yandex-team.com> 2023-11-01 18:35:30 +0300
committer: robot-contrib <robot-contrib@yandex-team.com> 2023-11-01 19:02:08 +0300
commit: dd876eb3c079859377abf5b952b766b7cd99037d (patch)
tree: 33c1e2659001168d59c59a1340385c5486e1f8cb
parent: 0a532f08a13bcf0b6c8406d508dd219167c5ab96 (diff)
download: ydb-dd876eb3c079859377abf5b952b766b7cd99037d.tar.gz
6 files changed, 53 insertions, 29 deletions
diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc
index 442c31c07f..90b913eb48 100644
--- a/contrib/libs/re2/re2/parse.cc
+++ b/contrib/libs/re2/re2/parse.cc
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "absl/base/macros.h"
+#include "absl/strings/ascii.h"
 #include "util/logging.h"
 #include "util/utf.h"
 #include "re2/pod_array.h"
@@ -1322,14 +1323,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
 // Parses a decimal integer, storing it in *np.
 // Sets *s to span the remainder of the string.
 static bool ParseInteger(absl::string_view* s, int* np) {
-  if (s->empty() || !isdigit((*s)[0] & 0xFF))
+  if (s->empty() || !absl::ascii_isdigit((*s)[0] & 0xFF))
     return false;
   // Disallow leading zeros.
-  if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
+  if (s->size() >= 2 && (*s)[0] == '0' && absl::ascii_isdigit((*s)[1] & 0xFF))
     return false;
   int n = 0;
   int c;
-  while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
+  while (!s->empty() && absl::ascii_isdigit(c = (*s)[0] & 0xFF)) {
     // Avoid overflow.
     if (n >= 100000000)
       return false;
@@ -1468,7 +1469,7 @@ static bool ParseEscape(absl::string_view* s, Rune* rp,
   int code;
   switch (c) {
     default:
-      if (c < Runeself && !isalpha(c) && !isdigit(c)) {
+      if (c < Runeself && !absl::ascii_isalnum(c)) {
         // Escaped non-word characters are always themselves.
         // PCRE is not quite so rigorous: it accepts things like
         // \q, but we don't.  We once rejected \_, but too many
diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc
index 3138b3b555..d592fc3ac7 100644
--- a/contrib/libs/re2/re2/re2.cc
+++ b/contrib/libs/re2/re2/re2.cc
@@ -27,6 +27,7 @@
 
 #include "absl/base/macros.h"
 #include "absl/container/fixed_array.h"
+#include "absl/strings/ascii.h"
 #include "absl/strings/str_format.h"
 #include "util/logging.h"
 #include "util/strutil.h"
@@ -975,7 +976,7 @@ bool RE2::CheckRewriteString(absl::string_view rewrite,
     if (c == '\\') {
       continue;
     }
-    if (!isdigit(c)) {
+    if (!absl::ascii_isdigit(c)) {
       *error = "Rewrite schema error: "
                "'\\' must be followed by a digit or '\\'.";
       return false;
@@ -1005,7 +1006,7 @@ int RE2::MaxSubmatch(absl::string_view rewrite) {
     if (*s == '\\') {
       s++;
       int c = (s < end) ? *s : -1;
-      if (isdigit(c)) {
+      if (absl::ascii_isdigit(c)) {
         int n = (c - '0');
         if (n > max)
           max = n;
@@ -1029,7 +1030,7 @@ bool RE2::Rewrite(std::string* out,
     }
     s++;
     int c = (s < end) ? *s : -1;
-    if (isdigit(c)) {
+    if (absl::ascii_isdigit(c)) {
       int n = (c - '0');
       if (n >= veclen) {
         if (options_.log_errors()) {
@@ -1117,13 +1118,13 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
                                    size_t* np, bool accept_spaces) {
   size_t n = *np;
   if (n == 0) return "";
-  if (n > 0 && isspace(*str)) {
+  if (n > 0 && absl::ascii_isspace(*str)) {
     // We are less forgiving than the strtoxxx() routines and do not
     // allow leading spaces. We do allow leading spaces for floats.
     if (!accept_spaces) {
       return "";
     }
-    while (n > 0 && isspace(*str)) {
+    while (n > 0 && absl::ascii_isspace(*str)) {
       n--;
       str++;
     }
diff --git a/contrib/libs/re2/re2/re2.h b/contrib/libs/re2/re2/re2.h
index 6081c5f8b3..cc76f382d8 100644
--- a/contrib/libs/re2/re2/re2.h
+++ b/contrib/libs/re2/re2/re2.h
@@ -85,6 +85,11 @@
 //    std::string s;
 //    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
 //
+// Example: extracts "ruby" into "s" and no value into "i"
+//    absl::optional<int> i;
+//    std::string s;
+//    CHECK(RE2::FullMatch("ruby", "(\\w+)(?::(\\d+))?", &s, &i));
+//
 // Example: fails because string cannot be stored in integer
 //    CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
 //
@@ -381,6 +386,7 @@ class RE2 {
   // type, or one of:
   //    std::string        (matched piece is copied to string)
   //    absl::string_view  (string_view is mutated to point to matched piece)
+  //    absl::optional<T>  (T is a supported numeric or string type as above)
   //    T                  ("bool T::ParseFrom(const char*, size_t)" must exist)
   //    (void*)NULL        (the corresponding matched sub-pattern is not copied)
   //
@@ -394,11 +400,14 @@ class RE2 {
   //      ignored.
   //
   // CAVEAT: An optional sub-pattern that does not exist in the
-  // matched string is assigned the empty string.  Therefore, the
-  // following will return false (because the empty string is not a
-  // valid number):
+  // matched string is assigned the null string.  Therefore, the
+  // following returns false because the null string - absence of
+  // a string (not even the empty string) - is not a valid number:
+  //
   //    int number;
   //    RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
+  //
+  // Use absl::optional<int> instead to handle this case correctly.
   template <typename... A>
   static bool FullMatch(absl::string_view text, const RE2& re, A&&... a) {
     return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
@@ -564,7 +573,7 @@ class RE2 {
     ANCHOR_BOTH         // Anchor at start and end
   };
 
-  // Return the number of capturing subpatterns, or -1 if the
+  // Return the number of capturing sub-patterns, or -1 if the
   // regexp wasn't valid on construction.  The overall match ($0)
   // does not count: if the regexp is "(a)(b)", returns 2.
   int NumberOfCapturingGroups() const { return num_captures_; }
diff --git a/contrib/libs/re2/re2/unicode_casefold.cc b/contrib/libs/re2/re2/unicode_casefold.cc
index d9de2821d5..297d0c8a41 100644
--- a/contrib/libs/re2/re2/unicode_casefold.cc
+++ b/contrib/libs/re2/re2/unicode_casefold.cc
@@ -7,7 +7,7 @@
 namespace re2 {
 
 
-// 1424 groups, 2878 pairs, 367 ranges
+// 1427 groups, 2884 pairs, 372 ranges
 const CaseFold unicode_casefold[] = {
 	{ 65, 90, 32 },
 	{ 97, 106, -32 },
@@ -141,11 +141,13 @@ const CaseFold unicode_casefold[] = {
 	{ 904, 906, 37 },
 	{ 908, 908, 64 },
 	{ 910, 911, 63 },
+	{ 912, 912, 7235 },
 	{ 913, 929, 32 },
 	{ 931, 931, 31 },
 	{ 932, 939, 32 },
 	{ 940, 940, -38 },
 	{ 941, 943, -37 },
+	{ 944, 944, 7219 },
 	{ 945, 945, -32 },
 	{ 946, 946, 30 },
 	{ 947, 948, -32 },
@@ -278,9 +280,11 @@ const CaseFold unicode_casefold[] = {
 	{ 8136, 8139, -86 },
 	{ 8140, 8140, -9 },
 	{ 8144, 8145, 8 },
+	{ 8147, 8147, -7235 },
 	{ 8152, 8153, -8 },
 	{ 8154, 8155, -100 },
 	{ 8160, 8161, 8 },
+	{ 8163, 8163, -7219 },
 	{ 8165, 8165, 7 },
 	{ 8168, 8169, -8 },
 	{ 8170, 8171, -112 },
@@ -354,6 +358,7 @@ const CaseFold unicode_casefold[] = {
 	{ 42997, 42998, OddEven },
 	{ 43859, 43859, -928 },
 	{ 43888, 43967, -38864 },
+	{ 64261, 64262, OddEven },
 	{ 65313, 65338, 32 },
 	{ 65345, 65370, -32 },
 	{ 66560, 66599, 40 },
@@ -377,9 +382,9 @@ const CaseFold unicode_casefold[] = {
 	{ 125184, 125217, 34 },
 	{ 125218, 125251, -34 },
 };
-const int num_unicode_casefold = 367;
+const int num_unicode_casefold = 372;
 
-// 1424 groups, 1454 pairs, 205 ranges
+// 1427 groups, 1457 pairs, 208 ranges
 const CaseFold unicode_tolower[] = {
 	{ 65, 90, 32 },
 	{ 181, 181, 775 },
@@ -515,8 +520,10 @@ const CaseFold unicode_tolower[] = {
 	{ 8126, 8126, -7173 },
 	{ 8136, 8139, -86 },
 	{ 8140, 8140, -9 },
+	{ 8147, 8147, -7235 },
 	{ 8152, 8153, -8 },
 	{ 8154, 8155, -100 },
+	{ 8163, 8163, -7219 },
 	{ 8168, 8169, -8 },
 	{ 8170, 8171, -112 },
 	{ 8172, 8172, -7 },
@@ -575,6 +582,7 @@ const CaseFold unicode_tolower[] = {
 	{ 42966, 42968, EvenOddSkip },
 	{ 42997, 42997, OddEven },
 	{ 43888, 43967, -38864 },
+	{ 64261, 64261, OddEven },
 	{ 65313, 65338, 32 },
 	{ 66560, 66599, 40 },
 	{ 66736, 66771, 40 },
@@ -587,7 +595,7 @@ const CaseFold unicode_tolower[] = {
 	{ 93760, 93791, 32 },
 	{ 125184, 125217, 34 },
 };
-const int num_unicode_tolower = 205;
+const int num_unicode_tolower = 208;
 
 
 
diff --git a/contrib/libs/re2/re2/unicode_groups.cc b/contrib/libs/re2/re2/unicode_groups.cc
index 3b58be4cb8..b2a7ba666e 100644
--- a/contrib/libs/re2/re2/unicode_groups.cc
+++ b/contrib/libs/re2/re2/unicode_groups.cc
@@ -735,6 +735,7 @@ static const URange32 L_range32[] = {
 	{ 177984, 178205 },
 	{ 178208, 183969 },
 	{ 183984, 191456 },
+	{ 191472, 192093 },
 	{ 194560, 195101 },
 	{ 196608, 201546 },
 	{ 201552, 205743 },
@@ -1986,6 +1987,7 @@ static const URange32 Lo_range32[] = {
 	{ 177984, 178205 },
 	{ 178208, 183969 },
 	{ 183984, 191456 },
+	{ 191472, 192093 },
 	{ 194560, 195101 },
 	{ 196608, 201546 },
 	{ 201552, 205743 },
@@ -4519,7 +4521,7 @@ static const URange16 S_range16[] = {
 	{ 11904, 11929 },
 	{ 11931, 12019 },
 	{ 12032, 12245 },
-	{ 12272, 12283 },
+	{ 12272, 12287 },
 	{ 12292, 12292 },
 	{ 12306, 12307 },
 	{ 12320, 12320 },
@@ -4529,6 +4531,7 @@ static const URange16 S_range16[] = {
 	{ 12688, 12689 },
 	{ 12694, 12703 },
 	{ 12736, 12771 },
+	{ 12783, 12783 },
 	{ 12800, 12830 },
 	{ 12842, 12871 },
 	{ 12880, 12880 },
@@ -4862,7 +4865,7 @@ static const URange16 So_range16[] = {
 	{ 11904, 11929 },
 	{ 11931, 12019 },
 	{ 12032, 12245 },
-	{ 12272, 12283 },
+	{ 12272, 12287 },
 	{ 12292, 12292 },
 	{ 12306, 12307 },
 	{ 12320, 12320 },
@@ -4871,6 +4874,7 @@ static const URange16 So_range16[] = {
 	{ 12688, 12689 },
 	{ 12694, 12703 },
 	{ 12736, 12771 },
+	{ 12783, 12783 },
 	{ 12800, 12830 },
 	{ 12842, 12871 },
 	{ 12880, 12880 },
@@ -5220,8 +5224,7 @@ static const URange16 Common_range16[] = {
 	{ 11126, 11157 },
 	{ 11159, 11263 },
 	{ 11776, 11869 },
-	{ 12272, 12283 },
-	{ 12288, 12292 },
+	{ 12272, 12292 },
 	{ 12294, 12294 },
 	{ 12296, 12320 },
 	{ 12336, 12343 },
@@ -5231,6 +5234,7 @@ static const URange16 Common_range16[] = {
 	{ 12539, 12540 },
 	{ 12688, 12703 },
 	{ 12736, 12771 },
+	{ 12783, 12783 },
 	{ 12832, 12895 },
 	{ 12927, 13007 },
 	{ 13055, 13055 },
@@ -5604,6 +5608,7 @@ static const URange32 Han_range32[] = {
 	{ 177984, 178205 },
 	{ 178208, 183969 },
 	{ 183984, 191456 },
+	{ 191472, 192093 },
 	{ 194560, 195101 },
 	{ 196608, 201546 },
 	{ 201552, 205743 },
@@ -6302,7 +6307,7 @@ static const URange16 Yi_range16[] = {
 static const URange32 Zanabazar_Square_range32[] = {
 	{ 72192, 72263 },
 };
-// 4040 16-bit ranges, 1775 32-bit ranges
+// 4042 16-bit ranges, 1778 32-bit ranges
 const UGroup unicode_groups[] = {
 	{ "Adlam", +1, 0, 0, Adlam_range32, 3 },
 	{ "Ahom", +1, 0, 0, Ahom_range32, 3 },
@@ -6356,7 +6361,7 @@ const UGroup unicode_groups[] = {
 	{ "Gujarati", +1, Gujarati_range16, 14, 0, 0 },
 	{ "Gunjala_Gondi", +1, 0, 0, Gunjala_Gondi_range32, 6 },
 	{ "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 },
-	{ "Han", +1, Han_range16, 11, Han_range32, 10 },
+	{ "Han", +1, Han_range16, 11, Han_range32, 11 },
 	{ "Hangul", +1, Hangul_range16, 14, 0, 0 },
 	{ "Hanifi_Rohingya", +1, 0, 0, Hanifi_Rohingya_range32, 2 },
 	{ "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 },
@@ -6378,7 +6383,7 @@ const UGroup unicode_groups[] = {
 	{ "Khmer", +1, Khmer_range16, 4, 0, 0 },
 	{ "Khojki", +1, 0, 0, Khojki_range32, 2 },
 	{ "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 },
-	{ "L", +1, L_range16, 380, L_range32, 279 },
+	{ "L", +1, L_range16, 380, L_range32, 280 },
 	{ "Lao", +1, Lao_range16, 11, 0, 0 },
 	{ "Latin", +1, Latin_range16, 34, Latin_range32, 5 },
 	{ "Lepcha", +1, Lepcha_range16, 3, 0, 0 },
@@ -6388,7 +6393,7 @@ const UGroup unicode_groups[] = {
 	{ "Lisu", +1, Lisu_range16, 1, Lisu_range32, 1 },
 	{ "Ll", +1, Ll_range16, 617, Ll_range32, 41 },
 	{ "Lm", +1, Lm_range16, 57, Lm_range32, 14 },
-	{ "Lo", +1, Lo_range16, 290, Lo_range32, 220 },
+	{ "Lo", +1, Lo_range16, 290, Lo_range32, 221 },
 	{ "Lt", +1, Lt_range16, 10, 0, 0 },
 	{ "Lu", +1, Lu_range16, 605, Lu_range32, 41 },
 	{ "Lycian", +1, 0, 0, Lycian_range32, 1 },
@@ -6457,7 +6462,7 @@ const UGroup unicode_groups[] = {
 	{ "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 },
 	{ "Rejang", +1, Rejang_range16, 2, 0, 0 },
 	{ "Runic", +1, Runic_range16, 2, 0, 0 },
-	{ "S", +1, S_range16, 151, S_range32, 81 },
+	{ "S", +1, S_range16, 152, S_range32, 81 },
 	{ "Samaritan", +1, Samaritan_range16, 2, 0, 0 },
 	{ "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 },
 	{ "Sc", +1, Sc_range16, 18, Sc_range32, 3 },
@@ -6468,7 +6473,7 @@ const UGroup unicode_groups[] = {
 	{ "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 },
 	{ "Sk", +1, Sk_range16, 30, Sk_range32, 1 },
 	{ "Sm", +1, Sm_range16, 53, Sm_range32, 11 },
-	{ "So", +1, So_range16, 114, So_range32, 70 },
+	{ "So", +1, So_range16, 115, So_range32, 70 },
 	{ "Sogdian", +1, 0, 0, Sogdian_range32, 1 },
 	{ "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 },
 	{ "Soyombo", +1, 0, 0, Soyombo_range32, 1 },
diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make
index 81a886eed4..2e58923e52 100644
--- a/contrib/libs/re2/ya.make
+++ b/contrib/libs/re2/ya.make
@@ -9,9 +9,9 @@ LICENSE(
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
-VERSION(2023-09-01)
+VERSION(2023-11-01)
 
-ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-09-01.tar.gz)
+ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-11-01.tar.gz)
 
 PEERDIR(
     contrib/restricted/abseil-cpp/absl/base
author	robot-contrib <robot-contrib@yandex-team.com>	2023-11-01 18:35:30 +0300
committer	robot-contrib <robot-contrib@yandex-team.com>	2023-11-01 19:02:08 +0300
commit	dd876eb3c079859377abf5b952b766b7cd99037d (patch)
tree	33c1e2659001168d59c59a1340385c5486e1f8cb
parent	0a532f08a13bcf0b6c8406d508dd219167c5ab96 (diff)
download	ydb-dd876eb3c079859377abf5b952b766b7cd99037d.tar.gz