aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-contrib <robot-contrib@yandex-team.com>2023-11-01 18:35:30 +0300
committerrobot-contrib <robot-contrib@yandex-team.com>2023-11-01 19:02:08 +0300
commitdd876eb3c079859377abf5b952b766b7cd99037d (patch)
tree33c1e2659001168d59c59a1340385c5486e1f8cb
parent0a532f08a13bcf0b6c8406d508dd219167c5ab96 (diff)
downloadydb-dd876eb3c079859377abf5b952b766b7cd99037d.tar.gz
Update contrib/libs/re2 to 2023-11-01
-rw-r--r--contrib/libs/re2/re2/parse.cc9
-rw-r--r--contrib/libs/re2/re2/re2.cc11
-rw-r--r--contrib/libs/re2/re2/re2.h17
-rw-r--r--contrib/libs/re2/re2/unicode_casefold.cc16
-rw-r--r--contrib/libs/re2/re2/unicode_groups.cc25
-rw-r--r--contrib/libs/re2/ya.make4
6 files changed, 53 insertions, 29 deletions
diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc
index 442c31c07f..90b913eb48 100644
--- a/contrib/libs/re2/re2/parse.cc
+++ b/contrib/libs/re2/re2/parse.cc
@@ -26,6 +26,7 @@
#include <vector>
#include "absl/base/macros.h"
+#include "absl/strings/ascii.h"
#include "util/logging.h"
#include "util/utf.h"
#include "re2/pod_array.h"
@@ -1322,14 +1323,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
// Parses a decimal integer, storing it in *np.
// Sets *s to span the remainder of the string.
static bool ParseInteger(absl::string_view* s, int* np) {
- if (s->empty() || !isdigit((*s)[0] & 0xFF))
+ if (s->empty() || !absl::ascii_isdigit((*s)[0] & 0xFF))
return false;
// Disallow leading zeros.
- if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
+ if (s->size() >= 2 && (*s)[0] == '0' && absl::ascii_isdigit((*s)[1] & 0xFF))
return false;
int n = 0;
int c;
- while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
+ while (!s->empty() && absl::ascii_isdigit(c = (*s)[0] & 0xFF)) {
// Avoid overflow.
if (n >= 100000000)
return false;
@@ -1468,7 +1469,7 @@ static bool ParseEscape(absl::string_view* s, Rune* rp,
int code;
switch (c) {
default:
- if (c < Runeself && !isalpha(c) && !isdigit(c)) {
+ if (c < Runeself && !absl::ascii_isalnum(c)) {
// Escaped non-word characters are always themselves.
// PCRE is not quite so rigorous: it accepts things like
// \q, but we don't. We once rejected \_, but too many
diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc
index 3138b3b555..d592fc3ac7 100644
--- a/contrib/libs/re2/re2/re2.cc
+++ b/contrib/libs/re2/re2/re2.cc
@@ -27,6 +27,7 @@
#include "absl/base/macros.h"
#include "absl/container/fixed_array.h"
+#include "absl/strings/ascii.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "util/strutil.h"
@@ -975,7 +976,7 @@ bool RE2::CheckRewriteString(absl::string_view rewrite,
if (c == '\\') {
continue;
}
- if (!isdigit(c)) {
+ if (!absl::ascii_isdigit(c)) {
*error = "Rewrite schema error: "
"'\\' must be followed by a digit or '\\'.";
return false;
@@ -1005,7 +1006,7 @@ int RE2::MaxSubmatch(absl::string_view rewrite) {
if (*s == '\\') {
s++;
int c = (s < end) ? *s : -1;
- if (isdigit(c)) {
+ if (absl::ascii_isdigit(c)) {
int n = (c - '0');
if (n > max)
max = n;
@@ -1029,7 +1030,7 @@ bool RE2::Rewrite(std::string* out,
}
s++;
int c = (s < end) ? *s : -1;
- if (isdigit(c)) {
+ if (absl::ascii_isdigit(c)) {
int n = (c - '0');
if (n >= veclen) {
if (options_.log_errors()) {
@@ -1117,13 +1118,13 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
size_t* np, bool accept_spaces) {
size_t n = *np;
if (n == 0) return "";
- if (n > 0 && isspace(*str)) {
+ if (n > 0 && absl::ascii_isspace(*str)) {
// We are less forgiving than the strtoxxx() routines and do not
// allow leading spaces. We do allow leading spaces for floats.
if (!accept_spaces) {
return "";
}
- while (n > 0 && isspace(*str)) {
+ while (n > 0 && absl::ascii_isspace(*str)) {
n--;
str++;
}
diff --git a/contrib/libs/re2/re2/re2.h b/contrib/libs/re2/re2/re2.h
index 6081c5f8b3..cc76f382d8 100644
--- a/contrib/libs/re2/re2/re2.h
+++ b/contrib/libs/re2/re2/re2.h
@@ -85,6 +85,11 @@
// std::string s;
// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
//
+// Example: extracts "ruby" into "s" and no value into "i"
+// absl::optional<int> i;
+// std::string s;
+// CHECK(RE2::FullMatch("ruby", "(\\w+)(?::(\\d+))?", &s, &i));
+//
// Example: fails because string cannot be stored in integer
// CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
//
@@ -381,6 +386,7 @@ class RE2 {
// type, or one of:
// std::string (matched piece is copied to string)
// absl::string_view (string_view is mutated to point to matched piece)
+ // absl::optional<T> (T is a supported numeric or string type as above)
// T ("bool T::ParseFrom(const char*, size_t)" must exist)
// (void*)NULL (the corresponding matched sub-pattern is not copied)
//
@@ -394,11 +400,14 @@ class RE2 {
// ignored.
//
// CAVEAT: An optional sub-pattern that does not exist in the
- // matched string is assigned the empty string. Therefore, the
- // following will return false (because the empty string is not a
- // valid number):
+ // matched string is assigned the null string. Therefore, the
+ // following returns false because the null string - absence of
+ // a string (not even the empty string) - is not a valid number:
+ //
// int number;
// RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
+ //
+ // Use absl::optional<int> instead to handle this case correctly.
template <typename... A>
static bool FullMatch(absl::string_view text, const RE2& re, A&&... a) {
return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
@@ -564,7 +573,7 @@ class RE2 {
ANCHOR_BOTH // Anchor at start and end
};
- // Return the number of capturing subpatterns, or -1 if the
+ // Return the number of capturing sub-patterns, or -1 if the
// regexp wasn't valid on construction. The overall match ($0)
// does not count: if the regexp is "(a)(b)", returns 2.
int NumberOfCapturingGroups() const { return num_captures_; }
diff --git a/contrib/libs/re2/re2/unicode_casefold.cc b/contrib/libs/re2/re2/unicode_casefold.cc
index d9de2821d5..297d0c8a41 100644
--- a/contrib/libs/re2/re2/unicode_casefold.cc
+++ b/contrib/libs/re2/re2/unicode_casefold.cc
@@ -7,7 +7,7 @@
namespace re2 {
-// 1424 groups, 2878 pairs, 367 ranges
+// 1427 groups, 2884 pairs, 372 ranges
const CaseFold unicode_casefold[] = {
{ 65, 90, 32 },
{ 97, 106, -32 },
@@ -141,11 +141,13 @@ const CaseFold unicode_casefold[] = {
{ 904, 906, 37 },
{ 908, 908, 64 },
{ 910, 911, 63 },
+ { 912, 912, 7235 },
{ 913, 929, 32 },
{ 931, 931, 31 },
{ 932, 939, 32 },
{ 940, 940, -38 },
{ 941, 943, -37 },
+ { 944, 944, 7219 },
{ 945, 945, -32 },
{ 946, 946, 30 },
{ 947, 948, -32 },
@@ -278,9 +280,11 @@ const CaseFold unicode_casefold[] = {
{ 8136, 8139, -86 },
{ 8140, 8140, -9 },
{ 8144, 8145, 8 },
+ { 8147, 8147, -7235 },
{ 8152, 8153, -8 },
{ 8154, 8155, -100 },
{ 8160, 8161, 8 },
+ { 8163, 8163, -7219 },
{ 8165, 8165, 7 },
{ 8168, 8169, -8 },
{ 8170, 8171, -112 },
@@ -354,6 +358,7 @@ const CaseFold unicode_casefold[] = {
{ 42997, 42998, OddEven },
{ 43859, 43859, -928 },
{ 43888, 43967, -38864 },
+ { 64261, 64262, OddEven },
{ 65313, 65338, 32 },
{ 65345, 65370, -32 },
{ 66560, 66599, 40 },
@@ -377,9 +382,9 @@ const CaseFold unicode_casefold[] = {
{ 125184, 125217, 34 },
{ 125218, 125251, -34 },
};
-const int num_unicode_casefold = 367;
+const int num_unicode_casefold = 372;
-// 1424 groups, 1454 pairs, 205 ranges
+// 1427 groups, 1457 pairs, 208 ranges
const CaseFold unicode_tolower[] = {
{ 65, 90, 32 },
{ 181, 181, 775 },
@@ -515,8 +520,10 @@ const CaseFold unicode_tolower[] = {
{ 8126, 8126, -7173 },
{ 8136, 8139, -86 },
{ 8140, 8140, -9 },
+ { 8147, 8147, -7235 },
{ 8152, 8153, -8 },
{ 8154, 8155, -100 },
+ { 8163, 8163, -7219 },
{ 8168, 8169, -8 },
{ 8170, 8171, -112 },
{ 8172, 8172, -7 },
@@ -575,6 +582,7 @@ const CaseFold unicode_tolower[] = {
{ 42966, 42968, EvenOddSkip },
{ 42997, 42997, OddEven },
{ 43888, 43967, -38864 },
+ { 64261, 64261, OddEven },
{ 65313, 65338, 32 },
{ 66560, 66599, 40 },
{ 66736, 66771, 40 },
@@ -587,7 +595,7 @@ const CaseFold unicode_tolower[] = {
{ 93760, 93791, 32 },
{ 125184, 125217, 34 },
};
-const int num_unicode_tolower = 205;
+const int num_unicode_tolower = 208;
diff --git a/contrib/libs/re2/re2/unicode_groups.cc b/contrib/libs/re2/re2/unicode_groups.cc
index 3b58be4cb8..b2a7ba666e 100644
--- a/contrib/libs/re2/re2/unicode_groups.cc
+++ b/contrib/libs/re2/re2/unicode_groups.cc
@@ -735,6 +735,7 @@ static const URange32 L_range32[] = {
{ 177984, 178205 },
{ 178208, 183969 },
{ 183984, 191456 },
+ { 191472, 192093 },
{ 194560, 195101 },
{ 196608, 201546 },
{ 201552, 205743 },
@@ -1986,6 +1987,7 @@ static const URange32 Lo_range32[] = {
{ 177984, 178205 },
{ 178208, 183969 },
{ 183984, 191456 },
+ { 191472, 192093 },
{ 194560, 195101 },
{ 196608, 201546 },
{ 201552, 205743 },
@@ -4519,7 +4521,7 @@ static const URange16 S_range16[] = {
{ 11904, 11929 },
{ 11931, 12019 },
{ 12032, 12245 },
- { 12272, 12283 },
+ { 12272, 12287 },
{ 12292, 12292 },
{ 12306, 12307 },
{ 12320, 12320 },
@@ -4529,6 +4531,7 @@ static const URange16 S_range16[] = {
{ 12688, 12689 },
{ 12694, 12703 },
{ 12736, 12771 },
+ { 12783, 12783 },
{ 12800, 12830 },
{ 12842, 12871 },
{ 12880, 12880 },
@@ -4862,7 +4865,7 @@ static const URange16 So_range16[] = {
{ 11904, 11929 },
{ 11931, 12019 },
{ 12032, 12245 },
- { 12272, 12283 },
+ { 12272, 12287 },
{ 12292, 12292 },
{ 12306, 12307 },
{ 12320, 12320 },
@@ -4871,6 +4874,7 @@ static const URange16 So_range16[] = {
{ 12688, 12689 },
{ 12694, 12703 },
{ 12736, 12771 },
+ { 12783, 12783 },
{ 12800, 12830 },
{ 12842, 12871 },
{ 12880, 12880 },
@@ -5220,8 +5224,7 @@ static const URange16 Common_range16[] = {
{ 11126, 11157 },
{ 11159, 11263 },
{ 11776, 11869 },
- { 12272, 12283 },
- { 12288, 12292 },
+ { 12272, 12292 },
{ 12294, 12294 },
{ 12296, 12320 },
{ 12336, 12343 },
@@ -5231,6 +5234,7 @@ static const URange16 Common_range16[] = {
{ 12539, 12540 },
{ 12688, 12703 },
{ 12736, 12771 },
+ { 12783, 12783 },
{ 12832, 12895 },
{ 12927, 13007 },
{ 13055, 13055 },
@@ -5604,6 +5608,7 @@ static const URange32 Han_range32[] = {
{ 177984, 178205 },
{ 178208, 183969 },
{ 183984, 191456 },
+ { 191472, 192093 },
{ 194560, 195101 },
{ 196608, 201546 },
{ 201552, 205743 },
@@ -6302,7 +6307,7 @@ static const URange16 Yi_range16[] = {
static const URange32 Zanabazar_Square_range32[] = {
{ 72192, 72263 },
};
-// 4040 16-bit ranges, 1775 32-bit ranges
+// 4042 16-bit ranges, 1778 32-bit ranges
const UGroup unicode_groups[] = {
{ "Adlam", +1, 0, 0, Adlam_range32, 3 },
{ "Ahom", +1, 0, 0, Ahom_range32, 3 },
@@ -6356,7 +6361,7 @@ const UGroup unicode_groups[] = {
{ "Gujarati", +1, Gujarati_range16, 14, 0, 0 },
{ "Gunjala_Gondi", +1, 0, 0, Gunjala_Gondi_range32, 6 },
{ "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 },
- { "Han", +1, Han_range16, 11, Han_range32, 10 },
+ { "Han", +1, Han_range16, 11, Han_range32, 11 },
{ "Hangul", +1, Hangul_range16, 14, 0, 0 },
{ "Hanifi_Rohingya", +1, 0, 0, Hanifi_Rohingya_range32, 2 },
{ "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 },
@@ -6378,7 +6383,7 @@ const UGroup unicode_groups[] = {
{ "Khmer", +1, Khmer_range16, 4, 0, 0 },
{ "Khojki", +1, 0, 0, Khojki_range32, 2 },
{ "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 },
- { "L", +1, L_range16, 380, L_range32, 279 },
+ { "L", +1, L_range16, 380, L_range32, 280 },
{ "Lao", +1, Lao_range16, 11, 0, 0 },
{ "Latin", +1, Latin_range16, 34, Latin_range32, 5 },
{ "Lepcha", +1, Lepcha_range16, 3, 0, 0 },
@@ -6388,7 +6393,7 @@ const UGroup unicode_groups[] = {
{ "Lisu", +1, Lisu_range16, 1, Lisu_range32, 1 },
{ "Ll", +1, Ll_range16, 617, Ll_range32, 41 },
{ "Lm", +1, Lm_range16, 57, Lm_range32, 14 },
- { "Lo", +1, Lo_range16, 290, Lo_range32, 220 },
+ { "Lo", +1, Lo_range16, 290, Lo_range32, 221 },
{ "Lt", +1, Lt_range16, 10, 0, 0 },
{ "Lu", +1, Lu_range16, 605, Lu_range32, 41 },
{ "Lycian", +1, 0, 0, Lycian_range32, 1 },
@@ -6457,7 +6462,7 @@ const UGroup unicode_groups[] = {
{ "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 },
{ "Rejang", +1, Rejang_range16, 2, 0, 0 },
{ "Runic", +1, Runic_range16, 2, 0, 0 },
- { "S", +1, S_range16, 151, S_range32, 81 },
+ { "S", +1, S_range16, 152, S_range32, 81 },
{ "Samaritan", +1, Samaritan_range16, 2, 0, 0 },
{ "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 },
{ "Sc", +1, Sc_range16, 18, Sc_range32, 3 },
@@ -6468,7 +6473,7 @@ const UGroup unicode_groups[] = {
{ "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 },
{ "Sk", +1, Sk_range16, 30, Sk_range32, 1 },
{ "Sm", +1, Sm_range16, 53, Sm_range32, 11 },
- { "So", +1, So_range16, 114, So_range32, 70 },
+ { "So", +1, So_range16, 115, So_range32, 70 },
{ "Sogdian", +1, 0, 0, Sogdian_range32, 1 },
{ "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 },
{ "Soyombo", +1, 0, 0, Soyombo_range32, 1 },
diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make
index 81a886eed4..2e58923e52 100644
--- a/contrib/libs/re2/ya.make
+++ b/contrib/libs/re2/ya.make
@@ -9,9 +9,9 @@ LICENSE(
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-VERSION(2023-09-01)
+VERSION(2023-11-01)
-ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-09-01.tar.gz)
+ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-11-01.tar.gz)
PEERDIR(
contrib/restricted/abseil-cpp/absl/base