diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2023-09-02 00:08:04 +0300 |
---|---|---|
committer | robot-contrib <robot-contrib@yandex-team.com> | 2023-09-02 00:22:37 +0300 |
commit | 9281a5398d6bd424c83adf2672da3780d22e97d1 (patch) | |
tree | 144ccc737a02435e9dbc0f4ee32ff1b323f7b14c /contrib | |
parent | 41755088940129b1cbdda02718c50d190b2f87b4 (diff) | |
download | ydb-9281a5398d6bd424c83adf2672da3780d22e97d1.tar.gz |
Update contrib/libs/re2 to 2023-09-01
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/libs/re2/re2/dfa.cc | 7 | ||||
-rw-r--r-- | contrib/libs/re2/re2/parse.cc | 26 | ||||
-rw-r--r-- | contrib/libs/re2/re2/re2.h | 2 | ||||
-rw-r--r-- | contrib/libs/re2/re2/regexp.cc | 8 | ||||
-rw-r--r-- | contrib/libs/re2/re2/testing/parse_test.cc | 18 | ||||
-rw-r--r-- | contrib/libs/re2/util/pcre.cc | 2 | ||||
-rw-r--r-- | contrib/libs/re2/ya.make | 4 |
7 files changed, 42 insertions, 25 deletions
diff --git a/contrib/libs/re2/re2/dfa.cc b/contrib/libs/re2/re2/dfa.cc index a177596b257..41fc61dd7b9 100644 --- a/contrib/libs/re2/re2/dfa.cc +++ b/contrib/libs/re2/re2/dfa.cc @@ -134,15 +134,8 @@ class DFA { // into this state, along with kFlagMatch if this // is a matching state. -// Work around the bug affecting flexible array members in GCC 6.x (for x >= 1). -// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932) -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1 - std::atomic<State*> next_[0]; // Outgoing arrows from State, - // one per input byte class -#else std::atomic<State*> next_[]; // Outgoing arrows from State, // one per input byte class -#endif }; enum { diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc index 0a5e4aa1ed7..442c31c07fd 100644 --- a/contrib/libs/re2/re2/parse.cc +++ b/contrib/libs/re2/re2/parse.cc @@ -2059,8 +2059,6 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) { return false; } - t.remove_prefix(2); // "(?" - // Check for named captures, first introduced in Python's regexp library. // As usual, there are three slightly different syntaxes: // @@ -2074,22 +2072,23 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) { // support all three as well. EcmaScript 4 uses only the Python form. // // In both the open source world (via Code Search) and the - // Google source tree, (?P<expr>name) is the dominant form, - // so that's the one we implement. One is enough. - if (t.size() > 2 && t[0] == 'P' && t[1] == '<') { + // Google source tree, (?P<name>expr) and (?<name>expr) are the + // dominant forms of named captures and both are supported. + if ((t.size() > 4 && t[2] == 'P' && t[3] == '<') || + (t.size() > 3 && t[2] == '<')) { // Pull out name. - size_t end = t.find('>', 2); + size_t begin = t[2] == 'P' ? 4 : 3; + size_t end = t.find('>', begin); if (end == absl::string_view::npos) { - if (!IsValidUTF8(*s, status_)) + if (!IsValidUTF8(t, status_)) return false; status_->set_code(kRegexpBadNamedCapture); - status_->set_error_arg(*s); + status_->set_error_arg(t); return false; } - // t is "P<name>...", t[end] == '>' - absl::string_view capture(t.data()-2, end+3); // "(?P<name>" - absl::string_view name(t.data()+2, end-2); // "name" + absl::string_view capture(t.data(), end+1); + absl::string_view name(t.data()+begin, end-begin); if (!IsValidUTF8(name, status_)) return false; if (!IsValidCaptureName(name)) { @@ -2103,11 +2102,12 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) { return false; } - s->remove_prefix( - static_cast<size_t>(capture.data() + capture.size() - s->data())); + s->remove_prefix(capture.size()); return true; } + t.remove_prefix(2); // "(?" + bool negated = false; bool sawflags = false; int nflags = flags_; diff --git a/contrib/libs/re2/re2/re2.h b/contrib/libs/re2/re2/re2.h index f318f6deb6f..6081c5f8b30 100644 --- a/contrib/libs/re2/re2/re2.h +++ b/contrib/libs/re2/re2/re2.h @@ -1009,7 +1009,7 @@ inline RE2::Arg RE2::Octal(T* ptr) { } // Silence warnings about missing initializers for members of LazyRE2. -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6 +#if !defined(__clang__) && defined(__GNUC__) #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #endif diff --git a/contrib/libs/re2/re2/regexp.cc b/contrib/libs/re2/re2/regexp.cc index 1614bb0fed3..4ea81cfcdc0 100644 --- a/contrib/libs/re2/re2/regexp.cc +++ b/contrib/libs/re2/re2/regexp.cc @@ -400,7 +400,13 @@ static bool TopEqual(Regexp* a, Regexp* b) { a->max() == b->max(); case kRegexpCapture: - return a->cap() == b->cap() && a->name() == b->name(); + if (a->name() == NULL || b->name() == NULL) { + // One pointer is null, so the other pointer should also be null. + return a->cap() == b->cap() && a->name() == b->name(); + } else { + // Neither pointer is null, so compare the pointees for equality. + return a->cap() == b->cap() && *a->name() == *b->name(); + } case kRegexpHaveMatch: return a->match_id() == b->match_id(); diff --git a/contrib/libs/re2/re2/testing/parse_test.cc b/contrib/libs/re2/re2/testing/parse_test.cc index 9d3954424a1..0ee5561e994 100644 --- a/contrib/libs/re2/re2/testing/parse_test.cc +++ b/contrib/libs/re2/re2/testing/parse_test.cc @@ -166,6 +166,8 @@ static Test tests[] = { // Test named captures { "(?P<name>a)", "cap{name:lit{a}}" }, { "(?P<中文>a)", "cap{中文:lit{a}}" }, + { "(?<name>a)", "cap{name:lit{a}}" }, + { "(?<中文>a)", "cap{中文:lit{a}}" }, // Case-folded literals { "[Aa]", "litfold{a}" }, @@ -396,6 +398,11 @@ const char* badtests[] = { "(?P<name", "(?P<x y>a)", "(?P<>a)", + "(?<name>a", + "(?<name>", + "(?<name", + "(?<x y>a)", + "(?<>a)", "[a-Z]", "(?i)[a-Z]", "a{100000}", @@ -416,6 +423,7 @@ const char* only_perl[] = { "\\Q\\\\\\\\\\E", "(?:a)", "(?P<name>a)", + "(?<name>a)", }; // Valid in POSIX, bad in Perl. @@ -505,6 +513,16 @@ TEST(NamedCaptures, ErrorArgs) { EXPECT_TRUE(re == NULL); EXPECT_EQ(status.code(), kRegexpBadNamedCapture); EXPECT_EQ(status.error_arg(), "(?P<space bar>"); + + re = Regexp::Parse("test(?<name", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadNamedCapture); + EXPECT_EQ(status.error_arg(), "(?<name"); + + re = Regexp::Parse("test(?<space bar>z)", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadNamedCapture); + EXPECT_EQ(status.error_arg(), "(?<space bar>"); } } // namespace re2 diff --git a/contrib/libs/re2/util/pcre.cc b/contrib/libs/re2/util/pcre.cc index 82b4f597457..f54cb28f839 100644 --- a/contrib/libs/re2/util/pcre.cc +++ b/contrib/libs/re2/util/pcre.cc @@ -21,7 +21,7 @@ #include "util/pcre.h" // Silence warnings about the wacky formatting in the operator() functions. -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6 +#if !defined(__clang__) && defined(__GNUC__) #pragma GCC diagnostic ignored "-Wmisleading-indentation" #endif diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make index e01531f4d51..81a886eed49 100644 --- a/contrib/libs/re2/ya.make +++ b/contrib/libs/re2/ya.make @@ -9,9 +9,9 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(2023-08-01) +VERSION(2023-09-01) -ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-08-01.tar.gz) +ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-09-01.tar.gz) PEERDIR( contrib/restricted/abseil-cpp/absl/base |