diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2024-02-03 13:52:37 +0300 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2024-02-09 19:17:32 +0300 |
commit | 2ed9104e38ed23f276e507b97d25c12cca398899 (patch) | |
tree | 050b888b478624c307d64362f4a26cbde473122e | |
parent | 742514d7c9a9664bf01c7181f5ae05a791e35551 (diff) | |
download | ydb-2ed9104e38ed23f276e507b97d25c12cca398899.tar.gz |
Update contrib/libs/re2 to 2024-02-01
-rw-r--r-- | contrib/libs/re2/AUTHORS | 13 | ||||
-rw-r--r-- | contrib/libs/re2/re2/dfa.cc | 8 | ||||
-rw-r--r-- | contrib/libs/re2/re2/parse.cc | 23 | ||||
-rw-r--r-- | contrib/libs/re2/re2/re2.cc | 5 | ||||
-rw-r--r-- | contrib/libs/re2/re2/testing/parse_test.cc | 33 | ||||
-rw-r--r-- | contrib/libs/re2/ya.make | 4 |
6 files changed, 64 insertions, 22 deletions
diff --git a/contrib/libs/re2/AUTHORS b/contrib/libs/re2/AUTHORS deleted file mode 100644 index 0754006fec..0000000000 --- a/contrib/libs/re2/AUTHORS +++ /dev/null @@ -1,13 +0,0 @@ -# This is the official list of RE2 authors for copyright purposes. -# This file is distinct from the CONTRIBUTORS files. -# See the latter for an explanation. - -# Names should be added to this file as -# Name or Organization <email address> -# The email address is not required for organizations. - -# Please keep the list sorted. - -Google Inc. -Samsung Electronics -Stefano Rivera <stefano.rivera@gmail.com> diff --git a/contrib/libs/re2/re2/dfa.cc b/contrib/libs/re2/re2/dfa.cc index 41fc61dd7b..e35fcb2819 100644 --- a/contrib/libs/re2/re2/dfa.cc +++ b/contrib/libs/re2/re2/dfa.cc @@ -1367,7 +1367,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { lastmatch = p; if (ExtraDebug) absl::FPrintF(stderr, "match @stx! [%s]\n", DumpState(s)); - if (params->matches != NULL && kind_ == Prog::kManyMatch) { + if (params->matches != NULL) { for (int i = s->ninst_ - 1; i >= 0; i--) { int id = s->inst_[i]; if (id == MatchSep) @@ -1484,7 +1484,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { lastmatch = p + 1; if (ExtraDebug) absl::FPrintF(stderr, "match @%d! [%s]\n", lastmatch - bp, DumpState(s)); - if (params->matches != NULL && kind_ == Prog::kManyMatch) { + if (params->matches != NULL) { for (int i = s->ninst_ - 1; i >= 0; i--) { int id = s->inst_[i]; if (id == MatchSep) @@ -1551,7 +1551,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { lastmatch = p; if (ExtraDebug) absl::FPrintF(stderr, "match @etx! [%s]\n", DumpState(s)); - if (params->matches != NULL && kind_ == Prog::kManyMatch) { + if (params->matches != NULL) { for (int i = s->ninst_ - 1; i >= 0; i--) { int id = s->inst_[i]; if (id == MatchSep) @@ -1767,6 +1767,8 @@ bool DFA::Search(absl::string_view text, absl::string_view context, params.anchored = anchored; params.want_earliest_match = want_earliest_match; params.run_forward = run_forward; + // matches should be null except when using RE2::Set. + DCHECK(matches == NULL || kind_ == Prog::kManyMatch); params.matches = matches; if (!AnalyzeSearch(¶ms)) { diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc index 90b913eb48..c22f272553 100644 --- a/contrib/libs/re2/re2/parse.cc +++ b/contrib/libs/re2/re2/parse.cc @@ -1177,7 +1177,17 @@ void FactorAlternationImpl::Round3(Regexp** sub, int nsub, for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it) ccb.AddRange(it->lo, it->hi); } else if (re->op() == kRegexpLiteral) { - ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags()); + if (re->parse_flags() & Regexp::FoldCase) { + // AddFoldedRange() can terminate prematurely if the character class + // already contains the rune. For example, if it contains 'a' and we + // want to add folded 'a', it sees 'a' and stops without adding 'A'. + // To avoid that, we use an empty character class and then merge it. + CharClassBuilder tmp; + tmp.AddRangeFlags(re->rune(), re->rune(), re->parse_flags()); + ccb.AddCharClass(&tmp); + } else { + ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags()); + } } else { LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " " << re->ToString(); @@ -2060,6 +2070,17 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) { return false; } + // Check for look-around assertions. This is NOT because we support them! ;) + // As per https://github.com/google/re2/issues/468, we really want to report + // kRegexpBadPerlOp (not kRegexpBadNamedCapture) for look-behind assertions. + // Additionally, it would be nice to report not "(?<", but "(?<=" or "(?<!". + if ((t.size() > 3 && (t[2] == '=' || t[2] == '!')) || + (t.size() > 4 && t[2] == '<' && (t[3] == '=' || t[3] == '!'))) { + status_->set_code(kRegexpBadPerlOp); + status_->set_error_arg(absl::string_view(t.data(), t[2] == '<' ? 4 : 3)); + return false; + } + // Check for named captures, first introduced in Python's regexp library. // As usual, there are three slightly different syntaxes: // diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc index d592fc3ac7..83edfedea1 100644 --- a/contrib/libs/re2/re2/re2.cc +++ b/contrib/libs/re2/re2/re2.cc @@ -9,7 +9,6 @@ #include "re2/re2.h" -#include <assert.h> #include <ctype.h> #include <errno.h> #ifdef _MSC_VER @@ -454,8 +453,8 @@ bool RE2::Replace(std::string* str, if (!re.Rewrite(&s, rewrite, vec, nvec)) return false; - assert(vec[0].data() >= str->data()); - assert(vec[0].data() + vec[0].size() <= str->data() + str->size()); + DCHECK_GE(vec[0].data(), str->data()); + DCHECK_LE(vec[0].data() + vec[0].size(), str->data() + str->size()); str->replace(vec[0].data() - str->data(), vec[0].size(), s); return true; } diff --git a/contrib/libs/re2/re2/testing/parse_test.cc b/contrib/libs/re2/re2/testing/parse_test.cc index 0ee5561e99..7684b62a49 100644 --- a/contrib/libs/re2/re2/testing/parse_test.cc +++ b/contrib/libs/re2/re2/testing/parse_test.cc @@ -356,6 +356,13 @@ Test prefix_tests[] = { "cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}" "cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}" "str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" }, + // As per https://github.com/google/re2/issues/467, + // these should factor identically, but they didn't + // because AddFoldedRange() terminated prematurely. + { "0A|0[aA]", "cat{lit{0}cc{0x41 0x61}}" }, + { "0a|0[aA]", "cat{lit{0}cc{0x41 0x61}}" }, + { "0[aA]|0A", "cat{lit{0}cc{0x41 0x61}}" }, + { "0[aA]|0a", "cat{lit{0}cc{0x41 0x61}}" }, }; // Test that prefix factoring works. @@ -525,4 +532,30 @@ TEST(NamedCaptures, ErrorArgs) { EXPECT_EQ(status.error_arg(), "(?<space bar>"); } +// Test that look-around error args are correct. +TEST(LookAround, ErrorArgs) { + RegexpStatus status; + Regexp* re; + + re = Regexp::Parse("(?=foo).*", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadPerlOp); + EXPECT_EQ(status.error_arg(), "(?="); + + re = Regexp::Parse("(?!foo).*", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadPerlOp); + EXPECT_EQ(status.error_arg(), "(?!"); + + re = Regexp::Parse("(?<=foo).*", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadPerlOp); + EXPECT_EQ(status.error_arg(), "(?<="); + + re = Regexp::Parse("(?<!foo).*", Regexp::LikePerl, &status); + EXPECT_TRUE(re == NULL); + EXPECT_EQ(status.code(), kRegexpBadPerlOp); + EXPECT_EQ(status.error_arg(), "(?<!"); +} + } // namespace re2 diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make index 2e58923e52..7f6fae30da 100644 --- a/contrib/libs/re2/ya.make +++ b/contrib/libs/re2/ya.make @@ -9,9 +9,9 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(2023-11-01) +VERSION(2024-02-01) -ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-11-01.tar.gz) +ORIGINAL_SOURCE(https://github.com/google/re2/archive/2024-02-01.tar.gz) PEERDIR( contrib/restricted/abseil-cpp/absl/base |