aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-contrib <robot-contrib@yandex-team.com>2024-02-03 13:52:37 +0300
committerAlexander Smirnov <alex@ydb.tech>2024-02-09 19:17:32 +0300
commit2ed9104e38ed23f276e507b97d25c12cca398899 (patch)
tree050b888b478624c307d64362f4a26cbde473122e
parent742514d7c9a9664bf01c7181f5ae05a791e35551 (diff)
downloadydb-2ed9104e38ed23f276e507b97d25c12cca398899.tar.gz
Update contrib/libs/re2 to 2024-02-01
-rw-r--r--contrib/libs/re2/AUTHORS13
-rw-r--r--contrib/libs/re2/re2/dfa.cc8
-rw-r--r--contrib/libs/re2/re2/parse.cc23
-rw-r--r--contrib/libs/re2/re2/re2.cc5
-rw-r--r--contrib/libs/re2/re2/testing/parse_test.cc33
-rw-r--r--contrib/libs/re2/ya.make4
6 files changed, 64 insertions, 22 deletions
diff --git a/contrib/libs/re2/AUTHORS b/contrib/libs/re2/AUTHORS
deleted file mode 100644
index 0754006fec..0000000000
--- a/contrib/libs/re2/AUTHORS
+++ /dev/null
@@ -1,13 +0,0 @@
-# This is the official list of RE2 authors for copyright purposes.
-# This file is distinct from the CONTRIBUTORS files.
-# See the latter for an explanation.
-
-# Names should be added to this file as
-# Name or Organization <email address>
-# The email address is not required for organizations.
-
-# Please keep the list sorted.
-
-Google Inc.
-Samsung Electronics
-Stefano Rivera <stefano.rivera@gmail.com>
diff --git a/contrib/libs/re2/re2/dfa.cc b/contrib/libs/re2/re2/dfa.cc
index 41fc61dd7b..e35fcb2819 100644
--- a/contrib/libs/re2/re2/dfa.cc
+++ b/contrib/libs/re2/re2/dfa.cc
@@ -1367,7 +1367,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
lastmatch = p;
if (ExtraDebug)
absl::FPrintF(stderr, "match @stx! [%s]\n", DumpState(s));
- if (params->matches != NULL && kind_ == Prog::kManyMatch) {
+ if (params->matches != NULL) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
if (id == MatchSep)
@@ -1484,7 +1484,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
lastmatch = p + 1;
if (ExtraDebug)
absl::FPrintF(stderr, "match @%d! [%s]\n", lastmatch - bp, DumpState(s));
- if (params->matches != NULL && kind_ == Prog::kManyMatch) {
+ if (params->matches != NULL) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
if (id == MatchSep)
@@ -1551,7 +1551,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
lastmatch = p;
if (ExtraDebug)
absl::FPrintF(stderr, "match @etx! [%s]\n", DumpState(s));
- if (params->matches != NULL && kind_ == Prog::kManyMatch) {
+ if (params->matches != NULL) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
if (id == MatchSep)
@@ -1767,6 +1767,8 @@ bool DFA::Search(absl::string_view text, absl::string_view context,
params.anchored = anchored;
params.want_earliest_match = want_earliest_match;
params.run_forward = run_forward;
+ // matches should be null except when using RE2::Set.
+ DCHECK(matches == NULL || kind_ == Prog::kManyMatch);
params.matches = matches;
if (!AnalyzeSearch(&params)) {
diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc
index 90b913eb48..c22f272553 100644
--- a/contrib/libs/re2/re2/parse.cc
+++ b/contrib/libs/re2/re2/parse.cc
@@ -1177,7 +1177,17 @@ void FactorAlternationImpl::Round3(Regexp** sub, int nsub,
for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
ccb.AddRange(it->lo, it->hi);
} else if (re->op() == kRegexpLiteral) {
- ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
+ if (re->parse_flags() & Regexp::FoldCase) {
+ // AddFoldedRange() can terminate prematurely if the character class
+ // already contains the rune. For example, if it contains 'a' and we
+ // want to add folded 'a', it sees 'a' and stops without adding 'A'.
+ // To avoid that, we use an empty character class and then merge it.
+ CharClassBuilder tmp;
+ tmp.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
+ ccb.AddCharClass(&tmp);
+ } else {
+ ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
+ }
} else {
LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " "
<< re->ToString();
@@ -2060,6 +2070,17 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) {
return false;
}
+ // Check for look-around assertions. This is NOT because we support them! ;)
+ // As per https://github.com/google/re2/issues/468, we really want to report
+ // kRegexpBadPerlOp (not kRegexpBadNamedCapture) for look-behind assertions.
+ // Additionally, it would be nice to report not "(?<", but "(?<=" or "(?<!".
+ if ((t.size() > 3 && (t[2] == '=' || t[2] == '!')) ||
+ (t.size() > 4 && t[2] == '<' && (t[3] == '=' || t[3] == '!'))) {
+ status_->set_code(kRegexpBadPerlOp);
+ status_->set_error_arg(absl::string_view(t.data(), t[2] == '<' ? 4 : 3));
+ return false;
+ }
+
// Check for named captures, first introduced in Python's regexp library.
// As usual, there are three slightly different syntaxes:
//
diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc
index d592fc3ac7..83edfedea1 100644
--- a/contrib/libs/re2/re2/re2.cc
+++ b/contrib/libs/re2/re2/re2.cc
@@ -9,7 +9,6 @@
#include "re2/re2.h"
-#include <assert.h>
#include <ctype.h>
#include <errno.h>
#ifdef _MSC_VER
@@ -454,8 +453,8 @@ bool RE2::Replace(std::string* str,
if (!re.Rewrite(&s, rewrite, vec, nvec))
return false;
- assert(vec[0].data() >= str->data());
- assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
+ DCHECK_GE(vec[0].data(), str->data());
+ DCHECK_LE(vec[0].data() + vec[0].size(), str->data() + str->size());
str->replace(vec[0].data() - str->data(), vec[0].size(), s);
return true;
}
diff --git a/contrib/libs/re2/re2/testing/parse_test.cc b/contrib/libs/re2/re2/testing/parse_test.cc
index 0ee5561e99..7684b62a49 100644
--- a/contrib/libs/re2/re2/testing/parse_test.cc
+++ b/contrib/libs/re2/re2/testing/parse_test.cc
@@ -356,6 +356,13 @@ Test prefix_tests[] = {
"cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}"
"cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}"
"str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" },
+ // As per https://github.com/google/re2/issues/467,
+ // these should factor identically, but they didn't
+ // because AddFoldedRange() terminated prematurely.
+ { "0A|0[aA]", "cat{lit{0}cc{0x41 0x61}}" },
+ { "0a|0[aA]", "cat{lit{0}cc{0x41 0x61}}" },
+ { "0[aA]|0A", "cat{lit{0}cc{0x41 0x61}}" },
+ { "0[aA]|0a", "cat{lit{0}cc{0x41 0x61}}" },
};
// Test that prefix factoring works.
@@ -525,4 +532,30 @@ TEST(NamedCaptures, ErrorArgs) {
EXPECT_EQ(status.error_arg(), "(?<space bar>");
}
+// Test that look-around error args are correct.
+TEST(LookAround, ErrorArgs) {
+ RegexpStatus status;
+ Regexp* re;
+
+ re = Regexp::Parse("(?=foo).*", Regexp::LikePerl, &status);
+ EXPECT_TRUE(re == NULL);
+ EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+ EXPECT_EQ(status.error_arg(), "(?=");
+
+ re = Regexp::Parse("(?!foo).*", Regexp::LikePerl, &status);
+ EXPECT_TRUE(re == NULL);
+ EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+ EXPECT_EQ(status.error_arg(), "(?!");
+
+ re = Regexp::Parse("(?<=foo).*", Regexp::LikePerl, &status);
+ EXPECT_TRUE(re == NULL);
+ EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+ EXPECT_EQ(status.error_arg(), "(?<=");
+
+ re = Regexp::Parse("(?<!foo).*", Regexp::LikePerl, &status);
+ EXPECT_TRUE(re == NULL);
+ EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+ EXPECT_EQ(status.error_arg(), "(?<!");
+}
+
} // namespace re2
diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make
index 2e58923e52..7f6fae30da 100644
--- a/contrib/libs/re2/ya.make
+++ b/contrib/libs/re2/ya.make
@@ -9,9 +9,9 @@ LICENSE(
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-VERSION(2023-11-01)
+VERSION(2024-02-01)
-ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-11-01.tar.gz)
+ORIGINAL_SOURCE(https://github.com/google/re2/archive/2024-02-01.tar.gz)
PEERDIR(
contrib/restricted/abseil-cpp/absl/base