aboutsummaryrefslogtreecommitdiffstats
path: root/contrib
diff options
context:
space:
mode:
authorrobot-contrib <robot-contrib@yandex-team.com>2022-12-16 12:29:02 +0300
committerrobot-contrib <robot-contrib@yandex-team.com>2022-12-16 12:29:02 +0300
commita8bebdbb76e6a6e629f5ca6c84425f7f51649892 (patch)
treea6a70f6bfe80ea55889e1731678a1e7550399e4c /contrib
parentcb6356ca0d52f1d0437996c4f5d662c5201f682b (diff)
downloadydb-a8bebdbb76e6a6e629f5ca6c84425f7f51649892.tar.gz
Update contrib/libs/re2 to 2022-12-01
Diffstat (limited to 'contrib')
-rw-r--r--contrib/libs/re2/re2/compile.cc6
-rw-r--r--contrib/libs/re2/re2/dfa.cc2
-rw-r--r--contrib/libs/re2/re2/parse.cc4
-rw-r--r--contrib/libs/re2/re2/prefilter.cc51
-rw-r--r--contrib/libs/re2/re2/prefilter.h24
-rw-r--r--contrib/libs/re2/re2/prog.cc2
-rw-r--r--contrib/libs/re2/re2/re2.cc124
-rw-r--r--contrib/libs/re2/re2/re2.h82
-rw-r--r--contrib/libs/re2/re2/regexp.cc38
-rw-r--r--contrib/libs/re2/re2/set.cc4
-rw-r--r--contrib/libs/re2/re2/simplify.cc4
-rw-r--r--contrib/libs/re2/re2/testing/filtered_re2_test.cc3
-rw-r--r--contrib/libs/re2/re2/unicode_groups.cc225
-rw-r--r--contrib/libs/re2/util/rune.cc8
14 files changed, 363 insertions, 214 deletions
diff --git a/contrib/libs/re2/re2/compile.cc b/contrib/libs/re2/re2/compile.cc
index 61d801a630..03bffab421 100644
--- a/contrib/libs/re2/re2/compile.cc
+++ b/contrib/libs/re2/re2/compile.cc
@@ -789,8 +789,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
// Should not be called.
Frag Compiler::Copy(Frag arg) {
// We're using WalkExponential; there should be no copying.
- LOG(DFATAL) << "Compiler::Copy called!";
failed_ = true;
+ LOG(DFATAL) << "Compiler::Copy called!";
return NoMatch();
}
@@ -916,8 +916,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
CharClass* cc = re->cc();
if (cc->empty()) {
// This can't happen.
- LOG(DFATAL) << "No ranges in char class";
failed_ = true;
+ LOG(DFATAL) << "No ranges in char class";
return NoMatch();
}
@@ -974,8 +974,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
case kRegexpNoWordBoundary:
return EmptyWidth(kEmptyNonWordBoundary);
}
- LOG(DFATAL) << "Missing case in Compiler: " << re->op();
failed_ = true;
+ LOG(DFATAL) << "Missing case in Compiler: " << re->op();
return NoMatch();
}
diff --git a/contrib/libs/re2/re2/dfa.cc b/contrib/libs/re2/re2/dfa.cc
index d47c7d50a7..55def2b1be 100644
--- a/contrib/libs/re2/re2/dfa.cc
+++ b/contrib/libs/re2/re2/dfa.cc
@@ -1675,8 +1675,8 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
if (!AnalyzeSearchHelper(params, info, flags)) {
ResetCache(params->cache_lock);
if (!AnalyzeSearchHelper(params, info, flags)) {
- LOG(DFATAL) << "Failed to analyze start state.";
params->failed = true;
+ LOG(DFATAL) << "Failed to analyze start state.";
return false;
}
}
diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc
index 85f16f060b..d7a9fe5084 100644
--- a/contrib/libs/re2/re2/parse.cc
+++ b/contrib/libs/re2/re2/parse.cc
@@ -1589,8 +1589,6 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
// return true;
}
- LOG(DFATAL) << "Not reached in ParseEscape.";
-
BadEscape:
// Unrecognized escape sequence.
status->set_code(kRegexpBadEscape);
@@ -2059,8 +2057,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
// Caller is supposed to check this.
if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') {
- LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
status_->set_code(kRegexpInternalError);
+ LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
return false;
}
diff --git a/contrib/libs/re2/re2/prefilter.cc b/contrib/libs/re2/re2/prefilter.cc
index a47b3120fb..37b0cf8a73 100644
--- a/contrib/libs/re2/re2/prefilter.cc
+++ b/contrib/libs/re2/re2/prefilter.cc
@@ -7,6 +7,7 @@
#include <stddef.h>
#include <stdint.h>
#include <string>
+#include <utility>
#include <vector>
#include "util/util.h"
@@ -21,9 +22,6 @@ namespace re2 {
static const bool ExtraDebug = false;
-typedef std::set<std::string>::iterator SSIter;
-typedef std::set<std::string>::const_iterator ConstSSIter;
-
// Initializes a Prefilter, allocating subs_ as necessary.
Prefilter::Prefilter(Op op) {
op_ = op;
@@ -140,7 +138,7 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
return AndOr(OR, a, b);
}
-static void SimplifyStringSet(std::set<std::string>* ss) {
+void Prefilter::SimplifyStringSet(SSet* ss) {
// Now make sure that the strings aren't redundant. For example, if
// we know "ab" is a required string, then it doesn't help at all to
// know that "abc" is also a required string, so delete "abc". This
@@ -149,13 +147,19 @@ static void SimplifyStringSet(std::set<std::string>* ss) {
// candidate for match, so further matching "abc" is redundant.
// Note that we must ignore "" because find() would find it at the
// start of everything and thus we would end up erasing everything.
- for (SSIter i = ss->begin(); i != ss->end(); ++i) {
- if (i->empty())
- continue;
+ //
+ // The SSet sorts strings by length, then lexicographically. Note that
+ // smaller strings appear first and all strings must be unique. These
+ // observations let us skip string comparisons when possible.
+ SSIter i = ss->begin();
+ if (i != ss->end() && i->empty()) {
+ ++i;
+ }
+ for (; i != ss->end(); ++i) {
SSIter j = i;
++j;
while (j != ss->end()) {
- if (j->find(*i) != std::string::npos) {
+ if (j->size() > i->size() && j->find(*i) != std::string::npos) {
j = ss->erase(j);
continue;
}
@@ -164,7 +168,7 @@ static void SimplifyStringSet(std::set<std::string>* ss) {
}
}
-Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) {
+Prefilter* Prefilter::OrStrings(SSet* ss) {
Prefilter* or_prefilter = new Prefilter(NONE);
SimplifyStringSet(ss);
for (SSIter i = ss->begin(); i != ss->end(); ++i)
@@ -226,14 +230,14 @@ class Prefilter::Info {
// Caller takes ownership of the Prefilter.
Prefilter* TakeMatch();
- std::set<std::string>& exact() { return exact_; }
+ SSet& exact() { return exact_; }
bool is_exact() const { return is_exact_; }
class Walker;
private:
- std::set<std::string> exact_;
+ SSet exact_;
// When is_exact_ is true, the strings that match
// are placed in exact_. When it is no longer an exact
@@ -286,18 +290,7 @@ std::string Prefilter::Info::ToString() {
return "";
}
-// Add the strings from src to dst.
-static void CopyIn(const std::set<std::string>& src,
- std::set<std::string>* dst) {
- for (ConstSSIter i = src.begin(); i != src.end(); ++i)
- dst->insert(*i);
-}
-
-// Add the cross-product of a and b to dst.
-// (For each string i in a and j in b, add i+j.)
-static void CrossProduct(const std::set<std::string>& a,
- const std::set<std::string>& b,
- std::set<std::string>* dst) {
+void Prefilter::CrossProduct(const SSet& a, const SSet& b, SSet* dst) {
for (ConstSSIter i = a.begin(); i != a.end(); ++i)
for (ConstSSIter j = b.begin(); j != b.end(); ++j)
dst->insert(*i + *j);
@@ -343,8 +336,14 @@ Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
Info *ab = new Info();
if (a->is_exact_ && b->is_exact_) {
- CopyIn(a->exact_, &ab->exact_);
- CopyIn(b->exact_, &ab->exact_);
+ // Avoid string copies by moving the larger exact_ set into
+ // ab directly, then merge in the smaller set.
+ if (a->exact_.size() < b->exact_.size()) {
+ using std::swap;
+ swap(a, b);
+ }
+ ab->exact_ = std::move(a->exact_);
+ ab->exact_.insert(b->exact_.begin(), b->exact_.end());
ab->is_exact_ = true;
} else {
// Either a or b has is_exact_ = false. If the other
@@ -532,8 +531,8 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
switch (re->op()) {
default:
case kRegexpRepeat:
- LOG(DFATAL) << "Bad regexp op " << re->op();
info = EmptyString();
+ LOG(DFATAL) << "Bad regexp op " << re->op();
break;
case kRegexpNoMatch:
diff --git a/contrib/libs/re2/re2/prefilter.h b/contrib/libs/re2/re2/prefilter.h
index 4fedeb4a7c..e149e59a86 100644
--- a/contrib/libs/re2/re2/prefilter.h
+++ b/contrib/libs/re2/re2/prefilter.h
@@ -60,8 +60,21 @@ class Prefilter {
std::string DebugString() const;
private:
+ // A comparator used to store exact strings. We compare by length,
+ // then lexicographically. This ordering makes it easier to reduce the
+ // set of strings in SimplifyStringSet.
+ struct LengthThenLex {
+ bool operator()(const std::string& a, const std::string& b) const {
+ return (a.size() < b.size()) || (a.size() == b.size() && a < b);
+ }
+ };
+
class Info;
+ using SSet = std::set<std::string, LengthThenLex>;
+ using SSIter = SSet::iterator;
+ using ConstSSIter = SSet::const_iterator;
+
// Combines two prefilters together to create an AND. The passed
// Prefilters will be part of the returned Prefilter or deleted.
static Prefilter* And(Prefilter* a, Prefilter* b);
@@ -77,12 +90,21 @@ class Prefilter {
static Prefilter* FromString(const std::string& str);
- static Prefilter* OrStrings(std::set<std::string>* ss);
+ static Prefilter* OrStrings(SSet* ss);
static Info* BuildInfo(Regexp* re);
Prefilter* Simplify();
+ // Removes redundant strings from the set. A string is redundant if
+ // any of the other strings appear as a substring. The empty string
+ // is a special case, which is ignored.
+ static void SimplifyStringSet(SSet* ss);
+
+ // Adds the cross-product of a and b to dst.
+ // (For each string i in a and j in b, add i+j.)
+ static void CrossProduct(const SSet& a, const SSet& b, SSet* dst);
+
// Kind of Prefilter.
Op op_;
diff --git a/contrib/libs/re2/re2/prog.cc b/contrib/libs/re2/re2/prog.cc
index a700d35de3..3b9596acab 100644
--- a/contrib/libs/re2/re2/prog.cc
+++ b/contrib/libs/re2/re2/prog.cc
@@ -511,7 +511,7 @@ void Prog::ComputeByteMap() {
builder.Build(bytemap_, &bytemap_range_);
- if (0) { // For debugging, use trivial bytemap.
+ if ((0)) { // For debugging, use trivial bytemap.
LOG(ERROR) << "Using trivial bytemap.";
for (int i = 0; i < 256; i++)
bytemap_[i] = static_cast<uint8_t>(i);
diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc
index ad126d00bd..1c2645bf07 100644
--- a/contrib/libs/re2/re2/re2.cc
+++ b/contrib/libs/re2/re2/re2.cc
@@ -36,6 +36,13 @@
namespace re2 {
+// Controls the maximum count permitted by GlobalReplace(); -1 is unlimited.
+static int maximum_global_replace_count = -1;
+
+void RE2::FUZZING_ONLY_set_maximum_global_replace_count(int i) {
+ maximum_global_replace_count = i;
+}
+
// Maximum number of args we can set
static const int kMaxArgs = 16;
static const int kVecSize = 1+kMaxArgs;
@@ -43,11 +50,11 @@ static const int kVecSize = 1+kMaxArgs;
const int RE2::Options::kDefaultMaxMem; // initialized in re2.h
RE2::Options::Options(RE2::CannedOptions opt)
- : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
+ : max_mem_(kDefaultMaxMem),
+ encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
posix_syntax_(opt == RE2::POSIX),
longest_match_(opt == RE2::POSIX),
log_errors_(opt != RE2::Quiet),
- max_mem_(kDefaultMaxMem),
literal_(false),
never_nl_(false),
dot_nl_(false),
@@ -58,11 +65,30 @@ RE2::Options::Options(RE2::CannedOptions opt)
one_line_(false) {
}
-// static empty objects for use as const references.
-// To avoid global constructors, allocated in RE2::Init().
-static const std::string* empty_string;
-static const std::map<std::string, int>* empty_named_groups;
-static const std::map<int, std::string>* empty_group_names;
+// Empty objects for use as const references.
+// Statically allocating the storage and then
+// lazily constructing the objects (in a once
+// in RE2::Init()) avoids global constructors
+// and the false positives (thanks, Valgrind)
+// about memory leaks at program termination.
+struct EmptyStorage {
+ std::string empty_string;
+ std::map<std::string, int> empty_named_groups;
+ std::map<int, std::string> empty_group_names;
+};
+alignas(EmptyStorage) static char empty_storage[sizeof(EmptyStorage)];
+
+static inline std::string* empty_string() {
+ return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_string;
+}
+
+static inline std::map<std::string, int>* empty_named_groups() {
+ return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_named_groups;
+}
+
+static inline std::map<int, std::string>* empty_group_names() {
+ return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_group_names;
+}
// Converts from Regexp error code to RE2 error code.
// Maybe some day they will diverge. In any event, this
@@ -173,23 +199,23 @@ int RE2::Options::ParseFlags() const {
void RE2::Init(const StringPiece& pattern, const Options& options) {
static std::once_flag empty_once;
std::call_once(empty_once, []() {
- empty_string = new std::string;
- empty_named_groups = new std::map<std::string, int>;
- empty_group_names = new std::map<int, std::string>;
+ (void) new (empty_storage) EmptyStorage;
});
- pattern_.assign(pattern.data(), pattern.size());
+ pattern_ = new std::string(pattern);
options_.Copy(options);
entire_regexp_ = NULL;
- error_ = empty_string;
- error_code_ = NoError;
- error_arg_.clear();
- prefix_.clear();
- prefix_foldcase_ = false;
suffix_regexp_ = NULL;
- prog_ = NULL;
+ error_ = empty_string();
+ error_arg_ = empty_string();
+
num_captures_ = -1;
+ error_code_ = NoError;
+ longest_match_ = options_.longest_match();
is_one_pass_ = false;
+ prefix_foldcase_ = false;
+ prefix_.clear();
+ prog_ = NULL;
rprog_ = NULL;
named_groups_ = NULL;
@@ -197,25 +223,29 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
RegexpStatus status;
entire_regexp_ = Regexp::Parse(
- pattern_,
+ *pattern_,
static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
&status);
if (entire_regexp_ == NULL) {
if (options_.log_errors()) {
- LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
+ LOG(ERROR) << "Error parsing '" << trunc(*pattern_) << "': "
<< status.Text();
}
error_ = new std::string(status.Text());
error_code_ = RegexpErrorToRE2(status.code());
- error_arg_ = std::string(status.error_arg());
+ error_arg_ = new std::string(status.error_arg());
return;
}
+ bool foldcase;
re2::Regexp* suffix;
- if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix))
+ if (entire_regexp_->RequiredPrefix(&prefix_, &foldcase, &suffix)) {
+ prefix_foldcase_ = foldcase;
suffix_regexp_ = suffix;
- else
+ }
+ else {
suffix_regexp_ = entire_regexp_->Incref();
+ }
// Two thirds of the memory goes to the forward Prog,
// one third to the reverse prog, because the forward
@@ -223,7 +253,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
if (prog_ == NULL) {
if (options_.log_errors())
- LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
+ LOG(ERROR) << "Error compiling '" << trunc(*pattern_) << "'";
error_ = new std::string("pattern too large - compile failed");
error_code_ = RE2::ErrorPatternTooLarge;
return;
@@ -249,7 +279,8 @@ re2::Prog* RE2::ReverseProg() const {
re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
if (re->rprog_ == NULL) {
if (re->options_.log_errors())
- LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
+ LOG(ERROR) << "Error reverse compiling '" << trunc(*re->pattern_)
+ << "'";
// We no longer touch error_ and error_code_ because failing to compile
// the reverse Prog is not a showstopper: falling back to NFA execution
// is fine. More importantly, an RE2 object is supposed to be logically
@@ -261,18 +292,21 @@ re2::Prog* RE2::ReverseProg() const {
}
RE2::~RE2() {
+ if (group_names_ != empty_group_names())
+ delete group_names_;
+ if (named_groups_ != empty_named_groups())
+ delete named_groups_;
+ delete rprog_;
+ delete prog_;
+ if (error_arg_ != empty_string())
+ delete error_arg_;
+ if (error_ != empty_string())
+ delete error_;
if (suffix_regexp_)
suffix_regexp_->Decref();
if (entire_regexp_)
entire_regexp_->Decref();
- delete prog_;
- delete rprog_;
- if (error_ != empty_string)
- delete error_;
- if (named_groups_ != NULL && named_groups_ != empty_named_groups)
- delete named_groups_;
- if (group_names_ != NULL && group_names_ != empty_group_names)
- delete group_names_;
+ delete pattern_;
}
int RE2::ProgramSize() const {
@@ -352,7 +386,7 @@ const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
if (re->suffix_regexp_ != NULL)
re->named_groups_ = re->suffix_regexp_->NamedCaptures();
if (re->named_groups_ == NULL)
- re->named_groups_ = empty_named_groups;
+ re->named_groups_ = empty_named_groups();
}, this);
return *named_groups_;
}
@@ -363,7 +397,7 @@ const std::map<int, std::string>& RE2::CapturingGroupNames() const {
if (re->suffix_regexp_ != NULL)
re->group_names_ = re->suffix_regexp_->CaptureNames();
if (re->group_names_ == NULL)
- re->group_names_ = empty_group_names;
+ re->group_names_ = empty_group_names();
}, this);
return *group_names_;
}
@@ -439,13 +473,10 @@ int RE2::GlobalReplace(std::string* str,
const char* lastend = NULL;
std::string out;
int count = 0;
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
- // Iterate just once when fuzzing. Otherwise, we easily get bogged down
- // and coverage is unlikely to improve despite significant expense.
- while (p == str->data()) {
-#else
while (p <= ep) {
-#endif
+ if (maximum_global_replace_count != -1 &&
+ count >= maximum_global_replace_count)
+ break;
if (!re.Match(*str, static_cast<size_t>(p - str->data()),
str->size(), UNANCHORED, vec, nvec))
break;
@@ -686,9 +717,8 @@ bool RE2::Match(const StringPiece& text,
}
Prog::Anchor anchor = Prog::kUnanchored;
- Prog::MatchKind kind = Prog::kFirstMatch;
- if (options_.longest_match())
- kind = Prog::kLongestMatch;
+ Prog::MatchKind kind =
+ longest_match_ ? Prog::kLongestMatch : Prog::kFirstMatch;
bool can_one_pass = is_one_pass_ && ncap <= Prog::kMaxOnePassCapture;
bool can_bit_state = prog_->CanBitState();
@@ -720,7 +750,7 @@ bool RE2::Match(const StringPiece& text,
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
- << "pattern length " << pattern_.size() << ", "
+ << "pattern length " << pattern_->size() << ", "
<< "program size " << prog->size() << ", "
<< "list count " << prog->list_count() << ", "
<< "bytemap range " << prog->bytemap_range();
@@ -740,7 +770,7 @@ bool RE2::Match(const StringPiece& text,
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
- << "pattern length " << pattern_.size() << ", "
+ << "pattern length " << pattern_->size() << ", "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
@@ -766,7 +796,7 @@ bool RE2::Match(const StringPiece& text,
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
- << "pattern length " << pattern_.size() << ", "
+ << "pattern length " << pattern_->size() << ", "
<< "program size " << prog->size() << ", "
<< "list count " << prog->list_count() << ", "
<< "bytemap range " << prog->bytemap_range();
@@ -809,7 +839,7 @@ bool RE2::Match(const StringPiece& text,
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
- << "pattern length " << pattern_.size() << ", "
+ << "pattern length " << pattern_->size() << ", "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
diff --git a/contrib/libs/re2/re2/re2.h b/contrib/libs/re2/re2/re2.h
index c9694d35ac..09eb287136 100644
--- a/contrib/libs/re2/re2/re2.h
+++ b/contrib/libs/re2/re2/re2.h
@@ -66,17 +66,17 @@
// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
//
// -----------------------------------------------------------------------
-// MATCHING WITH SUBSTRING EXTRACTION:
+// SUBMATCH EXTRACTION:
//
-// You can supply extra pointer arguments to extract matched substrings.
+// You can supply extra pointer arguments to extract submatches.
// On match failure, none of the pointees will have been modified.
-// On match success, the substrings will be converted (as necessary) and
+// On match success, the submatches will be converted (as necessary) and
// their values will be assigned to their pointees until all conversions
// have succeeded or one conversion has failed.
// On conversion failure, the pointees will be in an indeterminate state
// because the caller has no way of knowing which conversion failed.
// However, conversion cannot fail for types like string and StringPiece
-// that do not inspect the substring contents. Hence, in the common case
+// that do not inspect the submatch contents. Hence, in the common case
// where all of the pointees are of such types, failure is always due to
// match failure and thus none of the pointees will have been modified.
//
@@ -100,10 +100,10 @@
// Example: integer overflow causes failure
// CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
//
-// NOTE(rsc): Asking for substrings slows successful matches quite a bit.
+// NOTE(rsc): Asking for submatches slows successful matches quite a bit.
// This may get a little faster in the future, but right now is slower
// than PCRE. On the other hand, failed matches run *very* fast (faster
-// than PCRE), as do matches without substring extraction.
+// than PCRE), as do matches without submatch extraction.
//
// -----------------------------------------------------------------------
// PARTIAL MATCHES
@@ -275,23 +275,35 @@ class RE2 {
// Need to have the const char* and const std::string& forms for implicit
// conversions when passing string literals to FullMatch and PartialMatch.
// Otherwise the StringPiece form would be sufficient.
-#ifndef SWIG
RE2(const char* pattern);
RE2(const std::string& pattern);
-#endif
RE2(const StringPiece& pattern);
RE2(const StringPiece& pattern, const Options& options);
// ambiguity resolution.
RE2(const TString& pattern) : RE2(StringPiece(pattern)) {}
~RE2();
+ // Not copyable.
+ // RE2 objects are expensive. You should probably use std::shared_ptr<RE2>
+ // instead. If you really must copy, RE2(first.pattern(), first.options())
+ // effectively does so: it produces a second object that mimics the first.
+ RE2(const RE2&) = delete;
+ RE2& operator=(const RE2&) = delete;
+ // Not movable.
+ // RE2 objects are thread-safe and logically immutable. You should probably
+ // use std::unique_ptr<RE2> instead. Otherwise, consider std::deque<RE2> if
+ // direct emplacement into a container is desired. If you really must move,
+ // be prepared to submit a design document along with your feature request.
+ RE2(RE2&&) = delete;
+ RE2& operator=(RE2&&) = delete;
+
// Returns whether RE2 was created properly.
bool ok() const { return error_code() == NoError; }
// The string specification for this RE2. E.g.
// RE2 re("ab*c?d+");
// re.pattern(); // "ab*c?d+"
- const std::string& pattern() const { return pattern_; }
+ const std::string& pattern() const { return *pattern_; }
// If RE2 could not be created properly, returns an error string.
// Else returns the empty string.
@@ -303,7 +315,7 @@ class RE2 {
// If RE2 could not be created properly, returns the offending
// portion of the regexp.
- const std::string& error_arg() const { return error_arg_; }
+ const std::string& error_arg() const { return *error_arg_; }
// Returns the program size, a very approximate measure of a regexp's "cost".
// Larger numbers are more expensive than smaller numbers.
@@ -336,7 +348,6 @@ class RE2 {
static bool FindAndConsumeN(StringPiece* input, const RE2& re,
const Arg* const args[], int n);
-#ifndef SWIG
private:
template <typename F, typename SP>
static inline bool Apply(F f, SP sp, const RE2& re) {
@@ -442,7 +453,6 @@ class RE2 {
static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
}
-#endif
// Replace the first match of "re" in "str" with "rewrite".
// Within "rewrite", backslash-escaped digits (\1 to \9) can be
@@ -698,11 +708,11 @@ class RE2 {
};
Options() :
+ max_mem_(kDefaultMaxMem),
encoding_(EncodingUTF8),
posix_syntax_(false),
longest_match_(false),
log_errors_(true),
- max_mem_(kDefaultMaxMem),
literal_(false),
never_nl_(false),
dot_nl_(false),
@@ -715,6 +725,9 @@ class RE2 {
/*implicit*/ Options(CannedOptions);
+ int64_t max_mem() const { return max_mem_; }
+ void set_max_mem(int64_t m) { max_mem_ = m; }
+
Encoding encoding() const { return encoding_; }
void set_encoding(Encoding encoding) { encoding_ = encoding; }
@@ -727,9 +740,6 @@ class RE2 {
bool log_errors() const { return log_errors_; }
void set_log_errors(bool b) { log_errors_ = b; }
- int64_t max_mem() const { return max_mem_; }
- void set_max_mem(int64_t m) { max_mem_ = m; }
-
bool literal() const { return literal_; }
void set_literal(bool b) { literal_ = b; }
@@ -761,11 +771,11 @@ class RE2 {
int ParseFlags() const;
private:
+ int64_t max_mem_;
Encoding encoding_;
bool posix_syntax_;
bool longest_match_;
bool log_errors_;
- int64_t max_mem_;
bool literal_;
bool never_nl_;
bool dot_nl_;
@@ -787,6 +797,10 @@ class RE2 {
template <typename T>
static Arg Octal(T* ptr);
+ // Controls the maximum count permitted by GlobalReplace(); -1 is unlimited.
+ // FOR FUZZING ONLY.
+ static void FUZZING_ONLY_set_maximum_global_replace_count(int i);
+
private:
void Init(const StringPiece& pattern, const Options& options);
@@ -798,18 +812,23 @@ class RE2 {
re2::Prog* ReverseProg() const;
- std::string pattern_; // string regular expression
- Options options_; // option flags
- re2::Regexp* entire_regexp_; // parsed regular expression
- const std::string* error_; // error indicator (or points to empty string)
- ErrorCode error_code_; // error code
- std::string error_arg_; // fragment of regexp showing error
- std::string prefix_; // required prefix (before suffix_regexp_)
- bool prefix_foldcase_; // prefix_ is ASCII case-insensitive
- re2::Regexp* suffix_regexp_; // parsed regular expression, prefix_ removed
- re2::Prog* prog_; // compiled program for regexp
- int num_captures_; // number of capturing groups
- bool is_one_pass_; // can use prog_->SearchOnePass?
+ // First cache line is relatively cold fields.
+ const std::string* pattern_; // string regular expression
+ Options options_; // option flags
+ re2::Regexp* entire_regexp_; // parsed regular expression
+ re2::Regexp* suffix_regexp_; // parsed regular expression, prefix_ removed
+ const std::string* error_; // error indicator (or points to empty string)
+ const std::string* error_arg_; // fragment of regexp showing error (or ditto)
+
+ // Second cache line is relatively hot fields.
+ // These are ordered oddly to pack everything.
+ int num_captures_; // number of capturing groups
+ ErrorCode error_code_ : 29; // error code (29 bits is more than enough)
+ bool longest_match_ : 1; // cached copy of options_.longest_match()
+ bool is_one_pass_ : 1; // can use prog_->SearchOnePass?
+ bool prefix_foldcase_ : 1; // prefix_ is ASCII case-insensitive
+ std::string prefix_; // required prefix (before suffix_regexp_)
+ re2::Prog* prog_; // compiled program for regexp
// Reverse Prog for DFA execution only
mutable re2::Prog* rprog_;
@@ -821,9 +840,6 @@ class RE2 {
mutable std::once_flag rprog_once_;
mutable std::once_flag named_groups_once_;
mutable std::once_flag group_names_once_;
-
- RE2(const RE2&) = delete;
- RE2& operator=(const RE2&) = delete;
};
/***** Implementation details *****/
@@ -954,7 +970,6 @@ inline RE2::Arg RE2::Octal(T* ptr) {
});
}
-#ifndef SWIG
// Silence warnings about missing initializers for members of LazyRE2.
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
@@ -1005,7 +1020,6 @@ class LazyRE2 {
void operator=(const LazyRE2&); // disallowed
};
-#endif
namespace hooks {
diff --git a/contrib/libs/re2/re2/regexp.cc b/contrib/libs/re2/re2/regexp.cc
index ca1318b43d..74ecb31969 100644
--- a/contrib/libs/re2/re2/regexp.cc
+++ b/contrib/libs/re2/re2/regexp.cc
@@ -74,16 +74,27 @@ bool Regexp::QuickDestroy() {
return false;
}
-// Lazily allocated.
-static Mutex* ref_mutex;
-static std::map<Regexp*, int>* ref_map;
+// Similar to EmptyStorage in re2.cc.
+struct RefStorage {
+ Mutex ref_mutex;
+ std::map<Regexp*, int> ref_map;
+};
+alignas(RefStorage) static char ref_storage[sizeof(RefStorage)];
+
+static inline Mutex* ref_mutex() {
+ return &reinterpret_cast<RefStorage*>(ref_storage)->ref_mutex;
+}
+
+static inline std::map<Regexp*, int>* ref_map() {
+ return &reinterpret_cast<RefStorage*>(ref_storage)->ref_map;
+}
int Regexp::Ref() {
if (ref_ < kMaxRef)
return ref_;
- MutexLock l(ref_mutex);
- return (*ref_map)[this];
+ MutexLock l(ref_mutex());
+ return (*ref_map())[this];
}
// Increments reference count, returns object as convenience.
@@ -91,18 +102,17 @@ Regexp* Regexp::Incref() {
if (ref_ >= kMaxRef-1) {
static std::once_flag ref_once;
std::call_once(ref_once, []() {
- ref_mutex = new Mutex;
- ref_map = new std::map<Regexp*, int>;
+ (void) new (ref_storage) RefStorage;
});
// Store ref count in overflow map.
- MutexLock l(ref_mutex);
+ MutexLock l(ref_mutex());
if (ref_ == kMaxRef) {
// already overflowed
- (*ref_map)[this]++;
+ (*ref_map())[this]++;
} else {
// overflowing now
- (*ref_map)[this] = kMaxRef;
+ (*ref_map())[this] = kMaxRef;
ref_ = kMaxRef;
}
return this;
@@ -116,13 +126,13 @@ Regexp* Regexp::Incref() {
void Regexp::Decref() {
if (ref_ == kMaxRef) {
// Ref count is stored in overflow map.
- MutexLock l(ref_mutex);
- int r = (*ref_map)[this] - 1;
+ MutexLock l(ref_mutex());
+ int r = (*ref_map())[this] - 1;
if (r < kMaxRef) {
ref_ = static_cast<uint16_t>(r);
- ref_map->erase(this);
+ ref_map()->erase(this);
} else {
- (*ref_map)[this] = r;
+ (*ref_map())[this] = r;
}
return;
}
diff --git a/contrib/libs/re2/re2/set.cc b/contrib/libs/re2/re2/set.cc
index 18705663a5..fe0ea055fe 100644
--- a/contrib/libs/re2/re2/set.cc
+++ b/contrib/libs/re2/re2/set.cc
@@ -128,9 +128,9 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
ErrorInfo* error_info) const {
if (!compiled_) {
- LOG(DFATAL) << "RE2::Set::Match() called before compiling";
if (error_info != NULL)
error_info->kind = kNotCompiled;
+ LOG(DFATAL) << "RE2::Set::Match() called before compiling";
return false;
}
#ifdef RE2_HAVE_THREAD_LOCAL
@@ -161,9 +161,9 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
}
if (v != NULL) {
if (matches->empty()) {
- LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
if (error_info != NULL)
error_info->kind = kInconsistent;
+ LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
return false;
}
v->assign(matches->begin(), matches->end());
diff --git a/contrib/libs/re2/re2/simplify.cc b/contrib/libs/re2/re2/simplify.cc
index 663d5fcd45..0df9051587 100644
--- a/contrib/libs/re2/re2/simplify.cc
+++ b/contrib/libs/re2/re2/simplify.cc
@@ -371,8 +371,8 @@ void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
break;
default:
- LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
nre->Decref();
+ LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
return;
}
@@ -432,8 +432,8 @@ void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
}
default:
- LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
nre->Decref();
+ LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
return;
}
diff --git a/contrib/libs/re2/re2/testing/filtered_re2_test.cc b/contrib/libs/re2/re2/testing/filtered_re2_test.cc
index 073a70a745..79fd874078 100644
--- a/contrib/libs/re2/re2/testing/filtered_re2_test.cc
+++ b/contrib/libs/re2/re2/testing/filtered_re2_test.cc
@@ -106,12 +106,13 @@ AtomTest atom_tests[] = {
// substring in an OR are removed; that is, only the shortest
// substring is kept.
"SubstrAtomRemovesSuperStrInOr", {
- "(abc123|abc|ghi789|abc1234).*[x-z]+",
+ "(abc123|abc|defxyz|ghi789|abc1234|xyz).*[x-z]+",
"abcd..yyy..yyyzzz",
"mnmnpp[a-z]+PPP"
}, {
"abc",
"ghi789",
+ "xyz",
"abcd",
"yyy",
"yyyzzz",
diff --git a/contrib/libs/re2/re2/unicode_groups.cc b/contrib/libs/re2/re2/unicode_groups.cc
index 2a8d7dae1f..3b58be4cb8 100644
--- a/contrib/libs/re2/re2/unicode_groups.cc
+++ b/contrib/libs/re2/re2/unicode_groups.cc
@@ -29,7 +29,7 @@ static const URange16 C_range16[] = {
static const URange32 C_range32[] = {
{ 69821, 69821 },
{ 69837, 69837 },
- { 78896, 78904 },
+ { 78896, 78911 },
{ 113824, 113827 },
{ 119155, 119162 },
{ 917505, 917505 },
@@ -60,7 +60,7 @@ static const URange16 Cf_range16[] = {
static const URange32 Cf_range32[] = {
{ 69821, 69821 },
{ 69837, 69837 },
- { 78896, 78904 },
+ { 78896, 78911 },
{ 113824, 113827 },
{ 119155, 119162 },
{ 917505, 917505 },
@@ -548,6 +548,7 @@ static const URange32 L_range32[] = {
{ 70108, 70108 },
{ 70144, 70161 },
{ 70163, 70187 },
+ { 70207, 70208 },
{ 70272, 70278 },
{ 70280, 70280 },
{ 70282, 70285 },
@@ -610,11 +611,15 @@ static const URange32 L_range32[] = {
{ 73066, 73097 },
{ 73112, 73112 },
{ 73440, 73458 },
+ { 73474, 73474 },
+ { 73476, 73488 },
+ { 73490, 73523 },
{ 73648, 73648 },
{ 73728, 74649 },
{ 74880, 75075 },
{ 77712, 77808 },
- { 77824, 78894 },
+ { 77824, 78895 },
+ { 78913, 78918 },
{ 82944, 83526 },
{ 92160, 92728 },
{ 92736, 92766 },
@@ -637,7 +642,9 @@ static const URange32 L_range32[] = {
{ 110581, 110587 },
{ 110589, 110590 },
{ 110592, 110882 },
+ { 110898, 110898 },
{ 110928, 110930 },
+ { 110933, 110933 },
{ 110948, 110951 },
{ 110960, 111355 },
{ 113664, 113770 },
@@ -675,11 +682,14 @@ static const URange32 L_range32[] = {
{ 120746, 120770 },
{ 120772, 120779 },
{ 122624, 122654 },
+ { 122661, 122666 },
+ { 122928, 122989 },
{ 123136, 123180 },
{ 123191, 123197 },
{ 123214, 123214 },
{ 123536, 123565 },
{ 123584, 123627 },
+ { 124112, 124139 },
{ 124896, 124902 },
{ 124904, 124907 },
{ 124909, 124910 },
@@ -721,12 +731,13 @@ static const URange32 L_range32[] = {
{ 126629, 126633 },
{ 126635, 126651 },
{ 131072, 173791 },
- { 173824, 177976 },
+ { 173824, 177977 },
{ 177984, 178205 },
{ 178208, 183969 },
{ 183984, 191456 },
{ 194560, 195101 },
{ 196608, 201546 },
+ { 201552, 205743 },
};
static const URange16 Ll_range16[] = {
{ 97, 122 },
@@ -1387,6 +1398,7 @@ static const URange32 Ll_range32[] = {
{ 120779, 120779 },
{ 122624, 122633 },
{ 122635, 122654 },
+ { 122661, 122666 },
{ 125218, 125251 },
};
static const URange16 Lm_range16[] = {
@@ -1459,7 +1471,9 @@ static const URange32 Lm_range32[] = {
{ 110576, 110579 },
{ 110581, 110587 },
{ 110589, 110590 },
+ { 122928, 122989 },
{ 123191, 123197 },
+ { 124139, 124139 },
{ 125259, 125259 },
};
static const URange16 Lo_range16[] = {
@@ -1829,6 +1843,7 @@ static const URange32 Lo_range32[] = {
{ 70108, 70108 },
{ 70144, 70161 },
{ 70163, 70187 },
+ { 70207, 70208 },
{ 70272, 70278 },
{ 70280, 70280 },
{ 70282, 70285 },
@@ -1890,11 +1905,15 @@ static const URange32 Lo_range32[] = {
{ 73066, 73097 },
{ 73112, 73112 },
{ 73440, 73458 },
+ { 73474, 73474 },
+ { 73476, 73488 },
+ { 73490, 73523 },
{ 73648, 73648 },
{ 73728, 74649 },
{ 74880, 75075 },
{ 77712, 77808 },
- { 77824, 78894 },
+ { 77824, 78895 },
+ { 78913, 78918 },
{ 82944, 83526 },
{ 92160, 92728 },
{ 92736, 92766 },
@@ -1909,7 +1928,9 @@ static const URange32 Lo_range32[] = {
{ 100352, 101589 },
{ 101632, 101640 },
{ 110592, 110882 },
+ { 110898, 110898 },
{ 110928, 110930 },
+ { 110933, 110933 },
{ 110948, 110951 },
{ 110960, 111355 },
{ 113664, 113770 },
@@ -1921,6 +1942,7 @@ static const URange32 Lo_range32[] = {
{ 123214, 123214 },
{ 123536, 123565 },
{ 123584, 123627 },
+ { 124112, 124138 },
{ 124896, 124902 },
{ 124904, 124907 },
{ 124909, 124910 },
@@ -1960,12 +1982,13 @@ static const URange32 Lo_range32[] = {
{ 126629, 126633 },
{ 126635, 126651 },
{ 131072, 173791 },
- { 173824, 177976 },
+ { 173824, 177977 },
{ 177984, 178205 },
{ 178208, 183969 },
{ 183984, 191456 },
{ 194560, 195101 },
{ 196608, 201546 },
+ { 201552, 205743 },
};
static const URange16 Lt_range16[] = {
{ 453, 453 },
@@ -2710,6 +2733,7 @@ static const URange16 M_range16[] = {
{ 3274, 3277 },
{ 3285, 3286 },
{ 3298, 3299 },
+ { 3315, 3315 },
{ 3328, 3331 },
{ 3387, 3388 },
{ 3390, 3396 },
@@ -2728,7 +2752,7 @@ static const URange16 M_range16[] = {
{ 3655, 3662 },
{ 3761, 3761 },
{ 3764, 3772 },
- { 3784, 3789 },
+ { 3784, 3790 },
{ 3864, 3865 },
{ 3893, 3893 },
{ 3895, 3895 },
@@ -2832,6 +2856,7 @@ static const URange32 M_range32[] = {
{ 68325, 68326 },
{ 68900, 68903 },
{ 69291, 69292 },
+ { 69373, 69375 },
{ 69446, 69456 },
{ 69506, 69509 },
{ 69632, 69634 },
@@ -2851,6 +2876,7 @@ static const URange32 M_range32[] = {
{ 70094, 70095 },
{ 70188, 70199 },
{ 70206, 70206 },
+ { 70209, 70209 },
{ 70367, 70378 },
{ 70400, 70403 },
{ 70459, 70460 },
@@ -2898,6 +2924,12 @@ static const URange32 M_range32[] = {
{ 73104, 73105 },
{ 73107, 73111 },
{ 73459, 73462 },
+ { 73472, 73473 },
+ { 73475, 73475 },
+ { 73524, 73530 },
+ { 73534, 73538 },
+ { 78912, 78912 },
+ { 78919, 78933 },
{ 92912, 92916 },
{ 92976, 92982 },
{ 94031, 94031 },
@@ -2925,9 +2957,11 @@ static const URange32 M_range32[] = {
{ 122907, 122913 },
{ 122915, 122916 },
{ 122918, 122922 },
+ { 123023, 123023 },
{ 123184, 123190 },
{ 123566, 123566 },
{ 123628, 123631 },
+ { 124140, 124143 },
{ 125136, 125142 },
{ 125252, 125258 },
{ 917760, 917999 },
@@ -2968,6 +3002,7 @@ static const URange16 Mc_range16[] = {
{ 3271, 3272 },
{ 3274, 3275 },
{ 3285, 3286 },
+ { 3315, 3315 },
{ 3330, 3331 },
{ 3390, 3392 },
{ 3398, 3400 },
@@ -3108,6 +3143,10 @@ static const URange32 Mc_range32[] = {
{ 73107, 73108 },
{ 73110, 73110 },
{ 73461, 73462 },
+ { 73475, 73475 },
+ { 73524, 73525 },
+ { 73534, 73535 },
+ { 73537, 73537 },
{ 94033, 94087 },
{ 94192, 94193 },
{ 119141, 119142 },
@@ -3213,7 +3252,7 @@ static const URange16 Mn_range16[] = {
{ 3655, 3662 },
{ 3761, 3761 },
{ 3764, 3772 },
- { 3784, 3789 },
+ { 3784, 3790 },
{ 3864, 3865 },
{ 3893, 3893 },
{ 3895, 3895 },
@@ -3346,6 +3385,7 @@ static const URange32 Mn_range32[] = {
{ 68325, 68326 },
{ 68900, 68903 },
{ 69291, 69292 },
+ { 69373, 69375 },
{ 69446, 69456 },
{ 69506, 69509 },
{ 69633, 69633 },
@@ -3368,6 +3408,7 @@ static const URange32 Mn_range32[] = {
{ 70196, 70196 },
{ 70198, 70199 },
{ 70206, 70206 },
+ { 70209, 70209 },
{ 70367, 70367 },
{ 70371, 70378 },
{ 70400, 70401 },
@@ -3429,6 +3470,12 @@ static const URange32 Mn_range32[] = {
{ 73109, 73109 },
{ 73111, 73111 },
{ 73459, 73460 },
+ { 73472, 73473 },
+ { 73526, 73530 },
+ { 73536, 73536 },
+ { 73538, 73538 },
+ { 78912, 78912 },
+ { 78919, 78933 },
{ 92912, 92916 },
{ 92976, 92982 },
{ 94031, 94031 },
@@ -3453,9 +3500,11 @@ static const URange32 Mn_range32[] = {
{ 122907, 122913 },
{ 122915, 122916 },
{ 122918, 122922 },
+ { 123023, 123023 },
{ 123184, 123190 },
{ 123566, 123566 },
{ 123628, 123631 },
+ { 124140, 124143 },
{ 125136, 125142 },
{ 125252, 125258 },
{ 917760, 917999 },
@@ -3576,6 +3625,7 @@ static const URange32 N_range32[] = {
{ 72784, 72812 },
{ 73040, 73049 },
{ 73120, 73129 },
+ { 73552, 73561 },
{ 73664, 73684 },
{ 74752, 74862 },
{ 92768, 92777 },
@@ -3583,11 +3633,13 @@ static const URange32 N_range32[] = {
{ 93008, 93017 },
{ 93019, 93025 },
{ 93824, 93846 },
+ { 119488, 119507 },
{ 119520, 119539 },
{ 119648, 119672 },
{ 120782, 120831 },
{ 123200, 123209 },
{ 123632, 123641 },
+ { 124144, 124153 },
{ 125127, 125135 },
{ 125264, 125273 },
{ 126065, 126123 },
@@ -3655,12 +3707,14 @@ static const URange32 Nd_range32[] = {
{ 72784, 72793 },
{ 73040, 73049 },
{ 73120, 73129 },
+ { 73552, 73561 },
{ 92768, 92777 },
{ 92864, 92873 },
{ 93008, 93017 },
{ 120782, 120831 },
{ 123200, 123209 },
{ 123632, 123641 },
+ { 124144, 124153 },
{ 125264, 125273 },
{ 130032, 130041 },
};
@@ -3745,6 +3799,7 @@ static const URange32 No_range32[] = {
{ 73664, 73684 },
{ 93019, 93025 },
{ 93824, 93846 },
+ { 119488, 119507 },
{ 119520, 119539 },
{ 119648, 119672 },
{ 125127, 125135 },
@@ -3932,9 +3987,11 @@ static const URange32 P_range32[] = {
{ 72255, 72262 },
{ 72346, 72348 },
{ 72350, 72354 },
+ { 72448, 72457 },
{ 72769, 72773 },
{ 72816, 72817 },
{ 73463, 73464 },
+ { 73539, 73551 },
{ 73727, 73727 },
{ 74864, 74868 },
{ 77809, 77810 },
@@ -4255,9 +4312,11 @@ static const URange32 Po_range32[] = {
{ 72255, 72262 },
{ 72346, 72348 },
{ 72350, 72354 },
+ { 72448, 72457 },
{ 72769, 72773 },
{ 72816, 72817 },
{ 73463, 73464 },
+ { 73539, 73551 },
{ 73727, 73727 },
{ 74864, 74868 },
{ 77809, 77810 },
@@ -4564,10 +4623,10 @@ static const URange32 S_range32[] = {
{ 127568, 127569 },
{ 127584, 127589 },
{ 127744, 128727 },
- { 128733, 128748 },
+ { 128732, 128748 },
{ 128752, 128764 },
- { 128768, 128883 },
- { 128896, 128984 },
+ { 128768, 128886 },
+ { 128891, 128985 },
{ 128992, 129003 },
{ 129008, 129008 },
{ 129024, 129035 },
@@ -4578,15 +4637,13 @@ static const URange32 S_range32[] = {
{ 129200, 129201 },
{ 129280, 129619 },
{ 129632, 129645 },
- { 129648, 129652 },
- { 129656, 129660 },
- { 129664, 129670 },
- { 129680, 129708 },
- { 129712, 129722 },
- { 129728, 129733 },
- { 129744, 129753 },
- { 129760, 129767 },
- { 129776, 129782 },
+ { 129648, 129660 },
+ { 129664, 129672 },
+ { 129680, 129725 },
+ { 129727, 129733 },
+ { 129742, 129755 },
+ { 129760, 129768 },
+ { 129776, 129784 },
{ 129792, 129938 },
{ 129940, 129994 },
};
@@ -4882,10 +4939,10 @@ static const URange32 So_range32[] = {
{ 127584, 127589 },
{ 127744, 127994 },
{ 128000, 128727 },
- { 128733, 128748 },
+ { 128732, 128748 },
{ 128752, 128764 },
- { 128768, 128883 },
- { 128896, 128984 },
+ { 128768, 128886 },
+ { 128891, 128985 },
{ 128992, 129003 },
{ 129008, 129008 },
{ 129024, 129035 },
@@ -4896,15 +4953,13 @@ static const URange32 So_range32[] = {
{ 129200, 129201 },
{ 129280, 129619 },
{ 129632, 129645 },
- { 129648, 129652 },
- { 129656, 129660 },
- { 129664, 129670 },
- { 129680, 129708 },
- { 129712, 129722 },
- { 129728, 129733 },
- { 129744, 129753 },
- { 129760, 129767 },
- { 129776, 129782 },
+ { 129648, 129660 },
+ { 129664, 129672 },
+ { 129680, 129725 },
+ { 129727, 129733 },
+ { 129742, 129755 },
+ { 129760, 129768 },
+ { 129776, 129784 },
{ 129792, 129938 },
{ 129940, 129994 },
};
@@ -4972,6 +5027,7 @@ static const URange16 Arabic_range16[] = {
};
static const URange32 Arabic_range32[] = {
{ 69216, 69246 },
+ { 69373, 69375 },
{ 126464, 126467 },
{ 126469, 126495 },
{ 126497, 126498 },
@@ -5218,6 +5274,7 @@ static const URange32 Common_range32[] = {
{ 119171, 119172 },
{ 119180, 119209 },
{ 119214, 119274 },
+ { 119488, 119507 },
{ 119520, 119539 },
{ 119552, 119638 },
{ 119648, 119672 },
@@ -5258,10 +5315,10 @@ static const URange32 Common_range32[] = {
{ 127568, 127569 },
{ 127584, 127589 },
{ 127744, 128727 },
- { 128733, 128748 },
+ { 128732, 128748 },
{ 128752, 128764 },
- { 128768, 128883 },
- { 128896, 128984 },
+ { 128768, 128886 },
+ { 128891, 128985 },
{ 128992, 129003 },
{ 129008, 129008 },
{ 129024, 129035 },
@@ -5272,15 +5329,13 @@ static const URange32 Common_range32[] = {
{ 129200, 129201 },
{ 129280, 129619 },
{ 129632, 129645 },
- { 129648, 129652 },
- { 129656, 129660 },
- { 129664, 129670 },
- { 129680, 129708 },
- { 129712, 129722 },
- { 129728, 129733 },
- { 129744, 129753 },
- { 129760, 129767 },
- { 129776, 129782 },
+ { 129648, 129660 },
+ { 129664, 129672 },
+ { 129680, 129725 },
+ { 129727, 129733 },
+ { 129742, 129755 },
+ { 129760, 129768 },
+ { 129776, 129784 },
{ 129792, 129938 },
{ 129940, 129994 },
{ 130032, 130041 },
@@ -5319,6 +5374,10 @@ static const URange16 Cyrillic_range16[] = {
{ 42560, 42655 },
{ 65070, 65071 },
};
+static const URange32 Cyrillic_range32[] = {
+ { 122928, 122989 },
+ { 123023, 123023 },
+};
static const URange32 Deseret_range32[] = {
{ 66560, 66639 },
};
@@ -5328,6 +5387,9 @@ static const URange16 Devanagari_range16[] = {
{ 2406, 2431 },
{ 43232, 43263 },
};
+static const URange32 Devanagari_range32[] = {
+ { 72448, 72457 },
+};
static const URange32 Dives_Akuru_range32[] = {
{ 71936, 71942 },
{ 71945, 71945 },
@@ -5349,8 +5411,7 @@ static const URange32 Duployan_range32[] = {
{ 113820, 113823 },
};
static const URange32 Egyptian_Hieroglyphs_range32[] = {
- { 77824, 78894 },
- { 78896, 78904 },
+ { 77824, 78933 },
};
static const URange32 Elbasan_range32[] = {
{ 66816, 66855 },
@@ -5539,12 +5600,13 @@ static const URange32 Han_range32[] = {
{ 94178, 94179 },
{ 94192, 94193 },
{ 131072, 173791 },
- { 173824, 177976 },
+ { 173824, 177977 },
{ 177984, 178205 },
{ 178208, 183969 },
{ 183984, 191456 },
{ 194560, 195101 },
{ 196608, 201546 },
+ { 201552, 205743 },
};
static const URange16 Hangul_range16[] = {
{ 4352, 4607 },
@@ -5591,6 +5653,7 @@ static const URange16 Hiragana_range16[] = {
};
static const URange32 Hiragana_range32[] = {
{ 110593, 110879 },
+ { 110898, 110898 },
{ 110928, 110930 },
{ 127488, 127488 },
};
@@ -5661,7 +5724,7 @@ static const URange16 Kannada_range16[] = {
{ 3293, 3294 },
{ 3296, 3299 },
{ 3302, 3311 },
- { 3313, 3314 },
+ { 3313, 3315 },
};
static const URange16 Katakana_range16[] = {
{ 12449, 12538 },
@@ -5678,8 +5741,14 @@ static const URange32 Katakana_range32[] = {
{ 110589, 110590 },
{ 110592, 110592 },
{ 110880, 110882 },
+ { 110933, 110933 },
{ 110948, 110951 },
};
+static const URange32 Kawi_range32[] = {
+ { 73472, 73488 },
+ { 73490, 73530 },
+ { 73534, 73561 },
+};
static const URange16 Kayah_Li_range16[] = {
{ 43264, 43309 },
{ 43311, 43311 },
@@ -5706,7 +5775,7 @@ static const URange16 Khmer_range16[] = {
};
static const URange32 Khojki_range32[] = {
{ 70144, 70161 },
- { 70163, 70206 },
+ { 70163, 70209 },
};
static const URange32 Khudawadi_range32[] = {
{ 70320, 70378 },
@@ -5721,7 +5790,7 @@ static const URange16 Lao_range16[] = {
{ 3751, 3773 },
{ 3776, 3780 },
{ 3782, 3782 },
- { 3784, 3789 },
+ { 3784, 3790 },
{ 3792, 3801 },
{ 3804, 3807 },
};
@@ -5766,6 +5835,7 @@ static const URange32 Latin_range32[] = {
{ 67463, 67504 },
{ 67506, 67514 },
{ 122624, 122654 },
+ { 122661, 122666 },
};
static const URange16 Lepcha_range16[] = {
{ 7168, 7223 },
@@ -5903,6 +5973,9 @@ static const URange32 Nabataean_range32[] = {
{ 67712, 67742 },
{ 67751, 67759 },
};
+static const URange32 Nag_Mundari_range32[] = {
+ { 124112, 124153 },
+};
static const URange32 Nandinagari_range32[] = {
{ 72096, 72103 },
{ 72106, 72151 },
@@ -6229,12 +6302,12 @@ static const URange16 Yi_range16[] = {
static const URange32 Zanabazar_Square_range32[] = {
{ 72192, 72263 },
};
-// 4038 16-bit ranges, 1712 32-bit ranges
+// 4040 16-bit ranges, 1775 32-bit ranges
const UGroup unicode_groups[] = {
{ "Adlam", +1, 0, 0, Adlam_range32, 3 },
{ "Ahom", +1, 0, 0, Ahom_range32, 3 },
{ "Anatolian_Hieroglyphs", +1, 0, 0, Anatolian_Hieroglyphs_range32, 1 },
- { "Arabic", +1, Arabic_range16, 22, Arabic_range32, 35 },
+ { "Arabic", +1, Arabic_range16, 22, Arabic_range32, 36 },
{ "Armenian", +1, Armenian_range16, 4, 0, 0 },
{ "Avestan", +1, 0, 0, Avestan_range32, 2 },
{ "Balinese", +1, Balinese_range16, 2, 0, 0 },
@@ -6259,19 +6332,19 @@ const UGroup unicode_groups[] = {
{ "Cherokee", +1, Cherokee_range16, 3, 0, 0 },
{ "Chorasmian", +1, 0, 0, Chorasmian_range32, 1 },
{ "Co", +1, Co_range16, 1, Co_range32, 2 },
- { "Common", +1, Common_range16, 91, Common_range32, 83 },
+ { "Common", +1, Common_range16, 91, Common_range32, 82 },
{ "Coptic", +1, Coptic_range16, 3, 0, 0 },
{ "Cs", +1, Cs_range16, 1, 0, 0 },
{ "Cuneiform", +1, 0, 0, Cuneiform_range32, 4 },
{ "Cypriot", +1, 0, 0, Cypriot_range32, 6 },
{ "Cypro_Minoan", +1, 0, 0, Cypro_Minoan_range32, 1 },
- { "Cyrillic", +1, Cyrillic_range16, 8, 0, 0 },
+ { "Cyrillic", +1, Cyrillic_range16, 8, Cyrillic_range32, 2 },
{ "Deseret", +1, 0, 0, Deseret_range32, 1 },
- { "Devanagari", +1, Devanagari_range16, 4, 0, 0 },
+ { "Devanagari", +1, Devanagari_range16, 4, Devanagari_range32, 1 },
{ "Dives_Akuru", +1, 0, 0, Dives_Akuru_range32, 8 },
{ "Dogra", +1, 0, 0, Dogra_range32, 1 },
{ "Duployan", +1, 0, 0, Duployan_range32, 5 },
- { "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 2 },
+ { "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 1 },
{ "Elbasan", +1, 0, 0, Elbasan_range32, 1 },
{ "Elymaic", +1, 0, 0, Elymaic_range32, 1 },
{ "Ethiopic", +1, Ethiopic_range16, 32, Ethiopic_range32, 4 },
@@ -6283,13 +6356,13 @@ const UGroup unicode_groups[] = {
{ "Gujarati", +1, Gujarati_range16, 14, 0, 0 },
{ "Gunjala_Gondi", +1, 0, 0, Gunjala_Gondi_range32, 6 },
{ "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 },
- { "Han", +1, Han_range16, 11, Han_range32, 9 },
+ { "Han", +1, Han_range16, 11, Han_range32, 10 },
{ "Hangul", +1, Hangul_range16, 14, 0, 0 },
{ "Hanifi_Rohingya", +1, 0, 0, Hanifi_Rohingya_range32, 2 },
{ "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 },
{ "Hatran", +1, 0, 0, Hatran_range32, 3 },
{ "Hebrew", +1, Hebrew_range16, 9, 0, 0 },
- { "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 3 },
+ { "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 4 },
{ "Imperial_Aramaic", +1, 0, 0, Imperial_Aramaic_range32, 2 },
{ "Inherited", +1, Inherited_range16, 19, Inherited_range32, 10 },
{ "Inscriptional_Pahlavi", +1, 0, 0, Inscriptional_Pahlavi_range32, 2 },
@@ -6297,29 +6370,30 @@ const UGroup unicode_groups[] = {
{ "Javanese", +1, Javanese_range16, 3, 0, 0 },
{ "Kaithi", +1, 0, 0, Kaithi_range32, 2 },
{ "Kannada", +1, Kannada_range16, 13, 0, 0 },
- { "Katakana", +1, Katakana_range16, 7, Katakana_range32, 6 },
+ { "Katakana", +1, Katakana_range16, 7, Katakana_range32, 7 },
+ { "Kawi", +1, 0, 0, Kawi_range32, 3 },
{ "Kayah_Li", +1, Kayah_Li_range16, 2, 0, 0 },
{ "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 },
{ "Khitan_Small_Script", +1, 0, 0, Khitan_Small_Script_range32, 2 },
{ "Khmer", +1, Khmer_range16, 4, 0, 0 },
{ "Khojki", +1, 0, 0, Khojki_range32, 2 },
{ "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 },
- { "L", +1, L_range16, 380, L_range32, 268 },
+ { "L", +1, L_range16, 380, L_range32, 279 },
{ "Lao", +1, Lao_range16, 11, 0, 0 },
- { "Latin", +1, Latin_range16, 34, Latin_range32, 4 },
+ { "Latin", +1, Latin_range16, 34, Latin_range32, 5 },
{ "Lepcha", +1, Lepcha_range16, 3, 0, 0 },
{ "Limbu", +1, Limbu_range16, 5, 0, 0 },
{ "Linear_A", +1, 0, 0, Linear_A_range32, 3 },
{ "Linear_B", +1, 0, 0, Linear_B_range32, 7 },
{ "Lisu", +1, Lisu_range16, 1, Lisu_range32, 1 },
- { "Ll", +1, Ll_range16, 617, Ll_range32, 40 },
- { "Lm", +1, Lm_range16, 57, Lm_range32, 12 },
- { "Lo", +1, Lo_range16, 290, Lo_range32, 211 },
+ { "Ll", +1, Ll_range16, 617, Ll_range32, 41 },
+ { "Lm", +1, Lm_range16, 57, Lm_range32, 14 },
+ { "Lo", +1, Lo_range16, 290, Lo_range32, 220 },
{ "Lt", +1, Lt_range16, 10, 0, 0 },
{ "Lu", +1, Lu_range16, 605, Lu_range32, 41 },
{ "Lycian", +1, 0, 0, Lycian_range32, 1 },
{ "Lydian", +1, 0, 0, Lydian_range32, 2 },
- { "M", +1, M_range16, 189, M_range32, 110 },
+ { "M", +1, M_range16, 190, M_range32, 120 },
{ "Mahajani", +1, 0, 0, Mahajani_range32, 1 },
{ "Makasar", +1, 0, 0, Makasar_range32, 1 },
{ "Malayalam", +1, Malayalam_range16, 7, 0, 0 },
@@ -6327,7 +6401,7 @@ const UGroup unicode_groups[] = {
{ "Manichaean", +1, 0, 0, Manichaean_range32, 2 },
{ "Marchen", +1, 0, 0, Marchen_range32, 3 },
{ "Masaram_Gondi", +1, 0, 0, Masaram_Gondi_range32, 7 },
- { "Mc", +1, Mc_range16, 111, Mc_range32, 66 },
+ { "Mc", +1, Mc_range16, 112, Mc_range32, 70 },
{ "Me", +1, Me_range16, 5, 0, 0 },
{ "Medefaidrin", +1, 0, 0, Medefaidrin_range32, 1 },
{ "Meetei_Mayek", +1, Meetei_Mayek_range16, 3, 0, 0 },
@@ -6335,21 +6409,22 @@ const UGroup unicode_groups[] = {
{ "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 3 },
{ "Meroitic_Hieroglyphs", +1, 0, 0, Meroitic_Hieroglyphs_range32, 1 },
{ "Miao", +1, 0, 0, Miao_range32, 3 },
- { "Mn", +1, Mn_range16, 212, Mn_range32, 124 },
+ { "Mn", +1, Mn_range16, 212, Mn_range32, 134 },
{ "Modi", +1, 0, 0, Modi_range32, 2 },
{ "Mongolian", +1, Mongolian_range16, 5, Mongolian_range32, 1 },
{ "Mro", +1, 0, 0, Mro_range32, 3 },
{ "Multani", +1, 0, 0, Multani_range32, 5 },
{ "Myanmar", +1, Myanmar_range16, 3, 0, 0 },
- { "N", +1, N_range16, 67, N_range32, 67 },
+ { "N", +1, N_range16, 67, N_range32, 70 },
{ "Nabataean", +1, 0, 0, Nabataean_range32, 2 },
+ { "Nag_Mundari", +1, 0, 0, Nag_Mundari_range32, 1 },
{ "Nandinagari", +1, 0, 0, Nandinagari_range32, 3 },
- { "Nd", +1, Nd_range16, 37, Nd_range32, 25 },
+ { "Nd", +1, Nd_range16, 37, Nd_range32, 27 },
{ "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 },
{ "Newa", +1, 0, 0, Newa_range32, 2 },
{ "Nko", +1, Nko_range16, 2, 0, 0 },
{ "Nl", +1, Nl_range16, 7, Nl_range32, 5 },
- { "No", +1, No_range16, 29, No_range32, 42 },
+ { "No", +1, No_range16, 29, No_range32, 43 },
{ "Nushu", +1, 0, 0, Nushu_range32, 2 },
{ "Nyiakeng_Puachue_Hmong", +1, 0, 0, Nyiakeng_Puachue_Hmong_range32, 4 },
{ "Ogham", +1, Ogham_range16, 1, 0, 0 },
@@ -6366,7 +6441,7 @@ const UGroup unicode_groups[] = {
{ "Oriya", +1, Oriya_range16, 14, 0, 0 },
{ "Osage", +1, 0, 0, Osage_range32, 2 },
{ "Osmanya", +1, 0, 0, Osmanya_range32, 2 },
- { "P", +1, P_range16, 133, P_range32, 56 },
+ { "P", +1, P_range16, 133, P_range32, 58 },
{ "Pahawh_Hmong", +1, 0, 0, Pahawh_Hmong_range32, 5 },
{ "Palmyrene", +1, 0, 0, Palmyrene_range32, 1 },
{ "Pau_Cin_Hau", +1, 0, 0, Pau_Cin_Hau_range32, 1 },
@@ -6377,12 +6452,12 @@ const UGroup unicode_groups[] = {
{ "Phags_Pa", +1, Phags_Pa_range16, 1, 0, 0 },
{ "Phoenician", +1, 0, 0, Phoenician_range32, 2 },
{ "Pi", +1, Pi_range16, 11, 0, 0 },
- { "Po", +1, Po_range16, 130, Po_range32, 55 },
+ { "Po", +1, Po_range16, 130, Po_range32, 57 },
{ "Ps", +1, Ps_range16, 79, 0, 0 },
{ "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 },
{ "Rejang", +1, Rejang_range16, 2, 0, 0 },
{ "Runic", +1, Runic_range16, 2, 0, 0 },
- { "S", +1, S_range16, 151, S_range32, 83 },
+ { "S", +1, S_range16, 151, S_range32, 81 },
{ "Samaritan", +1, Samaritan_range16, 2, 0, 0 },
{ "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 },
{ "Sc", +1, Sc_range16, 18, Sc_range32, 3 },
@@ -6393,7 +6468,7 @@ const UGroup unicode_groups[] = {
{ "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 },
{ "Sk", +1, Sk_range16, 30, Sk_range32, 1 },
{ "Sm", +1, Sm_range16, 53, Sm_range32, 11 },
- { "So", +1, So_range16, 114, So_range32, 72 },
+ { "So", +1, So_range16, 114, So_range32, 70 },
{ "Sogdian", +1, 0, 0, Sogdian_range32, 1 },
{ "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 },
{ "Soyombo", +1, 0, 0, Soyombo_range32, 1 },
@@ -6429,7 +6504,7 @@ const UGroup unicode_groups[] = {
{ "Zp", +1, Zp_range16, 1, 0, 0 },
{ "Zs", +1, Zs_range16, 7, 0, 0 },
};
-const int num_unicode_groups = 197;
+const int num_unicode_groups = 199;
} // namespace re2
diff --git a/contrib/libs/re2/util/rune.cc b/contrib/libs/re2/util/rune.cc
index 4f625ea380..a40e756c4e 100644
--- a/contrib/libs/re2/util/rune.cc
+++ b/contrib/libs/re2/util/rune.cc
@@ -51,7 +51,7 @@ int
chartorune(Rune *rune, const char *str)
{
int c, c1, c2, c3;
- long l;
+ Rune l;
/*
* one character sequence
@@ -127,7 +127,7 @@ int
runetochar(char *str, const Rune *rune)
{
/* Runes are signed, so convert to unsigned for range check. */
- unsigned long c;
+ unsigned int c;
/*
* one character sequence
@@ -212,7 +212,7 @@ int
utflen(const char *s)
{
int c;
- long n;
+ int n;
Rune rune;
n = 0;
@@ -232,7 +232,7 @@ utflen(const char *s)
char*
utfrune(const char *s, Rune c)
{
- long c1;
+ int c1;
Rune r;
int n;