aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/re2
diff options
context:
space:
mode:
authorAndrey Khalyavin <halyavin@gmail.com>2022-02-10 16:46:29 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:29 +0300
commitf773626848a7c7456803654292e716b83d69cc12 (patch)
treedb052dfcf9134f492bdbb962cb6c16cea58e1ed3 /contrib/libs/re2
parentf43ab775d197d300eb67bd4497632b909cd7c2a5 (diff)
downloadydb-f773626848a7c7456803654292e716b83d69cc12.tar.gz
Restoring authorship annotation for Andrey Khalyavin <halyavin@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/re2')
-rw-r--r--contrib/libs/re2/re2/bitmap256.h218
-rw-r--r--contrib/libs/re2/re2/bitstate.cc60
-rw-r--r--contrib/libs/re2/re2/compile.cc606
-rw-r--r--contrib/libs/re2/re2/dfa.cc528
-rw-r--r--contrib/libs/re2/re2/filtered_re2.cc84
-rw-r--r--contrib/libs/re2/re2/filtered_re2.h46
-rw-r--r--contrib/libs/re2/re2/mimics_pcre.cc8
-rw-r--r--contrib/libs/re2/re2/nfa.cc380
-rw-r--r--contrib/libs/re2/re2/onepass.cc286
-rw-r--r--contrib/libs/re2/re2/parse.cc510
-rw-r--r--contrib/libs/re2/re2/prefilter.cc212
-rw-r--r--contrib/libs/re2/re2/prefilter.h28
-rw-r--r--contrib/libs/re2/re2/prefilter_tree.cc252
-rw-r--r--contrib/libs/re2/re2/prefilter_tree.h54
-rw-r--r--contrib/libs/re2/re2/prog.cc1064
-rw-r--r--contrib/libs/re2/re2/prog.h184
-rw-r--r--contrib/libs/re2/re2/re2.cc476
-rw-r--r--contrib/libs/re2/re2/re2.h1430
-rw-r--r--contrib/libs/re2/re2/regexp.cc206
-rw-r--r--contrib/libs/re2/re2/regexp.h206
-rw-r--r--contrib/libs/re2/re2/set.cc36
-rw-r--r--contrib/libs/re2/re2/set.h84
-rw-r--r--contrib/libs/re2/re2/simplify.cc590
-rw-r--r--contrib/libs/re2/re2/sparse_array.h194
-rw-r--r--contrib/libs/re2/re2/sparse_set.h274
-rw-r--r--contrib/libs/re2/re2/stringpiece.cc130
-rw-r--r--contrib/libs/re2/re2/stringpiece.h382
-rw-r--r--contrib/libs/re2/re2/tostring.cc40
-rw-r--r--contrib/libs/re2/re2/unicode_casefold.cc1012
-rw-r--r--contrib/libs/re2/re2/unicode_casefold.h42
-rw-r--r--contrib/libs/re2/re2/unicode_groups.cc6284
-rw-r--r--contrib/libs/re2/re2/unicode_groups.h22
-rw-r--r--contrib/libs/re2/re2/walker-inl.h18
-rw-r--r--contrib/libs/re2/util/logging.h214
-rw-r--r--contrib/libs/re2/util/mix.h82
-rw-r--r--contrib/libs/re2/util/mutex.h256
-rw-r--r--contrib/libs/re2/util/rune.cc14
-rw-r--r--contrib/libs/re2/util/strutil.cc150
-rw-r--r--contrib/libs/re2/util/strutil.h34
-rw-r--r--contrib/libs/re2/util/utf.h8
-rw-r--r--contrib/libs/re2/util/util.h34
-rw-r--r--contrib/libs/re2/ya.make34
42 files changed, 8386 insertions, 8386 deletions
diff --git a/contrib/libs/re2/re2/bitmap256.h b/contrib/libs/re2/re2/bitmap256.h
index 4899379e4d..2a4e47e090 100644
--- a/contrib/libs/re2/re2/bitmap256.h
+++ b/contrib/libs/re2/re2/bitmap256.h
@@ -1,117 +1,117 @@
-// Copyright 2016 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_BITMAP256_H_
-#define RE2_BITMAP256_H_
-
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-#include <stdint.h>
-#include <string.h>
-
-#include "util/util.h"
-#include "util/logging.h"
-
-namespace re2 {
-
-class Bitmap256 {
- public:
- Bitmap256() {
+// Copyright 2016 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_BITMAP256_H_
+#define RE2_BITMAP256_H_
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+#include <stdint.h>
+#include <string.h>
+
+#include "util/util.h"
+#include "util/logging.h"
+
+namespace re2 {
+
+class Bitmap256 {
+ public:
+ Bitmap256() {
Clear();
}
// Clears all of the bits.
void Clear() {
- memset(words_, 0, sizeof words_);
- }
-
- // Tests the bit with index c.
- bool Test(int c) const {
- DCHECK_GE(c, 0);
- DCHECK_LE(c, 255);
-
+ memset(words_, 0, sizeof words_);
+ }
+
+ // Tests the bit with index c.
+ bool Test(int c) const {
+ DCHECK_GE(c, 0);
+ DCHECK_LE(c, 255);
+
return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
- }
-
- // Sets the bit with index c.
- void Set(int c) {
- DCHECK_GE(c, 0);
- DCHECK_LE(c, 255);
-
+ }
+
+ // Sets the bit with index c.
+ void Set(int c) {
+ DCHECK_GE(c, 0);
+ DCHECK_LE(c, 255);
+
words_[c / 64] |= (uint64_t{1} << (c % 64));
- }
-
- // Finds the next non-zero bit with index >= c.
- // Returns -1 if no such bit exists.
- int FindNextSetBit(int c) const;
-
- private:
- // Finds the least significant non-zero bit in n.
- static int FindLSBSet(uint64_t n) {
- DCHECK_NE(n, 0);
-#if defined(__GNUC__)
- return __builtin_ctzll(n);
-#elif defined(_MSC_VER) && defined(_M_X64)
- unsigned long c;
- _BitScanForward64(&c, n);
- return static_cast<int>(c);
-#elif defined(_MSC_VER) && defined(_M_IX86)
- unsigned long c;
- if (static_cast<uint32_t>(n) != 0) {
- _BitScanForward(&c, static_cast<uint32_t>(n));
- return static_cast<int>(c);
- } else {
- _BitScanForward(&c, static_cast<uint32_t>(n >> 32));
- return static_cast<int>(c) + 32;
- }
-#else
- int c = 63;
- for (int shift = 1 << 5; shift != 0; shift >>= 1) {
- uint64_t word = n << shift;
- if (word != 0) {
- n = word;
- c -= shift;
- }
- }
- return c;
-#endif
- }
-
- uint64_t words_[4];
-};
-
-int Bitmap256::FindNextSetBit(int c) const {
- DCHECK_GE(c, 0);
- DCHECK_LE(c, 255);
-
- // Check the word that contains the bit. Mask out any lower bits.
- int i = c / 64;
+ }
+
+ // Finds the next non-zero bit with index >= c.
+ // Returns -1 if no such bit exists.
+ int FindNextSetBit(int c) const;
+
+ private:
+ // Finds the least significant non-zero bit in n.
+ static int FindLSBSet(uint64_t n) {
+ DCHECK_NE(n, 0);
+#if defined(__GNUC__)
+ return __builtin_ctzll(n);
+#elif defined(_MSC_VER) && defined(_M_X64)
+ unsigned long c;
+ _BitScanForward64(&c, n);
+ return static_cast<int>(c);
+#elif defined(_MSC_VER) && defined(_M_IX86)
+ unsigned long c;
+ if (static_cast<uint32_t>(n) != 0) {
+ _BitScanForward(&c, static_cast<uint32_t>(n));
+ return static_cast<int>(c);
+ } else {
+ _BitScanForward(&c, static_cast<uint32_t>(n >> 32));
+ return static_cast<int>(c) + 32;
+ }
+#else
+ int c = 63;
+ for (int shift = 1 << 5; shift != 0; shift >>= 1) {
+ uint64_t word = n << shift;
+ if (word != 0) {
+ n = word;
+ c -= shift;
+ }
+ }
+ return c;
+#endif
+ }
+
+ uint64_t words_[4];
+};
+
+int Bitmap256::FindNextSetBit(int c) const {
+ DCHECK_GE(c, 0);
+ DCHECK_LE(c, 255);
+
+ // Check the word that contains the bit. Mask out any lower bits.
+ int i = c / 64;
uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
- if (word != 0)
- return (i * 64) + FindLSBSet(word);
-
- // Check any following words.
- i++;
- switch (i) {
- case 1:
- if (words_[1] != 0)
- return (1 * 64) + FindLSBSet(words_[1]);
- FALLTHROUGH_INTENDED;
- case 2:
- if (words_[2] != 0)
- return (2 * 64) + FindLSBSet(words_[2]);
- FALLTHROUGH_INTENDED;
- case 3:
- if (words_[3] != 0)
- return (3 * 64) + FindLSBSet(words_[3]);
- FALLTHROUGH_INTENDED;
- default:
- return -1;
- }
-}
-
-} // namespace re2
-
-#endif // RE2_BITMAP256_H_
+ if (word != 0)
+ return (i * 64) + FindLSBSet(word);
+
+ // Check any following words.
+ i++;
+ switch (i) {
+ case 1:
+ if (words_[1] != 0)
+ return (1 * 64) + FindLSBSet(words_[1]);
+ FALLTHROUGH_INTENDED;
+ case 2:
+ if (words_[2] != 0)
+ return (2 * 64) + FindLSBSet(words_[2]);
+ FALLTHROUGH_INTENDED;
+ case 3:
+ if (words_[3] != 0)
+ return (3 * 64) + FindLSBSet(words_[3]);
+ FALLTHROUGH_INTENDED;
+ default:
+ return -1;
+ }
+}
+
+} // namespace re2
+
+#endif // RE2_BITMAP256_H_
diff --git a/contrib/libs/re2/re2/bitstate.cc b/contrib/libs/re2/re2/bitstate.cc
index 877e548234..f1020e635d 100644
--- a/contrib/libs/re2/re2/bitstate.cc
+++ b/contrib/libs/re2/re2/bitstate.cc
@@ -17,13 +17,13 @@
// SearchBitState is a fast replacement for the NFA code on small
// regexps and texts when SearchOnePass cannot be used.
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
#include <limits>
#include <utility>
-
-#include "util/logging.h"
+
+#include "util/logging.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/regexp.h"
@@ -155,7 +155,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
cap_[prog_->inst(-id)->cap()] = p;
continue;
}
-
+
if (rle > 0) {
p += rle;
// Revivify job on stack.
@@ -171,10 +171,10 @@ bool BitState::TrySearch(int id0, const char* p0) {
LOG(DFATAL) << "Unexpected opcode: " << ip->opcode();
return false;
- case kInstFail:
+ case kInstFail:
break;
-
- case kInstAltMatch:
+
+ case kInstAltMatch:
if (ip->greedy(prog_)) {
// out1 is the Match instruction.
id = ip->out1();
@@ -194,40 +194,40 @@ bool BitState::TrySearch(int id0, const char* p0) {
int c = -1;
if (p < end)
c = *p & 0xFF;
- if (!ip->Matches(c))
- goto Next;
-
+ if (!ip->Matches(c))
+ goto Next;
+
if (ip->hint() != 0)
Push(id+ip->hint(), p); // try the next when we're done
- id = ip->out();
- p++;
- goto CheckAndLoop;
+ id = ip->out();
+ p++;
+ goto CheckAndLoop;
}
case kInstCapture:
if (!ip->last())
Push(id+1, p); // try the next when we're done
-
+
if (0 <= ip->cap() && ip->cap() < cap_.size()) {
// Capture p to register, but save old value first.
Push(-id, cap_[ip->cap()]); // undo when we're done
cap_[ip->cap()] = p;
}
-
+
id = ip->out();
goto CheckAndLoop;
-
+
case kInstEmptyWidth:
if (ip->empty() & ~Prog::EmptyFlags(context_, p))
- goto Next;
-
- if (!ip->last())
+ goto Next;
+
+ if (!ip->last())
Push(id+1, p); // try the next when we're done
id = ip->out();
goto CheckAndLoop;
case kInstNop:
- if (!ip->last())
+ if (!ip->last())
Push(id+1, p); // try the next when we're done
id = ip->out();
@@ -241,7 +241,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
case kInstMatch: {
if (endmatch_ && p != end)
- goto Next;
+ goto Next;
// We found a match. If the caller doesn't care
// where the match is, no point going further.
@@ -256,9 +256,9 @@ bool BitState::TrySearch(int id0, const char* p0) {
if (submatch_[0].data() == NULL ||
(longest_ && p > submatch_[0].data() + submatch_[0].size())) {
for (int i = 0; i < nsubmatch_; i++)
- submatch_[i] =
- StringPiece(cap_[2 * i],
- static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
+ submatch_[i] =
+ StringPiece(cap_[2 * i],
+ static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
}
// If going for first match, we're done.
@@ -303,7 +303,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
submatch_ = submatch;
nsubmatch_ = nsubmatch;
for (int i = 0; i < nsubmatch_; i++)
- submatch_[i] = StringPiece();
+ submatch_[i] = StringPiece();
// Allocate scratch space.
int nvisited = prog_->list_count() * static_cast<int>(text.size()+1);
@@ -337,10 +337,10 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
// Try to use prefix accel (e.g. memchr) to skip ahead.
if (p < etext && prog_->can_prefix_accel()) {
p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext - p));
- if (p == NULL)
+ if (p == NULL)
p = etext;
- }
-
+ }
+
cap_[0] = p;
if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
return true;
diff --git a/contrib/libs/re2/re2/compile.cc b/contrib/libs/re2/re2/compile.cc
index 61d801a630..0baa344430 100644
--- a/contrib/libs/re2/re2/compile.cc
+++ b/contrib/libs/re2/re2/compile.cc
@@ -8,16 +8,16 @@
// This file's external interface is just Regexp::CompileToProg.
// The Compiler class defined in this file is private.
-#include <stdint.h>
-#include <string.h>
-#include <unordered_map>
-#include <utility>
-
-#include "util/logging.h"
-#include "util/utf.h"
+#include <stdint.h>
+#include <string.h>
+#include <unordered_map>
+#include <utility>
+
+#include "util/logging.h"
+#include "util/utf.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
-#include "re2/re2.h"
+#include "re2/re2.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
@@ -77,7 +77,7 @@ static const PatchList kNullPatchList = {0, 0};
// Compiled program fragment.
struct Frag {
- uint32_t begin;
+ uint32_t begin;
PatchList end;
bool nullable;
@@ -89,7 +89,7 @@ struct Frag {
// Input encodings.
enum Encoding {
kEncodingUTF8 = 1, // UTF-8 (0-10FFFF)
- kEncodingLatin1, // Latin-1 (0-FF)
+ kEncodingLatin1, // Latin-1 (0-FF)
};
class Compiler : public Regexp::Walker<Frag> {
@@ -101,7 +101,7 @@ class Compiler : public Regexp::Walker<Frag> {
// Caller is responsible for deleting Prog when finished with it.
// If reversed is true, compiles for walking over the input
// string backward (reverses all concatenations).
- static Prog *Compile(Regexp* re, bool reversed, int64_t max_mem);
+ static Prog *Compile(Regexp* re, bool reversed, int64_t max_mem);
// Compiles alternation of all the re to a new Prog.
// Each re has a match with an id equal to its index in the vector.
@@ -137,7 +137,7 @@ class Compiler : public Regexp::Walker<Frag> {
Frag NoMatch();
// Returns a fragment that matches the empty string.
- Frag Match(int32_t id);
+ Frag Match(int32_t id);
// Returns a no-op fragment.
Frag Nop();
@@ -165,28 +165,28 @@ class Compiler : public Regexp::Walker<Frag> {
void Add_80_10ffff();
// New suffix that matches the byte range lo-hi, then goes to next.
- int UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next);
- int CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next);
-
- // Returns true iff the suffix is cached.
- bool IsCachedRuneByteSuffix(int id);
+ int UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next);
+ int CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next);
+ // Returns true iff the suffix is cached.
+ bool IsCachedRuneByteSuffix(int id);
+
// Adds a suffix to alternation.
void AddSuffix(int id);
- // Adds a suffix to the trie starting from the given root node.
- // Returns zero iff allocating an instruction fails. Otherwise, returns
- // the current root node, which might be different from what was given.
- int AddSuffixRecursive(int root, int id);
-
- // Finds the trie node for the given suffix. Returns a Frag in order to
+ // Adds a suffix to the trie starting from the given root node.
+ // Returns zero iff allocating an instruction fails. Otherwise, returns
+ // the current root node, which might be different from what was given.
+ int AddSuffixRecursive(int root, int id);
+
+ // Finds the trie node for the given suffix. Returns a Frag in order to
// distinguish between pointing at the root node directly (end.head == 0)
// and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively).
- Frag FindByteRange(int root, int id);
-
- // Compares two ByteRanges and returns true iff they are equal.
- bool ByteRangeEqual(int id1, int id2);
-
+ Frag FindByteRange(int root, int id);
+
+ // Compares two ByteRanges and returns true iff they are equal.
+ bool ByteRangeEqual(int id1, int id2);
+
// Returns the alternation of all the added suffixes.
Frag EndRange();
@@ -209,15 +209,15 @@ class Compiler : public Regexp::Walker<Frag> {
int ninst_; // Number of instructions used.
int max_ninst_; // Maximum number of instructions.
- int64_t max_mem_; // Total memory budget.
+ int64_t max_mem_; // Total memory budget.
- std::unordered_map<uint64_t, int> rune_cache_;
+ std::unordered_map<uint64_t, int> rune_cache_;
Frag rune_range_;
RE2::Anchor anchor_; // anchor mode for RE2::Set
- Compiler(const Compiler&) = delete;
- Compiler& operator=(const Compiler&) = delete;
+ Compiler(const Compiler&) = delete;
+ Compiler& operator=(const Compiler&) = delete;
};
Compiler::Compiler() {
@@ -365,8 +365,8 @@ Frag Compiler::Star(Frag a, bool nongreedy) {
// Given a fragment for a, returns a fragment for a? or a?? (if nongreedy)
Frag Compiler::Quest(Frag a, bool nongreedy) {
- if (IsNoMatch(a))
- return Nop();
+ if (IsNoMatch(a))
+ return Nop();
int id = AllocInst(1);
if (id < 0)
return NoMatch();
@@ -400,7 +400,7 @@ Frag Compiler::Nop() {
}
// Returns a fragment that signals a match.
-Frag Compiler::Match(int32_t match_id) {
+Frag Compiler::Match(int32_t match_id) {
int id = AllocInst(1);
if (id < 0)
return NoMatch();
@@ -419,8 +419,8 @@ Frag Compiler::EmptyWidth(EmptyOp empty) {
// Given a fragment a, returns a fragment with capturing parens around a.
Frag Compiler::Capture(Frag a, int n) {
- if (IsNoMatch(a))
- return NoMatch();
+ if (IsNoMatch(a))
+ return NoMatch();
int id = AllocInst(2);
if (id < 0)
return NoMatch();
@@ -434,7 +434,7 @@ Frag Compiler::Capture(Frag a, int n) {
// A Rune is a name for a Unicode code point.
// Returns maximum rune encoded by UTF-8 sequence of length len.
static int MaxRune(int len) {
- int b; // number of Rune bits in len-byte UTF-8 sequence (len < UTFmax)
+ int b; // number of Rune bits in len-byte UTF-8 sequence (len < UTFmax)
if (len == 1)
b = 7;
else
@@ -456,7 +456,7 @@ void Compiler::BeginRange() {
rune_range_.end = kNullPatchList;
}
-int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
+int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
int next) {
Frag f = ByteRange(lo, hi, foldcase);
if (next != 0) {
@@ -467,18 +467,18 @@ int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
return f.begin;
}
-static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase,
- int next) {
- return (uint64_t)next << 17 |
- (uint64_t)lo << 9 |
- (uint64_t)hi << 1 |
- (uint64_t)foldcase;
-}
-
-int Compiler::CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
- int next) {
- uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next);
- std::unordered_map<uint64_t, int>::const_iterator it = rune_cache_.find(key);
+static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase,
+ int next) {
+ return (uint64_t)next << 17 |
+ (uint64_t)lo << 9 |
+ (uint64_t)hi << 1 |
+ (uint64_t)foldcase;
+}
+
+int Compiler::CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
+ int next) {
+ uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next);
+ std::unordered_map<uint64_t, int>::const_iterator it = rune_cache_.find(key);
if (it != rune_cache_.end())
return it->second;
int id = UncachedRuneByteSuffix(lo, hi, foldcase, next);
@@ -486,31 +486,31 @@ int Compiler::CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
return id;
}
-bool Compiler::IsCachedRuneByteSuffix(int id) {
- uint8_t lo = inst_[id].lo_;
- uint8_t hi = inst_[id].hi_;
- bool foldcase = inst_[id].foldcase() != 0;
- int next = inst_[id].out();
-
- uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next);
- return rune_cache_.find(key) != rune_cache_.end();
-}
-
+bool Compiler::IsCachedRuneByteSuffix(int id) {
+ uint8_t lo = inst_[id].lo_;
+ uint8_t hi = inst_[id].hi_;
+ bool foldcase = inst_[id].foldcase() != 0;
+ int next = inst_[id].out();
+
+ uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next);
+ return rune_cache_.find(key) != rune_cache_.end();
+}
+
void Compiler::AddSuffix(int id) {
- if (failed_)
- return;
-
+ if (failed_)
+ return;
+
if (rune_range_.begin == 0) {
rune_range_.begin = id;
return;
}
- if (encoding_ == kEncodingUTF8) {
- // Build a trie in order to reduce fanout.
- rune_range_.begin = AddSuffixRecursive(rune_range_.begin, id);
- return;
- }
-
+ if (encoding_ == kEncodingUTF8) {
+ // Build a trie in order to reduce fanout.
+ rune_range_.begin = AddSuffixRecursive(rune_range_.begin, id);
+ return;
+ }
+
int alt = AllocInst(1);
if (alt < 0) {
rune_range_.begin = 0;
@@ -520,102 +520,102 @@ void Compiler::AddSuffix(int id) {
rune_range_.begin = alt;
}
-int Compiler::AddSuffixRecursive(int root, int id) {
- DCHECK(inst_[root].opcode() == kInstAlt ||
- inst_[root].opcode() == kInstByteRange);
-
- Frag f = FindByteRange(root, id);
- if (IsNoMatch(f)) {
- int alt = AllocInst(1);
- if (alt < 0)
- return 0;
- inst_[alt].InitAlt(root, id);
- return alt;
- }
-
- int br;
+int Compiler::AddSuffixRecursive(int root, int id) {
+ DCHECK(inst_[root].opcode() == kInstAlt ||
+ inst_[root].opcode() == kInstByteRange);
+
+ Frag f = FindByteRange(root, id);
+ if (IsNoMatch(f)) {
+ int alt = AllocInst(1);
+ if (alt < 0)
+ return 0;
+ inst_[alt].InitAlt(root, id);
+ return alt;
+ }
+
+ int br;
if (f.end.head == 0)
- br = root;
+ br = root;
else if (f.end.head&1)
- br = inst_[f.begin].out1();
- else
- br = inst_[f.begin].out();
-
- if (IsCachedRuneByteSuffix(br)) {
- // We can't fiddle with cached suffixes, so make a clone of the head.
- int byterange = AllocInst(1);
- if (byterange < 0)
- return 0;
- inst_[byterange].InitByteRange(inst_[br].lo(), inst_[br].hi(),
- inst_[br].foldcase(), inst_[br].out());
-
- // Ensure that the parent points to the clone, not to the original.
- // Note that this could leave the head unreachable except via the cache.
- br = byterange;
+ br = inst_[f.begin].out1();
+ else
+ br = inst_[f.begin].out();
+
+ if (IsCachedRuneByteSuffix(br)) {
+ // We can't fiddle with cached suffixes, so make a clone of the head.
+ int byterange = AllocInst(1);
+ if (byterange < 0)
+ return 0;
+ inst_[byterange].InitByteRange(inst_[br].lo(), inst_[br].hi(),
+ inst_[br].foldcase(), inst_[br].out());
+
+ // Ensure that the parent points to the clone, not to the original.
+ // Note that this could leave the head unreachable except via the cache.
+ br = byterange;
if (f.end.head == 0)
- root = br;
+ root = br;
else if (f.end.head&1)
- inst_[f.begin].out1_ = br;
- else
- inst_[f.begin].set_out(br);
- }
-
- int out = inst_[id].out();
- if (!IsCachedRuneByteSuffix(id)) {
- // The head should be the instruction most recently allocated, so free it
- // instead of leaving it unreachable.
+ inst_[f.begin].out1_ = br;
+ else
+ inst_[f.begin].set_out(br);
+ }
+
+ int out = inst_[id].out();
+ if (!IsCachedRuneByteSuffix(id)) {
+ // The head should be the instruction most recently allocated, so free it
+ // instead of leaving it unreachable.
DCHECK_EQ(id, ninst_-1);
- inst_[id].out_opcode_ = 0;
- inst_[id].out1_ = 0;
+ inst_[id].out_opcode_ = 0;
+ inst_[id].out1_ = 0;
ninst_--;
- }
-
- out = AddSuffixRecursive(inst_[br].out(), out);
- if (out == 0)
- return 0;
-
- inst_[br].set_out(out);
- return root;
-}
-
-bool Compiler::ByteRangeEqual(int id1, int id2) {
- return inst_[id1].lo() == inst_[id2].lo() &&
- inst_[id1].hi() == inst_[id2].hi() &&
- inst_[id1].foldcase() == inst_[id2].foldcase();
-}
-
-Frag Compiler::FindByteRange(int root, int id) {
- if (inst_[root].opcode() == kInstByteRange) {
- if (ByteRangeEqual(root, id))
+ }
+
+ out = AddSuffixRecursive(inst_[br].out(), out);
+ if (out == 0)
+ return 0;
+
+ inst_[br].set_out(out);
+ return root;
+}
+
+bool Compiler::ByteRangeEqual(int id1, int id2) {
+ return inst_[id1].lo() == inst_[id2].lo() &&
+ inst_[id1].hi() == inst_[id2].hi() &&
+ inst_[id1].foldcase() == inst_[id2].foldcase();
+}
+
+Frag Compiler::FindByteRange(int root, int id) {
+ if (inst_[root].opcode() == kInstByteRange) {
+ if (ByteRangeEqual(root, id))
return Frag(root, kNullPatchList, false);
- else
- return NoMatch();
- }
-
- while (inst_[root].opcode() == kInstAlt) {
- int out1 = inst_[root].out1();
- if (ByteRangeEqual(out1, id))
+ else
+ return NoMatch();
+ }
+
+ while (inst_[root].opcode() == kInstAlt) {
+ int out1 = inst_[root].out1();
+ if (ByteRangeEqual(out1, id))
return Frag(root, PatchList::Mk((root << 1) | 1), false);
-
- // CharClass is a sorted list of ranges, so if out1 of the root Alt wasn't
- // what we're looking for, then we can stop immediately. Unfortunately, we
- // can't short-circuit the search in reverse mode.
- if (!reversed_)
- return NoMatch();
-
- int out = inst_[root].out();
- if (inst_[out].opcode() == kInstAlt)
- root = out;
- else if (ByteRangeEqual(out, id))
+
+ // CharClass is a sorted list of ranges, so if out1 of the root Alt wasn't
+ // what we're looking for, then we can stop immediately. Unfortunately, we
+ // can't short-circuit the search in reverse mode.
+ if (!reversed_)
+ return NoMatch();
+
+ int out = inst_[root].out();
+ if (inst_[out].opcode() == kInstAlt)
+ root = out;
+ else if (ByteRangeEqual(out, id))
return Frag(root, PatchList::Mk(root << 1), false);
- else
- return NoMatch();
- }
-
- LOG(DFATAL) << "should never happen";
- return NoMatch();
-}
-
+ else
+ return NoMatch();
+ }
+
+ LOG(DFATAL) << "should never happen";
+ return NoMatch();
+}
+
Frag Compiler::EndRange() {
return rune_range_;
}
@@ -639,13 +639,13 @@ void Compiler::AddRuneRange(Rune lo, Rune hi, bool foldcase) {
}
void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) {
- // Latin-1 is easy: runes *are* bytes.
+ // Latin-1 is easy: runes *are* bytes.
if (lo > hi || lo > 0xFF)
return;
if (hi > 0xFF)
hi = 0xFF;
- AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo),
- static_cast<uint8_t>(hi), foldcase, 0));
+ AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo),
+ static_cast<uint8_t>(hi), foldcase, 0));
}
void Compiler::Add_80_10ffff() {
@@ -710,14 +710,14 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
// ASCII range is always a special case.
if (hi < Runeself) {
- AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo),
- static_cast<uint8_t>(hi), foldcase, 0));
+ AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo),
+ static_cast<uint8_t>(hi), foldcase, 0));
return;
}
// Split range into sections that agree on leading bytes.
for (int i = 1; i < UTFmax; i++) {
- uint32_t m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence
+ uint32_t m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence
if ((lo & ~m) != (hi & ~m)) {
if ((lo & m) != 0) {
AddRuneRangeUTF8(lo, lo|m, foldcase);
@@ -733,55 +733,55 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
}
// Finally. Generate byte matching equivalent for lo-hi.
- uint8_t ulo[UTFmax], uhi[UTFmax];
+ uint8_t ulo[UTFmax], uhi[UTFmax];
int n = runetochar(reinterpret_cast<char*>(ulo), &lo);
int m = runetochar(reinterpret_cast<char*>(uhi), &hi);
(void)m; // USED(m)
DCHECK_EQ(n, m);
- // The logic below encodes this thinking:
- //
- // 1. When we have built the whole suffix, we know that it cannot
- // possibly be a suffix of anything longer: in forward mode, nothing
- // else can occur before the leading byte; in reverse mode, nothing
- // else can occur after the last continuation byte or else the leading
- // byte would have to change. Thus, there is no benefit to caching
- // the first byte of the suffix whereas there is a cost involved in
- // cloning it if it begins a common prefix, which is fairly likely.
- //
- // 2. Conversely, the last byte of the suffix cannot possibly be a
- // prefix of anything because next == 0, so we will never want to
- // clone it, but it is fairly likely to be a common suffix. Perhaps
- // more so in reverse mode than in forward mode because the former is
- // "converging" towards lower entropy, but caching is still worthwhile
- // for the latter in cases such as 80-BF.
- //
- // 3. Handling the bytes between the first and the last is less
- // straightforward and, again, the approach depends on whether we are
- // "converging" towards lower entropy: in forward mode, a single byte
- // is unlikely to be part of a common suffix whereas a byte range
- // is more likely so; in reverse mode, a byte range is unlikely to
- // be part of a common suffix whereas a single byte is more likely
- // so. The same benefit versus cost argument applies here.
+ // The logic below encodes this thinking:
+ //
+ // 1. When we have built the whole suffix, we know that it cannot
+ // possibly be a suffix of anything longer: in forward mode, nothing
+ // else can occur before the leading byte; in reverse mode, nothing
+ // else can occur after the last continuation byte or else the leading
+ // byte would have to change. Thus, there is no benefit to caching
+ // the first byte of the suffix whereas there is a cost involved in
+ // cloning it if it begins a common prefix, which is fairly likely.
+ //
+ // 2. Conversely, the last byte of the suffix cannot possibly be a
+ // prefix of anything because next == 0, so we will never want to
+ // clone it, but it is fairly likely to be a common suffix. Perhaps
+ // more so in reverse mode than in forward mode because the former is
+ // "converging" towards lower entropy, but caching is still worthwhile
+ // for the latter in cases such as 80-BF.
+ //
+ // 3. Handling the bytes between the first and the last is less
+ // straightforward and, again, the approach depends on whether we are
+ // "converging" towards lower entropy: in forward mode, a single byte
+ // is unlikely to be part of a common suffix whereas a byte range
+ // is more likely so; in reverse mode, a byte range is unlikely to
+ // be part of a common suffix whereas a single byte is more likely
+ // so. The same benefit versus cost argument applies here.
int id = 0;
if (reversed_) {
- for (int i = 0; i < n; i++) {
- // In reverse UTF-8 mode: cache the leading byte; don't cache the last
- // continuation byte; cache anything else iff it's a single byte (XX-XX).
- if (i == 0 || (ulo[i] == uhi[i] && i != n-1))
- id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id);
- else
- id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id);
- }
+ for (int i = 0; i < n; i++) {
+ // In reverse UTF-8 mode: cache the leading byte; don't cache the last
+ // continuation byte; cache anything else iff it's a single byte (XX-XX).
+ if (i == 0 || (ulo[i] == uhi[i] && i != n-1))
+ id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id);
+ else
+ id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id);
+ }
} else {
- for (int i = n-1; i >= 0; i--) {
- // In forward UTF-8 mode: don't cache the leading byte; cache the last
- // continuation byte; cache anything else iff it's a byte range (XX-YY).
- if (i == n-1 || (ulo[i] < uhi[i] && i != 0))
- id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id);
- else
- id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id);
- }
+ for (int i = n-1; i >= 0; i--) {
+ // In forward UTF-8 mode: don't cache the leading byte; cache the last
+ // continuation byte; cache anything else iff it's a byte range (XX-YY).
+ if (i == n-1 || (ulo[i] < uhi[i] && i != 0))
+ id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id);
+ else
+ id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id);
+ }
}
AddSuffix(id);
}
@@ -807,13 +807,13 @@ Frag Compiler::PreVisit(Regexp* re, Frag, bool* stop) {
if (failed_)
*stop = true;
- return Frag(); // not used by caller
+ return Frag(); // not used by caller
}
Frag Compiler::Literal(Rune r, bool foldcase) {
switch (encoding_) {
default:
- return Frag();
+ return Frag();
case kEncodingLatin1:
return ByteRange(r, r, foldcase);
@@ -821,11 +821,11 @@ Frag Compiler::Literal(Rune r, bool foldcase) {
case kEncodingUTF8: {
if (r < Runeself) // Make common case fast.
return ByteRange(r, r, foldcase);
- uint8_t buf[UTFmax];
+ uint8_t buf[UTFmax];
int n = runetochar(reinterpret_cast<char*>(buf), &r);
- Frag f = ByteRange((uint8_t)buf[0], buf[0], false);
+ Frag f = ByteRange((uint8_t)buf[0], buf[0], false);
for (int i = 1; i < n; i++)
- f = Cat(f, ByteRange((uint8_t)buf[i], buf[i], false));
+ f = Cat(f, ByteRange((uint8_t)buf[i], buf[i], false));
return f;
}
}
@@ -877,16 +877,16 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
}
case kRegexpStar:
- return Star(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
+ return Star(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
case kRegexpPlus:
- return Plus(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
+ return Plus(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
case kRegexpQuest:
- return Quest(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
+ return Quest(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
case kRegexpLiteral:
- return Literal(re->rune(), (re->parse_flags()&Regexp::FoldCase) != 0);
+ return Literal(re->rune(), (re->parse_flags()&Regexp::FoldCase) != 0);
case kRegexpLiteralString: {
// Concatenation of literals.
@@ -894,8 +894,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
return Nop();
Frag f;
for (int i = 0; i < re->nrunes(); i++) {
- Frag f1 = Literal(re->runes()[i],
- (re->parse_flags()&Regexp::FoldCase) != 0);
+ Frag f1 = Literal(re->runes()[i],
+ (re->parse_flags()&Regexp::FoldCase) != 0);
if (i == 0)
f = f1;
else
@@ -940,8 +940,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
// If this range contains all of A-Za-z or none of it,
// the fold flag is unnecessary; don't bother.
bool fold = foldascii;
- if ((i->lo <= 'A' && 'z' <= i->hi) || i->hi < 'A' || 'z' < i->lo ||
- ('Z' < i->lo && i->hi < 'a'))
+ if ((i->lo <= 'A' && 'z' <= i->hi) || i->hi < 'A' || 'z' < i->lo ||
+ ('Z' < i->lo && i->hi < 'a'))
fold = false;
AddRuneRange(i->lo, i->hi, fold);
@@ -982,109 +982,109 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
// Is this regexp required to start at the beginning of the text?
// Only approximate; can return false for complicated regexps like (\Aa|\Ab),
// but handles (\A(a|b)). Could use the Walker to write a more exact one.
-static bool IsAnchorStart(Regexp** pre, int depth) {
- Regexp* re = *pre;
- Regexp* sub;
- // The depth limit makes sure that we don't overflow
- // the stack on a deeply nested regexp. As the comment
- // above says, IsAnchorStart is conservative, so returning
- // a false negative is okay. The exact limit is somewhat arbitrary.
- if (re == NULL || depth >= 4)
- return false;
- switch (re->op()) {
- default:
- break;
- case kRegexpConcat:
- if (re->nsub() > 0) {
- sub = re->sub()[0]->Incref();
- if (IsAnchorStart(&sub, depth+1)) {
+static bool IsAnchorStart(Regexp** pre, int depth) {
+ Regexp* re = *pre;
+ Regexp* sub;
+ // The depth limit makes sure that we don't overflow
+ // the stack on a deeply nested regexp. As the comment
+ // above says, IsAnchorStart is conservative, so returning
+ // a false negative is okay. The exact limit is somewhat arbitrary.
+ if (re == NULL || depth >= 4)
+ return false;
+ switch (re->op()) {
+ default:
+ break;
+ case kRegexpConcat:
+ if (re->nsub() > 0) {
+ sub = re->sub()[0]->Incref();
+ if (IsAnchorStart(&sub, depth+1)) {
PODArray<Regexp*> subcopy(re->nsub());
- subcopy[0] = sub; // already have reference
- for (int i = 1; i < re->nsub(); i++)
- subcopy[i] = re->sub()[i]->Incref();
+ subcopy[0] = sub; // already have reference
+ for (int i = 1; i < re->nsub(); i++)
+ subcopy[i] = re->sub()[i]->Incref();
*pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
- re->Decref();
- return true;
+ re->Decref();
+ return true;
}
- sub->Decref();
- }
- break;
- case kRegexpCapture:
- sub = re->sub()[0]->Incref();
- if (IsAnchorStart(&sub, depth+1)) {
- *pre = Regexp::Capture(sub, re->parse_flags(), re->cap());
+ sub->Decref();
+ }
+ break;
+ case kRegexpCapture:
+ sub = re->sub()[0]->Incref();
+ if (IsAnchorStart(&sub, depth+1)) {
+ *pre = Regexp::Capture(sub, re->parse_flags(), re->cap());
re->Decref();
return true;
- }
- sub->Decref();
- break;
- case kRegexpBeginText:
- *pre = Regexp::LiteralString(NULL, 0, re->parse_flags());
- re->Decref();
- return true;
+ }
+ sub->Decref();
+ break;
+ case kRegexpBeginText:
+ *pre = Regexp::LiteralString(NULL, 0, re->parse_flags());
+ re->Decref();
+ return true;
}
- return false;
+ return false;
}
// Is this regexp required to start at the end of the text?
// Only approximate; can return false for complicated regexps like (a\z|b\z),
// but handles ((a|b)\z). Could use the Walker to write a more exact one.
-static bool IsAnchorEnd(Regexp** pre, int depth) {
- Regexp* re = *pre;
- Regexp* sub;
- // The depth limit makes sure that we don't overflow
- // the stack on a deeply nested regexp. As the comment
- // above says, IsAnchorEnd is conservative, so returning
- // a false negative is okay. The exact limit is somewhat arbitrary.
- if (re == NULL || depth >= 4)
- return false;
- switch (re->op()) {
- default:
- break;
- case kRegexpConcat:
- if (re->nsub() > 0) {
- sub = re->sub()[re->nsub() - 1]->Incref();
- if (IsAnchorEnd(&sub, depth+1)) {
+static bool IsAnchorEnd(Regexp** pre, int depth) {
+ Regexp* re = *pre;
+ Regexp* sub;
+ // The depth limit makes sure that we don't overflow
+ // the stack on a deeply nested regexp. As the comment
+ // above says, IsAnchorEnd is conservative, so returning
+ // a false negative is okay. The exact limit is somewhat arbitrary.
+ if (re == NULL || depth >= 4)
+ return false;
+ switch (re->op()) {
+ default:
+ break;
+ case kRegexpConcat:
+ if (re->nsub() > 0) {
+ sub = re->sub()[re->nsub() - 1]->Incref();
+ if (IsAnchorEnd(&sub, depth+1)) {
PODArray<Regexp*> subcopy(re->nsub());
- subcopy[re->nsub() - 1] = sub; // already have reference
- for (int i = 0; i < re->nsub() - 1; i++)
- subcopy[i] = re->sub()[i]->Incref();
+ subcopy[re->nsub() - 1] = sub; // already have reference
+ for (int i = 0; i < re->nsub() - 1; i++)
+ subcopy[i] = re->sub()[i]->Incref();
*pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
- re->Decref();
- return true;
+ re->Decref();
+ return true;
}
- sub->Decref();
- }
- break;
- case kRegexpCapture:
- sub = re->sub()[0]->Incref();
- if (IsAnchorEnd(&sub, depth+1)) {
- *pre = Regexp::Capture(sub, re->parse_flags(), re->cap());
+ sub->Decref();
+ }
+ break;
+ case kRegexpCapture:
+ sub = re->sub()[0]->Incref();
+ if (IsAnchorEnd(&sub, depth+1)) {
+ *pre = Regexp::Capture(sub, re->parse_flags(), re->cap());
re->Decref();
return true;
- }
- sub->Decref();
- break;
- case kRegexpEndText:
- *pre = Regexp::LiteralString(NULL, 0, re->parse_flags());
- re->Decref();
- return true;
+ }
+ sub->Decref();
+ break;
+ case kRegexpEndText:
+ *pre = Regexp::LiteralString(NULL, 0, re->parse_flags());
+ re->Decref();
+ return true;
}
- return false;
+ return false;
}
-void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
+void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
RE2::Anchor anchor) {
if (flags & Regexp::Latin1)
encoding_ = kEncodingLatin1;
max_mem_ = max_mem;
if (max_mem <= 0) {
max_ninst_ = 100000; // more than enough
- } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) {
+ } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) {
// No room for anything.
max_ninst_ = 0;
} else {
- int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst);
+ int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst);
// Limit instruction count so that inst->id() fits nicely in an int.
// SparseArray also assumes that the indices (inst->id()) are ints.
// The call to WalkExponential uses 2*max_ninst_ below,
@@ -1108,7 +1108,7 @@ void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem,
// If reversed is true, compiles a program that expects
// to run over the input string backward (reverses all concatenations).
// The reversed flag is also recorded in the returned program.
-Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
+Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
Compiler c;
c.Setup(re->parse_flags(), max_mem, RE2::UNANCHORED /* unused */);
c.reversed_ = reversed;
@@ -1121,8 +1121,8 @@ Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
// Record whether prog is anchored, removing the anchors.
// (They get in the way of other optimizations.)
- bool is_anchor_start = IsAnchorStart(&sre, 0);
- bool is_anchor_end = IsAnchorEnd(&sre, 0);
+ bool is_anchor_start = IsAnchorStart(&sre, 0);
+ bool is_anchor_end = IsAnchorEnd(&sre, 0);
// Generate fragment for entire regexp.
Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
@@ -1165,12 +1165,12 @@ Prog* Compiler::Finish(Regexp* re) {
ninst_ = 1;
}
- // Hand off the array to Prog.
+ // Hand off the array to Prog.
prog_->inst_ = std::move(inst_);
prog_->size_ = ninst_;
- prog_->Optimize();
- prog_->Flatten();
+ prog_->Optimize();
+ prog_->Flatten();
prog_->ComputeByteMap();
if (!prog_->reversed()) {
@@ -1199,11 +1199,11 @@ Prog* Compiler::Finish(Regexp* re) {
}
// Converts Regexp to Prog.
-Prog* Regexp::CompileToProg(int64_t max_mem) {
+Prog* Regexp::CompileToProg(int64_t max_mem) {
return Compiler::Compile(this, false, max_mem);
}
-Prog* Regexp::CompileToReverseProg(int64_t max_mem) {
+Prog* Regexp::CompileToReverseProg(int64_t max_mem) {
return Compiler::Compile(this, true, max_mem);
}
@@ -1242,11 +1242,11 @@ Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
// Make sure DFA has enough memory to operate,
// since we're not going to fall back to the NFA.
- bool dfa_failed = false;
+ bool dfa_failed = false;
StringPiece sp = "hello, world";
prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch,
- NULL, &dfa_failed, NULL);
- if (dfa_failed) {
+ NULL, &dfa_failed, NULL);
+ if (dfa_failed) {
delete prog;
return NULL;
}
diff --git a/contrib/libs/re2/re2/dfa.cc b/contrib/libs/re2/re2/dfa.cc
index d47c7d50a7..f36855a273 100644
--- a/contrib/libs/re2/re2/dfa.cc
+++ b/contrib/libs/re2/re2/dfa.cc
@@ -21,48 +21,48 @@
//
// See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent.
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-#include <algorithm>
-#include <atomic>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <algorithm>
+#include <atomic>
#include <deque>
-#include <mutex>
-#include <new>
-#include <string>
+#include <mutex>
+#include <new>
+#include <string>
#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "util/logging.h"
-#include "util/mix.h"
-#include "util/mutex.h"
-#include "util/strutil.h"
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "util/logging.h"
+#include "util/mix.h"
+#include "util/mutex.h"
+#include "util/strutil.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/sparse_set.h"
-#include "re2/stringpiece.h"
+#include "re2/stringpiece.h"
-// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
-#ifdef _MSC_VER
-#pragma warning(disable: 4200)
-#endif
+// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
+#ifdef _MSC_VER
+#pragma warning(disable: 4200)
+#endif
namespace re2 {
-// Controls whether the DFA should bail out early if the NFA would be faster.
-static bool dfa_should_bail_when_slow = true;
-
+// Controls whether the DFA should bail out early if the NFA would be faster.
+static bool dfa_should_bail_when_slow = true;
+
void Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(bool b) {
dfa_should_bail_when_slow = b;
}
// Changing this to true compiles in prints that trace execution of the DFA.
// Generates a lot of output -- only useful for debugging.
-static const bool ExtraDebug = false;
+static const bool ExtraDebug = false;
// A DFA implementation of a regular expression program.
// Since this is entirely a forward declaration mandated by C++,
@@ -70,7 +70,7 @@ static const bool ExtraDebug = false;
// the comments in the sections that follow the DFA definition.
class DFA {
public:
- DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem);
+ DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem);
~DFA();
bool ok() const { return !init_failed_; }
Prog::MatchKind kind() { return kind_; }
@@ -106,28 +106,28 @@ class DFA {
// difficult to mark them as such.
class RWLocker;
class StateSaver;
- class Workq;
+ class Workq;
// A single DFA state. The DFA is represented as a graph of these
// States, linked by the next_ pointers. If in state s and reading
// byte c, the next state should be s->next_[c].
struct State {
- inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
+ inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
int* inst_; // Instruction pointers in the state.
int ninst_; // # of inst_ pointers.
- uint32_t flag_; // Empty string bitfield flags in effect on the way
+ uint32_t flag_; // Empty string bitfield flags in effect on the way
// into this state, along with kFlagMatch if this
// is a matching state.
-
+
// Work around the bug affecting flexible array members in GCC 6.x (for x >= 1).
-// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932)
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
- std::atomic<State*> next_[0]; // Outgoing arrows from State,
-#else
- std::atomic<State*> next_[]; // Outgoing arrows from State,
-#endif
-
+// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932)
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
+ std::atomic<State*> next_[0]; // Outgoing arrows from State,
+#else
+ std::atomic<State*> next_[]; // Outgoing arrows from State,
+#endif
+
// one per input byte class
};
@@ -140,35 +140,35 @@ class DFA {
kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left
};
- struct StateHash {
- size_t operator()(const State* a) const {
- DCHECK(a != NULL);
- HashMix mix(a->flag_);
- for (int i = 0; i < a->ninst_; i++)
- mix.Mix(a->inst_[i]);
- mix.Mix(0);
- return mix.get();
- }
- };
-
+ struct StateHash {
+ size_t operator()(const State* a) const {
+ DCHECK(a != NULL);
+ HashMix mix(a->flag_);
+ for (int i = 0; i < a->ninst_; i++)
+ mix.Mix(a->inst_[i]);
+ mix.Mix(0);
+ return mix.get();
+ }
+ };
+
struct StateEqual {
bool operator()(const State* a, const State* b) const {
- DCHECK(a != NULL);
- DCHECK(b != NULL);
+ DCHECK(a != NULL);
+ DCHECK(b != NULL);
if (a == b)
return true;
- if (a->flag_ != b->flag_)
+ if (a->flag_ != b->flag_)
return false;
if (a->ninst_ != b->ninst_)
return false;
for (int i = 0; i < a->ninst_; i++)
if (a->inst_[i] != b->inst_[i])
return false;
- return true;
+ return true;
}
};
- typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
+ typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
private:
// Make it easier to swap in a scalable reader-writer mutex.
@@ -200,7 +200,7 @@ class DFA {
// Looks up and returns a State matching the inst, ninst, and flag.
// L >= mutex_
- State* CachedState(int* inst, int ninst, uint32_t flag);
+ State* CachedState(int* inst, int ninst, uint32_t flag);
// Clear the cache entirely.
// Must hold cache_mutex_.w or be in destructor.
@@ -208,7 +208,7 @@ class DFA {
// Converts a State into a Workq: the opposite of WorkqToCachedState.
// L >= mutex_
- void StateToWorkq(State* s, Workq* q);
+ void StateToWorkq(State* s, Workq* q);
// Runs a State on a given byte, returning the next state.
State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_
@@ -223,12 +223,12 @@ class DFA {
// Runs a Workq on a set of empty-string flags, producing a new Workq in nq.
// L >= mutex_
- void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint32_t flag);
+ void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint32_t flag);
// Adds the instruction id to the Workq, following empty arrows
// according to flag.
// L >= mutex_
- void AddToQueue(Workq* q, int id, uint32_t flag);
+ void AddToQueue(Workq* q, int id, uint32_t flag);
// For debugging, returns a text representation of State.
static std::string DumpState(State* state);
@@ -265,8 +265,8 @@ class DFA {
SparseSet* matches;
private:
- SearchParams(const SearchParams&) = delete;
- SearchParams& operator=(const SearchParams&) = delete;
+ SearchParams(const SearchParams&) = delete;
+ SearchParams& operator=(const SearchParams&) = delete;
};
// Before each search, the parameters to Search are analyzed by
@@ -281,8 +281,8 @@ class DFA {
// false on failure.
// cache_mutex_.r <= L < mutex_
bool AnalyzeSearch(SearchParams* params);
- bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
- uint32_t flags);
+ bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
+ uint32_t flags);
// The generic search loop, inlined to create specialized versions.
// cache_mutex_.r <= L < mutex_
@@ -339,8 +339,8 @@ class DFA {
// readers. Any State* pointers are only valid while cache_mutex_
// is held.
CacheMutex cache_mutex_;
- int64_t mem_budget_; // Total memory budget for all States.
- int64_t state_budget_; // Amount of memory remaining for new States.
+ int64_t mem_budget_; // Total memory budget for all States.
+ int64_t state_budget_; // Amount of memory remaining for new States.
StateSet state_cache_; // All States computed so far.
StartInfo start_[kMaxStart];
@@ -348,9 +348,9 @@ class DFA {
DFA& operator=(const DFA&) = delete;
};
-// Shorthand for casting to uint8_t*.
-static inline const uint8_t* BytePtr(const void* v) {
- return reinterpret_cast<const uint8_t*>(v);
+// Shorthand for casting to uint8_t*.
+static inline const uint8_t* BytePtr(const void* v) {
+ return reinterpret_cast<const uint8_t*>(v);
}
// Work queues
@@ -415,24 +415,24 @@ class DFA::Workq : public SparseSet {
int maxmark_; // maximum number of marks
int nextmark_; // id of next mark
bool last_was_mark_; // last inserted was mark
-
- Workq(const Workq&) = delete;
- Workq& operator=(const Workq&) = delete;
+
+ Workq(const Workq&) = delete;
+ Workq& operator=(const Workq&) = delete;
};
-DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
+DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
: prog_(prog),
kind_(kind),
init_failed_(false),
q0_(NULL),
q1_(NULL),
- mem_budget_(max_mem) {
- if (ExtraDebug)
+ mem_budget_(max_mem) {
+ if (ExtraDebug)
fprintf(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored().c_str());
int nmark = 0;
- if (kind_ == Prog::kLongestMatch)
- nmark = prog_->size();
- // See DFA::AddToQueue() for why this is so.
+ if (kind_ == Prog::kLongestMatch)
+ nmark = prog_->size();
+ // See DFA::AddToQueue() for why this is so.
int nstack = prog_->inst_count(kInstCapture) +
prog_->inst_count(kInstEmptyWidth) +
prog_->inst_count(kInstNop) +
@@ -454,18 +454,18 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
// At minimum, the search requires room for two states in order
// to limp along, restarting frequently. We'll get better performance
// if there is room for a larger number of states, say 20.
- // Note that a state stores list heads only, so we use the program
- // list count for the upper bound, not the program size.
- int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
- int64_t one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
- (prog_->list_count()+nmark)*sizeof(int);
+ // Note that a state stores list heads only, so we use the program
+ // list count for the upper bound, not the program size.
+ int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
+ int64_t one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
+ (prog_->list_count()+nmark)*sizeof(int);
if (state_budget_ < 20*one_state) {
init_failed_ = true;
return;
}
- q0_ = new Workq(prog_->size(), nmark);
- q1_ = new Workq(prog_->size(), nmark);
+ q0_ = new Workq(prog_->size(), nmark);
+ q1_ = new Workq(prog_->size(), nmark);
stack_ = PODArray<int>(nstack);
}
@@ -593,7 +593,7 @@ std::string DFA::DumpState(State* state) {
// If mq is not null, MatchSep and the match IDs in mq will be appended
// to the State.
DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
- //mutex_.AssertHeld();
+ //mutex_.AssertHeld();
// Construct array of instruction ids for the new state.
// Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
@@ -601,10 +601,10 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
// RunWorkqOnEmptyString or RunWorkqOnByte.
PODArray<int> inst(q->size());
int n = 0;
- uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
- bool sawmatch = false; // whether queue contains guaranteed kInstMatch
- bool sawmark = false; // whether queue contains a Mark
- if (ExtraDebug)
+ uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
+ bool sawmatch = false; // whether queue contains guaranteed kInstMatch
+ bool sawmark = false; // whether queue contains a Mark
+ if (ExtraDebug)
fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag);
for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
int id = *it;
@@ -627,18 +627,18 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
if (kind_ != Prog::kManyMatch &&
(kind_ != Prog::kFirstMatch ||
(it == q->begin() && ip->greedy(prog_))) &&
- (kind_ != Prog::kLongestMatch || !sawmark) &&
- (flag & kFlagMatch)) {
- if (ExtraDebug)
+ (kind_ != Prog::kLongestMatch || !sawmark) &&
+ (flag & kFlagMatch)) {
+ if (ExtraDebug)
fprintf(stderr, " -> FullMatchState\n");
return FullMatchState;
}
- FALLTHROUGH_INTENDED;
- default:
- // Record iff id is the head of its list, which must
- // be the case if id-1 is the last of *its* list. :)
- if (prog_->inst(id-1)->last())
- inst[n++] = *it;
+ FALLTHROUGH_INTENDED;
+ default:
+ // Record iff id is the head of its list, which must
+ // be the case if id-1 is the last of *its* list. :)
+ if (prog_->inst(id-1)->last())
+ inst[n++] = *it;
if (ip->opcode() == kInstEmptyWidth)
needflags |= ip->empty();
if (ip->opcode() == kInstMatch && !prog_->anchor_end())
@@ -675,7 +675,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
// the execution loop can stop early. This is only okay
// if the state is *not* a matching state.
if (n == 0 && flag == 0) {
- if (ExtraDebug)
+ if (ExtraDebug)
fprintf(stderr, " -> DeadState\n");
return DeadState;
}
@@ -690,7 +690,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
int* markp = ip;
while (markp < ep && *markp != Mark)
markp++;
- std::sort(ip, markp);
+ std::sort(ip, markp);
if (markp < ep)
markp++;
ip = markp;
@@ -727,50 +727,50 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
// Looks in the State cache for a State matching inst, ninst, flag.
// If one is found, returns it. If one is not found, allocates one,
// inserts it in the cache, and returns it.
-DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) {
- //mutex_.AssertHeld();
+DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) {
+ //mutex_.AssertHeld();
// Look in the cache for a pre-existing state.
- // We have to initialise the struct like this because otherwise
- // MSVC will complain about the flexible array member. :(
- State state;
- state.inst_ = inst;
- state.ninst_ = ninst;
- state.flag_ = flag;
+ // We have to initialise the struct like this because otherwise
+ // MSVC will complain about the flexible array member. :(
+ State state;
+ state.inst_ = inst;
+ state.ninst_ = ninst;
+ state.flag_ = flag;
StateSet::iterator it = state_cache_.find(&state);
if (it != state_cache_.end()) {
- if (ExtraDebug)
+ if (ExtraDebug)
fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str());
return *it;
}
// Must have enough memory for new state.
// In addition to what we're going to allocate,
- // the state cache hash table seems to incur about 40 bytes per
+ // the state cache hash table seems to incur about 40 bytes per
// State*, empirically.
- const int kStateCacheOverhead = 40;
+ const int kStateCacheOverhead = 40;
int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
- int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
- ninst*sizeof(int);
+ int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
+ ninst*sizeof(int);
if (mem_budget_ < mem + kStateCacheOverhead) {
mem_budget_ = -1;
return NULL;
}
mem_budget_ -= mem + kStateCacheOverhead;
- // Allocate new state along with room for next_ and inst_.
+ // Allocate new state along with room for next_ and inst_.
char* space = std::allocator<char>().allocate(mem);
- State* s = new (space) State;
- (void) new (s->next_) std::atomic<State*>[nnext];
- // Work around a unfortunate bug in older versions of libstdc++.
- // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658)
- for (int i = 0; i < nnext; i++)
- (void) new (s->next_ + i) std::atomic<State*>(NULL);
- s->inst_ = new (s->next_ + nnext) int[ninst];
+ State* s = new (space) State;
+ (void) new (s->next_) std::atomic<State*>[nnext];
+ // Work around a unfortunate bug in older versions of libstdc++.
+ // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658)
+ for (int i = 0; i < nnext; i++)
+ (void) new (s->next_ + i) std::atomic<State*>(NULL);
+ s->inst_ = new (s->next_ + nnext) int[ninst];
memmove(s->inst_, inst, ninst*sizeof s->inst_[0]);
s->ninst_ = ninst;
s->flag_ = flag;
- if (ExtraDebug)
+ if (ExtraDebug)
fprintf(stderr, " -> %s\n", DumpState(s).c_str());
// Put state in cache and return it.
@@ -780,19 +780,19 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) {
// Clear the cache. Must hold cache_mutex_.w or be in destructor.
void DFA::ClearCache() {
- StateSet::iterator begin = state_cache_.begin();
- StateSet::iterator end = state_cache_.end();
- while (begin != end) {
- StateSet::iterator tmp = begin;
- ++begin;
- // Deallocate the blob of memory that we allocated in DFA::CachedState().
+ StateSet::iterator begin = state_cache_.begin();
+ StateSet::iterator end = state_cache_.end();
+ while (begin != end) {
+ StateSet::iterator tmp = begin;
+ ++begin;
+ // Deallocate the blob of memory that we allocated in DFA::CachedState().
// We recompute mem in order to benefit from sized delete where possible.
int ninst = (*tmp)->ninst_;
int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
ninst*sizeof(int);
std::allocator<char>().deallocate(reinterpret_cast<char*>(*tmp), mem);
- }
+ }
state_cache_.clear();
}
@@ -806,24 +806,24 @@ void DFA::StateToWorkq(State* s, Workq* q) {
// Nothing after this is an instruction!
break;
} else {
- // Explore from the head of the list.
- AddToQueue(q, s->inst_[i], s->flag_ & kFlagEmptyMask);
+ // Explore from the head of the list.
+ AddToQueue(q, s->inst_[i], s->flag_ & kFlagEmptyMask);
}
}
}
-// Adds ip to the work queue, following empty arrows according to flag.
-void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
+// Adds ip to the work queue, following empty arrows according to flag.
+void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
// Use stack_ to hold our stack of instructions yet to process.
- // It was preallocated as follows:
- // one entry per Capture;
- // one entry per EmptyWidth; and
- // one entry per Nop.
- // This reflects the maximum number of stack pushes that each can
- // perform. (Each instruction can be processed at most once.)
- // When using marks, we also added nmark == prog_->size().
- // (Otherwise, nmark == 0.)
+ // It was preallocated as follows:
+ // one entry per Capture;
+ // one entry per EmptyWidth; and
+ // one entry per Nop.
+ // This reflects the maximum number of stack pushes that each can
+ // perform. (Each instruction can be processed at most once.)
+ // When using marks, we also added nmark == prog_->size().
+ // (Otherwise, nmark == 0.)
int* stk = stack_.data();
int nstk = 0;
@@ -832,7 +832,7 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
DCHECK_LE(nstk, stack_.size());
id = stk[--nstk];
- Loop:
+ Loop:
if (id == Mark) {
q->mark();
continue;
@@ -842,8 +842,8 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
continue;
// If ip is already on the queue, nothing to do.
- // Otherwise add it. We don't actually keep all the
- // ones that get added, but adding all of them here
+ // Otherwise add it. We don't actually keep all the
+ // ones that get added, but adding all of them here
// increases the likelihood of q->contains(id),
// reducing the amount of duplicated work.
if (q->contains(id))
@@ -853,46 +853,46 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
// Process instruction.
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
- default:
- LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+ default:
+ LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
break;
case kInstByteRange: // just save these on the queue
case kInstMatch:
- if (ip->last())
- break;
- id = id+1;
- goto Loop;
+ if (ip->last())
+ break;
+ id = id+1;
+ goto Loop;
case kInstCapture: // DFA treats captures as no-ops.
case kInstNop:
- if (!ip->last())
- stk[nstk++] = id+1;
-
- // If this instruction is the [00-FF]* loop at the beginning of
- // a leftmost-longest unanchored search, separate with a Mark so
- // that future threads (which will start farther to the right in
- // the input string) are lower priority than current threads.
- if (ip->opcode() == kInstNop && q->maxmark() > 0 &&
+ if (!ip->last())
+ stk[nstk++] = id+1;
+
+ // If this instruction is the [00-FF]* loop at the beginning of
+ // a leftmost-longest unanchored search, separate with a Mark so
+ // that future threads (which will start farther to the right in
+ // the input string) are lower priority than current threads.
+ if (ip->opcode() == kInstNop && q->maxmark() > 0 &&
id == prog_->start_unanchored() && id != prog_->start())
stk[nstk++] = Mark;
- id = ip->out();
- goto Loop;
-
- case kInstAltMatch:
- DCHECK(!ip->last());
- id = id+1;
- goto Loop;
-
+ id = ip->out();
+ goto Loop;
+
+ case kInstAltMatch:
+ DCHECK(!ip->last());
+ id = id+1;
+ goto Loop;
+
case kInstEmptyWidth:
- if (!ip->last())
- stk[nstk++] = id+1;
-
- // Continue on if we have all the right flag bits.
- if (ip->empty() & ~flag)
- break;
- id = ip->out();
- goto Loop;
+ if (!ip->last())
+ stk[nstk++] = id+1;
+
+ // Continue on if we have all the right flag bits.
+ if (ip->empty() & ~flag)
+ break;
+ id = ip->out();
+ goto Loop;
}
}
}
@@ -913,7 +913,7 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
// and then processing only $. Doing the two-step sequence won't match
// ^$^$^$ but processing ^ and $ simultaneously will (and is the behavior
// exhibited by existing implementations).
-void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint32_t flag) {
+void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint32_t flag) {
newq->clear();
for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) {
if (oldq->is_mark(*i))
@@ -929,7 +929,7 @@ void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint32_t flag) {
// regular expression program has been reached (the regexp has matched).
void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
int c, uint32_t flag, bool* ismatch) {
- //mutex_.AssertHeld();
+ //mutex_.AssertHeld();
newq->clear();
for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) {
@@ -942,10 +942,10 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
int id = *i;
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
- default:
- LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
- break;
-
+ default:
+ LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+ break;
+
case kInstFail: // never succeeds
case kInstCapture: // already followed
case kInstNop: // already followed
@@ -984,7 +984,7 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
}
}
- if (ExtraDebug)
+ if (ExtraDebug)
fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n",
DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch);
}
@@ -1000,8 +1000,8 @@ DFA::State* DFA::RunStateOnByteUnlocked(State* state, int c) {
// Processes input byte c in state, returning new state.
DFA::State* DFA::RunStateOnByte(State* state, int c) {
- //mutex_.AssertHeld();
-
+ //mutex_.AssertHeld();
+
if (state <= SpecialStateMax) {
if (state == FullMatchState) {
// It is convenient for routines like PossibleMatchRange
@@ -1023,9 +1023,9 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
}
// If someone else already computed this, return it.
- State* ns = state->next_[ByteMap(c)].load(std::memory_order_relaxed);
- if (ns != NULL)
- return ns;
+ State* ns = state->next_[ByteMap(c)].load(std::memory_order_relaxed);
+ if (ns != NULL)
+ return ns;
// Convert state into Workq.
StateToWorkq(state, q0_);
@@ -1034,10 +1034,10 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
// around this byte. Before the byte we have the flags recorded
// in the State structure itself. After the byte we have
// nothing yet (but that will change: read on).
- uint32_t needflag = state->flag_ >> kFlagNeedShift;
- uint32_t beforeflag = state->flag_ & kFlagEmptyMask;
- uint32_t oldbeforeflag = beforeflag;
- uint32_t afterflag = 0;
+ uint32_t needflag = state->flag_ >> kFlagNeedShift;
+ uint32_t beforeflag = state->flag_ & kFlagEmptyMask;
+ uint32_t oldbeforeflag = beforeflag;
+ uint32_t afterflag = 0;
if (c == '\n') {
// Insert implicit $ and ^ around \n
@@ -1053,8 +1053,8 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
// The state flag kFlagLastWord says whether the last
// byte processed was a word character. Use that info to
// insert empty-width (non-)word boundaries.
- bool islastword = (state->flag_ & kFlagLastWord) != 0;
- bool isword = c != kByteEndText && Prog::IsWordChar(static_cast<uint8_t>(c));
+ bool islastword = (state->flag_ & kFlagLastWord) != 0;
+ bool isword = c != kByteEndText && Prog::IsWordChar(static_cast<uint8_t>(c));
if (isword == islastword)
beforeflag |= kEmptyNonWordBoundary;
else
@@ -1064,7 +1064,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
// Only useful to rerun on empty string if there are new, useful flags.
if (beforeflag & ~oldbeforeflag & needflag) {
RunWorkqOnEmptyString(q0_, q1_, beforeflag);
- using std::swap;
+ using std::swap;
swap(q0_, q1_);
}
bool ismatch = false;
@@ -1073,7 +1073,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
swap(q0_, q1_);
// Save afterflag along with ismatch and isword in new state.
- uint32_t flag = afterflag;
+ uint32_t flag = afterflag;
if (ismatch)
flag |= kFlagMatch;
if (isword)
@@ -1084,11 +1084,11 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
else
ns = WorkqToCachedState(q0_, NULL, flag);
- // Flush ns before linking to it.
+ // Flush ns before linking to it.
// Write barrier before updating state->next_ so that the
// main search loop can proceed without any locking, for speed.
// (Otherwise it would need one mutex operation per input byte.)
- state->next_[ByteMap(c)].store(ns, std::memory_order_release);
+ state->next_[ByteMap(c)].store(ns, std::memory_order_release);
return ns;
}
@@ -1126,8 +1126,8 @@ class DFA::RWLocker {
CacheMutex* mu_;
bool writing_;
- RWLocker(const RWLocker&) = delete;
- RWLocker& operator=(const RWLocker&) = delete;
+ RWLocker(const RWLocker&) = delete;
+ RWLocker& operator=(const RWLocker&) = delete;
};
DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
@@ -1139,15 +1139,15 @@ DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
if (!writing_) {
mu_->ReaderUnlock();
- mu_->WriterLock();
+ mu_->WriterLock();
writing_ = true;
}
}
DFA::RWLocker::~RWLocker() {
- if (!writing_)
- mu_->ReaderUnlock();
- else
+ if (!writing_)
+ mu_->ReaderUnlock();
+ else
mu_->WriterUnlock();
}
@@ -1212,12 +1212,12 @@ class DFA::StateSaver {
DFA* dfa_; // the DFA to use
int* inst_; // saved info from State
int ninst_;
- uint32_t flag_;
+ uint32_t flag_;
bool is_special_; // whether original state was special
State* special_; // if is_special_, the original state
- StateSaver(const StateSaver&) = delete;
- StateSaver& operator=(const StateSaver&) = delete;
+ StateSaver(const StateSaver&) = delete;
+ StateSaver& operator=(const StateSaver&) = delete;
};
DFA::StateSaver::StateSaver(DFA* dfa, State* state) {
@@ -1331,13 +1331,13 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
const uint8_t* ep = BytePtr(params->text.data() +
params->text.size()); // end of text
const uint8_t* resetp = NULL; // p at last cache reset
- if (!run_forward) {
- using std::swap;
+ if (!run_forward) {
+ using std::swap;
swap(p, ep);
- }
+ }
- const uint8_t* bytemap = prog_->bytemap();
- const uint8_t* lastmatch = NULL; // most recent matching position in text
+ const uint8_t* bytemap = prog_->bytemap();
+ const uint8_t* lastmatch = NULL; // most recent matching position in text
bool matched = false;
State* s = start;
@@ -1364,7 +1364,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
}
while (p != ep) {
- if (ExtraDebug)
+ if (ExtraDebug)
fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str());
if (can_prefix_accel && s == start) {
@@ -1402,7 +1402,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
// Okay to use bytemap[] not ByteMap() here, because
// c is known to be an actual byte and not kByteEndText.
- State* ns = s->next_[bytemap[c]].load(std::memory_order_acquire);
+ State* ns = s->next_[bytemap[c]].load(std::memory_order_acquire);
if (ns == NULL) {
ns = RunStateOnByteUnlocked(s, c);
if (ns == NULL) {
@@ -1415,7 +1415,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
// of 10 bytes per state computation, fail so that RE2 can
// fall back to the NFA. However, RE2::Set cannot fall back,
// so we just have to keep on keeping on in that case.
- if (dfa_should_bail_when_slow && resetp != NULL &&
+ if (dfa_should_bail_when_slow && resetp != NULL &&
static_cast<size_t>(p - resetp) < 10*state_cache_.size() &&
kind_ != Prog::kManyMatch) {
params->failed = true;
@@ -1464,7 +1464,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
lastmatch = p - 1;
else
lastmatch = p + 1;
- if (ExtraDebug)
+ if (ExtraDebug)
fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str());
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
@@ -1499,7 +1499,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
lastbyte = BeginPtr(params->text)[-1] & 0xFF;
}
- State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire);
+ State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire);
if (ns == NULL) {
ns = RunStateOnByteUnlocked(s, lastbyte);
if (ns == NULL) {
@@ -1539,8 +1539,8 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
if (id == MatchSep)
break;
params->matches->insert(id);
- }
- }
+ }
+ }
}
params->ep = reinterpret_cast<const char*>(lastmatch);
@@ -1628,14 +1628,14 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
// Sanity check: make sure that text lies within context.
if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
- LOG(DFATAL) << "context does not contain text";
+ LOG(DFATAL) << "context does not contain text";
params->start = DeadState;
return true;
}
// Determine correct search type.
int start;
- uint32_t flags;
+ uint32_t flags;
if (params->run_forward) {
if (BeginPtr(text) == BeginPtr(context)) {
start = kStartBeginText;
@@ -1693,7 +1693,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
params->start->flag_ >> kFlagNeedShift == 0)
params->can_prefix_accel = true;
- if (ExtraDebug)
+ if (ExtraDebug)
fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
params->anchored, params->run_forward, flags,
DumpState(params->start).c_str(), params->can_prefix_accel);
@@ -1703,8 +1703,8 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
// Fills in info if needed. Returns true on success, false on failure.
bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
- uint32_t flags) {
- // Quick check.
+ uint32_t flags) {
+ // Quick check.
State* start = info->start.load(std::memory_order_acquire);
if (start != NULL)
return true;
@@ -1722,7 +1722,7 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
if (start == NULL)
return false;
- // Synchronize with "quick check" above.
+ // Synchronize with "quick check" above.
info->start.store(start, std::memory_order_release);
return true;
}
@@ -1743,7 +1743,7 @@ bool DFA::Search(const StringPiece& text,
}
*failed = false;
- if (ExtraDebug) {
+ if (ExtraDebug) {
fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_);
@@ -1761,7 +1761,7 @@ bool DFA::Search(const StringPiece& text,
return false;
}
if (params.start == DeadState)
- return false;
+ return false;
if (params.start == FullMatchState) {
if (run_forward == want_earliest_match)
*epp = text.data();
@@ -1769,7 +1769,7 @@ bool DFA::Search(const StringPiece& text,
*epp = text.data() + text.size();
return true;
}
- if (ExtraDebug)
+ if (ExtraDebug)
fprintf(stderr, "start %s\n", DumpState(params.start).c_str());
bool ret = FastSearchLoop(&params);
if (params.failed) {
@@ -1782,35 +1782,35 @@ bool DFA::Search(const StringPiece& text,
DFA* Prog::GetDFA(MatchKind kind) {
// For a forward DFA, half the memory goes to each DFA.
- // However, if it is a "many match" DFA, then there is
- // no counterpart with which the memory must be shared.
- //
+ // However, if it is a "many match" DFA, then there is
+ // no counterpart with which the memory must be shared.
+ //
// For a reverse DFA, all the memory goes to the
// "longest match" DFA, because RE2 never does reverse
// "first match" searches.
- if (kind == kFirstMatch) {
- std::call_once(dfa_first_once_, [](Prog* prog) {
- prog->dfa_first_ = new DFA(prog, kFirstMatch, prog->dfa_mem_ / 2);
- }, this);
- return dfa_first_;
- } else if (kind == kManyMatch) {
- std::call_once(dfa_first_once_, [](Prog* prog) {
- prog->dfa_first_ = new DFA(prog, kManyMatch, prog->dfa_mem_);
- }, this);
- return dfa_first_;
- } else {
- std::call_once(dfa_longest_once_, [](Prog* prog) {
- if (!prog->reversed_)
- prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_ / 2);
- else
- prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_);
- }, this);
- return dfa_longest_;
+ if (kind == kFirstMatch) {
+ std::call_once(dfa_first_once_, [](Prog* prog) {
+ prog->dfa_first_ = new DFA(prog, kFirstMatch, prog->dfa_mem_ / 2);
+ }, this);
+ return dfa_first_;
+ } else if (kind == kManyMatch) {
+ std::call_once(dfa_first_once_, [](Prog* prog) {
+ prog->dfa_first_ = new DFA(prog, kManyMatch, prog->dfa_mem_);
+ }, this);
+ return dfa_first_;
+ } else {
+ std::call_once(dfa_longest_once_, [](Prog* prog) {
+ if (!prog->reversed_)
+ prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_ / 2);
+ else
+ prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_);
+ }, this);
+ return dfa_longest_;
}
-}
+}
-void Prog::DeleteDFA(DFA* dfa) {
- delete dfa;
+void Prog::DeleteDFA(DFA* dfa) {
+ delete dfa;
}
// Executes the regexp program to search in text,
@@ -1824,7 +1824,7 @@ void Prog::DeleteDFA(DFA* dfa) {
// This is the only external interface (class DFA only exists in this file).
//
bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
- Anchor anchor, MatchKind kind, StringPiece* match0,
+ Anchor anchor, MatchKind kind, StringPiece* match0,
bool* failed, SparseSet* matches) {
*failed = false;
@@ -1891,7 +1891,7 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
*match0 =
StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
else
- *match0 =
+ *match0 =
StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
}
return true;
@@ -1905,7 +1905,7 @@ int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) {
// Pick out start state for unanchored search
// at beginning of text.
RWLocker l(&cache_mutex_);
- SearchParams params(StringPiece(), StringPiece(), &l);
+ SearchParams params(StringPiece(), StringPiece(), &l);
params.anchored = false;
if (!AnalyzeSearch(&params) ||
params.start == NULL ||
@@ -1993,7 +1993,7 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
// Pick out start state for anchored search at beginning of text.
RWLocker l(&cache_mutex_);
- SearchParams params(StringPiece(), StringPiece(), &l);
+ SearchParams params(StringPiece(), StringPiece(), &l);
params.anchored = true;
if (!AnalyzeSearch(&params))
return false;
@@ -2033,14 +2033,14 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
// Build minimum prefix.
State* s = params.start;
min->clear();
- MutexLock lock(&mutex_);
+ MutexLock lock(&mutex_);
for (int i = 0; i < maxlen; i++) {
- if (previously_visited_states[s] > kMaxEltRepetitions)
+ if (previously_visited_states[s] > kMaxEltRepetitions)
break;
previously_visited_states[s]++;
// Stop if min is a match.
- State* ns = RunStateOnByte(s, kByteEndText);
+ State* ns = RunStateOnByte(s, kByteEndText);
if (ns == NULL) // DFA out of memory
return false;
if (ns != DeadState && (ns == FullMatchState || ns->IsMatch()))
@@ -2049,13 +2049,13 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
// Try to extend the string with low bytes.
bool extended = false;
for (int j = 0; j < 256; j++) {
- ns = RunStateOnByte(s, j);
+ ns = RunStateOnByte(s, j);
if (ns == NULL) // DFA out of memory
return false;
if (ns == FullMatchState ||
(ns > SpecialStateMax && ns->ninst_ > 0)) {
extended = true;
- min->append(1, static_cast<char>(j));
+ min->append(1, static_cast<char>(j));
s = ns;
break;
}
@@ -2069,20 +2069,20 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
s = params.start;
max->clear();
for (int i = 0; i < maxlen; i++) {
- if (previously_visited_states[s] > kMaxEltRepetitions)
+ if (previously_visited_states[s] > kMaxEltRepetitions)
break;
previously_visited_states[s] += 1;
// Try to extend the string with high bytes.
bool extended = false;
for (int j = 255; j >= 0; j--) {
- State* ns = RunStateOnByte(s, j);
+ State* ns = RunStateOnByte(s, j);
if (ns == NULL)
return false;
if (ns == FullMatchState ||
(ns > SpecialStateMax && ns->ninst_ > 0)) {
extended = true;
- max->append(1, static_cast<char>(j));
+ max->append(1, static_cast<char>(j));
s = ns;
break;
}
@@ -2110,9 +2110,9 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
// PossibleMatchRange for a Prog.
bool Prog::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
- // Have to use dfa_longest_ to get all strings for full matches.
- // For example, (a|aa) never matches aa in first-match mode.
- return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
+ // Have to use dfa_longest_ to get all strings for full matches.
+ // For example, (a|aa) never matches aa in first-match mode.
+ return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
}
} // namespace re2
diff --git a/contrib/libs/re2/re2/filtered_re2.cc b/contrib/libs/re2/re2/filtered_re2.cc
index 5df97456e2..4a4a190889 100644
--- a/contrib/libs/re2/re2/filtered_re2.cc
+++ b/contrib/libs/re2/re2/filtered_re2.cc
@@ -3,13 +3,13 @@
// license that can be found in the LICENSE file.
#include "re2/filtered_re2.h"
-
-#include <stddef.h>
-#include <string>
+
+#include <stddef.h>
+#include <string>
#include <utility>
-
-#include "util/util.h"
-#include "util/logging.h"
+
+#include "util/util.h"
+#include "util/logging.h"
#include "re2/prefilter.h"
#include "re2/prefilter_tree.h"
@@ -20,13 +20,13 @@ FilteredRE2::FilteredRE2()
prefilter_tree_(new PrefilterTree()) {
}
-FilteredRE2::FilteredRE2(int min_atom_len)
- : compiled_(false),
- prefilter_tree_(new PrefilterTree(min_atom_len)) {
-}
-
+FilteredRE2::FilteredRE2(int min_atom_len)
+ : compiled_(false),
+ prefilter_tree_(new PrefilterTree(min_atom_len)) {
+}
+
FilteredRE2::~FilteredRE2() {
- for (size_t i = 0; i < re2_vec_.size(); i++)
+ for (size_t i = 0; i < re2_vec_.size(); i++)
delete re2_vec_[i];
}
@@ -52,13 +52,13 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
RE2::ErrorCode code = re->error_code();
if (!re->ok()) {
- if (options.log_errors()) {
- LOG(ERROR) << "Couldn't compile regular expression, skipping: "
+ if (options.log_errors()) {
+ LOG(ERROR) << "Couldn't compile regular expression, skipping: "
<< pattern << " due to error " << re->error();
- }
+ }
delete re;
} else {
- *id = static_cast<int>(re2_vec_.size());
+ *id = static_cast<int>(re2_vec_.size());
re2_vec_.push_back(re);
}
@@ -66,17 +66,17 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
}
void FilteredRE2::Compile(std::vector<std::string>* atoms) {
- if (compiled_) {
- LOG(ERROR) << "Compile called already.";
+ if (compiled_) {
+ LOG(ERROR) << "Compile called already.";
return;
}
- if (re2_vec_.empty()) {
- LOG(ERROR) << "Compile called before Add.";
- return;
- }
-
- for (size_t i = 0; i < re2_vec_.size(); i++) {
+ if (re2_vec_.empty()) {
+ LOG(ERROR) << "Compile called before Add.";
+ return;
+ }
+
+ for (size_t i = 0; i < re2_vec_.size(); i++) {
Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]);
prefilter_tree_->Add(prefilter);
}
@@ -86,21 +86,21 @@ void FilteredRE2::Compile(std::vector<std::string>* atoms) {
}
int FilteredRE2::SlowFirstMatch(const StringPiece& text) const {
- for (size_t i = 0; i < re2_vec_.size(); i++)
+ for (size_t i = 0; i < re2_vec_.size(); i++)
if (RE2::PartialMatch(text, *re2_vec_[i]))
- return static_cast<int>(i);
+ return static_cast<int>(i);
return -1;
}
int FilteredRE2::FirstMatch(const StringPiece& text,
- const std::vector<int>& atoms) const {
+ const std::vector<int>& atoms) const {
if (!compiled_) {
- LOG(DFATAL) << "FirstMatch called before Compile.";
+ LOG(DFATAL) << "FirstMatch called before Compile.";
return -1;
}
- std::vector<int> regexps;
+ std::vector<int> regexps;
prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
- for (size_t i = 0; i < regexps.size(); i++)
+ for (size_t i = 0; i < regexps.size(); i++)
if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
return regexps[i];
return -1;
@@ -108,25 +108,25 @@ int FilteredRE2::FirstMatch(const StringPiece& text,
bool FilteredRE2::AllMatches(
const StringPiece& text,
- const std::vector<int>& atoms,
- std::vector<int>* matching_regexps) const {
+ const std::vector<int>& atoms,
+ std::vector<int>* matching_regexps) const {
matching_regexps->clear();
- std::vector<int> regexps;
+ std::vector<int> regexps;
prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
- for (size_t i = 0; i < regexps.size(); i++)
+ for (size_t i = 0; i < regexps.size(); i++)
if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
matching_regexps->push_back(regexps[i]);
return !matching_regexps->empty();
}
-void FilteredRE2::AllPotentials(
- const std::vector<int>& atoms,
- std::vector<int>* potential_regexps) const {
- prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps);
-}
-
-void FilteredRE2::RegexpsGivenStrings(const std::vector<int>& matched_atoms,
- std::vector<int>* passed_regexps) {
+void FilteredRE2::AllPotentials(
+ const std::vector<int>& atoms,
+ std::vector<int>* potential_regexps) const {
+ prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps);
+}
+
+void FilteredRE2::RegexpsGivenStrings(const std::vector<int>& matched_atoms,
+ std::vector<int>* passed_regexps) {
prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps);
}
diff --git a/contrib/libs/re2/re2/filtered_re2.h b/contrib/libs/re2/re2/filtered_re2.h
index dd618c70e8..d07822dad3 100644
--- a/contrib/libs/re2/re2/filtered_re2.h
+++ b/contrib/libs/re2/re2/filtered_re2.h
@@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#ifndef RE2_FILTERED_RE2_H_
-#define RE2_FILTERED_RE2_H_
-
+#ifndef RE2_FILTERED_RE2_H_
+#define RE2_FILTERED_RE2_H_
+
// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.
// It provides a prefilter mechanism that helps in cutting down the
// number of regexps that need to be actually searched.
@@ -22,10 +22,10 @@
// in the text to get the actual regexp matches.
#include <memory>
-#include <string>
-#include <vector>
+#include <string>
+#include <vector>
-#include "re2/re2.h"
+#include "re2/re2.h"
namespace re2 {
@@ -34,7 +34,7 @@ class PrefilterTree;
class FilteredRE2 {
public:
FilteredRE2();
- explicit FilteredRE2(int min_atom_len);
+ explicit FilteredRE2(int min_atom_len);
~FilteredRE2();
// Not copyable.
@@ -69,24 +69,24 @@ class FilteredRE2 {
// Returns -1 on no match. Compile has to be called before
// calling this.
int FirstMatch(const StringPiece& text,
- const std::vector<int>& atoms) const;
+ const std::vector<int>& atoms) const;
// Returns the indices of all matching regexps, after first clearing
// matched_regexps.
bool AllMatches(const StringPiece& text,
- const std::vector<int>& atoms,
- std::vector<int>* matching_regexps) const;
-
- // Returns the indices of all potentially matching regexps after first
- // clearing potential_regexps.
- // A regexp is potentially matching if it passes the filter.
- // If a regexp passes the filter it may still not match.
- // A regexp that does not pass the filter is guaranteed to not match.
- void AllPotentials(const std::vector<int>& atoms,
- std::vector<int>* potential_regexps) const;
-
+ const std::vector<int>& atoms,
+ std::vector<int>* matching_regexps) const;
+
+ // Returns the indices of all potentially matching regexps after first
+ // clearing potential_regexps.
+ // A regexp is potentially matching if it passes the filter.
+ // If a regexp passes the filter it may still not match.
+ // A regexp that does not pass the filter is guaranteed to not match.
+ void AllPotentials(const std::vector<int>& atoms,
+ std::vector<int>* potential_regexps) const;
+
// The number of regexps added.
- int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }
+ int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }
// Get the individual RE2 objects.
const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; }
@@ -96,11 +96,11 @@ class FilteredRE2 {
void PrintPrefilter(int regexpid);
// Useful for testing and debugging.
- void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
- std::vector<int>* passed_regexps);
+ void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
+ std::vector<int>* passed_regexps);
// All the regexps in the FilteredRE2.
- std::vector<RE2*> re2_vec_;
+ std::vector<RE2*> re2_vec_;
// Has the FilteredRE2 been compiled using Compile()
bool compiled_;
diff --git a/contrib/libs/re2/re2/mimics_pcre.cc b/contrib/libs/re2/re2/mimics_pcre.cc
index b1d6a51228..23e6b43f37 100644
--- a/contrib/libs/re2/re2/mimics_pcre.cc
+++ b/contrib/libs/re2/re2/mimics_pcre.cc
@@ -22,8 +22,8 @@
//
// Regexp::MimicsPCRE checks for any of these conditions.
-#include "util/util.h"
-#include "util/logging.h"
+#include "util/util.h"
+#include "util/logging.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
@@ -135,8 +135,8 @@ class EmptyStringWalker : public Regexp::Walker<bool> {
}
private:
- EmptyStringWalker(const EmptyStringWalker&) = delete;
- EmptyStringWalker& operator=(const EmptyStringWalker&) = delete;
+ EmptyStringWalker(const EmptyStringWalker&) = delete;
+ EmptyStringWalker& operator=(const EmptyStringWalker&) = delete;
};
// Called after visiting re's children. child_args contains the return
diff --git a/contrib/libs/re2/re2/nfa.cc b/contrib/libs/re2/re2/nfa.cc
index c7339f8ffd..9767d45d65 100644
--- a/contrib/libs/re2/re2/nfa.cc
+++ b/contrib/libs/re2/re2/nfa.cc
@@ -24,14 +24,14 @@
// Like Thompson's original machine and like the DFA implementation, this
// implementation notices a match only once it is one byte past it.
-#include <stdio.h>
-#include <string.h>
-#include <algorithm>
+#include <stdio.h>
+#include <string.h>
+#include <algorithm>
#include <deque>
-#include <string>
-#include <utility>
-#include <vector>
-
+#include <string>
+#include <utility>
+#include <vector>
+
#include "util/logging.h"
#include "util/strutil.h"
#include "re2/pod_array.h"
@@ -42,8 +42,8 @@
namespace re2 {
-static const bool ExtraDebug = false;
-
+static const bool ExtraDebug = false;
+
class NFA {
public:
NFA(Prog* prog);
@@ -67,7 +67,7 @@ class NFA {
private:
struct Thread {
union {
- int ref;
+ int ref;
Thread* next; // when on free list
};
const char** capture;
@@ -75,8 +75,8 @@ class NFA {
// State for explicit stack in AddToThreadq.
struct AddState {
- int id; // Inst to process
- Thread* t; // if not null, set t0 = t before processing id
+ int id; // Inst to process
+ Thread* t; // if not null, set t0 = t before processing id
};
// Threadq is a list of threads. The list is sorted by the order
@@ -85,23 +85,23 @@ class NFA {
typedef SparseArray<Thread*> Threadq;
inline Thread* AllocThread();
- inline Thread* Incref(Thread* t);
- inline void Decref(Thread* t);
+ inline Thread* Incref(Thread* t);
+ inline void Decref(Thread* t);
- // Follows all empty arrows from id0 and enqueues all the states reached.
- // Enqueues only the ByteRange instructions that match byte c.
+ // Follows all empty arrows from id0 and enqueues all the states reached.
+ // Enqueues only the ByteRange instructions that match byte c.
// context is used (with p) for evaluating empty-width specials.
- // p is the current input position, and t0 is the current thread.
+ // p is the current input position, and t0 is the current thread.
void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
- const char* p, Thread* t0);
+ const char* p, Thread* t0);
// Run runq on byte c, appending new states to nextq.
// Updates matched_ and match_ as new, better matches are found.
// context is used (with p) for evaluating empty-width specials.
// p is the position of byte c in the input string for AddToThreadq;
// p-1 will be used when processing Match instructions.
- // Frees all the threads on runq.
- // If there is a shortcut to the end, returns that shortcut.
+ // Frees all the threads on runq.
+ // If there is a shortcut to the end, returns that shortcut.
int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
const char* p);
@@ -126,13 +126,13 @@ class NFA {
const char** match_; // best match so far
bool matched_; // any match so far?
- NFA(const NFA&) = delete;
- NFA& operator=(const NFA&) = delete;
+ NFA(const NFA&) = delete;
+ NFA& operator=(const NFA&) = delete;
};
NFA::NFA(Prog* prog) {
prog_ = prog;
- start_ = prog_->start();
+ start_ = prog_->start();
ncapture_ = 0;
longest_ = false;
endmatch_ = false;
@@ -140,7 +140,7 @@ NFA::NFA(Prog* prog) {
etext_ = NULL;
q0_.resize(prog_->size());
q1_.resize(prog_->size());
- // See NFA::AddToThreadq() for why this is so.
+ // See NFA::AddToThreadq() for why this is so.
int nstack = 2*prog_->inst_count(kInstCapture) +
prog_->inst_count(kInstEmptyWidth) +
prog_->inst_count(kInstNop) + 1; // + 1 for start inst
@@ -160,78 +160,78 @@ NFA::Thread* NFA::AllocThread() {
Thread* t = freelist_;
if (t != NULL) {
freelist_ = t->next;
- t->ref = 1;
+ t->ref = 1;
// We don't need to touch t->capture because
// the caller will immediately overwrite it.
return t;
}
arena_.emplace_back();
t = &arena_.back();
- t->ref = 1;
+ t->ref = 1;
t->capture = new const char*[ncapture_];
return t;
}
-NFA::Thread* NFA::Incref(Thread* t) {
+NFA::Thread* NFA::Incref(Thread* t) {
+ DCHECK(t != NULL);
+ t->ref++;
+ return t;
+}
+
+void NFA::Decref(Thread* t) {
DCHECK(t != NULL);
- t->ref++;
- return t;
-}
-
-void NFA::Decref(Thread* t) {
- DCHECK(t != NULL);
- t->ref--;
- if (t->ref > 0)
- return;
- DCHECK_EQ(t->ref, 0);
+ t->ref--;
+ if (t->ref > 0)
+ return;
+ DCHECK_EQ(t->ref, 0);
t->next = freelist_;
freelist_ = t;
-}
-
-// Follows all empty arrows from id0 and enqueues all the states reached.
-// Enqueues only the ByteRange instructions that match byte c.
+}
+
+// Follows all empty arrows from id0 and enqueues all the states reached.
+// Enqueues only the ByteRange instructions that match byte c.
// context is used (with p) for evaluating empty-width specials.
-// p is the current input position, and t0 is the current thread.
+// p is the current input position, and t0 is the current thread.
void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
- const char* p, Thread* t0) {
+ const char* p, Thread* t0) {
if (id0 == 0)
return;
// Use stack_ to hold our stack of instructions yet to process.
- // It was preallocated as follows:
- // two entries per Capture;
- // one entry per EmptyWidth; and
- // one entry per Nop.
- // This reflects the maximum number of stack pushes that each can
- // perform. (Each instruction can be processed at most once.)
+ // It was preallocated as follows:
+ // two entries per Capture;
+ // one entry per EmptyWidth; and
+ // one entry per Nop.
+ // This reflects the maximum number of stack pushes that each can
+ // perform. (Each instruction can be processed at most once.)
AddState* stk = stack_.data();
- int nstk = 0;
+ int nstk = 0;
stk[nstk++] = {id0, NULL};
while (nstk > 0) {
DCHECK_LE(nstk, stack_.size());
- AddState a = stk[--nstk];
-
- Loop:
- if (a.t != NULL) {
- // t0 was a thread that we allocated and copied in order to
- // record the capture, so we must now decref it.
- Decref(t0);
- t0 = a.t;
- }
-
+ AddState a = stk[--nstk];
+
+ Loop:
+ if (a.t != NULL) {
+ // t0 was a thread that we allocated and copied in order to
+ // record the capture, so we must now decref it.
+ Decref(t0);
+ t0 = a.t;
+ }
+
int id = a.id;
if (id == 0)
continue;
if (q->has_index(id)) {
- if (ExtraDebug)
- fprintf(stderr, " [%d%s]\n", id, FormatCapture(t0->capture).c_str());
+ if (ExtraDebug)
+ fprintf(stderr, " [%d%s]\n", id, FormatCapture(t0->capture).c_str());
continue;
}
// Create entry in q no matter what. We might fill it in below,
// or we might not. Even if not, it is necessary to have it,
- // so that we don't revisit id0 during the recursion.
+ // so that we don't revisit id0 during the recursion.
q->set_new(id, NULL);
Thread** tp = &q->get_existing(id);
int j;
@@ -247,48 +247,48 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
case kInstAltMatch:
// Save state; will pick up at next byte.
- t = Incref(t0);
+ t = Incref(t0);
*tp = t;
- DCHECK(!ip->last());
+ DCHECK(!ip->last());
a = {id+1, NULL};
- goto Loop;
+ goto Loop;
case kInstNop:
- if (!ip->last())
+ if (!ip->last())
stk[nstk++] = {id+1, NULL};
-
+
// Continue on.
a = {ip->out(), NULL};
- goto Loop;
+ goto Loop;
case kInstCapture:
- if (!ip->last())
+ if (!ip->last())
stk[nstk++] = {id+1, NULL};
-
+
if ((j=ip->cap()) < ncapture_) {
- // Push a dummy whose only job is to restore t0
+ // Push a dummy whose only job is to restore t0
// once we finish exploring this possibility.
stk[nstk++] = {0, t0};
// Record capture.
- t = AllocThread();
- CopyCapture(t->capture, t0->capture);
- t->capture[j] = p;
- t0 = t;
+ t = AllocThread();
+ CopyCapture(t->capture, t0->capture);
+ t->capture[j] = p;
+ t0 = t;
}
a = {ip->out(), NULL};
- goto Loop;
-
- case kInstByteRange:
- if (!ip->Matches(c))
- goto Next;
+ goto Loop;
+ case kInstByteRange:
+ if (!ip->Matches(c))
+ goto Next;
+
// Save state; will pick up at next byte.
- t = Incref(t0);
+ t = Incref(t0);
*tp = t;
- if (ExtraDebug)
- fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str());
+ if (ExtraDebug)
+ fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str());
if (ip->hint() == 0)
break;
@@ -302,27 +302,27 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
if (ExtraDebug)
fprintf(stderr, " ! %d%s\n", id, FormatCapture(t0->capture).c_str());
- Next:
- if (ip->last())
- break;
+ Next:
+ if (ip->last())
+ break;
a = {id+1, NULL};
- goto Loop;
-
+ goto Loop;
+
case kInstEmptyWidth:
- if (!ip->last())
+ if (!ip->last())
stk[nstk++] = {id+1, NULL};
-
+
// Continue on if we have all the right flag bits.
if (ip->empty() & ~Prog::EmptyFlags(context, p))
break;
a = {ip->out(), NULL};
- goto Loop;
+ goto Loop;
}
}
}
// Run runq on byte c, appending new states to nextq.
-// Updates matched_ and match_ as new, better matches are found.
+// Updates matched_ and match_ as new, better matches are found.
// context is used (with p) for evaluating empty-width specials.
// p is the position of byte c in the input string for AddToThreadq;
// p-1 will be used when processing Match instructions.
@@ -340,12 +340,12 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
if (longest_) {
// Can skip any threads started after our current best match.
if (matched_ && match_[0] < t->capture[0]) {
- Decref(t);
+ Decref(t);
continue;
}
}
- int id = i->index();
+ int id = i->index();
Prog::Inst* ip = prog_->inst(id);
switch (ip->opcode()) {
@@ -363,10 +363,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
break;
// The match is ours if we want it.
if (ip->greedy(prog_) || longest_) {
- CopyCapture(match_, t->capture);
- matched_ = true;
-
- Decref(t);
+ CopyCapture(match_, t->capture);
+ matched_ = true;
+
+ Decref(t);
for (++i; i != runq->end(); ++i) {
if (i->value() != NULL)
Decref(i->value());
@@ -398,21 +398,21 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
// point but longer than an existing match.
if (!matched_ || t->capture[0] < match_[0] ||
(t->capture[0] == match_[0] && p-1 > match_[1])) {
- CopyCapture(match_, t->capture);
+ CopyCapture(match_, t->capture);
match_[1] = p-1;
- matched_ = true;
- }
+ matched_ = true;
+ }
} else {
// Leftmost-biased mode: this match is by definition
// better than what we've already found (see next line).
- CopyCapture(match_, t->capture);
+ CopyCapture(match_, t->capture);
match_[1] = p-1;
- matched_ = true;
+ matched_ = true;
// Cut off the threads that can only find matches
// worse than the one we just found: don't run the
// rest of the current Threadq.
- Decref(t);
+ Decref(t);
for (++i; i != runq->end(); ++i) {
if (i->value() != NULL)
Decref(i->value());
@@ -423,7 +423,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
break;
}
}
- Decref(t);
+ Decref(t);
}
runq->clear();
return 0;
@@ -455,9 +455,9 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
if (context.data() == NULL)
context = text;
- // Sanity check: make sure that text lies within context.
+ // Sanity check: make sure that text lies within context.
if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
- LOG(DFATAL) << "context does not contain text";
+ LOG(DFATAL) << "context does not contain text";
return false;
}
@@ -496,7 +496,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
// For convenience.
etext_ = text.data() + text.size();
- if (ExtraDebug)
+ if (ExtraDebug)
fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
std::string(text).c_str(), std::string(context).c_str(), anchored, longest);
@@ -508,29 +508,29 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
// Loop over the text, stepping the machine.
for (const char* p = text.data();; p++) {
- if (ExtraDebug) {
- int c = 0;
+ if (ExtraDebug) {
+ int c = 0;
if (p == btext_)
- c = '^';
+ c = '^';
else if (p > etext_)
- c = '$';
+ c = '$';
else if (p < etext_)
- c = p[0] & 0xFF;
-
+ c = p[0] & 0xFF;
+
fprintf(stderr, "%c:", c);
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
Thread* t = i->value();
if (t == NULL)
continue;
- fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str());
+ fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str());
}
fprintf(stderr, "\n");
}
- // This is a no-op the first time around the loop because runq is empty.
+ // This is a no-op the first time around the loop because runq is empty.
int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
DCHECK_EQ(runq->size(), 0);
- using std::swap;
+ using std::swap;
swap(nextq, runq);
nextq->clear();
if (id != 0) {
@@ -544,8 +544,8 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
break;
case kInstCapture:
- if (ip->cap() < ncapture_)
- match_[ip->cap()] = p;
+ if (ip->cap() < ncapture_)
+ match_[ip->cap()] = p;
id = ip->out();
continue;
@@ -574,24 +574,24 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
// Try to use prefix accel (e.g. memchr) to skip ahead.
// The search must be unanchored and there must be zero
// possible matches already.
- if (!anchored && runq->size() == 0 &&
+ if (!anchored && runq->size() == 0 &&
p < etext_ && prog_->can_prefix_accel()) {
p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
if (p == NULL)
p = etext_;
}
- Thread* t = AllocThread();
- CopyCapture(t->capture, match_);
- t->capture[0] = p;
+ Thread* t = AllocThread();
+ CopyCapture(t->capture, match_);
+ t->capture[0] = p;
AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
t);
- Decref(t);
+ Decref(t);
}
// If all the threads have died, stop early.
if (runq->size() == 0) {
- if (ExtraDebug)
+ if (ExtraDebug)
fprintf(stderr, "dead\n");
break;
}
@@ -616,11 +616,11 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
if (matched_) {
for (int i = 0; i < nsubmatch; i++)
- submatch[i] =
- StringPiece(match_[2 * i],
- static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
- if (ExtraDebug)
- fprintf(stderr, "match (%td,%td)\n",
+ submatch[i] =
+ StringPiece(match_[2 * i],
+ static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
+ if (ExtraDebug)
+ fprintf(stderr, "match (%td,%td)\n",
match_[0] - btext_,
match_[1] - btext_);
return true;
@@ -632,7 +632,7 @@ bool
Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
Anchor anchor, MatchKind kind,
StringPiece* match, int nmatch) {
- if (ExtraDebug)
+ if (ExtraDebug)
Dump();
NFA nfa(this);
@@ -651,63 +651,63 @@ Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
return true;
}
-// For each instruction i in the program reachable from the start, compute the
-// number of instructions reachable from i by following only empty transitions
-// and record that count as fanout[i].
-//
-// fanout holds the results and is also the work queue for the outer iteration.
-// reachable holds the reached nodes for the inner iteration.
-void Prog::Fanout(SparseArray<int>* fanout) {
- DCHECK_EQ(fanout->max_size(), size());
- SparseSet reachable(size());
- fanout->clear();
- fanout->set_new(start(), 0);
- for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) {
+// For each instruction i in the program reachable from the start, compute the
+// number of instructions reachable from i by following only empty transitions
+// and record that count as fanout[i].
+//
+// fanout holds the results and is also the work queue for the outer iteration.
+// reachable holds the reached nodes for the inner iteration.
+void Prog::Fanout(SparseArray<int>* fanout) {
+ DCHECK_EQ(fanout->max_size(), size());
+ SparseSet reachable(size());
+ fanout->clear();
+ fanout->set_new(start(), 0);
+ for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) {
int* count = &i->value();
- reachable.clear();
- reachable.insert(i->index());
- for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {
- int id = *j;
- Prog::Inst* ip = inst(id);
- switch (ip->opcode()) {
- default:
- LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()";
- break;
-
- case kInstByteRange:
- if (!ip->last())
- reachable.insert(id+1);
-
- (*count)++;
- if (!fanout->has_index(ip->out())) {
- fanout->set_new(ip->out(), 0);
- }
- break;
-
- case kInstAltMatch:
- DCHECK(!ip->last());
- reachable.insert(id+1);
- break;
-
- case kInstCapture:
- case kInstEmptyWidth:
- case kInstNop:
- if (!ip->last())
- reachable.insert(id+1);
-
- reachable.insert(ip->out());
- break;
-
- case kInstMatch:
- if (!ip->last())
- reachable.insert(id+1);
- break;
-
- case kInstFail:
- break;
- }
- }
- }
-}
-
+ reachable.clear();
+ reachable.insert(i->index());
+ for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {
+ int id = *j;
+ Prog::Inst* ip = inst(id);
+ switch (ip->opcode()) {
+ default:
+ LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()";
+ break;
+
+ case kInstByteRange:
+ if (!ip->last())
+ reachable.insert(id+1);
+
+ (*count)++;
+ if (!fanout->has_index(ip->out())) {
+ fanout->set_new(ip->out(), 0);
+ }
+ break;
+
+ case kInstAltMatch:
+ DCHECK(!ip->last());
+ reachable.insert(id+1);
+ break;
+
+ case kInstCapture:
+ case kInstEmptyWidth:
+ case kInstNop:
+ if (!ip->last())
+ reachable.insert(id+1);
+
+ reachable.insert(ip->out());
+ break;
+
+ case kInstMatch:
+ if (!ip->last())
+ reachable.insert(id+1);
+ break;
+
+ case kInstFail:
+ break;
+ }
+ }
+ }
+}
+
} // namespace re2
diff --git a/contrib/libs/re2/re2/onepass.cc b/contrib/libs/re2/re2/onepass.cc
index 263974654d..2789dbb206 100644
--- a/contrib/libs/re2/re2/onepass.cc
+++ b/contrib/libs/re2/re2/onepass.cc
@@ -50,30 +50,30 @@
// See also Anne Brüggemann-Klein and Derick Wood,
// "One-unambiguous regular languages", Information and Computation 142(2).
-#include <stdint.h>
+#include <stdint.h>
#include <string.h>
-#include <algorithm>
+#include <algorithm>
#include <map>
-#include <string>
-#include <vector>
-
-#include "util/util.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "util/utf.h"
+#include <string>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/sparse_set.h"
-#include "re2/stringpiece.h"
-
-// Silence "zero-sized array in struct/union" warning for OneState::action.
-#ifdef _MSC_VER
-#pragma warning(disable: 4200)
-#endif
+#include "re2/stringpiece.h"
+// Silence "zero-sized array in struct/union" warning for OneState::action.
+#ifdef _MSC_VER
+#pragma warning(disable: 4200)
+#endif
+
namespace re2 {
-static const bool ExtraDebug = false;
+static const bool ExtraDebug = false;
// The key insight behind this implementation is that the
// non-determinism in an NFA for a one-pass regular expression
@@ -144,11 +144,11 @@ static const bool ExtraDebug = false;
// maps next input bytes into equivalence classes, to reduce
// the memory footprint.)
struct OneState {
- uint32_t matchcond; // conditions to match right now.
- uint32_t action[];
+ uint32_t matchcond; // conditions to match right now.
+ uint32_t action[];
};
-// The uint32_t conditions in the action are a combination of
+// The uint32_t conditions in the action are a combination of
// condition and capture bits and the next state. The bottom 16 bits
// are the condition and capture bits, and the top 16 are the index of
// the next state.
@@ -165,8 +165,8 @@ struct OneState {
// and kEmptyNonWordBoundary, so we can use that as a sentinel
// instead of needing an extra bit.
-static const int kIndexShift = 16; // number of bits below index
-static const int kEmptyShift = 6; // number of empty flags in prog.h
+static const int kIndexShift = 16; // number of bits below index
+static const int kEmptyShift = 6; // number of empty flags in prog.h
static const int kRealCapShift = kEmptyShift + 1;
static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2;
@@ -174,23 +174,23 @@ static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2;
static const int kCapShift = kRealCapShift - 2;
static const int kMaxCap = kRealMaxCap + 2;
-static const uint32_t kMatchWins = 1 << kEmptyShift;
-static const uint32_t kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift;
+static const uint32_t kMatchWins = 1 << kEmptyShift;
+static const uint32_t kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift;
-static const uint32_t kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary;
+static const uint32_t kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary;
// Check, at compile time, that prog.h agrees with math above.
// This function is never called.
void OnePass_Checks() {
- static_assert((1<<kEmptyShift)-1 == kEmptyAllFlags,
- "kEmptyShift disagrees with kEmptyAllFlags");
+ static_assert((1<<kEmptyShift)-1 == kEmptyAllFlags,
+ "kEmptyShift disagrees with kEmptyAllFlags");
// kMaxCap counts pointers, kMaxOnePassCapture counts pairs.
- static_assert(kMaxCap == Prog::kMaxOnePassCapture*2,
- "kMaxCap disagrees with kMaxOnePassCapture");
+ static_assert(kMaxCap == Prog::kMaxOnePassCapture*2,
+ "kMaxCap disagrees with kMaxOnePassCapture");
}
-static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) {
- uint32_t satisfied = Prog::EmptyFlags(context, p);
+static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) {
+ uint32_t satisfied = Prog::EmptyFlags(context, p);
if (cond & kEmptyAllFlags & ~satisfied)
return false;
return true;
@@ -198,17 +198,17 @@ static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) {
// Apply the capture bits in cond, saving p to the appropriate
// locations in cap[].
-static void ApplyCaptures(uint32_t cond, const char* p,
+static void ApplyCaptures(uint32_t cond, const char* p,
const char** cap, int ncap) {
for (int i = 2; i < ncap; i++)
if (cond & (1 << kCapShift << i))
cap[i] = p;
}
-// Computes the OneState* for the given nodeindex.
-static inline OneState* IndexToNode(uint8_t* nodes, int statesize,
+// Computes the OneState* for the given nodeindex.
+static inline OneState* IndexToNode(uint8_t* nodes, int statesize,
int nodeindex) {
- return reinterpret_cast<OneState*>(nodes + statesize*nodeindex);
+ return reinterpret_cast<OneState*>(nodes + statesize*nodeindex);
}
bool Prog::SearchOnePass(const StringPiece& text,
@@ -245,26 +245,26 @@ bool Prog::SearchOnePass(const StringPiece& text,
kind = kFullMatch;
uint8_t* nodes = onepass_nodes_.data();
- int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t);
- // start() is always mapped to the zeroth OneState.
- OneState* state = IndexToNode(nodes, statesize, 0);
- uint8_t* bytemap = bytemap_;
+ int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t);
+ // start() is always mapped to the zeroth OneState.
+ OneState* state = IndexToNode(nodes, statesize, 0);
+ uint8_t* bytemap = bytemap_;
const char* bp = text.data();
const char* ep = text.data() + text.size();
const char* p;
bool matched = false;
matchcap[0] = bp;
cap[0] = bp;
- uint32_t nextmatchcond = state->matchcond;
+ uint32_t nextmatchcond = state->matchcond;
for (p = bp; p < ep; p++) {
int c = bytemap[*p & 0xFF];
- uint32_t matchcond = nextmatchcond;
- uint32_t cond = state->action[c];
+ uint32_t matchcond = nextmatchcond;
+ uint32_t cond = state->action[c];
// Determine whether we can reach act->next.
// If so, advance state and nextmatchcond.
if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) {
- uint32_t nextindex = cond >> kIndexShift;
+ uint32_t nextindex = cond >> kIndexShift;
state = IndexToNode(nodes, statesize, nextindex);
nextmatchcond = state->matchcond;
} else {
@@ -323,7 +323,7 @@ bool Prog::SearchOnePass(const StringPiece& text,
// Look for match at end of input.
{
- uint32_t matchcond = state->matchcond;
+ uint32_t matchcond = state->matchcond;
if (matchcond != kImpossible &&
((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) {
if (nmatch > 1 && (matchcond & kCapMask))
@@ -339,9 +339,9 @@ done:
if (!matched)
return false;
for (int i = 0; i < nmatch; i++)
- match[i] =
- StringPiece(matchcap[2 * i],
- static_cast<size_t>(matchcap[2 * i + 1] - matchcap[2 * i]));
+ match[i] =
+ StringPiece(matchcap[2 * i],
+ static_cast<size_t>(matchcap[2 * i + 1] - matchcap[2 * i]));
return true;
}
@@ -363,7 +363,7 @@ static bool AddQ(Instq *q, int id) {
struct InstCond {
int id;
- uint32_t cond;
+ uint32_t cond;
};
// Returns whether this is a one-pass program; that is,
@@ -393,37 +393,37 @@ bool Prog::IsOnePass() {
// Willing to use at most 1/4 of the DFA budget (heuristic).
// Limit max node count to 65000 as a conservative estimate to
// avoid overflowing 16-bit node index in encoding.
- int maxnodes = 2 + inst_count(kInstByteRange);
- int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t);
+ int maxnodes = 2 + inst_count(kInstByteRange);
+ int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t);
if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes)
return false;
// Flood the graph starting at the start state, and check
// that in each reachable state, each possible byte leads
// to a unique next state.
- int stacksize = inst_count(kInstCapture) +
- inst_count(kInstEmptyWidth) +
- inst_count(kInstNop) + 1; // + 1 for start inst
+ int stacksize = inst_count(kInstCapture) +
+ inst_count(kInstEmptyWidth) +
+ inst_count(kInstNop) + 1; // + 1 for start inst
PODArray<InstCond> stack(stacksize);
-
+
int size = this->size();
PODArray<int> nodebyid(size); // indexed by ip
memset(nodebyid.data(), 0xFF, size*sizeof nodebyid[0]);
- // Originally, nodes was a uint8_t[maxnodes*statesize], but that was
- // unnecessarily optimistic: why allocate a large amount of memory
- // upfront for a large program when it is unlikely to be one-pass?
- std::vector<uint8_t> nodes;
+ // Originally, nodes was a uint8_t[maxnodes*statesize], but that was
+ // unnecessarily optimistic: why allocate a large amount of memory
+ // upfront for a large program when it is unlikely to be one-pass?
+ std::vector<uint8_t> nodes;
Instq tovisit(size), workq(size);
AddQ(&tovisit, start());
nodebyid[start()] = 0;
int nalloc = 1;
- nodes.insert(nodes.end(), statesize, 0);
+ nodes.insert(nodes.end(), statesize, 0);
for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {
int id = *it;
int nodeindex = nodebyid[id];
- OneState* node = IndexToNode(nodes.data(), statesize, nodeindex);
+ OneState* node = IndexToNode(nodes.data(), statesize, nodeindex);
// Flood graph using manual stack, filling in actions as found.
// Default is none.
@@ -438,107 +438,107 @@ bool Prog::IsOnePass() {
stack[nstack++].cond = 0;
while (nstack > 0) {
int id = stack[--nstack].id;
- uint32_t cond = stack[nstack].cond;
-
- Loop:
+ uint32_t cond = stack[nstack].cond;
+
+ Loop:
Prog::Inst* ip = inst(id);
switch (ip->opcode()) {
- default:
- LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
- break;
-
+ default:
+ LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+ break;
+
case kInstAltMatch:
// TODO(rsc): Ignoring kInstAltMatch optimization.
// Should implement it in this engine, but it's subtle.
- DCHECK(!ip->last());
+ DCHECK(!ip->last());
// If already on work queue, (1) is violated: bail out.
- if (!AddQ(&workq, id+1))
+ if (!AddQ(&workq, id+1))
goto fail;
- id = id+1;
- goto Loop;
+ id = id+1;
+ goto Loop;
case kInstByteRange: {
int nextindex = nodebyid[ip->out()];
if (nextindex == -1) {
if (nalloc >= maxnodes) {
- if (ExtraDebug)
- LOG(ERROR) << StringPrintf(
- "Not OnePass: hit node limit %d >= %d", nalloc, maxnodes);
+ if (ExtraDebug)
+ LOG(ERROR) << StringPrintf(
+ "Not OnePass: hit node limit %d >= %d", nalloc, maxnodes);
goto fail;
}
nextindex = nalloc;
- AddQ(&tovisit, ip->out());
- nodebyid[ip->out()] = nalloc;
+ AddQ(&tovisit, ip->out());
+ nodebyid[ip->out()] = nalloc;
nalloc++;
- nodes.insert(nodes.end(), statesize, 0);
- // Update node because it might have been invalidated.
- node = IndexToNode(nodes.data(), statesize, nodeindex);
+ nodes.insert(nodes.end(), statesize, 0);
+ // Update node because it might have been invalidated.
+ node = IndexToNode(nodes.data(), statesize, nodeindex);
}
for (int c = ip->lo(); c <= ip->hi(); c++) {
int b = bytemap_[c];
- // Skip any bytes immediately after c that are also in b.
- while (c < 256-1 && bytemap_[c+1] == b)
- c++;
- uint32_t act = node->action[b];
- uint32_t newact = (nextindex << kIndexShift) | cond;
- if (matched)
- newact |= kMatchWins;
+ // Skip any bytes immediately after c that are also in b.
+ while (c < 256-1 && bytemap_[c+1] == b)
+ c++;
+ uint32_t act = node->action[b];
+ uint32_t newact = (nextindex << kIndexShift) | cond;
+ if (matched)
+ newact |= kMatchWins;
if ((act & kImpossible) == kImpossible) {
node->action[b] = newact;
} else if (act != newact) {
- if (ExtraDebug)
- LOG(ERROR) << StringPrintf(
- "Not OnePass: conflict on byte %#x at state %d", c, *it);
+ if (ExtraDebug)
+ LOG(ERROR) << StringPrintf(
+ "Not OnePass: conflict on byte %#x at state %d", c, *it);
goto fail;
}
}
if (ip->foldcase()) {
- Rune lo = std::max<Rune>(ip->lo(), 'a') + 'A' - 'a';
- Rune hi = std::min<Rune>(ip->hi(), 'z') + 'A' - 'a';
+ Rune lo = std::max<Rune>(ip->lo(), 'a') + 'A' - 'a';
+ Rune hi = std::min<Rune>(ip->hi(), 'z') + 'A' - 'a';
for (int c = lo; c <= hi; c++) {
int b = bytemap_[c];
- // Skip any bytes immediately after c that are also in b.
- while (c < 256-1 && bytemap_[c+1] == b)
- c++;
- uint32_t act = node->action[b];
- uint32_t newact = (nextindex << kIndexShift) | cond;
- if (matched)
- newact |= kMatchWins;
+ // Skip any bytes immediately after c that are also in b.
+ while (c < 256-1 && bytemap_[c+1] == b)
+ c++;
+ uint32_t act = node->action[b];
+ uint32_t newact = (nextindex << kIndexShift) | cond;
+ if (matched)
+ newact |= kMatchWins;
if ((act & kImpossible) == kImpossible) {
node->action[b] = newact;
} else if (act != newact) {
- if (ExtraDebug)
- LOG(ERROR) << StringPrintf(
- "Not OnePass: conflict on byte %#x at state %d", c, *it);
+ if (ExtraDebug)
+ LOG(ERROR) << StringPrintf(
+ "Not OnePass: conflict on byte %#x at state %d", c, *it);
goto fail;
}
}
}
-
- if (ip->last())
- break;
- // If already on work queue, (1) is violated: bail out.
- if (!AddQ(&workq, id+1))
- goto fail;
- id = id+1;
- goto Loop;
+
+ if (ip->last())
+ break;
+ // If already on work queue, (1) is violated: bail out.
+ if (!AddQ(&workq, id+1))
+ goto fail;
+ id = id+1;
+ goto Loop;
}
case kInstCapture:
- case kInstEmptyWidth:
- case kInstNop:
- if (!ip->last()) {
- // If already on work queue, (1) is violated: bail out.
- if (!AddQ(&workq, id+1))
- goto fail;
- stack[nstack].id = id+1;
- stack[nstack++].cond = cond;
- }
-
- if (ip->opcode() == kInstCapture && ip->cap() < kMaxCap)
+ case kInstEmptyWidth:
+ case kInstNop:
+ if (!ip->last()) {
+ // If already on work queue, (1) is violated: bail out.
+ if (!AddQ(&workq, id+1))
+ goto fail;
+ stack[nstack].id = id+1;
+ stack[nstack++].cond = cond;
+ }
+
+ if (ip->opcode() == kInstCapture && ip->cap() < kMaxCap)
cond |= (1 << kCapShift) << ip->cap();
- if (ip->opcode() == kInstEmptyWidth)
- cond |= ip->empty();
+ if (ip->opcode() == kInstEmptyWidth)
+ cond |= ip->empty();
// kInstCapture and kInstNop always proceed to ip->out().
// kInstEmptyWidth only sometimes proceeds to ip->out(),
@@ -548,44 +548,44 @@ bool Prog::IsOnePass() {
// If already on work queue, (1) is violated: bail out.
if (!AddQ(&workq, ip->out())) {
- if (ExtraDebug)
- LOG(ERROR) << StringPrintf(
+ if (ExtraDebug)
+ LOG(ERROR) << StringPrintf(
"Not OnePass: multiple paths %d -> %d", *it, ip->out());
goto fail;
}
- id = ip->out();
- goto Loop;
+ id = ip->out();
+ goto Loop;
case kInstMatch:
if (matched) {
// (3) is violated
- if (ExtraDebug)
- LOG(ERROR) << StringPrintf(
+ if (ExtraDebug)
+ LOG(ERROR) << StringPrintf(
"Not OnePass: multiple matches from %d", *it);
goto fail;
}
matched = true;
node->matchcond = cond;
- if (ip->last())
- break;
- // If already on work queue, (1) is violated: bail out.
- if (!AddQ(&workq, id+1))
- goto fail;
- id = id+1;
- goto Loop;
-
+ if (ip->last())
+ break;
+ // If already on work queue, (1) is violated: bail out.
+ if (!AddQ(&workq, id+1))
+ goto fail;
+ id = id+1;
+ goto Loop;
+
case kInstFail:
break;
}
}
}
- if (ExtraDebug) { // For debugging, dump one-pass NFA to LOG(ERROR).
- LOG(ERROR) << "bytemap:\n" << DumpByteMap();
- LOG(ERROR) << "prog:\n" << Dump();
-
- std::map<int, int> idmap;
+ if (ExtraDebug) { // For debugging, dump one-pass NFA to LOG(ERROR).
+ LOG(ERROR) << "bytemap:\n" << DumpByteMap();
+ LOG(ERROR) << "prog:\n" << Dump();
+
+ std::map<int, int> idmap;
for (int i = 0; i < size; i++)
if (nodebyid[i] != -1)
idmap[nodebyid[i]] = i;
@@ -595,8 +595,8 @@ bool Prog::IsOnePass() {
int id = *it;
int nodeindex = nodebyid[id];
if (nodeindex == -1)
- continue;
- OneState* node = IndexToNode(nodes.data(), statesize, nodeindex);
+ continue;
+ OneState* node = IndexToNode(nodes.data(), statesize, nodeindex);
dump += StringPrintf("node %d id=%d: matchcond=%#x\n",
nodeindex, id, node->matchcond);
for (int i = 0; i < bytemap_range_; i++) {
@@ -608,7 +608,7 @@ bool Prog::IsOnePass() {
idmap[node->action[i] >> kIndexShift]);
}
}
- LOG(ERROR) << "nodes:\n" << dump;
+ LOG(ERROR) << "nodes:\n" << dump;
}
dfa_mem_ -= nalloc*statesize;
diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc
index 85f16f060b..632d69ae27 100644
--- a/contrib/libs/re2/re2/parse.cc
+++ b/contrib/libs/re2/re2/parse.cc
@@ -16,37 +16,37 @@
// and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W.
// See regexp.h for rationale.
-#include <ctype.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-#include <algorithm>
-#include <map>
-#include <string>
+#include <ctype.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <algorithm>
+#include <map>
+#include <string>
#include <vector>
-
-#include "util/util.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "util/utf.h"
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
#include "re2/pod_array.h"
#include "re2/regexp.h"
-#include "re2/stringpiece.h"
+#include "re2/stringpiece.h"
#include "re2/unicode_casefold.h"
#include "re2/unicode_groups.h"
-#include "re2/walker-inl.h"
-
-#if defined(RE2_USE_ICU)
-#include "unicode/uniset.h"
-#include "unicode/unistr.h"
-#include "unicode/utypes.h"
-#endif
-
+#include "re2/walker-inl.h"
+
+#if defined(RE2_USE_ICU)
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/utypes.h"
+#endif
+
namespace re2 {
// Controls the maximum repeat count permitted by the parser.
static int maximum_repeat_count = 1000;
-
+
void Regexp::FUZZING_ONLY_set_maximum_repeat_count(int i) {
maximum_repeat_count = i;
}
@@ -183,8 +183,8 @@ private:
int ncap_; // number of capturing parens seen
int rune_max_; // maximum char value for this encoding
- ParseState(const ParseState&) = delete;
- ParseState& operator=(const ParseState&) = delete;
+ ParseState(const ParseState&) = delete;
+ ParseState& operator=(const ParseState&) = delete;
};
// Pseudo-operators - only on parse stack.
@@ -242,8 +242,8 @@ bool Regexp::ParseState::PushRegexp(Regexp* re) {
// single characters (e.g., [.] instead of \.), and some
// analysis does better with fewer character classes.
// Similarly, [Aa] can be rewritten as a literal A with ASCII case folding.
- if (re->op_ == kRegexpCharClass && re->ccb_ != NULL) {
- re->ccb_->RemoveAbove(rune_max_);
+ if (re->op_ == kRegexpCharClass && re->ccb_ != NULL) {
+ re->ccb_->RemoveAbove(rune_max_);
if (re->ccb_->size() == 1) {
Rune r = re->ccb_->begin()->lo;
re->Decref();
@@ -269,12 +269,12 @@ bool Regexp::ParseState::PushRegexp(Regexp* re) {
// Searches the case folding tables and returns the CaseFold* that contains r.
// If there isn't one, returns the CaseFold* with smallest f->lo bigger than r.
// If there isn't one, returns NULL.
-const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) {
- const CaseFold* ef = f + n;
+const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) {
+ const CaseFold* ef = f + n;
// Binary search for entry containing r.
while (n > 0) {
- int m = n/2;
+ int m = n/2;
if (f[m].lo <= r && r <= f[m].hi)
return &f[m];
if (r < f[m].lo) {
@@ -286,10 +286,10 @@ const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) {
}
// There is no entry that contains r, but f points
- // where it would have been. Unless f points at
+ // where it would have been. Unless f points at
// the end of the array, it points at the next entry
// after r.
- if (f < ef)
+ if (f < ef)
return f;
// No entry contains r; no entry contains runes > r.
@@ -297,24 +297,24 @@ const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) {
}
// Returns the result of applying the fold f to the rune r.
-Rune ApplyFold(const CaseFold *f, Rune r) {
+Rune ApplyFold(const CaseFold *f, Rune r) {
switch (f->delta) {
default:
return r + f->delta;
- case EvenOddSkip: // even <-> odd but only applies to every other
- if ((r - f->lo) % 2)
- return r;
- FALLTHROUGH_INTENDED;
+ case EvenOddSkip: // even <-> odd but only applies to every other
+ if ((r - f->lo) % 2)
+ return r;
+ FALLTHROUGH_INTENDED;
case EvenOdd: // even <-> odd
if (r%2 == 0)
return r + 1;
return r - 1;
- case OddEvenSkip: // odd <-> even but only applies to every other
- if ((r - f->lo) % 2)
- return r;
- FALLTHROUGH_INTENDED;
+ case OddEvenSkip: // odd <-> even but only applies to every other
+ if ((r - f->lo) % 2)
+ return r;
+ FALLTHROUGH_INTENDED;
case OddEven: // odd <-> even
if (r%2 == 1)
return r + 1;
@@ -333,7 +333,7 @@ Rune ApplyFold(const CaseFold *f, Rune r) {
//
// CycleFoldRune('?') = '?'
Rune CycleFoldRune(Rune r) {
- const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, r);
+ const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, r);
if (f == NULL || r < f->lo)
return r;
return ApplyFold(f, r);
@@ -356,7 +356,7 @@ static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) {
return;
while (lo <= hi) {
- const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, lo);
+ const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, lo);
if (f == NULL) // lo has no fold, nor does anything above lo
break;
if (lo < f->lo) { // lo has no fold; next rune with a fold is f->lo
@@ -367,7 +367,7 @@ static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) {
// Add in the result of folding the range lo - f->hi
// and that range's fold, recursively.
Rune lo1 = lo;
- Rune hi1 = std::min<Rune>(hi, f->hi);
+ Rune hi1 = std::min<Rune>(hi, f->hi);
switch (f->delta) {
default:
lo1 += f->delta;
@@ -482,23 +482,23 @@ bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s,
Regexp::ParseFlags fl = flags_;
if (nongreedy)
fl = fl ^ NonGreedy;
-
- // Squash **, ++ and ??. Regexp::Star() et al. handle this too, but
- // they're mostly for use during simplification, not during parsing.
- if (op == stacktop_->op() && fl == stacktop_->parse_flags())
- return true;
-
- // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because
- // op is a repeat, we just have to check that stacktop_->op() is too,
- // then adjust stacktop_.
- if ((stacktop_->op() == kRegexpStar ||
- stacktop_->op() == kRegexpPlus ||
- stacktop_->op() == kRegexpQuest) &&
- fl == stacktop_->parse_flags()) {
- stacktop_->op_ = kRegexpStar;
- return true;
- }
-
+
+ // Squash **, ++ and ??. Regexp::Star() et al. handle this too, but
+ // they're mostly for use during simplification, not during parsing.
+ if (op == stacktop_->op() && fl == stacktop_->parse_flags())
+ return true;
+
+ // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because
+ // op is a repeat, we just have to check that stacktop_->op() is too,
+ // then adjust stacktop_.
+ if ((stacktop_->op() == kRegexpStar ||
+ stacktop_->op() == kRegexpPlus ||
+ stacktop_->op() == kRegexpQuest) &&
+ fl == stacktop_->parse_flags()) {
+ stacktop_->op_ = kRegexpStar;
+ return true;
+ }
+
Regexp* re = new Regexp(op, fl);
re->AllocSub(1);
re->down_ = stacktop_->down_;
@@ -508,61 +508,61 @@ bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s,
return true;
}
-// RepetitionWalker reports whether the repetition regexp is valid.
-// Valid means that the combination of the top-level repetition
-// and any inner repetitions does not exceed n copies of the
-// innermost thing.
-// This rewalks the regexp tree and is called for every repetition,
-// so we have to worry about inducing quadratic behavior in the parser.
-// We avoid this by only using RepetitionWalker when min or max >= 2.
-// In that case the depth of any >= 2 nesting can only get to 9 without
-// triggering a parse error, so each subtree can only be rewalked 9 times.
-class RepetitionWalker : public Regexp::Walker<int> {
- public:
- RepetitionWalker() {}
- virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
- virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
- int* child_args, int nchild_args);
- virtual int ShortVisit(Regexp* re, int parent_arg);
-
- private:
- RepetitionWalker(const RepetitionWalker&) = delete;
- RepetitionWalker& operator=(const RepetitionWalker&) = delete;
-};
-
-int RepetitionWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
- int arg = parent_arg;
- if (re->op() == kRegexpRepeat) {
- int m = re->max();
- if (m < 0) {
- m = re->min();
- }
- if (m > 0) {
- arg /= m;
- }
- }
- return arg;
-}
-
-int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
- int* child_args, int nchild_args) {
- int arg = pre_arg;
- for (int i = 0; i < nchild_args; i++) {
- if (child_args[i] < arg) {
- arg = child_args[i];
- }
- }
- return arg;
-}
-
-int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
+// RepetitionWalker reports whether the repetition regexp is valid.
+// Valid means that the combination of the top-level repetition
+// and any inner repetitions does not exceed n copies of the
+// innermost thing.
+// This rewalks the regexp tree and is called for every repetition,
+// so we have to worry about inducing quadratic behavior in the parser.
+// We avoid this by only using RepetitionWalker when min or max >= 2.
+// In that case the depth of any >= 2 nesting can only get to 9 without
+// triggering a parse error, so each subtree can only be rewalked 9 times.
+class RepetitionWalker : public Regexp::Walker<int> {
+ public:
+ RepetitionWalker() {}
+ virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
+ virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
+ int* child_args, int nchild_args);
+ virtual int ShortVisit(Regexp* re, int parent_arg);
+
+ private:
+ RepetitionWalker(const RepetitionWalker&) = delete;
+ RepetitionWalker& operator=(const RepetitionWalker&) = delete;
+};
+
+int RepetitionWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
+ int arg = parent_arg;
+ if (re->op() == kRegexpRepeat) {
+ int m = re->max();
+ if (m < 0) {
+ m = re->min();
+ }
+ if (m > 0) {
+ arg /= m;
+ }
+ }
+ return arg;
+}
+
+int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
+ int* child_args, int nchild_args) {
+ int arg = pre_arg;
+ for (int i = 0; i < nchild_args; i++) {
+ if (child_args[i] < arg) {
+ arg = child_args[i];
+ }
+ }
+ return arg;
+}
+
+int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
- LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
+ LOG(DFATAL) << "RepetitionWalker::ShortVisit called";
#endif
- return 0;
-}
-
+ return 0;
+}
+
// Pushes a repetition regexp onto the stack.
// A valid argument for the operator must already be on the stack.
bool Regexp::ParseState::PushRepetition(int min, int max,
@@ -591,14 +591,14 @@ bool Regexp::ParseState::PushRepetition(int min, int max,
re->sub()[0] = FinishRegexp(stacktop_);
re->simple_ = re->ComputeSimple();
stacktop_ = re;
- if (min >= 2 || max >= 2) {
- RepetitionWalker w;
+ if (min >= 2 || max >= 2) {
+ RepetitionWalker w;
if (w.Walk(stacktop_, maximum_repeat_count) == 0) {
- status_->set_code(kRegexpRepeatSize);
- status_->set_error_arg(s);
- return false;
- }
- }
+ status_->set_code(kRegexpRepeatSize);
+ status_->set_error_arg(s);
+ return false;
+ }
+ }
return true;
}
@@ -637,33 +637,33 @@ bool Regexp::ParseState::DoVerticalBar() {
Regexp* r1;
Regexp* r2;
if ((r1 = stacktop_) != NULL &&
- (r2 = r1->down_) != NULL &&
+ (r2 = r1->down_) != NULL &&
r2->op() == kVerticalBar) {
Regexp* r3;
- if ((r3 = r2->down_) != NULL &&
- (r1->op() == kRegexpAnyChar || r3->op() == kRegexpAnyChar)) {
- // AnyChar is above or below the vertical bar. Let it subsume
- // the other when the other is Literal, CharClass or AnyChar.
- if (r3->op() == kRegexpAnyChar &&
- (r1->op() == kRegexpLiteral ||
- r1->op() == kRegexpCharClass ||
- r1->op() == kRegexpAnyChar)) {
- // Discard r1.
- stacktop_ = r2;
- r1->Decref();
- return true;
- }
- if (r1->op() == kRegexpAnyChar &&
- (r3->op() == kRegexpLiteral ||
- r3->op() == kRegexpCharClass ||
- r3->op() == kRegexpAnyChar)) {
- // Rearrange the stack and discard r3.
- r1->down_ = r3->down_;
- r2->down_ = r1;
- stacktop_ = r2;
- r3->Decref();
- return true;
+ if ((r3 = r2->down_) != NULL &&
+ (r1->op() == kRegexpAnyChar || r3->op() == kRegexpAnyChar)) {
+ // AnyChar is above or below the vertical bar. Let it subsume
+ // the other when the other is Literal, CharClass or AnyChar.
+ if (r3->op() == kRegexpAnyChar &&
+ (r1->op() == kRegexpLiteral ||
+ r1->op() == kRegexpCharClass ||
+ r1->op() == kRegexpAnyChar)) {
+ // Discard r1.
+ stacktop_ = r2;
+ r1->Decref();
+ return true;
}
+ if (r1->op() == kRegexpAnyChar &&
+ (r3->op() == kRegexpLiteral ||
+ r3->op() == kRegexpCharClass ||
+ r3->op() == kRegexpAnyChar)) {
+ // Rearrange the stack and discard r3.
+ r1->down_ = r3->down_;
+ r2->down_ = r1;
+ stacktop_ = r2;
+ r3->Decref();
+ return true;
+ }
}
// Swap r1 below vertical bar (r2).
r1->down_ = r2->down_;
@@ -1083,14 +1083,14 @@ void FactorAlternationImpl::Round1(Regexp** sub, int nsub,
void FactorAlternationImpl::Round2(Regexp** sub, int nsub,
Regexp::ParseFlags flags,
std::vector<Splice>* splices) {
- // Round 2: Factor out common simple prefixes,
- // just the first piece of each concatenation.
- // This will be good enough a lot of the time.
- //
- // Complex subexpressions (e.g. involving quantifiers)
- // are not safe to factor because that collapses their
- // distinct paths through the automaton, which affects
- // correctness in some cases.
+ // Round 2: Factor out common simple prefixes,
+ // just the first piece of each concatenation.
+ // This will be good enough a lot of the time.
+ //
+ // Complex subexpressions (e.g. involving quantifiers)
+ // are not safe to factor because that collapses their
+ // distinct paths through the automaton, which affects
+ // correctness in some cases.
int start = 0;
Regexp* first = NULL;
for (int i = 0; i <= nsub; i++) {
@@ -1100,23 +1100,23 @@ void FactorAlternationImpl::Round2(Regexp** sub, int nsub,
if (i < nsub) {
first_i = Regexp::LeadingRegexp(sub[i]);
if (first != NULL &&
- // first must be an empty-width op
- // OR a char class, any char or any byte
- // OR a fixed repeat of a literal, char class, any char or any byte.
- (first->op() == kRegexpBeginLine ||
- first->op() == kRegexpEndLine ||
- first->op() == kRegexpWordBoundary ||
- first->op() == kRegexpNoWordBoundary ||
- first->op() == kRegexpBeginText ||
- first->op() == kRegexpEndText ||
- first->op() == kRegexpCharClass ||
- first->op() == kRegexpAnyChar ||
- first->op() == kRegexpAnyByte ||
- (first->op() == kRegexpRepeat &&
- first->min() == first->max() &&
- (first->sub()[0]->op() == kRegexpLiteral ||
- first->sub()[0]->op() == kRegexpCharClass ||
- first->sub()[0]->op() == kRegexpAnyChar ||
+ // first must be an empty-width op
+ // OR a char class, any char or any byte
+ // OR a fixed repeat of a literal, char class, any char or any byte.
+ (first->op() == kRegexpBeginLine ||
+ first->op() == kRegexpEndLine ||
+ first->op() == kRegexpWordBoundary ||
+ first->op() == kRegexpNoWordBoundary ||
+ first->op() == kRegexpBeginText ||
+ first->op() == kRegexpEndText ||
+ first->op() == kRegexpCharClass ||
+ first->op() == kRegexpAnyChar ||
+ first->op() == kRegexpAnyByte ||
+ (first->op() == kRegexpRepeat &&
+ first->min() == first->max() &&
+ (first->sub()[0]->op() == kRegexpLiteral ||
+ first->sub()[0]->op() == kRegexpCharClass ||
+ first->sub()[0]->op() == kRegexpAnyChar ||
first->sub()[0]->op() == kRegexpAnyByte))) &&
Regexp::Equal(first, first_i))
continue;
@@ -1312,7 +1312,7 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
if (r >= 0) {
re1->op_ = kRegexpLiteral;
re1->rune_ = r;
- re1->parse_flags_ = static_cast<uint16_t>(flags);
+ re1->parse_flags_ = static_cast<uint16_t>(flags);
return true;
}
@@ -1391,18 +1391,18 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
// Argument order is backwards from usual Google style
// but consistent with chartorune.
static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
- // fullrune() takes int, not size_t. However, it just looks
- // at the leading byte and treats any length >= 4 the same.
+ // fullrune() takes int, not size_t. However, it just looks
+ // at the leading byte and treats any length >= 4 the same.
if (fullrune(sp->data(), static_cast<int>(std::min(size_t{4}, sp->size())))) {
- int n = chartorune(r, sp->data());
- // Some copies of chartorune have a bug that accepts
- // encodings of values in (10FFFF, 1FFFFF] as valid.
- // Those values break the character class algorithm,
- // which assumes Runemax is the largest rune.
- if (*r > Runemax) {
- n = 1;
- *r = Runeerror;
- }
+ int n = chartorune(r, sp->data());
+ // Some copies of chartorune have a bug that accepts
+ // encodings of values in (10FFFF, 1FFFFF] as valid.
+ // Those values break the character class algorithm,
+ // which assumes Runemax is the largest rune.
+ if (*r > Runemax) {
+ n = 1;
+ *r = Runeerror;
+ }
if (!(n == 1 && *r == Runeerror)) { // no decoding error
sp->remove_prefix(n);
return n;
@@ -1456,12 +1456,12 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
if (s->empty() || (*s)[0] != '\\') {
// Should not happen - caller always checks.
status->set_code(kRegexpInternalError);
- status->set_error_arg(StringPiece());
+ status->set_error_arg(StringPiece());
return false;
}
if (s->size() == 1) {
status->set_code(kRegexpTrailingBackslash);
- status->set_error_arg(StringPiece());
+ status->set_error_arg(StringPiece());
return false;
}
Rune c, c1;
@@ -1492,7 +1492,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
// Single non-zero octal digit is a backreference; not supported.
if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
goto BadEscape;
- FALLTHROUGH_INTENDED;
+ FALLTHROUGH_INTENDED;
case '0':
// consume up to three octal digits; already have one.
code = c - '0';
@@ -1507,8 +1507,8 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
}
}
}
- if (code > rune_max)
- goto BadEscape;
+ if (code > rune_max)
+ goto BadEscape;
*rp = code;
return true;
@@ -1582,7 +1582,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
// in Perl, \b means word-boundary but [\b]
// means backspace. We don't support that:
// if you want a backspace embed a literal
- // backspace character or use \x08.
+ // backspace character or use \x08.
//
// case 'b':
// *rp = '\b';
@@ -1594,7 +1594,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
BadEscape:
// Unrecognized escape sequence.
status->set_code(kRegexpBadEscape);
- status->set_error_arg(
+ status->set_error_arg(
StringPiece(begin, static_cast<size_t>(s->data() - begin)));
return false;
}
@@ -1623,8 +1623,8 @@ void CharClassBuilder::AddRangeFlags(
}
// Look for a group with the given name.
-static const UGroup* LookupGroup(const StringPiece& name,
- const UGroup *groups, int ngroups) {
+static const UGroup* LookupGroup(const StringPiece& name,
+ const UGroup *groups, int ngroups) {
// Simple name lookup.
for (int i = 0; i < ngroups; i++)
if (StringPiece(groups[i].name) == name)
@@ -1641,12 +1641,12 @@ static const UGroup* LookupPerlGroup(const StringPiece& name) {
return LookupGroup(name, perl_groups, num_perl_groups);
}
-#if !defined(RE2_USE_ICU)
-// Fake UGroup containing all Runes
-static URange16 any16[] = { { 0, 65535 } };
-static URange32 any32[] = { { 65536, Runemax } };
-static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 };
-
+#if !defined(RE2_USE_ICU)
+// Fake UGroup containing all Runes
+static URange16 any16[] = { { 0, 65535 } };
+static URange32 any32[] = { { 65536, Runemax } };
+static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 };
+
// Look for a Unicode group with the given name (e.g., "Han")
static const UGroup* LookupUnicodeGroup(const StringPiece& name) {
// Special case: "Any" means any.
@@ -1654,11 +1654,11 @@ static const UGroup* LookupUnicodeGroup(const StringPiece& name) {
return &anygroup;
return LookupGroup(name, unicode_groups, num_unicode_groups);
}
-#endif
+#endif
// Add a UGroup or its negation to the character class.
-static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign,
- Regexp::ParseFlags parse_flags) {
+static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign,
+ Regexp::ParseFlags parse_flags) {
if (sign == +1) {
for (int i = 0; i < g->nr16; i++) {
cc->AddRangeFlags(g->r16[i].lo, g->r16[i].hi, parse_flags);
@@ -1675,13 +1675,13 @@ static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign,
// to what's already missing. Too hard, so do in two steps.
CharClassBuilder ccb1;
AddUGroup(&ccb1, g, +1, parse_flags);
- // If the flags say to take out \n, put it in, so that negating will take it out.
- // Normally AddRangeFlags does this, but we're bypassing AddRangeFlags.
- bool cutnl = !(parse_flags & Regexp::ClassNL) ||
- (parse_flags & Regexp::NeverNL);
- if (cutnl) {
- ccb1.AddRange('\n', '\n');
- }
+ // If the flags say to take out \n, put it in, so that negating will take it out.
+ // Normally AddRangeFlags does this, but we're bypassing AddRangeFlags.
+ bool cutnl = !(parse_flags & Regexp::ClassNL) ||
+ (parse_flags & Regexp::NeverNL);
+ if (cutnl) {
+ ccb1.AddRange('\n', '\n');
+ }
ccb1.Negate();
cc->AddCharClass(&ccb1);
return;
@@ -1746,7 +1746,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
// Committed to parse. Results:
int sign = +1; // -1 = negated char class
if (c == 'P')
- sign = -sign;
+ sign = -sign;
StringPiece seq = *s; // \p{Han} or \pL
StringPiece name; // Han or L
s->remove_prefix(2); // '\\', 'p'
@@ -1759,8 +1759,8 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
name = StringPiece(p, static_cast<size_t>(s->data() - p));
} else {
// Name is in braces. Look for closing }
- size_t end = s->find('}', 0);
- if (end == StringPiece::npos) {
+ size_t end = s->find('}', 0);
+ if (end == StringPiece::npos) {
if (!IsValidUTF8(seq, status))
return kParseError;
status->set_code(kRegexpBadCharRange);
@@ -1780,9 +1780,9 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
sign = -sign;
name.remove_prefix(1); // '^'
}
-
-#if !defined(RE2_USE_ICU)
- // Look up the group in the RE2 Unicode data.
+
+#if !defined(RE2_USE_ICU)
+ // Look up the group in the RE2 Unicode data.
const UGroup *g = LookupUnicodeGroup(name);
if (g == NULL) {
status->set_code(kRegexpBadCharRange);
@@ -1791,30 +1791,30 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
}
AddUGroup(cc, g, sign, parse_flags);
-#else
- // Look up the group in the ICU Unicode data. Because ICU provides full
- // Unicode properties support, this could be more than a lookup by name.
- ::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8(
+#else
+ // Look up the group in the ICU Unicode data. Because ICU provides full
+ // Unicode properties support, this could be more than a lookup by name.
+ ::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8(
std::string("\\p{") + std::string(name) + std::string("}"));
- UErrorCode uerr = U_ZERO_ERROR;
- ::icu::UnicodeSet uset(ustr, uerr);
- if (U_FAILURE(uerr)) {
- status->set_code(kRegexpBadCharRange);
- status->set_error_arg(seq);
- return kParseError;
- }
-
- // Convert the UnicodeSet to a URange32 and UGroup that we can add.
- int nr = uset.getRangeCount();
+ UErrorCode uerr = U_ZERO_ERROR;
+ ::icu::UnicodeSet uset(ustr, uerr);
+ if (U_FAILURE(uerr)) {
+ status->set_code(kRegexpBadCharRange);
+ status->set_error_arg(seq);
+ return kParseError;
+ }
+
+ // Convert the UnicodeSet to a URange32 and UGroup that we can add.
+ int nr = uset.getRangeCount();
PODArray<URange32> r(nr);
- for (int i = 0; i < nr; i++) {
- r[i].lo = uset.getRangeStart(i);
- r[i].hi = uset.getRangeEnd(i);
- }
+ for (int i = 0; i < nr; i++) {
+ r[i].lo = uset.getRangeStart(i);
+ r[i].hi = uset.getRangeEnd(i);
+ }
UGroup g = {"", +1, 0, 0, r.data(), nr};
- AddUGroup(cc, &g, sign, parse_flags);
-#endif
-
+ AddUGroup(cc, &g, sign, parse_flags);
+#endif
+
return kParseOk;
}
@@ -1841,7 +1841,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
// Got it. Check that it's valid.
q += 2;
- StringPiece name(p, static_cast<size_t>(q - p));
+ StringPiece name(p, static_cast<size_t>(q - p));
const UGroup *g = LookupPosixGroup(name);
if (g == NULL) {
@@ -1895,8 +1895,8 @@ bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr,
return false;
if (rr->hi < rr->lo) {
status->set_code(kRegexpBadCharRange);
- status->set_error_arg(
- StringPiece(os.data(), static_cast<size_t>(s->data() - os.data())));
+ status->set_error_arg(
+ StringPiece(os.data(), static_cast<size_t>(s->data() - os.data())));
return false;
}
} else {
@@ -1915,7 +1915,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
if (s->empty() || (*s)[0] != '[') {
// Caller checked this.
status->set_code(kRegexpInternalError);
- status->set_error_arg(StringPiece());
+ status->set_error_arg(StringPiece());
return false;
}
bool negated = false;
@@ -2083,8 +2083,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
// so that's the one we implement. One is enough.
if (t.size() > 2 && t[0] == 'P' && t[1] == '<') {
// Pull out name.
- size_t end = t.find('>', 2);
- if (end == StringPiece::npos) {
+ size_t end = t.find('>', 2);
+ if (end == StringPiece::npos) {
if (!IsValidUTF8(*s, status_))
return false;
status_->set_code(kRegexpBadNamedCapture);
@@ -2192,7 +2192,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
BadPerlOp:
status_->set_code(kRegexpBadPerlOp);
- status_->set_error_arg(
+ status_->set_error_arg(
StringPiece(s->data(), static_cast<size_t>(t.data() - s->data())));
return false;
}
@@ -2205,7 +2205,7 @@ void ConvertLatin1ToUTF8(const StringPiece& latin1, std::string* utf) {
char buf[UTFmax];
utf->clear();
- for (size_t i = 0; i < latin1.size(); i++) {
+ for (size_t i = 0; i < latin1.size(); i++) {
Rune r = latin1[i] & 0xFF;
int n = runetochar(buf, &r);
utf->append(buf, n);
@@ -2246,9 +2246,9 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
return ps.DoFinish();
}
- StringPiece lastunary = StringPiece();
+ StringPiece lastunary = StringPiece();
while (!t.empty()) {
- StringPiece isunary = StringPiece();
+ StringPiece isunary = StringPiece();
switch (t[0]) {
default: {
Rune r;
@@ -2267,13 +2267,13 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
return NULL;
break;
}
- if (ps.flags() & NeverCapture) {
- if (!ps.DoLeftParenNoCapture())
- return NULL;
- } else {
- if (!ps.DoLeftParen(StringPiece()))
- return NULL;
- }
+ if (ps.flags() & NeverCapture) {
+ if (!ps.DoLeftParenNoCapture())
+ return NULL;
+ } else {
+ if (!ps.DoLeftParen(StringPiece()))
+ return NULL;
+ }
t.remove_prefix(1); // '('
break;
@@ -2340,14 +2340,14 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
// a** is a syntax error, not a double-star.
// (and a++ means something else entirely, which we don't support!)
status->set_code(kRegexpRepeatOp);
- status->set_error_arg(StringPiece(
+ status->set_error_arg(StringPiece(
lastunary.data(),
static_cast<size_t>(t.data() - lastunary.data())));
return NULL;
}
}
- opstr = StringPiece(opstr.data(),
- static_cast<size_t>(t.data() - opstr.data()));
+ opstr = StringPiece(opstr.data(),
+ static_cast<size_t>(t.data() - opstr.data()));
if (!ps.PushRepeatOp(op, opstr, nongreedy))
return NULL;
isunary = opstr;
@@ -2373,14 +2373,14 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
if (!lastunary.empty()) {
// Not allowed to stack repetition operators.
status->set_code(kRegexpRepeatOp);
- status->set_error_arg(StringPiece(
+ status->set_error_arg(StringPiece(
lastunary.data(),
static_cast<size_t>(t.data() - lastunary.data())));
return NULL;
}
}
- opstr = StringPiece(opstr.data(),
- static_cast<size_t>(t.data() - opstr.data()));
+ opstr = StringPiece(opstr.data(),
+ static_cast<size_t>(t.data() - opstr.data()));
if (!ps.PushRepetition(lo, hi, opstr, nongreedy))
return NULL;
isunary = opstr;
diff --git a/contrib/libs/re2/re2/prefilter.cc b/contrib/libs/re2/re2/prefilter.cc
index a47b3120fb..0a13823200 100644
--- a/contrib/libs/re2/re2/prefilter.cc
+++ b/contrib/libs/re2/re2/prefilter.cc
@@ -3,23 +3,23 @@
// license that can be found in the LICENSE file.
#include "re2/prefilter.h"
-
-#include <stddef.h>
-#include <stdint.h>
-#include <string>
-#include <vector>
-
-#include "util/util.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "util/utf.h"
-#include "re2/re2.h"
-#include "re2/unicode_casefold.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
+#include "re2/re2.h"
+#include "re2/unicode_casefold.h"
#include "re2/walker-inl.h"
namespace re2 {
-static const bool ExtraDebug = false;
+static const bool ExtraDebug = false;
typedef std::set<std::string>::iterator SSIter;
typedef std::set<std::string>::const_iterator ConstSSIter;
@@ -29,13 +29,13 @@ Prefilter::Prefilter(Op op) {
op_ = op;
subs_ = NULL;
if (op_ == AND || op_ == OR)
- subs_ = new std::vector<Prefilter*>;
+ subs_ = new std::vector<Prefilter*>;
}
// Destroys a Prefilter.
Prefilter::~Prefilter() {
if (subs_) {
- for (size_t i = 0; i < subs_->size(); i++)
+ for (size_t i = 0; i < subs_->size(); i++)
delete (*subs_)[i];
delete subs_;
subs_ = NULL;
@@ -49,7 +49,7 @@ Prefilter* Prefilter::Simplify() {
}
// Nothing left in the AND/OR.
- if (subs_->empty()) {
+ if (subs_->empty()) {
if (op_ == AND)
op_ = ALL; // AND of nothing is true
else
@@ -104,7 +104,7 @@ Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) {
// If a and b match op, merge their contents.
if (a->op() == op && b->op() == op) {
- for (size_t i = 0; i < b->subs()->size(); i++) {
+ for (size_t i = 0; i < b->subs()->size(); i++) {
Prefilter* bb = (*b->subs())[i];
a->subs()->push_back(bb);
}
@@ -172,28 +172,28 @@ Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) {
return or_prefilter;
}
-static Rune ToLowerRune(Rune r) {
- if (r < Runeself) {
- if ('A' <= r && r <= 'Z')
- r += 'a' - 'A';
- return r;
- }
-
- const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);
- if (f == NULL || r < f->lo)
- return r;
- return ApplyFold(f, r);
-}
-
-static Rune ToLowerRuneLatin1(Rune r) {
- if ('A' <= r && r <= 'Z')
- r += 'a' - 'A';
- return r;
-}
-
+static Rune ToLowerRune(Rune r) {
+ if (r < Runeself) {
+ if ('A' <= r && r <= 'Z')
+ r += 'a' - 'A';
+ return r;
+ }
+
+ const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);
+ if (f == NULL || r < f->lo)
+ return r;
+ return ApplyFold(f, r);
+}
+
+static Rune ToLowerRuneLatin1(Rune r) {
+ if ('A' <= r && r <= 'Z')
+ r += 'a' - 'A';
+ return r;
+}
+
Prefilter* Prefilter::FromString(const std::string& str) {
Prefilter* m = new Prefilter(Prefilter::ATOM);
- m->atom_ = str;
+ m->atom_ = str;
return m;
}
@@ -215,9 +215,9 @@ class Prefilter::Info {
static Info* EmptyString();
static Info* NoMatch();
static Info* AnyCharOrAnyByte();
- static Info* CClass(CharClass* cc, bool latin1);
+ static Info* CClass(CharClass* cc, bool latin1);
static Info* Literal(Rune r);
- static Info* LiteralLatin1(Rune r);
+ static Info* LiteralLatin1(Rune r);
static Info* AnyMatch();
// Format Info as a string.
@@ -279,7 +279,7 @@ std::string Prefilter::Info::ToString() {
}
return s;
}
-
+
if (match_)
return match_->DebugString();
@@ -395,26 +395,26 @@ static std::string RuneToString(Rune r) {
}
static std::string RuneToStringLatin1(Rune r) {
- char c = r & 0xff;
+ char c = r & 0xff;
return std::string(&c, 1);
-}
-
+}
+
// Constructs Info for literal rune.
Prefilter::Info* Prefilter::Info::Literal(Rune r) {
Info* info = new Info();
- info->exact_.insert(RuneToString(ToLowerRune(r)));
- info->is_exact_ = true;
- return info;
-}
-
-// Constructs Info for literal rune for Latin1 encoded string.
-Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {
- Info* info = new Info();
- info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
+ info->exact_.insert(RuneToString(ToLowerRune(r)));
info->is_exact_ = true;
return info;
}
+// Constructs Info for literal rune for Latin1 encoded string.
+Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {
+ Info* info = new Info();
+ info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
+ info->is_exact_ = true;
+ return info;
+}
+
// Constructs Info for dot (any character) or \C (any byte).
Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() {
Prefilter::Info* info = new Prefilter::Info();
@@ -449,12 +449,12 @@ Prefilter::Info* Prefilter::Info::EmptyString() {
// Constructs Prefilter::Info for a character class.
typedef CharClass::iterator CCIter;
-Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
- bool latin1) {
- if (ExtraDebug) {
- LOG(ERROR) << "CharClassInfo:";
+Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
+ bool latin1) {
+ if (ExtraDebug) {
+ LOG(ERROR) << "CharClassInfo:";
for (CCIter i = cc->begin(); i != cc->end(); ++i)
- LOG(ERROR) << " " << i->lo << "-" << i->hi;
+ LOG(ERROR) << " " << i->lo << "-" << i->hi;
}
// If the class is too large, it's okay to overestimate.
@@ -463,26 +463,26 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
Prefilter::Info *a = new Prefilter::Info();
for (CCIter i = cc->begin(); i != cc->end(); ++i)
- for (Rune r = i->lo; r <= i->hi; r++) {
- if (latin1) {
- a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
- } else {
- a->exact_.insert(RuneToString(ToLowerRune(r)));
- }
- }
-
-
+ for (Rune r = i->lo; r <= i->hi; r++) {
+ if (latin1) {
+ a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
+ } else {
+ a->exact_.insert(RuneToString(ToLowerRune(r)));
+ }
+ }
+
+
a->is_exact_ = true;
- if (ExtraDebug)
- LOG(ERROR) << " = " << a->ToString();
+ if (ExtraDebug)
+ LOG(ERROR) << " = " << a->ToString();
return a;
}
class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
public:
- Walker(bool latin1) : latin1_(latin1) {}
+ Walker(bool latin1) : latin1_(latin1) {}
virtual Info* PostVisit(
Regexp* re, Info* parent_arg,
@@ -493,20 +493,20 @@ class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
Regexp* re,
Info* parent_arg);
- bool latin1() { return latin1_; }
+ bool latin1() { return latin1_; }
private:
- bool latin1_;
-
- Walker(const Walker&) = delete;
- Walker& operator=(const Walker&) = delete;
+ bool latin1_;
+
+ Walker(const Walker&) = delete;
+ Walker& operator=(const Walker&) = delete;
};
Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
- if (ExtraDebug)
- LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString();
-
- bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;
- Prefilter::Info::Walker w(latin1);
+ if (ExtraDebug)
+ LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString();
+
+ bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;
+ Prefilter::Info::Walker w(latin1);
Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);
if (w.stopped_early()) {
@@ -552,12 +552,12 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
break;
case kRegexpLiteral:
- if (latin1()) {
- info = LiteralLatin1(re->rune());
- }
- else {
- info = Literal(re->rune());
- }
+ if (latin1()) {
+ info = LiteralLatin1(re->rune());
+ }
+ else {
+ info = Literal(re->rune());
+ }
break;
case kRegexpLiteralString:
@@ -565,17 +565,17 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
info = NoMatch();
break;
}
- if (latin1()) {
- info = LiteralLatin1(re->runes()[0]);
- for (int i = 1; i < re->nrunes(); i++) {
- info = Concat(info, LiteralLatin1(re->runes()[i]));
- }
- } else {
- info = Literal(re->runes()[0]);
- for (int i = 1; i < re->nrunes(); i++) {
- info = Concat(info, Literal(re->runes()[i]));
- }
- }
+ if (latin1()) {
+ info = LiteralLatin1(re->runes()[0]);
+ for (int i = 1; i < re->nrunes(); i++) {
+ info = Concat(info, LiteralLatin1(re->runes()[i]));
+ }
+ } else {
+ info = Literal(re->runes()[0]);
+ for (int i = 1; i < re->nrunes(); i++) {
+ info = Concat(info, Literal(re->runes()[i]));
+ }
+ }
break;
case kRegexpConcat: {
@@ -626,7 +626,7 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
break;
case kRegexpCharClass:
- info = CClass(re->cc(), latin1());
+ info = CClass(re->cc(), latin1());
break;
case kRegexpCapture:
@@ -635,9 +635,9 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
break;
}
- if (ExtraDebug)
- LOG(ERROR) << "BuildInfo " << re->ToString()
- << ": " << (info ? info->ToString() : "");
+ if (ExtraDebug)
+ LOG(ERROR) << "BuildInfo " << re->ToString()
+ << ": " << (info ? info->ToString() : "");
return info;
}
@@ -674,21 +674,21 @@ std::string Prefilter::DebugString() const {
return "";
case AND: {
std::string s = "";
- for (size_t i = 0; i < subs_->size(); i++) {
+ for (size_t i = 0; i < subs_->size(); i++) {
if (i > 0)
s += " ";
- Prefilter* sub = (*subs_)[i];
- s += sub ? sub->DebugString() : "<nil>";
+ Prefilter* sub = (*subs_)[i];
+ s += sub ? sub->DebugString() : "<nil>";
}
return s;
}
case OR: {
std::string s = "(";
- for (size_t i = 0; i < subs_->size(); i++) {
+ for (size_t i = 0; i < subs_->size(); i++) {
if (i > 0)
s += "|";
- Prefilter* sub = (*subs_)[i];
- s += sub ? sub->DebugString() : "<nil>";
+ Prefilter* sub = (*subs_)[i];
+ s += sub ? sub->DebugString() : "<nil>";
}
s += ")";
return s;
diff --git a/contrib/libs/re2/re2/prefilter.h b/contrib/libs/re2/re2/prefilter.h
index 4fedeb4a7c..8390aa8892 100644
--- a/contrib/libs/re2/re2/prefilter.h
+++ b/contrib/libs/re2/re2/prefilter.h
@@ -2,19 +2,19 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#ifndef RE2_PREFILTER_H_
-#define RE2_PREFILTER_H_
-
+#ifndef RE2_PREFILTER_H_
+#define RE2_PREFILTER_H_
+
// Prefilter is the class used to extract string guards from regexps.
// Rather than using Prefilter class directly, use FilteredRE2.
// See filtered_re2.h
-#include <set>
-#include <string>
-#include <vector>
+#include <set>
+#include <string>
+#include <vector>
-#include "util/util.h"
-#include "util/logging.h"
+#include "util/util.h"
+#include "util/logging.h"
namespace re2 {
@@ -42,14 +42,14 @@ class Prefilter {
int unique_id() const { return unique_id_; }
// The children of the Prefilter node.
- std::vector<Prefilter*>* subs() {
- DCHECK(op_ == AND || op_ == OR);
+ std::vector<Prefilter*>* subs() {
+ DCHECK(op_ == AND || op_ == OR);
return subs_;
}
// Set the children vector. Prefilter takes ownership of subs and
// subs_ will be deleted when Prefilter is deleted.
- void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; }
+ void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; }
// Given a RE2, return a Prefilter. The caller takes ownership of
// the Prefilter and should deallocate it. Returns NULL if Prefilter
@@ -87,7 +87,7 @@ class Prefilter {
Op op_;
// Sub-matches for AND or OR Prefilter.
- std::vector<Prefilter*>* subs_;
+ std::vector<Prefilter*>* subs_;
// Actual string to match in leaf node.
std::string atom_;
@@ -99,8 +99,8 @@ class Prefilter {
// and -1 for duplicate nodes.
int unique_id_;
- Prefilter(const Prefilter&) = delete;
- Prefilter& operator=(const Prefilter&) = delete;
+ Prefilter(const Prefilter&) = delete;
+ Prefilter& operator=(const Prefilter&) = delete;
};
} // namespace re2
diff --git a/contrib/libs/re2/re2/prefilter_tree.cc b/contrib/libs/re2/re2/prefilter_tree.cc
index fdf4e083c9..688b2751aa 100644
--- a/contrib/libs/re2/re2/prefilter_tree.cc
+++ b/contrib/libs/re2/re2/prefilter_tree.cc
@@ -2,61 +2,61 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "re2/prefilter_tree.h"
-
-#include <stddef.h>
-#include <algorithm>
-#include <map>
-#include <memory>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "util/util.h"
-#include "util/logging.h"
+#include "re2/prefilter_tree.h"
+
+#include <stddef.h>
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
#include "util/strutil.h"
#include "re2/prefilter.h"
-#include "re2/re2.h"
+#include "re2/re2.h"
namespace re2 {
-static const bool ExtraDebug = false;
-
+static const bool ExtraDebug = false;
+
PrefilterTree::PrefilterTree()
- : compiled_(false),
- min_atom_len_(3) {
-}
-
-PrefilterTree::PrefilterTree(int min_atom_len)
- : compiled_(false),
- min_atom_len_(min_atom_len) {
+ : compiled_(false),
+ min_atom_len_(3) {
}
+PrefilterTree::PrefilterTree(int min_atom_len)
+ : compiled_(false),
+ min_atom_len_(min_atom_len) {
+}
+
PrefilterTree::~PrefilterTree() {
- for (size_t i = 0; i < prefilter_vec_.size(); i++)
+ for (size_t i = 0; i < prefilter_vec_.size(); i++)
delete prefilter_vec_[i];
- for (size_t i = 0; i < entries_.size(); i++)
+ for (size_t i = 0; i < entries_.size(); i++)
delete entries_[i].parents;
}
-void PrefilterTree::Add(Prefilter* prefilter) {
+void PrefilterTree::Add(Prefilter* prefilter) {
if (compiled_) {
- LOG(DFATAL) << "Add called after Compile.";
+ LOG(DFATAL) << "Add called after Compile.";
return;
}
- if (prefilter != NULL && !KeepNode(prefilter)) {
- delete prefilter;
- prefilter = NULL;
+ if (prefilter != NULL && !KeepNode(prefilter)) {
+ delete prefilter;
+ prefilter = NULL;
}
- prefilter_vec_.push_back(prefilter);
+ prefilter_vec_.push_back(prefilter);
}
void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
if (compiled_) {
- LOG(DFATAL) << "Compile called already.";
+ LOG(DFATAL) << "Compile called already.";
return;
}
@@ -77,31 +77,31 @@ void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
// no longer necessary for their parent to trigger; that is, we do
// not miss out on any regexps triggering by getting rid of a
// prefilter node.
- for (size_t i = 0; i < entries_.size(); i++) {
- StdIntMap* parents = entries_[i].parents;
+ for (size_t i = 0; i < entries_.size(); i++) {
+ StdIntMap* parents = entries_[i].parents;
if (parents->size() > 8) {
// This one triggers too many things. If all the parents are AND
// nodes and have other things guarding them, then get rid of
// this trigger. TODO(vsri): Adjust the threshold appropriately,
// make it a function of total number of nodes?
bool have_other_guard = true;
- for (StdIntMap::iterator it = parents->begin();
- it != parents->end(); ++it) {
+ for (StdIntMap::iterator it = parents->begin();
+ it != parents->end(); ++it) {
have_other_guard = have_other_guard &&
- (entries_[it->first].propagate_up_at_count > 1);
- }
+ (entries_[it->first].propagate_up_at_count > 1);
+ }
if (have_other_guard) {
- for (StdIntMap::iterator it = parents->begin();
+ for (StdIntMap::iterator it = parents->begin();
it != parents->end(); ++it)
- entries_[it->first].propagate_up_at_count -= 1;
+ entries_[it->first].propagate_up_at_count -= 1;
parents->clear(); // Forget the parents
}
}
}
- if (ExtraDebug)
+ if (ExtraDebug)
PrintDebugInfo(&nodes);
}
@@ -119,7 +119,7 @@ std::string PrefilterTree::NodeString(Prefilter* node) const {
if (node->op() == Prefilter::ATOM) {
s += node->atom();
} else {
- for (size_t i = 0; i < node->subs()->size(); i++) {
+ for (size_t i = 0; i < node->subs()->size(); i++) {
if (i > 0)
s += ',';
s += StringPrintf("%d", (*node->subs())[i]->unique_id());
@@ -128,56 +128,56 @@ std::string PrefilterTree::NodeString(Prefilter* node) const {
return s;
}
-bool PrefilterTree::KeepNode(Prefilter* node) const {
- if (node == NULL)
- return false;
-
- switch (node->op()) {
- default:
- LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op();
- return false;
-
- case Prefilter::ALL:
+bool PrefilterTree::KeepNode(Prefilter* node) const {
+ if (node == NULL)
+ return false;
+
+ switch (node->op()) {
+ default:
+ LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op();
+ return false;
+
+ case Prefilter::ALL:
case Prefilter::NONE:
- return false;
-
- case Prefilter::ATOM:
- return node->atom().size() >= static_cast<size_t>(min_atom_len_);
-
- case Prefilter::AND: {
- int j = 0;
- std::vector<Prefilter*>* subs = node->subs();
- for (size_t i = 0; i < subs->size(); i++)
- if (KeepNode((*subs)[i]))
- (*subs)[j++] = (*subs)[i];
- else
- delete (*subs)[i];
-
- subs->resize(j);
- return j > 0;
- }
-
- case Prefilter::OR:
- for (size_t i = 0; i < node->subs()->size(); i++)
- if (!KeepNode((*node->subs())[i]))
- return false;
- return true;
- }
-}
-
+ return false;
+
+ case Prefilter::ATOM:
+ return node->atom().size() >= static_cast<size_t>(min_atom_len_);
+
+ case Prefilter::AND: {
+ int j = 0;
+ std::vector<Prefilter*>* subs = node->subs();
+ for (size_t i = 0; i < subs->size(); i++)
+ if (KeepNode((*subs)[i]))
+ (*subs)[j++] = (*subs)[i];
+ else
+ delete (*subs)[i];
+
+ subs->resize(j);
+ return j > 0;
+ }
+
+ case Prefilter::OR:
+ for (size_t i = 0; i < node->subs()->size(); i++)
+ if (!KeepNode((*node->subs())[i]))
+ return false;
+ return true;
+ }
+}
+
void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
std::vector<std::string>* atom_vec) {
atom_vec->clear();
// Build vector of all filter nodes, sorted topologically
// from top to bottom in v.
- std::vector<Prefilter*> v;
+ std::vector<Prefilter*> v;
// Add the top level nodes of each regexp prefilter.
- for (size_t i = 0; i < prefilter_vec_.size(); i++) {
+ for (size_t i = 0; i < prefilter_vec_.size(); i++) {
Prefilter* f = prefilter_vec_[i];
if (f == NULL)
- unfiltered_.push_back(static_cast<int>(i));
+ unfiltered_.push_back(static_cast<int>(i));
// We push NULL also on to v, so that we maintain the
// mapping of index==regexpid for level=0 prefilter nodes.
@@ -185,20 +185,20 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
}
// Now add all the descendant nodes.
- for (size_t i = 0; i < v.size(); i++) {
+ for (size_t i = 0; i < v.size(); i++) {
Prefilter* f = v[i];
if (f == NULL)
continue;
if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) {
- const std::vector<Prefilter*>& subs = *f->subs();
- for (size_t j = 0; j < subs.size(); j++)
+ const std::vector<Prefilter*>& subs = *f->subs();
+ for (size_t j = 0; j < subs.size(); j++)
v.push_back(subs[j]);
}
}
// Identify unique nodes.
int unique_id = 0;
- for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
+ for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
Prefilter *node = v[i];
if (node == NULL)
continue;
@@ -219,8 +219,8 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
}
entries_.resize(nodes->size());
- // Create parent StdIntMap for the entries.
- for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
+ // Create parent StdIntMap for the entries.
+ for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
Prefilter* prefilter = v[i];
if (prefilter == NULL)
continue;
@@ -229,11 +229,11 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
continue;
Entry* entry = &entries_[prefilter->unique_id()];
- entry->parents = new StdIntMap();
+ entry->parents = new StdIntMap();
}
// Fill the entries.
- for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
+ for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
Prefilter* prefilter = v[i];
if (prefilter == NULL)
continue;
@@ -255,8 +255,8 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
case Prefilter::OR:
case Prefilter::AND: {
- std::set<int> uniq_child;
- for (size_t j = 0; j < prefilter->subs()->size(); j++) {
+ std::set<int> uniq_child;
+ for (size_t j = 0; j < prefilter->subs()->size(); j++) {
Prefilter* child = (*prefilter->subs())[j];
Prefilter* canonical = CanonicalNode(nodes, child);
if (canonical == NULL) {
@@ -264,17 +264,17 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
return;
}
int child_id = canonical->unique_id();
- uniq_child.insert(child_id);
+ uniq_child.insert(child_id);
// To the child, we want to add to parent indices.
Entry* child_entry = &entries_[child_id];
- if (child_entry->parents->find(prefilter->unique_id()) ==
- child_entry->parents->end()) {
- (*child_entry->parents)[prefilter->unique_id()] = 1;
- }
+ if (child_entry->parents->find(prefilter->unique_id()) ==
+ child_entry->parents->end()) {
+ (*child_entry->parents)[prefilter->unique_id()] = 1;
+ }
}
- entry->propagate_up_at_count = prefilter->op() == Prefilter::AND
- ? static_cast<int>(uniq_child.size())
- : 1;
+ entry->propagate_up_at_count = prefilter->op() == Prefilter::AND
+ ? static_cast<int>(uniq_child.size())
+ : 1;
break;
}
@@ -282,20 +282,20 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
}
// For top level nodes, populate regexp id.
- for (size_t i = 0; i < prefilter_vec_.size(); i++) {
+ for (size_t i = 0; i < prefilter_vec_.size(); i++) {
if (prefilter_vec_[i] == NULL)
continue;
int id = CanonicalNode(nodes, prefilter_vec_[i])->unique_id();
DCHECK_LE(0, id);
Entry* entry = &entries_[id];
- entry->regexps.push_back(static_cast<int>(i));
+ entry->regexps.push_back(static_cast<int>(i));
}
}
// Functions for triggering during search.
void PrefilterTree::RegexpsGivenStrings(
- const std::vector<int>& matched_atoms,
- std::vector<int>* regexps) const {
+ const std::vector<int>& matched_atoms,
+ std::vector<int>* regexps) const {
regexps->clear();
if (!compiled_) {
// Some legacy users of PrefilterTree call Compile() before
@@ -304,9 +304,9 @@ void PrefilterTree::RegexpsGivenStrings(
if (prefilter_vec_.empty())
return;
- LOG(ERROR) << "RegexpsGivenStrings called before Compile.";
+ LOG(ERROR) << "RegexpsGivenStrings called before Compile.";
for (size_t i = 0; i < prefilter_vec_.size(); i++)
- regexps->push_back(static_cast<int>(i));
+ regexps->push_back(static_cast<int>(i));
} else {
IntMap regexps_map(static_cast<int>(prefilter_vec_.size()));
std::vector<int> matched_atom_ids;
@@ -320,26 +320,26 @@ void PrefilterTree::RegexpsGivenStrings(
regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end());
}
- std::sort(regexps->begin(), regexps->end());
+ std::sort(regexps->begin(), regexps->end());
}
-void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
+void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
IntMap* regexps) const {
- IntMap count(static_cast<int>(entries_.size()));
- IntMap work(static_cast<int>(entries_.size()));
- for (size_t i = 0; i < atom_ids.size(); i++)
+ IntMap count(static_cast<int>(entries_.size()));
+ IntMap work(static_cast<int>(entries_.size()));
+ for (size_t i = 0; i < atom_ids.size(); i++)
work.set(atom_ids[i], 1);
for (IntMap::iterator it = work.begin(); it != work.end(); ++it) {
const Entry& entry = entries_[it->index()];
// Record regexps triggered.
- for (size_t i = 0; i < entry.regexps.size(); i++)
+ for (size_t i = 0; i < entry.regexps.size(); i++)
regexps->set(entry.regexps[i], 1);
int c;
// Pass trigger up to parents.
- for (StdIntMap::iterator it = entry.parents->begin();
+ for (StdIntMap::iterator it = entry.parents->begin();
it != entry.parents->end();
++it) {
- int j = it->first;
+ int j = it->first;
const Entry& parent = entries_[j];
// Delay until all the children have succeeded.
if (parent.propagate_up_at_count > 1) {
@@ -361,26 +361,26 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
// Debugging help.
void PrefilterTree::PrintPrefilter(int regexpid) {
- LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]);
+ LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]);
}
void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
- LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size();
- LOG(ERROR) << "#Unique Nodes: " << entries_.size();
+ LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size();
+ LOG(ERROR) << "#Unique Nodes: " << entries_.size();
for (size_t i = 0; i < entries_.size(); i++) {
- StdIntMap* parents = entries_[i].parents;
- const std::vector<int>& regexps = entries_[i].regexps;
- LOG(ERROR) << "EntryId: " << i
- << " N: " << parents->size() << " R: " << regexps.size();
- for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
- LOG(ERROR) << it->first;
+ StdIntMap* parents = entries_[i].parents;
+ const std::vector<int>& regexps = entries_[i].regexps;
+ LOG(ERROR) << "EntryId: " << i
+ << " N: " << parents->size() << " R: " << regexps.size();
+ for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
+ LOG(ERROR) << it->first;
}
- LOG(ERROR) << "Map:";
+ LOG(ERROR) << "Map:";
for (NodeMap::const_iterator iter = nodes->begin();
iter != nodes->end(); ++iter)
- LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
- << " Str: " << (*iter).first;
+ LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
+ << " Str: " << (*iter).first;
}
std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
@@ -392,7 +392,7 @@ std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
// Adding the operation disambiguates AND and OR nodes.
node_string += node->op() == Prefilter::AND ? "AND" : "OR";
node_string += "(";
- for (size_t i = 0; i < node->subs()->size(); i++) {
+ for (size_t i = 0; i < node->subs()->size(); i++) {
if (i > 0)
node_string += ',';
node_string += StringPrintf("%d", (*node->subs())[i]->unique_id());
diff --git a/contrib/libs/re2/re2/prefilter_tree.h b/contrib/libs/re2/re2/prefilter_tree.h
index 5d73074d97..780f34a540 100644
--- a/contrib/libs/re2/re2/prefilter_tree.h
+++ b/contrib/libs/re2/re2/prefilter_tree.h
@@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#ifndef RE2_PREFILTER_TREE_H_
-#define RE2_PREFILTER_TREE_H_
-
+#ifndef RE2_PREFILTER_TREE_H_
+#define RE2_PREFILTER_TREE_H_
+
// The PrefilterTree class is used to form an AND-OR tree of strings
// that would trigger each regexp. The 'prefilter' of each regexp is
// added to PrefilterTree, and then PrefilterTree is used to find all
@@ -16,11 +16,11 @@
// atoms) that the user of this class should use to do the string
// matching.
-#include <map>
-#include <string>
-#include <vector>
-
-#include "util/util.h"
+#include <map>
+#include <string>
+#include <vector>
+
+#include "util/util.h"
#include "re2/prefilter.h"
#include "re2/sparse_array.h"
@@ -29,7 +29,7 @@ namespace re2 {
class PrefilterTree {
public:
PrefilterTree();
- explicit PrefilterTree(int min_atom_len);
+ explicit PrefilterTree(int min_atom_len);
~PrefilterTree();
// Adds the prefilter for the next regexp. Note that we assume that
@@ -50,8 +50,8 @@ class PrefilterTree {
// contain all the ids of string atoms that were found to match the
// content. The caller can use any string match engine to perform
// this function. This function is thread safe.
- void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
- std::vector<int>* regexps) const;
+ void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
+ std::vector<int>* regexps) const;
// Print debug prefilter. Also prints unique ids associated with
// nodes of the prefilter of the regexp.
@@ -77,23 +77,23 @@ class PrefilterTree {
// are two different nodes, but they share the atom 'def'. So when
// 'def' matches, it triggers two parents, corresponding to the two
// different OR nodes.
- StdIntMap* parents;
+ StdIntMap* parents;
// When this node is ready to trigger the parent, what are the
// regexps that are triggered.
- std::vector<int> regexps;
+ std::vector<int> regexps;
};
- // Returns true if the prefilter node should be kept.
- bool KeepNode(Prefilter* node) const;
-
+ // Returns true if the prefilter node should be kept.
+ bool KeepNode(Prefilter* node) const;
+
// This function assigns unique ids to various parts of the
// prefilter, by looking at if these nodes are already in the
// PrefilterTree.
void AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec);
// Given the matching atoms, find the regexps to be triggered.
- void PropagateMatch(const std::vector<int>& atom_ids,
+ void PropagateMatch(const std::vector<int>& atom_ids,
IntMap* regexps) const;
// Returns the prefilter node that has the same NodeString as this
@@ -112,28 +112,28 @@ class PrefilterTree {
// These are all the nodes formed by Compile. Essentially, there is
// one node for each unique atom and each unique AND/OR node.
- std::vector<Entry> entries_;
+ std::vector<Entry> entries_;
// indices of regexps that always pass through the filter (since we
// found no required literals in these regexps).
- std::vector<int> unfiltered_;
+ std::vector<int> unfiltered_;
// vector of Prefilter for all regexps.
- std::vector<Prefilter*> prefilter_vec_;
+ std::vector<Prefilter*> prefilter_vec_;
// Atom index in returned strings to entry id mapping.
- std::vector<int> atom_index_to_id_;
+ std::vector<int> atom_index_to_id_;
// Has the prefilter tree been compiled.
bool compiled_;
- // Strings less than this length are not stored as atoms.
- const int min_atom_len_;
-
- PrefilterTree(const PrefilterTree&) = delete;
- PrefilterTree& operator=(const PrefilterTree&) = delete;
+ // Strings less than this length are not stored as atoms.
+ const int min_atom_len_;
+
+ PrefilterTree(const PrefilterTree&) = delete;
+ PrefilterTree& operator=(const PrefilterTree&) = delete;
};
-} // namespace
+} // namespace
#endif // RE2_PREFILTER_TREE_H_
diff --git a/contrib/libs/re2/re2/prog.cc b/contrib/libs/re2/re2/prog.cc
index a700d35de3..0092562c26 100644
--- a/contrib/libs/re2/re2/prog.cc
+++ b/contrib/libs/re2/re2/prog.cc
@@ -13,29 +13,29 @@
#include <intrin.h>
#endif
#endif
-#include <stdint.h>
-#include <string.h>
-#include <algorithm>
-#include <memory>
-#include <utility>
-
-#include "util/util.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "re2/bitmap256.h"
-#include "re2/stringpiece.h"
-
+#include <stdint.h>
+#include <string.h>
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "re2/bitmap256.h"
+#include "re2/stringpiece.h"
+
namespace re2 {
// Constructors per Inst opcode
-void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) {
+void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) {
DCHECK_EQ(out_opcode_, 0);
set_out_opcode(out, kInstAlt);
out1_ = out1;
}
-void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
+void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
DCHECK_EQ(out_opcode_, 0);
set_out_opcode(out, kInstByteRange);
lo_ = lo & 0xFF;
@@ -43,25 +43,25 @@ void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
hint_foldcase_ = foldcase&1;
}
-void Prog::Inst::InitCapture(int cap, uint32_t out) {
+void Prog::Inst::InitCapture(int cap, uint32_t out) {
DCHECK_EQ(out_opcode_, 0);
set_out_opcode(out, kInstCapture);
cap_ = cap;
}
-void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) {
+void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) {
DCHECK_EQ(out_opcode_, 0);
set_out_opcode(out, kInstEmptyWidth);
empty_ = empty;
}
-void Prog::Inst::InitMatch(int32_t id) {
+void Prog::Inst::InitMatch(int32_t id) {
DCHECK_EQ(out_opcode_, 0);
set_opcode(kInstMatch);
match_id_ = id;
}
-void Prog::Inst::InitNop(uint32_t out) {
+void Prog::Inst::InitNop(uint32_t out) {
DCHECK_EQ(out_opcode_, 0);
set_opcode(kInstNop);
}
@@ -109,7 +109,7 @@ Prog::Prog()
: anchor_start_(false),
anchor_end_(false),
reversed_(false),
- did_flatten_(false),
+ did_flatten_(false),
did_onepass_(false),
start_(0),
start_unanchored_(0),
@@ -117,16 +117,16 @@ Prog::Prog()
bytemap_range_(0),
prefix_foldcase_(false),
prefix_size_(0),
- list_count_(0),
+ list_count_(0),
bit_state_text_max_size_(0),
- dfa_mem_(0),
+ dfa_mem_(0),
dfa_first_(NULL),
- dfa_longest_(NULL) {
+ dfa_longest_(NULL) {
}
Prog::~Prog() {
- DeleteDFA(dfa_longest_);
- DeleteDFA(dfa_first_);
+ DeleteDFA(dfa_longest_);
+ DeleteDFA(dfa_first_);
if (prefix_foldcase_)
delete[] prefix_dfa_;
}
@@ -153,29 +153,29 @@ static std::string ProgToString(Prog* prog, Workq* q) {
static std::string FlattenedProgToString(Prog* prog, int start) {
std::string s;
- for (int id = start; id < prog->size(); id++) {
- Prog::Inst* ip = prog->inst(id);
- if (ip->last())
+ for (int id = start; id < prog->size(); id++) {
+ Prog::Inst* ip = prog->inst(id);
+ if (ip->last())
s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
- else
+ else
s += StringPrintf("%d+ %s\n", id, ip->Dump().c_str());
}
- return s;
-}
+ return s;
+}
std::string Prog::Dump() {
- if (did_flatten_)
- return FlattenedProgToString(this, start_);
-
+ if (did_flatten_)
+ return FlattenedProgToString(this, start_);
+
Workq q(size_);
AddToQueue(&q, start_);
- return ProgToString(this, &q);
+ return ProgToString(this, &q);
}
std::string Prog::DumpUnanchored() {
- if (did_flatten_)
- return FlattenedProgToString(this, start_unanchored_);
-
+ if (did_flatten_)
+ return FlattenedProgToString(this, start_unanchored_);
+
Workq q(size_);
AddToQueue(&q, start_unanchored_);
return ProgToString(this, &q);
@@ -183,17 +183,17 @@ std::string Prog::DumpUnanchored() {
std::string Prog::DumpByteMap() {
std::string map;
- for (int c = 0; c < 256; c++) {
- int b = bytemap_[c];
- int lo = c;
- while (c < 256-1 && bytemap_[c+1] == b)
- c++;
- int hi = c;
+ for (int c = 0; c < 256; c++) {
+ int b = bytemap_[c];
+ int lo = c;
+ while (c < 256-1 && bytemap_[c+1] == b)
+ c++;
+ int hi = c;
map += StringPrintf("[%02x-%02x] -> %d\n", lo, hi, b);
- }
- return map;
-}
-
+ }
+ return map;
+}
+
// Is ip a guaranteed match at end of text, perhaps after some capturing?
static bool IsMatch(Prog* prog, Prog::Inst* ip) {
for (;;) {
@@ -218,8 +218,8 @@ static bool IsMatch(Prog* prog, Prog::Inst* ip) {
return true;
}
}
-}
-
+}
+
// Peep-hole optimizer.
void Prog::Optimize() {
Workq q(size_);
@@ -284,7 +284,7 @@ void Prog::Optimize() {
}
}
-uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
+uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
int flags = 0;
// ^ and \A
@@ -318,320 +318,320 @@ uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
return flags;
}
-// ByteMapBuilder implements a coloring algorithm.
-//
-// The first phase is a series of "mark and merge" batches: we mark one or more
-// [lo-hi] ranges, then merge them into our internal state. Batching is not for
-// performance; rather, it means that the ranges are treated indistinguishably.
-//
-// Internally, the ranges are represented using a bitmap that stores the splits
-// and a vector that stores the colors; both of them are indexed by the ranges'
-// last bytes. Thus, in order to merge a [lo-hi] range, we split at lo-1 and at
-// hi (if not already split), then recolor each range in between. The color map
-// (i.e. from the old color to the new color) is maintained for the lifetime of
-// the batch and so underpins this somewhat obscure approach to set operations.
-//
-// The second phase builds the bytemap from our internal state: we recolor each
-// range, then store the new color (which is now the byte class) in each of the
-// corresponding array elements. Finally, we output the number of byte classes.
-class ByteMapBuilder {
- public:
- ByteMapBuilder() {
- // Initial state: the [0-255] range has color 256.
- // This will avoid problems during the second phase,
- // in which we assign byte classes numbered from 0.
- splits_.Set(255);
- colors_[255] = 256;
- nextcolor_ = 257;
- }
-
- void Mark(int lo, int hi);
- void Merge();
- void Build(uint8_t* bytemap, int* bytemap_range);
-
- private:
- int Recolor(int oldcolor);
-
- Bitmap256 splits_;
+// ByteMapBuilder implements a coloring algorithm.
+//
+// The first phase is a series of "mark and merge" batches: we mark one or more
+// [lo-hi] ranges, then merge them into our internal state. Batching is not for
+// performance; rather, it means that the ranges are treated indistinguishably.
+//
+// Internally, the ranges are represented using a bitmap that stores the splits
+// and a vector that stores the colors; both of them are indexed by the ranges'
+// last bytes. Thus, in order to merge a [lo-hi] range, we split at lo-1 and at
+// hi (if not already split), then recolor each range in between. The color map
+// (i.e. from the old color to the new color) is maintained for the lifetime of
+// the batch and so underpins this somewhat obscure approach to set operations.
+//
+// The second phase builds the bytemap from our internal state: we recolor each
+// range, then store the new color (which is now the byte class) in each of the
+// corresponding array elements. Finally, we output the number of byte classes.
+class ByteMapBuilder {
+ public:
+ ByteMapBuilder() {
+ // Initial state: the [0-255] range has color 256.
+ // This will avoid problems during the second phase,
+ // in which we assign byte classes numbered from 0.
+ splits_.Set(255);
+ colors_[255] = 256;
+ nextcolor_ = 257;
+ }
+
+ void Mark(int lo, int hi);
+ void Merge();
+ void Build(uint8_t* bytemap, int* bytemap_range);
+
+ private:
+ int Recolor(int oldcolor);
+
+ Bitmap256 splits_;
int colors_[256];
- int nextcolor_;
- std::vector<std::pair<int, int>> colormap_;
- std::vector<std::pair<int, int>> ranges_;
-
- ByteMapBuilder(const ByteMapBuilder&) = delete;
- ByteMapBuilder& operator=(const ByteMapBuilder&) = delete;
-};
-
-void ByteMapBuilder::Mark(int lo, int hi) {
- DCHECK_GE(lo, 0);
- DCHECK_GE(hi, 0);
- DCHECK_LE(lo, 255);
- DCHECK_LE(hi, 255);
- DCHECK_LE(lo, hi);
-
- // Ignore any [0-255] ranges. They cause us to recolor every range, which
- // has no effect on the eventual result and is therefore a waste of time.
- if (lo == 0 && hi == 255)
- return;
-
- ranges_.emplace_back(lo, hi);
-}
-
-void ByteMapBuilder::Merge() {
- for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin();
- it != ranges_.end();
- ++it) {
- int lo = it->first-1;
- int hi = it->second;
-
- if (0 <= lo && !splits_.Test(lo)) {
- splits_.Set(lo);
- int next = splits_.FindNextSetBit(lo+1);
- colors_[lo] = colors_[next];
- }
- if (!splits_.Test(hi)) {
- splits_.Set(hi);
- int next = splits_.FindNextSetBit(hi+1);
- colors_[hi] = colors_[next];
- }
-
- int c = lo+1;
- while (c < 256) {
- int next = splits_.FindNextSetBit(c);
- colors_[next] = Recolor(colors_[next]);
- if (next == hi)
- break;
- c = next+1;
- }
- }
- colormap_.clear();
- ranges_.clear();
-}
-
-void ByteMapBuilder::Build(uint8_t* bytemap, int* bytemap_range) {
- // Assign byte classes numbered from 0.
- nextcolor_ = 0;
-
- int c = 0;
- while (c < 256) {
- int next = splits_.FindNextSetBit(c);
- uint8_t b = static_cast<uint8_t>(Recolor(colors_[next]));
- while (c <= next) {
- bytemap[c] = b;
- c++;
- }
- }
-
- *bytemap_range = nextcolor_;
-}
-
-int ByteMapBuilder::Recolor(int oldcolor) {
- // Yes, this is a linear search. There can be at most 256
- // colors and there will typically be far fewer than that.
- // Also, we need to consider keys *and* values in order to
- // avoid recoloring a given range more than once per batch.
- std::vector<std::pair<int, int>>::const_iterator it =
- std::find_if(colormap_.begin(), colormap_.end(),
- [=](const std::pair<int, int>& kv) -> bool {
- return kv.first == oldcolor || kv.second == oldcolor;
- });
- if (it != colormap_.end())
- return it->second;
- int newcolor = nextcolor_;
- nextcolor_++;
- colormap_.emplace_back(oldcolor, newcolor);
- return newcolor;
+ int nextcolor_;
+ std::vector<std::pair<int, int>> colormap_;
+ std::vector<std::pair<int, int>> ranges_;
+
+ ByteMapBuilder(const ByteMapBuilder&) = delete;
+ ByteMapBuilder& operator=(const ByteMapBuilder&) = delete;
+};
+
+void ByteMapBuilder::Mark(int lo, int hi) {
+ DCHECK_GE(lo, 0);
+ DCHECK_GE(hi, 0);
+ DCHECK_LE(lo, 255);
+ DCHECK_LE(hi, 255);
+ DCHECK_LE(lo, hi);
+
+ // Ignore any [0-255] ranges. They cause us to recolor every range, which
+ // has no effect on the eventual result and is therefore a waste of time.
+ if (lo == 0 && hi == 255)
+ return;
+
+ ranges_.emplace_back(lo, hi);
}
+void ByteMapBuilder::Merge() {
+ for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin();
+ it != ranges_.end();
+ ++it) {
+ int lo = it->first-1;
+ int hi = it->second;
+
+ if (0 <= lo && !splits_.Test(lo)) {
+ splits_.Set(lo);
+ int next = splits_.FindNextSetBit(lo+1);
+ colors_[lo] = colors_[next];
+ }
+ if (!splits_.Test(hi)) {
+ splits_.Set(hi);
+ int next = splits_.FindNextSetBit(hi+1);
+ colors_[hi] = colors_[next];
+ }
+
+ int c = lo+1;
+ while (c < 256) {
+ int next = splits_.FindNextSetBit(c);
+ colors_[next] = Recolor(colors_[next]);
+ if (next == hi)
+ break;
+ c = next+1;
+ }
+ }
+ colormap_.clear();
+ ranges_.clear();
+}
+
+void ByteMapBuilder::Build(uint8_t* bytemap, int* bytemap_range) {
+ // Assign byte classes numbered from 0.
+ nextcolor_ = 0;
+
+ int c = 0;
+ while (c < 256) {
+ int next = splits_.FindNextSetBit(c);
+ uint8_t b = static_cast<uint8_t>(Recolor(colors_[next]));
+ while (c <= next) {
+ bytemap[c] = b;
+ c++;
+ }
+ }
+
+ *bytemap_range = nextcolor_;
+}
+
+int ByteMapBuilder::Recolor(int oldcolor) {
+ // Yes, this is a linear search. There can be at most 256
+ // colors and there will typically be far fewer than that.
+ // Also, we need to consider keys *and* values in order to
+ // avoid recoloring a given range more than once per batch.
+ std::vector<std::pair<int, int>>::const_iterator it =
+ std::find_if(colormap_.begin(), colormap_.end(),
+ [=](const std::pair<int, int>& kv) -> bool {
+ return kv.first == oldcolor || kv.second == oldcolor;
+ });
+ if (it != colormap_.end())
+ return it->second;
+ int newcolor = nextcolor_;
+ nextcolor_++;
+ colormap_.emplace_back(oldcolor, newcolor);
+ return newcolor;
+}
+
void Prog::ComputeByteMap() {
- // Fill in bytemap with byte classes for the program.
- // Ranges of bytes that are treated indistinguishably
- // will be mapped to a single byte class.
- ByteMapBuilder builder;
-
- // Don't repeat the work for ^ and $.
- bool marked_line_boundaries = false;
- // Don't repeat the work for \b and \B.
- bool marked_word_boundaries = false;
-
- for (int id = 0; id < size(); id++) {
- Inst* ip = inst(id);
- if (ip->opcode() == kInstByteRange) {
- int lo = ip->lo();
- int hi = ip->hi();
- builder.Mark(lo, hi);
- if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
- int foldlo = lo;
- int foldhi = hi;
- if (foldlo < 'a')
- foldlo = 'a';
- if (foldhi > 'z')
- foldhi = 'z';
+ // Fill in bytemap with byte classes for the program.
+ // Ranges of bytes that are treated indistinguishably
+ // will be mapped to a single byte class.
+ ByteMapBuilder builder;
+
+ // Don't repeat the work for ^ and $.
+ bool marked_line_boundaries = false;
+ // Don't repeat the work for \b and \B.
+ bool marked_word_boundaries = false;
+
+ for (int id = 0; id < size(); id++) {
+ Inst* ip = inst(id);
+ if (ip->opcode() == kInstByteRange) {
+ int lo = ip->lo();
+ int hi = ip->hi();
+ builder.Mark(lo, hi);
+ if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
+ int foldlo = lo;
+ int foldhi = hi;
+ if (foldlo < 'a')
+ foldlo = 'a';
+ if (foldhi > 'z')
+ foldhi = 'z';
if (foldlo <= foldhi) {
foldlo += 'A' - 'a';
foldhi += 'A' - 'a';
builder.Mark(foldlo, foldhi);
}
- }
- // If this Inst is not the last Inst in its list AND the next Inst is
- // also a ByteRange AND the Insts have the same out, defer the merge.
- if (!ip->last() &&
- inst(id+1)->opcode() == kInstByteRange &&
- ip->out() == inst(id+1)->out())
- continue;
- builder.Merge();
- } else if (ip->opcode() == kInstEmptyWidth) {
- if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) &&
- !marked_line_boundaries) {
- builder.Mark('\n', '\n');
- builder.Merge();
- marked_line_boundaries = true;
- }
- if (ip->empty() & (kEmptyWordBoundary|kEmptyNonWordBoundary) &&
- !marked_word_boundaries) {
- // We require two batches here: the first for ranges that are word
- // characters, the second for ranges that are not word characters.
- for (bool isword : {true, false}) {
- int j;
- for (int i = 0; i < 256; i = j) {
- for (j = i + 1; j < 256 &&
- Prog::IsWordChar(static_cast<uint8_t>(i)) ==
- Prog::IsWordChar(static_cast<uint8_t>(j));
- j++)
- ;
- if (Prog::IsWordChar(static_cast<uint8_t>(i)) == isword)
- builder.Mark(i, j - 1);
- }
- builder.Merge();
- }
- marked_word_boundaries = true;
- }
- }
+ }
+ // If this Inst is not the last Inst in its list AND the next Inst is
+ // also a ByteRange AND the Insts have the same out, defer the merge.
+ if (!ip->last() &&
+ inst(id+1)->opcode() == kInstByteRange &&
+ ip->out() == inst(id+1)->out())
+ continue;
+ builder.Merge();
+ } else if (ip->opcode() == kInstEmptyWidth) {
+ if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) &&
+ !marked_line_boundaries) {
+ builder.Mark('\n', '\n');
+ builder.Merge();
+ marked_line_boundaries = true;
+ }
+ if (ip->empty() & (kEmptyWordBoundary|kEmptyNonWordBoundary) &&
+ !marked_word_boundaries) {
+ // We require two batches here: the first for ranges that are word
+ // characters, the second for ranges that are not word characters.
+ for (bool isword : {true, false}) {
+ int j;
+ for (int i = 0; i < 256; i = j) {
+ for (j = i + 1; j < 256 &&
+ Prog::IsWordChar(static_cast<uint8_t>(i)) ==
+ Prog::IsWordChar(static_cast<uint8_t>(j));
+ j++)
+ ;
+ if (Prog::IsWordChar(static_cast<uint8_t>(i)) == isword)
+ builder.Mark(i, j - 1);
+ }
+ builder.Merge();
+ }
+ marked_word_boundaries = true;
+ }
+ }
}
- builder.Build(bytemap_, &bytemap_range_);
-
- if (0) { // For debugging, use trivial bytemap.
- LOG(ERROR) << "Using trivial bytemap.";
- for (int i = 0; i < 256; i++)
- bytemap_[i] = static_cast<uint8_t>(i);
+ builder.Build(bytemap_, &bytemap_range_);
+
+ if (0) { // For debugging, use trivial bytemap.
+ LOG(ERROR) << "Using trivial bytemap.";
+ for (int i = 0; i < 256; i++)
+ bytemap_[i] = static_cast<uint8_t>(i);
bytemap_range_ = 256;
}
}
-// Prog::Flatten() implements a graph rewriting algorithm.
-//
-// The overall process is similar to epsilon removal, but retains some epsilon
-// transitions: those from Capture and EmptyWidth instructions; and those from
-// nullable subexpressions. (The latter avoids quadratic blowup in transitions
-// in the worst case.) It might be best thought of as Alt instruction elision.
-//
-// In conceptual terms, it divides the Prog into "trees" of instructions, then
-// traverses the "trees" in order to produce "lists" of instructions. A "tree"
-// is one or more instructions that grow from one "root" instruction to one or
-// more "leaf" instructions; if a "tree" has exactly one instruction, then the
-// "root" is also the "leaf". In most cases, a "root" is the successor of some
-// "leaf" (i.e. the "leaf" instruction's out() returns the "root" instruction)
-// and is considered a "successor root". A "leaf" can be a ByteRange, Capture,
-// EmptyWidth or Match instruction. However, this is insufficient for handling
-// nested nullable subexpressions correctly, so in some cases, a "root" is the
-// dominator of the instructions reachable from some "successor root" (i.e. it
-// has an unreachable predecessor) and is considered a "dominator root". Since
-// only Alt instructions can be "dominator roots" (other instructions would be
+// Prog::Flatten() implements a graph rewriting algorithm.
+//
+// The overall process is similar to epsilon removal, but retains some epsilon
+// transitions: those from Capture and EmptyWidth instructions; and those from
+// nullable subexpressions. (The latter avoids quadratic blowup in transitions
+// in the worst case.) It might be best thought of as Alt instruction elision.
+//
+// In conceptual terms, it divides the Prog into "trees" of instructions, then
+// traverses the "trees" in order to produce "lists" of instructions. A "tree"
+// is one or more instructions that grow from one "root" instruction to one or
+// more "leaf" instructions; if a "tree" has exactly one instruction, then the
+// "root" is also the "leaf". In most cases, a "root" is the successor of some
+// "leaf" (i.e. the "leaf" instruction's out() returns the "root" instruction)
+// and is considered a "successor root". A "leaf" can be a ByteRange, Capture,
+// EmptyWidth or Match instruction. However, this is insufficient for handling
+// nested nullable subexpressions correctly, so in some cases, a "root" is the
+// dominator of the instructions reachable from some "successor root" (i.e. it
+// has an unreachable predecessor) and is considered a "dominator root". Since
+// only Alt instructions can be "dominator roots" (other instructions would be
// "leaves"), only Alt instructions are required to be marked as predecessors.
-//
-// Dividing the Prog into "trees" comprises two passes: marking the "successor
-// roots" and the predecessors; and marking the "dominator roots". Sorting the
-// "successor roots" by their bytecode offsets enables iteration in order from
-// greatest to least during the second pass; by working backwards in this case
-// and flooding the graph no further than "leaves" and already marked "roots",
-// it becomes possible to mark "dominator roots" without doing excessive work.
-//
-// Traversing the "trees" is just iterating over the "roots" in order of their
-// marking and flooding the graph no further than "leaves" and "roots". When a
-// "leaf" is reached, the instruction is copied with its successor remapped to
-// its "root" number. When a "root" is reached, a Nop instruction is generated
-// with its successor remapped similarly. As each "list" is produced, its last
-// instruction is marked as such. After all of the "lists" have been produced,
-// a pass over their instructions remaps their successors to bytecode offsets.
-void Prog::Flatten() {
- if (did_flatten_)
- return;
- did_flatten_ = true;
-
- // Scratch structures. It's important that these are reused by functions
- // that we call in loops because they would thrash the heap otherwise.
- SparseSet reachable(size());
- std::vector<int> stk;
- stk.reserve(size());
-
- // First pass: Marks "successor roots" and predecessors.
- // Builds the mapping from inst-ids to root-ids.
- SparseArray<int> rootmap(size());
- SparseArray<int> predmap(size());
- std::vector<std::vector<int>> predvec;
- MarkSuccessors(&rootmap, &predmap, &predvec, &reachable, &stk);
-
- // Second pass: Marks "dominator roots".
- SparseArray<int> sorted(rootmap);
- std::sort(sorted.begin(), sorted.end(), sorted.less);
- for (SparseArray<int>::const_iterator i = sorted.end() - 1;
- i != sorted.begin();
- --i) {
- if (i->index() != start_unanchored() && i->index() != start())
- MarkDominator(i->index(), &rootmap, &predmap, &predvec, &reachable, &stk);
- }
-
- // Third pass: Emits "lists". Remaps outs to root-ids.
- // Builds the mapping from root-ids to flat-ids.
- std::vector<int> flatmap(rootmap.size());
- std::vector<Inst> flat;
- flat.reserve(size());
- for (SparseArray<int>::const_iterator i = rootmap.begin();
- i != rootmap.end();
- ++i) {
- flatmap[i->value()] = static_cast<int>(flat.size());
- EmitList(i->index(), &rootmap, &flat, &reachable, &stk);
- flat.back().set_last();
+//
+// Dividing the Prog into "trees" comprises two passes: marking the "successor
+// roots" and the predecessors; and marking the "dominator roots". Sorting the
+// "successor roots" by their bytecode offsets enables iteration in order from
+// greatest to least during the second pass; by working backwards in this case
+// and flooding the graph no further than "leaves" and already marked "roots",
+// it becomes possible to mark "dominator roots" without doing excessive work.
+//
+// Traversing the "trees" is just iterating over the "roots" in order of their
+// marking and flooding the graph no further than "leaves" and "roots". When a
+// "leaf" is reached, the instruction is copied with its successor remapped to
+// its "root" number. When a "root" is reached, a Nop instruction is generated
+// with its successor remapped similarly. As each "list" is produced, its last
+// instruction is marked as such. After all of the "lists" have been produced,
+// a pass over their instructions remaps their successors to bytecode offsets.
+void Prog::Flatten() {
+ if (did_flatten_)
+ return;
+ did_flatten_ = true;
+
+ // Scratch structures. It's important that these are reused by functions
+ // that we call in loops because they would thrash the heap otherwise.
+ SparseSet reachable(size());
+ std::vector<int> stk;
+ stk.reserve(size());
+
+ // First pass: Marks "successor roots" and predecessors.
+ // Builds the mapping from inst-ids to root-ids.
+ SparseArray<int> rootmap(size());
+ SparseArray<int> predmap(size());
+ std::vector<std::vector<int>> predvec;
+ MarkSuccessors(&rootmap, &predmap, &predvec, &reachable, &stk);
+
+ // Second pass: Marks "dominator roots".
+ SparseArray<int> sorted(rootmap);
+ std::sort(sorted.begin(), sorted.end(), sorted.less);
+ for (SparseArray<int>::const_iterator i = sorted.end() - 1;
+ i != sorted.begin();
+ --i) {
+ if (i->index() != start_unanchored() && i->index() != start())
+ MarkDominator(i->index(), &rootmap, &predmap, &predvec, &reachable, &stk);
+ }
+
+ // Third pass: Emits "lists". Remaps outs to root-ids.
+ // Builds the mapping from root-ids to flat-ids.
+ std::vector<int> flatmap(rootmap.size());
+ std::vector<Inst> flat;
+ flat.reserve(size());
+ for (SparseArray<int>::const_iterator i = rootmap.begin();
+ i != rootmap.end();
+ ++i) {
+ flatmap[i->value()] = static_cast<int>(flat.size());
+ EmitList(i->index(), &rootmap, &flat, &reachable, &stk);
+ flat.back().set_last();
// We have the bounds of the "list", so this is the
// most convenient point at which to compute hints.
ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size()));
- }
-
- list_count_ = static_cast<int>(flatmap.size());
- for (int i = 0; i < kNumInst; i++)
- inst_count_[i] = 0;
-
- // Fourth pass: Remaps outs to flat-ids.
- // Counts instructions by opcode.
- for (int id = 0; id < static_cast<int>(flat.size()); id++) {
- Inst* ip = &flat[id];
- if (ip->opcode() != kInstAltMatch) // handled in EmitList()
- ip->set_out(flatmap[ip->out()]);
- inst_count_[ip->opcode()]++;
- }
-
+ }
+
+ list_count_ = static_cast<int>(flatmap.size());
+ for (int i = 0; i < kNumInst; i++)
+ inst_count_[i] = 0;
+
+ // Fourth pass: Remaps outs to flat-ids.
+ // Counts instructions by opcode.
+ for (int id = 0; id < static_cast<int>(flat.size()); id++) {
+ Inst* ip = &flat[id];
+ if (ip->opcode() != kInstAltMatch) // handled in EmitList()
+ ip->set_out(flatmap[ip->out()]);
+ inst_count_[ip->opcode()]++;
+ }
+
#if !defined(NDEBUG)
// Address a `-Wunused-but-set-variable' warning from Clang 13.x.
size_t total = 0;
- for (int i = 0; i < kNumInst; i++)
- total += inst_count_[i];
+ for (int i = 0; i < kNumInst; i++)
+ total += inst_count_[i];
CHECK_EQ(total, flat.size());
#endif
-
- // Remap start_unanchored and start.
- if (start_unanchored() == 0) {
- DCHECK_EQ(start(), 0);
- } else if (start_unanchored() == start()) {
- set_start_unanchored(flatmap[1]);
- set_start(flatmap[1]);
- } else {
- set_start_unanchored(flatmap[1]);
- set_start(flatmap[2]);
- }
-
- // Finally, replace the old instructions with the new instructions.
- size_ = static_cast<int>(flat.size());
+
+ // Remap start_unanchored and start.
+ if (start_unanchored() == 0) {
+ DCHECK_EQ(start(), 0);
+ } else if (start_unanchored() == start()) {
+ set_start_unanchored(flatmap[1]);
+ set_start(flatmap[1]);
+ } else {
+ set_start_unanchored(flatmap[1]);
+ set_start(flatmap[2]);
+ }
+
+ // Finally, replace the old instructions with the new instructions.
+ size_ = static_cast<int>(flat.size());
inst_ = PODArray<Inst>(size_);
memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]);
@@ -649,198 +649,198 @@ void Prog::Flatten() {
// for tracking pairs of possibilities that it has already explored.
const size_t kBitStateBitmapMaxSize = 256*1024; // max size in bits
bit_state_text_max_size_ = kBitStateBitmapMaxSize / list_count_ - 1;
-}
-
-void Prog::MarkSuccessors(SparseArray<int>* rootmap,
- SparseArray<int>* predmap,
- std::vector<std::vector<int>>* predvec,
- SparseSet* reachable, std::vector<int>* stk) {
- // Mark the kInstFail instruction.
- rootmap->set_new(0, rootmap->size());
-
- // Mark the start_unanchored and start instructions.
- if (!rootmap->has_index(start_unanchored()))
- rootmap->set_new(start_unanchored(), rootmap->size());
- if (!rootmap->has_index(start()))
- rootmap->set_new(start(), rootmap->size());
-
- reachable->clear();
- stk->clear();
- stk->push_back(start_unanchored());
- while (!stk->empty()) {
- int id = stk->back();
- stk->pop_back();
- Loop:
- if (reachable->contains(id))
- continue;
- reachable->insert_new(id);
-
- Inst* ip = inst(id);
- switch (ip->opcode()) {
- default:
- LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
- break;
-
- case kInstAltMatch:
- case kInstAlt:
- // Mark this instruction as a predecessor of each out.
- for (int out : {ip->out(), ip->out1()}) {
- if (!predmap->has_index(out)) {
- predmap->set_new(out, static_cast<int>(predvec->size()));
- predvec->emplace_back();
- }
- (*predvec)[predmap->get_existing(out)].emplace_back(id);
- }
- stk->push_back(ip->out1());
- id = ip->out();
- goto Loop;
-
- case kInstByteRange:
- case kInstCapture:
- case kInstEmptyWidth:
- // Mark the out of this instruction as a "root".
- if (!rootmap->has_index(ip->out()))
- rootmap->set_new(ip->out(), rootmap->size());
- id = ip->out();
- goto Loop;
-
- case kInstNop:
- id = ip->out();
- goto Loop;
-
- case kInstMatch:
- case kInstFail:
- break;
- }
- }
-}
-
-void Prog::MarkDominator(int root, SparseArray<int>* rootmap,
- SparseArray<int>* predmap,
- std::vector<std::vector<int>>* predvec,
- SparseSet* reachable, std::vector<int>* stk) {
- reachable->clear();
- stk->clear();
- stk->push_back(root);
- while (!stk->empty()) {
- int id = stk->back();
- stk->pop_back();
- Loop:
- if (reachable->contains(id))
- continue;
- reachable->insert_new(id);
-
- if (id != root && rootmap->has_index(id)) {
- // We reached another "tree" via epsilon transition.
- continue;
- }
-
- Inst* ip = inst(id);
- switch (ip->opcode()) {
- default:
- LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
- break;
-
- case kInstAltMatch:
- case kInstAlt:
- stk->push_back(ip->out1());
- id = ip->out();
- goto Loop;
-
- case kInstByteRange:
- case kInstCapture:
- case kInstEmptyWidth:
- break;
-
- case kInstNop:
- id = ip->out();
- goto Loop;
-
- case kInstMatch:
- case kInstFail:
- break;
- }
- }
-
- for (SparseSet::const_iterator i = reachable->begin();
- i != reachable->end();
- ++i) {
- int id = *i;
- if (predmap->has_index(id)) {
- for (int pred : (*predvec)[predmap->get_existing(id)]) {
- if (!reachable->contains(pred)) {
- // id has a predecessor that cannot be reached from root!
- // Therefore, id must be a "root" too - mark it as such.
- if (!rootmap->has_index(id))
- rootmap->set_new(id, rootmap->size());
- }
- }
- }
- }
-}
-
-void Prog::EmitList(int root, SparseArray<int>* rootmap,
- std::vector<Inst>* flat,
- SparseSet* reachable, std::vector<int>* stk) {
- reachable->clear();
- stk->clear();
- stk->push_back(root);
- while (!stk->empty()) {
- int id = stk->back();
- stk->pop_back();
- Loop:
- if (reachable->contains(id))
- continue;
- reachable->insert_new(id);
-
- if (id != root && rootmap->has_index(id)) {
- // We reached another "tree" via epsilon transition. Emit a kInstNop
- // instruction so that the Prog does not become quadratically larger.
- flat->emplace_back();
- flat->back().set_opcode(kInstNop);
- flat->back().set_out(rootmap->get_existing(id));
- continue;
- }
-
- Inst* ip = inst(id);
- switch (ip->opcode()) {
- default:
- LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
- break;
-
- case kInstAltMatch:
- flat->emplace_back();
- flat->back().set_opcode(kInstAltMatch);
- flat->back().set_out(static_cast<int>(flat->size()));
- flat->back().out1_ = static_cast<uint32_t>(flat->size())+1;
- FALLTHROUGH_INTENDED;
-
- case kInstAlt:
- stk->push_back(ip->out1());
- id = ip->out();
- goto Loop;
-
- case kInstByteRange:
- case kInstCapture:
- case kInstEmptyWidth:
- flat->emplace_back();
- memmove(&flat->back(), ip, sizeof *ip);
- flat->back().set_out(rootmap->get_existing(ip->out()));
- break;
-
- case kInstNop:
- id = ip->out();
- goto Loop;
-
- case kInstMatch:
- case kInstFail:
- flat->emplace_back();
- memmove(&flat->back(), ip, sizeof *ip);
- break;
- }
- }
-}
-
+}
+
+void Prog::MarkSuccessors(SparseArray<int>* rootmap,
+ SparseArray<int>* predmap,
+ std::vector<std::vector<int>>* predvec,
+ SparseSet* reachable, std::vector<int>* stk) {
+ // Mark the kInstFail instruction.
+ rootmap->set_new(0, rootmap->size());
+
+ // Mark the start_unanchored and start instructions.
+ if (!rootmap->has_index(start_unanchored()))
+ rootmap->set_new(start_unanchored(), rootmap->size());
+ if (!rootmap->has_index(start()))
+ rootmap->set_new(start(), rootmap->size());
+
+ reachable->clear();
+ stk->clear();
+ stk->push_back(start_unanchored());
+ while (!stk->empty()) {
+ int id = stk->back();
+ stk->pop_back();
+ Loop:
+ if (reachable->contains(id))
+ continue;
+ reachable->insert_new(id);
+
+ Inst* ip = inst(id);
+ switch (ip->opcode()) {
+ default:
+ LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+ break;
+
+ case kInstAltMatch:
+ case kInstAlt:
+ // Mark this instruction as a predecessor of each out.
+ for (int out : {ip->out(), ip->out1()}) {
+ if (!predmap->has_index(out)) {
+ predmap->set_new(out, static_cast<int>(predvec->size()));
+ predvec->emplace_back();
+ }
+ (*predvec)[predmap->get_existing(out)].emplace_back(id);
+ }
+ stk->push_back(ip->out1());
+ id = ip->out();
+ goto Loop;
+
+ case kInstByteRange:
+ case kInstCapture:
+ case kInstEmptyWidth:
+ // Mark the out of this instruction as a "root".
+ if (!rootmap->has_index(ip->out()))
+ rootmap->set_new(ip->out(), rootmap->size());
+ id = ip->out();
+ goto Loop;
+
+ case kInstNop:
+ id = ip->out();
+ goto Loop;
+
+ case kInstMatch:
+ case kInstFail:
+ break;
+ }
+ }
+}
+
+void Prog::MarkDominator(int root, SparseArray<int>* rootmap,
+ SparseArray<int>* predmap,
+ std::vector<std::vector<int>>* predvec,
+ SparseSet* reachable, std::vector<int>* stk) {
+ reachable->clear();
+ stk->clear();
+ stk->push_back(root);
+ while (!stk->empty()) {
+ int id = stk->back();
+ stk->pop_back();
+ Loop:
+ if (reachable->contains(id))
+ continue;
+ reachable->insert_new(id);
+
+ if (id != root && rootmap->has_index(id)) {
+ // We reached another "tree" via epsilon transition.
+ continue;
+ }
+
+ Inst* ip = inst(id);
+ switch (ip->opcode()) {
+ default:
+ LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+ break;
+
+ case kInstAltMatch:
+ case kInstAlt:
+ stk->push_back(ip->out1());
+ id = ip->out();
+ goto Loop;
+
+ case kInstByteRange:
+ case kInstCapture:
+ case kInstEmptyWidth:
+ break;
+
+ case kInstNop:
+ id = ip->out();
+ goto Loop;
+
+ case kInstMatch:
+ case kInstFail:
+ break;
+ }
+ }
+
+ for (SparseSet::const_iterator i = reachable->begin();
+ i != reachable->end();
+ ++i) {
+ int id = *i;
+ if (predmap->has_index(id)) {
+ for (int pred : (*predvec)[predmap->get_existing(id)]) {
+ if (!reachable->contains(pred)) {
+ // id has a predecessor that cannot be reached from root!
+ // Therefore, id must be a "root" too - mark it as such.
+ if (!rootmap->has_index(id))
+ rootmap->set_new(id, rootmap->size());
+ }
+ }
+ }
+ }
+}
+
+void Prog::EmitList(int root, SparseArray<int>* rootmap,
+ std::vector<Inst>* flat,
+ SparseSet* reachable, std::vector<int>* stk) {
+ reachable->clear();
+ stk->clear();
+ stk->push_back(root);
+ while (!stk->empty()) {
+ int id = stk->back();
+ stk->pop_back();
+ Loop:
+ if (reachable->contains(id))
+ continue;
+ reachable->insert_new(id);
+
+ if (id != root && rootmap->has_index(id)) {
+ // We reached another "tree" via epsilon transition. Emit a kInstNop
+ // instruction so that the Prog does not become quadratically larger.
+ flat->emplace_back();
+ flat->back().set_opcode(kInstNop);
+ flat->back().set_out(rootmap->get_existing(id));
+ continue;
+ }
+
+ Inst* ip = inst(id);
+ switch (ip->opcode()) {
+ default:
+ LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
+ break;
+
+ case kInstAltMatch:
+ flat->emplace_back();
+ flat->back().set_opcode(kInstAltMatch);
+ flat->back().set_out(static_cast<int>(flat->size()));
+ flat->back().out1_ = static_cast<uint32_t>(flat->size())+1;
+ FALLTHROUGH_INTENDED;
+
+ case kInstAlt:
+ stk->push_back(ip->out1());
+ id = ip->out();
+ goto Loop;
+
+ case kInstByteRange:
+ case kInstCapture:
+ case kInstEmptyWidth:
+ flat->emplace_back();
+ memmove(&flat->back(), ip, sizeof *ip);
+ flat->back().set_out(rootmap->get_existing(ip->out()));
+ break;
+
+ case kInstNop:
+ id = ip->out();
+ goto Loop;
+
+ case kInstMatch:
+ case kInstFail:
+ flat->emplace_back();
+ memmove(&flat->back(), ip, sizeof *ip);
+ break;
+ }
+ }
+}
+
// For each ByteRange instruction in [begin, end), computes a hint to execution
// engines: the delta to the next instruction (in flat) worth exploring iff the
// current instruction matched.
diff --git a/contrib/libs/re2/re2/prog.h b/contrib/libs/re2/re2/prog.h
index 4af012ab6f..5ac0e67c29 100644
--- a/contrib/libs/re2/re2/prog.h
+++ b/contrib/libs/re2/re2/prog.h
@@ -2,24 +2,24 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#ifndef RE2_PROG_H_
-#define RE2_PROG_H_
-
+#ifndef RE2_PROG_H_
+#define RE2_PROG_H_
+
// Compiled representation of regular expressions.
// See regexp.h for the Regexp class, which represents a regular
// expression symbolically.
-#include <stdint.h>
+#include <stdint.h>
#include <functional>
-#include <mutex>
-#include <string>
-#include <vector>
+#include <mutex>
+#include <string>
+#include <vector>
#include <type_traits>
-#include "util/util.h"
-#include "util/logging.h"
+#include "util/util.h"
+#include "util/logging.h"
#include "re2/pod_array.h"
-#include "re2/re2.h"
+#include "re2/re2.h"
#include "re2/sparse_array.h"
#include "re2/sparse_set.h"
@@ -35,7 +35,7 @@ enum InstOp {
kInstMatch, // found a match!
kInstNop, // no-op; occasionally unavoidable
kInstFail, // never match; occasionally unavoidable
- kNumInst,
+ kNumInst,
};
// Bit flags for empty-width specials
@@ -49,7 +49,7 @@ enum EmptyOp {
kEmptyAllFlags = (1<<6)-1,
};
-class DFA;
+class DFA;
class Regexp;
// Compiled form of regexp program.
@@ -64,25 +64,25 @@ class Prog {
// See the assertion below for why this is so.
Inst() = default;
- // Copyable.
- Inst(const Inst&) = default;
- Inst& operator=(const Inst&) = default;
-
+ // Copyable.
+ Inst(const Inst&) = default;
+ Inst& operator=(const Inst&) = default;
+
// Constructors per opcode
- void InitAlt(uint32_t out, uint32_t out1);
- void InitByteRange(int lo, int hi, int foldcase, uint32_t out);
- void InitCapture(int cap, uint32_t out);
- void InitEmptyWidth(EmptyOp empty, uint32_t out);
+ void InitAlt(uint32_t out, uint32_t out1);
+ void InitByteRange(int lo, int hi, int foldcase, uint32_t out);
+ void InitCapture(int cap, uint32_t out);
+ void InitEmptyWidth(EmptyOp empty, uint32_t out);
void InitMatch(int id);
- void InitNop(uint32_t out);
+ void InitNop(uint32_t out);
void InitFail();
// Getters
int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
- int last() { return (out_opcode_>>3)&1; }
- int out() { return out_opcode_>>4; }
- int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
+ int last() { return (out_opcode_>>3)&1; }
+ int out() { return out_opcode_>>4; }
+ int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }
int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }
@@ -90,12 +90,12 @@ class Prog {
int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; }
int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
-
- bool greedy(Prog* p) {
+
+ bool greedy(Prog* p) {
DCHECK_EQ(opcode(), kInstAltMatch);
- return p->inst(out())->opcode() == kInstByteRange ||
- (p->inst(out())->opcode() == kInstNop &&
- p->inst(p->inst(out())->out())->opcode() == kInstByteRange);
+ return p->inst(out())->opcode() == kInstByteRange ||
+ (p->inst(out())->opcode() == kInstNop &&
+ p->inst(p->inst(out())->out())->opcode() == kInstByteRange);
}
// Does this inst (an kInstByteRange) match c?
@@ -110,24 +110,24 @@ class Prog {
std::string Dump();
// Maximum instruction id.
- // (Must fit in out_opcode_. PatchList/last steal another bit.)
+ // (Must fit in out_opcode_. PatchList/last steal another bit.)
static const int kMaxInst = (1<<28) - 1;
private:
void set_opcode(InstOp opcode) {
- out_opcode_ = (out()<<4) | (last()<<3) | opcode;
- }
-
- void set_last() {
- out_opcode_ = (out()<<4) | (1<<3) | opcode();
+ out_opcode_ = (out()<<4) | (last()<<3) | opcode;
}
+ void set_last() {
+ out_opcode_ = (out()<<4) | (1<<3) | opcode();
+ }
+
void set_out(int out) {
- out_opcode_ = (out<<4) | (last()<<3) | opcode();
+ out_opcode_ = (out<<4) | (last()<<3) | opcode();
}
void set_out_opcode(int out, InstOp opcode) {
- out_opcode_ = (out<<4) | (last()<<3) | opcode;
+ out_opcode_ = (out<<4) | (last()<<3) | opcode;
}
uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode
@@ -157,8 +157,8 @@ class Prog {
// foldcase: A-Z -> a-z before checking range.
};
- EmptyOp empty_; // opcode == kInstEmptyWidth
- // empty_ is bitwise OR of kEmpty* flags above.
+ EmptyOp empty_; // opcode == kInstEmptyWidth
+ // empty_ is bitwise OR of kEmpty* flags above.
};
friend class Compiler;
@@ -201,21 +201,21 @@ class Prog {
void set_start(int start) { start_ = start; }
int start_unanchored() { return start_unanchored_; }
void set_start_unanchored(int start) { start_unanchored_ = start; }
- int size() { return size_; }
+ int size() { return size_; }
bool reversed() { return reversed_; }
void set_reversed(bool reversed) { reversed_ = reversed; }
- int list_count() { return list_count_; }
- int inst_count(InstOp op) { return inst_count_[op]; }
+ int list_count() { return list_count_; }
+ int inst_count(InstOp op) { return inst_count_[op]; }
uint16_t* list_heads() { return list_heads_.data(); }
size_t bit_state_text_max_size() { return bit_state_text_max_size_; }
int64_t dfa_mem() { return dfa_mem_; }
- void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
+ void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
bool anchor_start() { return anchor_start_; }
void set_anchor_start(bool b) { anchor_start_ = b; }
bool anchor_end() { return anchor_end_; }
void set_anchor_end(bool b) { anchor_end_ = b; }
int bytemap_range() { return bytemap_range_; }
- const uint8_t* bytemap() { return bytemap_; }
+ const uint8_t* bytemap() { return bytemap_; }
bool can_prefix_accel() { return prefix_size_ != 0; }
// Accelerates to the first likely occurrence of the prefix.
@@ -230,7 +230,7 @@ class Prog {
return memchr(data, prefix_front_, size);
}
}
-
+
// Configures prefix accel using the analysis performed during compilation.
void ConfigurePrefixAccel(const std::string& prefix, bool prefix_foldcase);
@@ -249,7 +249,7 @@ class Prog {
// Returns the set of kEmpty flags that are in effect at
// position p within context.
- static uint32_t EmptyFlags(const StringPiece& context, const char* p);
+ static uint32_t EmptyFlags(const StringPiece& context, const char* p);
// Returns whether byte c is a word character: ASCII only.
// Used by the implementation of \b and \B.
@@ -258,7 +258,7 @@ class Prog {
// (the DFA has only one-byte lookahead).
// - even if the lookahead were possible, the Progs would be huge.
// This crude approximation is the same one PCRE uses.
- static bool IsWordChar(uint8_t c) {
+ static bool IsWordChar(uint8_t c) {
return ('A' <= c && c <= 'Z') ||
('a' <= c && c <= 'z') ||
('0' <= c && c <= '9') ||
@@ -291,7 +291,7 @@ class Prog {
// If matches != NULL and kind == kManyMatch and there is a match,
// SearchDFA fills matches with the match IDs of the final matching state.
bool SearchDFA(const StringPiece& text, const StringPiece& context,
- Anchor anchor, MatchKind kind, StringPiece* match0,
+ Anchor anchor, MatchKind kind, StringPiece* match0,
bool* failed, SparseSet* matches);
// The callback issued after building each DFA state with BuildEntireDFA().
@@ -311,7 +311,7 @@ class Prog {
// FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb);
- // Compute bytemap.
+ // Compute bytemap.
void ComputeByteMap();
// Run peep-hole optimizer on program.
@@ -361,41 +361,41 @@ class Prog {
// Returns true on success, false on error.
bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
- // EXPERIMENTAL! SUBJECT TO CHANGE!
- // Outputs the program fanout into the given sparse array.
- void Fanout(SparseArray<int>* fanout);
-
+ // EXPERIMENTAL! SUBJECT TO CHANGE!
+ // Outputs the program fanout into the given sparse array.
+ void Fanout(SparseArray<int>* fanout);
+
// Compiles a collection of regexps to Prog. Each regexp will have
// its own Match instruction recording the index in the output vector.
static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem);
- // Flattens the Prog from "tree" form to "list" form. This is an in-place
- // operation in the sense that the old instructions are lost.
- void Flatten();
-
- // Walks the Prog; the "successor roots" or predecessors of the reachable
- // instructions are marked in rootmap or predmap/predvec, respectively.
- // reachable and stk are preallocated scratch structures.
- void MarkSuccessors(SparseArray<int>* rootmap,
- SparseArray<int>* predmap,
- std::vector<std::vector<int>>* predvec,
- SparseSet* reachable, std::vector<int>* stk);
-
- // Walks the Prog from the given "root" instruction; the "dominator root"
- // of the reachable instructions (if such exists) is marked in rootmap.
- // reachable and stk are preallocated scratch structures.
- void MarkDominator(int root, SparseArray<int>* rootmap,
- SparseArray<int>* predmap,
- std::vector<std::vector<int>>* predvec,
- SparseSet* reachable, std::vector<int>* stk);
-
- // Walks the Prog from the given "root" instruction; the reachable
- // instructions are emitted in "list" form and appended to flat.
- // reachable and stk are preallocated scratch structures.
- void EmitList(int root, SparseArray<int>* rootmap,
- std::vector<Inst>* flat,
- SparseSet* reachable, std::vector<int>* stk);
-
+ // Flattens the Prog from "tree" form to "list" form. This is an in-place
+ // operation in the sense that the old instructions are lost.
+ void Flatten();
+
+ // Walks the Prog; the "successor roots" or predecessors of the reachable
+ // instructions are marked in rootmap or predmap/predvec, respectively.
+ // reachable and stk are preallocated scratch structures.
+ void MarkSuccessors(SparseArray<int>* rootmap,
+ SparseArray<int>* predmap,
+ std::vector<std::vector<int>>* predvec,
+ SparseSet* reachable, std::vector<int>* stk);
+
+ // Walks the Prog from the given "root" instruction; the "dominator root"
+ // of the reachable instructions (if such exists) is marked in rootmap.
+ // reachable and stk are preallocated scratch structures.
+ void MarkDominator(int root, SparseArray<int>* rootmap,
+ SparseArray<int>* predmap,
+ std::vector<std::vector<int>>* predvec,
+ SparseSet* reachable, std::vector<int>* stk);
+
+ // Walks the Prog from the given "root" instruction; the reachable
+ // instructions are emitted in "list" form and appended to flat.
+ // reachable and stk are preallocated scratch structures.
+ void EmitList(int root, SparseArray<int>* rootmap,
+ std::vector<Inst>* flat,
+ SparseSet* reachable, std::vector<int>* stk);
+
// Computes hints for ByteRange instructions in [begin, end).
void ComputeHints(std::vector<Inst>* flat, int begin, int end);
@@ -407,12 +407,12 @@ class Prog {
friend class Compiler;
DFA* GetDFA(MatchKind kind);
- void DeleteDFA(DFA* dfa);
+ void DeleteDFA(DFA* dfa);
bool anchor_start_; // regexp has explicit start anchor
bool anchor_end_; // regexp has explicit end anchor
bool reversed_; // whether program runs backward over input
- bool did_flatten_; // has Flatten been called?
+ bool did_flatten_; // has Flatten been called?
bool did_onepass_; // has IsOnePass been called?
int start_; // entry point for program
@@ -435,21 +435,21 @@ class Prog {
PODArray<uint16_t> list_heads_; // sparse array enumerating list heads
// not populated if size_ is overly large
size_t bit_state_text_max_size_; // upper bound (inclusive) on text.size()
-
+
PODArray<Inst> inst_; // pointer to instruction array
PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes
- int64_t dfa_mem_; // Maximum memory for DFAs.
- DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch
- DFA* dfa_longest_; // DFA cached for kLongestMatch/kFullMatch
+ int64_t dfa_mem_; // Maximum memory for DFAs.
+ DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch
+ DFA* dfa_longest_; // DFA cached for kLongestMatch/kFullMatch
- uint8_t bytemap_[256]; // map from input bytes to byte classes
+ uint8_t bytemap_[256]; // map from input bytes to byte classes
- std::once_flag dfa_first_once_;
- std::once_flag dfa_longest_once_;
+ std::once_flag dfa_first_once_;
+ std::once_flag dfa_longest_once_;
- Prog(const Prog&) = delete;
- Prog& operator=(const Prog&) = delete;
+ Prog(const Prog&) = delete;
+ Prog& operator=(const Prog&) = delete;
};
// std::string_view in MSVC has iterators that aren't just pointers and
@@ -465,4 +465,4 @@ static inline const char* EndPtr(const StringPiece& s) {
} // namespace re2
-#endif // RE2_PROG_H_
+#endif // RE2_PROG_H_
diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc
index 47fb385e4e..a9679c634e 100644
--- a/contrib/libs/re2/re2/re2.cc
+++ b/contrib/libs/re2/re2/re2.cc
@@ -7,29 +7,29 @@
// Originally the PCRE C++ wrapper, but adapted to use
// the new automata-based regular expression engines.
-#include "re2/re2.h"
+#include "re2/re2.h"
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
#ifdef _MSC_VER
#include <intrin.h>
#endif
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <algorithm>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
#include <atomic>
-#include <iterator>
-#include <mutex>
+#include <iterator>
+#include <mutex>
#include <string>
-#include <utility>
-#include <vector>
-
-#include "util/util.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "util/utf.h"
+#include <utility>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
#include "re2/prog.h"
#include "re2/regexp.h"
#include "re2/sparse_array.h"
@@ -40,26 +40,26 @@ namespace re2 {
static const int kMaxArgs = 16;
static const int kVecSize = 1+kMaxArgs;
-const int RE2::Options::kDefaultMaxMem; // initialized in re2.h
-
-RE2::Options::Options(RE2::CannedOptions opt)
- : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
- posix_syntax_(opt == RE2::POSIX),
- longest_match_(opt == RE2::POSIX),
- log_errors_(opt != RE2::Quiet),
- max_mem_(kDefaultMaxMem),
- literal_(false),
- never_nl_(false),
- dot_nl_(false),
- never_capture_(false),
- case_sensitive_(true),
- perl_classes_(false),
- word_boundary_(false),
- one_line_(false) {
-}
-
-// static empty objects for use as const references.
-// To avoid global constructors, allocated in RE2::Init().
+const int RE2::Options::kDefaultMaxMem; // initialized in re2.h
+
+RE2::Options::Options(RE2::CannedOptions opt)
+ : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
+ posix_syntax_(opt == RE2::POSIX),
+ longest_match_(opt == RE2::POSIX),
+ log_errors_(opt != RE2::Quiet),
+ max_mem_(kDefaultMaxMem),
+ literal_(false),
+ never_nl_(false),
+ dot_nl_(false),
+ never_capture_(false),
+ case_sensitive_(true),
+ perl_classes_(false),
+ word_boundary_(false),
+ one_line_(false) {
+}
+
+// static empty objects for use as const references.
+// To avoid global constructors, allocated in RE2::Init().
static const std::string* empty_string;
static const std::map<std::string, int>* empty_named_groups;
static const std::map<int, std::string>* empty_group_names;
@@ -67,37 +67,37 @@ static const std::map<int, std::string>* empty_group_names;
// Converts from Regexp error code to RE2 error code.
// Maybe some day they will diverge. In any event, this
// hides the existence of Regexp from RE2 users.
-static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
+static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
switch (code) {
- case re2::kRegexpSuccess:
+ case re2::kRegexpSuccess:
return RE2::NoError;
- case re2::kRegexpInternalError:
+ case re2::kRegexpInternalError:
return RE2::ErrorInternal;
- case re2::kRegexpBadEscape:
+ case re2::kRegexpBadEscape:
return RE2::ErrorBadEscape;
- case re2::kRegexpBadCharClass:
+ case re2::kRegexpBadCharClass:
return RE2::ErrorBadCharClass;
- case re2::kRegexpBadCharRange:
+ case re2::kRegexpBadCharRange:
return RE2::ErrorBadCharRange;
- case re2::kRegexpMissingBracket:
+ case re2::kRegexpMissingBracket:
return RE2::ErrorMissingBracket;
- case re2::kRegexpMissingParen:
+ case re2::kRegexpMissingParen:
return RE2::ErrorMissingParen;
case re2::kRegexpUnexpectedParen:
return RE2::ErrorUnexpectedParen;
- case re2::kRegexpTrailingBackslash:
+ case re2::kRegexpTrailingBackslash:
return RE2::ErrorTrailingBackslash;
- case re2::kRegexpRepeatArgument:
+ case re2::kRegexpRepeatArgument:
return RE2::ErrorRepeatArgument;
- case re2::kRegexpRepeatSize:
+ case re2::kRegexpRepeatSize:
return RE2::ErrorRepeatSize;
- case re2::kRegexpRepeatOp:
+ case re2::kRegexpRepeatOp:
return RE2::ErrorRepeatOp;
- case re2::kRegexpBadPerlOp:
+ case re2::kRegexpBadPerlOp:
return RE2::ErrorBadPerlOp;
- case re2::kRegexpBadUTF8:
+ case re2::kRegexpBadUTF8:
return RE2::ErrorBadUTF8;
- case re2::kRegexpBadNamedCapture:
+ case re2::kRegexpBadNamedCapture:
return RE2::ErrorBadNamedCapture;
}
return RE2::ErrorInternal;
@@ -130,8 +130,8 @@ int RE2::Options::ParseFlags() const {
int flags = Regexp::ClassNL;
switch (encoding()) {
default:
- if (log_errors())
- LOG(ERROR) << "Unknown encoding " << encoding();
+ if (log_errors())
+ LOG(ERROR) << "Unknown encoding " << encoding();
break;
case RE2::Options::EncodingUTF8:
break;
@@ -149,12 +149,12 @@ int RE2::Options::ParseFlags() const {
if (never_nl())
flags |= Regexp::NeverNL;
- if (dot_nl())
- flags |= Regexp::DotNL;
-
- if (never_capture())
- flags |= Regexp::NeverCapture;
-
+ if (dot_nl())
+ flags |= Regexp::DotNL;
+
+ if (never_capture())
+ flags |= Regexp::NeverCapture;
+
if (!case_sensitive())
flags |= Regexp::FoldCase;
@@ -171,16 +171,16 @@ int RE2::Options::ParseFlags() const {
}
void RE2::Init(const StringPiece& pattern, const Options& options) {
- static std::once_flag empty_once;
- std::call_once(empty_once, []() {
+ static std::once_flag empty_once;
+ std::call_once(empty_once, []() {
empty_string = new std::string;
empty_named_groups = new std::map<std::string, int>;
empty_group_names = new std::map<int, std::string>;
- });
-
+ });
+
pattern_.assign(pattern.data(), pattern.size());
options_.Copy(options);
- entire_regexp_ = NULL;
+ entire_regexp_ = NULL;
error_ = empty_string;
error_code_ = NoError;
error_arg_.clear();
@@ -211,7 +211,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
return;
}
- re2::Regexp* suffix;
+ re2::Regexp* suffix;
if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix))
suffix_regexp_ = suffix;
else
@@ -243,20 +243,20 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
}
// Returns rprog_, computing it if needed.
-re2::Prog* RE2::ReverseProg() const {
- std::call_once(rprog_once_, [](const RE2* re) {
- re->rprog_ =
- re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
- if (re->rprog_ == NULL) {
- if (re->options_.log_errors())
- LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
+re2::Prog* RE2::ReverseProg() const {
+ std::call_once(rprog_once_, [](const RE2* re) {
+ re->rprog_ =
+ re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
+ if (re->rprog_ == NULL) {
+ if (re->options_.log_errors())
+ LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
// We no longer touch error_ and error_code_ because failing to compile
// the reverse Prog is not a showstopper: falling back to NFA execution
// is fine. More importantly, an RE2 object is supposed to be logically
// immutable: whatever ok() would have returned after Init() completed,
// it should continue to return that no matter what ReverseProg() does.
}
- }, this);
+ }, this);
return rprog_;
}
@@ -267,11 +267,11 @@ RE2::~RE2() {
entire_regexp_->Decref();
delete prog_;
delete rprog_;
- if (error_ != empty_string)
+ if (error_ != empty_string)
delete error_;
- if (named_groups_ != NULL && named_groups_ != empty_named_groups)
+ if (named_groups_ != NULL && named_groups_ != empty_named_groups)
delete named_groups_;
- if (group_names_ != NULL && group_names_ != empty_group_names)
+ if (group_names_ != NULL && group_names_ != empty_group_names)
delete group_names_;
}
@@ -282,8 +282,8 @@ int RE2::ProgramSize() const {
}
int RE2::ReverseProgramSize() const {
- if (prog_ == NULL)
- return -1;
+ if (prog_ == NULL)
+ return -1;
Prog* prog = ReverseProg();
if (prog == NULL)
return -1;
@@ -306,12 +306,12 @@ static int FindMSBSet(uint32_t n) {
if (word != 0) {
n = word;
c += shift;
- }
- }
+ }
+ }
return c;
#endif
-}
-
+}
+
static int Fanout(Prog* prog, std::vector<int>* histogram) {
SparseArray<int> fanout(prog->size());
prog->Fanout(&fanout);
@@ -329,8 +329,8 @@ static int Fanout(Prog* prog, std::vector<int>* histogram) {
if (histogram != NULL)
histogram->assign(data, data+size);
return size-1;
-}
-
+}
+
int RE2::ProgramFanout(std::vector<int>* histogram) const {
if (prog_ == NULL)
return -1;
@@ -348,23 +348,23 @@ int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
// Returns named_groups_, computing it if needed.
const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
- std::call_once(named_groups_once_, [](const RE2* re) {
- if (re->suffix_regexp_ != NULL)
- re->named_groups_ = re->suffix_regexp_->NamedCaptures();
- if (re->named_groups_ == NULL)
- re->named_groups_ = empty_named_groups;
- }, this);
+ std::call_once(named_groups_once_, [](const RE2* re) {
+ if (re->suffix_regexp_ != NULL)
+ re->named_groups_ = re->suffix_regexp_->NamedCaptures();
+ if (re->named_groups_ == NULL)
+ re->named_groups_ = empty_named_groups;
+ }, this);
return *named_groups_;
}
// Returns group_names_, computing it if needed.
const std::map<int, std::string>& RE2::CapturingGroupNames() const {
- std::call_once(group_names_once_, [](const RE2* re) {
- if (re->suffix_regexp_ != NULL)
- re->group_names_ = re->suffix_regexp_->CaptureNames();
- if (re->group_names_ == NULL)
- re->group_names_ = empty_group_names;
- }, this);
+ std::call_once(group_names_once_, [](const RE2* re) {
+ if (re->suffix_regexp_ != NULL)
+ re->group_names_ = re->suffix_regexp_->CaptureNames();
+ if (re->group_names_ == NULL)
+ re->group_names_ = empty_group_names;
+ }, this);
return *group_names_;
}
@@ -382,7 +382,7 @@ bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
bool RE2::ConsumeN(StringPiece* input, const RE2& re,
const Arg* const args[], int n) {
- size_t consumed;
+ size_t consumed;
if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
input->remove_prefix(consumed);
return true;
@@ -393,7 +393,7 @@ bool RE2::ConsumeN(StringPiece* input, const RE2& re,
bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
const Arg* const args[], int n) {
- size_t consumed;
+ size_t consumed;
if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
input->remove_prefix(consumed);
return true;
@@ -411,7 +411,7 @@ bool RE2::Replace(std::string* str,
return false;
if (nvec > static_cast<int>(arraysize(vec)))
return false;
- if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
+ if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
return false;
std::string s;
@@ -439,43 +439,43 @@ int RE2::GlobalReplace(std::string* str,
const char* lastend = NULL;
std::string out;
int count = 0;
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
- // Iterate just once when fuzzing. Otherwise, we easily get bogged down
- // and coverage is unlikely to improve despite significant expense.
- while (p == str->data()) {
-#else
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ // Iterate just once when fuzzing. Otherwise, we easily get bogged down
+ // and coverage is unlikely to improve despite significant expense.
+ while (p == str->data()) {
+#else
while (p <= ep) {
-#endif
- if (!re.Match(*str, static_cast<size_t>(p - str->data()),
- str->size(), UNANCHORED, vec, nvec))
+#endif
+ if (!re.Match(*str, static_cast<size_t>(p - str->data()),
+ str->size(), UNANCHORED, vec, nvec))
break;
if (p < vec[0].data())
out.append(p, vec[0].data() - p);
if (vec[0].data() == lastend && vec[0].empty()) {
// Disallow empty match at end of last match: skip ahead.
- //
+ //
// fullrune() takes int, not ptrdiff_t. However, it just looks
- // at the leading byte and treats any length >= 4 the same.
- if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
+ // at the leading byte and treats any length >= 4 the same.
+ if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
- // re is in UTF-8 mode and there is enough left of str
- // to allow us to advance by up to UTFmax bytes.
- Rune r;
- int n = chartorune(&r, p);
- // Some copies of chartorune have a bug that accepts
- // encodings of values in (10FFFF, 1FFFFF] as valid.
- if (r > Runemax) {
- n = 1;
- r = Runeerror;
- }
- if (!(n == 1 && r == Runeerror)) { // no decoding error
- out.append(p, n);
- p += n;
- continue;
- }
- }
- // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode,
- // we fell through from above and the GIGO principle applies.
+ // re is in UTF-8 mode and there is enough left of str
+ // to allow us to advance by up to UTFmax bytes.
+ Rune r;
+ int n = chartorune(&r, p);
+ // Some copies of chartorune have a bug that accepts
+ // encodings of values in (10FFFF, 1FFFFF] as valid.
+ if (r > Runemax) {
+ n = 1;
+ r = Runeerror;
+ }
+ if (!(n == 1 && r == Runeerror)) { // no decoding error
+ out.append(p, n);
+ p += n;
+ continue;
+ }
+ }
+ // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode,
+ // we fell through from above and the GIGO principle applies.
if (p < ep)
out.append(p, 1);
p++;
@@ -492,7 +492,7 @@ int RE2::GlobalReplace(std::string* str,
if (p < ep)
out.append(p, ep - p);
- using std::swap;
+ using std::swap;
swap(out, *str);
return count;
}
@@ -507,7 +507,7 @@ bool RE2::Extract(const StringPiece& text,
return false;
if (nvec > static_cast<int>(arraysize(vec)))
return false;
- if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
+ if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
return false;
out->clear();
@@ -525,7 +525,7 @@ std::string RE2::QuoteMeta(const StringPiece& unquoted) {
// that. (This also makes it identical to the perl function of the
// same name except for the null-character special case;
// see `perldoc -f quotemeta`.)
- for (size_t ii = 0; ii < unquoted.size(); ++ii) {
+ for (size_t ii = 0; ii < unquoted.size(); ++ii) {
// Note that using 'isalnum' here raises the benchmark time from
// 32ns to 58ns:
if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
@@ -557,7 +557,7 @@ bool RE2::PossibleMatchRange(std::string* min, std::string* max,
if (prog_ == NULL)
return false;
- int n = static_cast<int>(prefix_.size());
+ int n = static_cast<int>(prefix_.size());
if (n > maxlen)
n = maxlen;
@@ -596,12 +596,12 @@ bool RE2::PossibleMatchRange(std::string* min, std::string* max,
// Avoid possible locale nonsense in standard strcasecmp.
// The string a is known to be all lowercase.
-static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
+static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
const char* ae = a + len;
for (; a < ae; a++, b++) {
- uint8_t x = *a;
- uint8_t y = *b;
+ uint8_t x = *a;
+ uint8_t y = *b;
if ('A' <= y && y <= 'Z')
y += 'a' - 'A';
if (x != y)
@@ -614,8 +614,8 @@ static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
/***** Actual matching and rewriting code *****/
bool RE2::Match(const StringPiece& text,
- size_t startpos,
- size_t endpos,
+ size_t startpos,
+ size_t endpos,
Anchor re_anchor,
StringPiece* submatch,
int nsubmatch) const {
@@ -625,18 +625,18 @@ bool RE2::Match(const StringPiece& text,
return false;
}
- if (startpos > endpos || endpos > text.size()) {
- if (options_.log_errors())
- LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
- << "startpos: " << startpos << ", "
- << "endpos: " << endpos << ", "
- << "text size: " << text.size() << "]";
- return false;
- }
-
+ if (startpos > endpos || endpos > text.size()) {
+ if (options_.log_errors())
+ LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
+ << "startpos: " << startpos << ", "
+ << "endpos: " << endpos << ", "
+ << "text size: " << text.size() << "]";
+ return false;
+ }
+
StringPiece subtext = text;
subtext.remove_prefix(startpos);
- subtext.remove_suffix(text.size() - endpos);
+ subtext.remove_suffix(text.size() - endpos);
// Use DFAs to find exact location of match, filter out non-matches.
@@ -651,13 +651,13 @@ bool RE2::Match(const StringPiece& text,
if (ncap > nsubmatch)
ncap = nsubmatch;
- // If the regexp is anchored explicitly, must not be in middle of text.
- if (prog_->anchor_start() && startpos != 0)
- return false;
+ // If the regexp is anchored explicitly, must not be in middle of text.
+ if (prog_->anchor_start() && startpos != 0)
+ return false;
if (prog_->anchor_end() && endpos != text.size())
return false;
-
- // If the regexp is anchored explicitly, update re_anchor
+
+ // If the regexp is anchored explicitly, update re_anchor
// so that we can potentially fall into a faster case below.
if (prog_->anchor_start() && prog_->anchor_end())
re_anchor = ANCHOR_BOTH;
@@ -665,10 +665,10 @@ bool RE2::Match(const StringPiece& text,
re_anchor = ANCHOR_START;
// Check for the required prefix, if any.
- size_t prefixlen = 0;
+ size_t prefixlen = 0;
if (!prefix_.empty()) {
- if (startpos != 0)
- return false;
+ if (startpos != 0)
+ return false;
prefixlen = prefix_.size();
if (prefixlen > subtext.size())
return false;
@@ -738,7 +738,7 @@ bool RE2::Match(const StringPiece& text,
if (!prog_->SearchDFA(subtext, text, anchor, kind,
matchp, &dfa_failed, NULL)) {
if (dfa_failed) {
- if (options_.log_errors())
+ if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_.size() << ", "
<< "program size " << prog_->size() << ", "
@@ -764,7 +764,7 @@ bool RE2::Match(const StringPiece& text,
if (!prog->SearchDFA(match, text, Prog::kAnchored,
Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
if (dfa_failed) {
- if (options_.log_errors())
+ if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_.size() << ", "
<< "program size " << prog->size() << ", "
@@ -774,8 +774,8 @@ bool RE2::Match(const StringPiece& text,
skipped_test = true;
break;
}
- if (options_.log_errors())
- LOG(ERROR) << "SearchDFA inconsistency";
+ if (options_.log_errors())
+ LOG(ERROR) << "SearchDFA inconsistency";
return false;
}
break;
@@ -807,13 +807,13 @@ bool RE2::Match(const StringPiece& text,
if (!prog_->SearchDFA(subtext, text, anchor, kind,
&match, &dfa_failed, NULL)) {
if (dfa_failed) {
- if (options_.log_errors())
+ if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_.size() << ", "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
- // Fall back to NFA below.
+ // Fall back to NFA below.
skipped_test = true;
break;
}
@@ -843,20 +843,20 @@ bool RE2::Match(const StringPiece& text,
if (can_one_pass && anchor != Prog::kUnanchored) {
if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
- if (!skipped_test && options_.log_errors())
+ if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchOnePass inconsistency";
return false;
}
} else if (can_bit_state && subtext1.size() <= bit_state_text_max_size) {
if (!prog_->SearchBitState(subtext1, text, anchor,
kind, submatch, ncap)) {
- if (!skipped_test && options_.log_errors())
+ if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchBitState inconsistency";
return false;
}
} else {
if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
- if (!skipped_test && options_.log_errors())
+ if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchNFA inconsistency";
return false;
}
@@ -865,19 +865,19 @@ bool RE2::Match(const StringPiece& text,
// Adjust overall match for required prefix that we stripped off.
if (prefixlen > 0 && nsubmatch > 0)
- submatch[0] = StringPiece(submatch[0].data() - prefixlen,
+ submatch[0] = StringPiece(submatch[0].data() - prefixlen,
submatch[0].size() + prefixlen);
// Zero submatches that don't exist in the regexp.
for (int i = ncap; i < nsubmatch; i++)
- submatch[i] = StringPiece();
+ submatch[i] = StringPiece();
return true;
}
// Internal matcher - like Match() but takes Args not StringPieces.
bool RE2::DoMatch(const StringPiece& text,
Anchor re_anchor,
- size_t* consumed,
+ size_t* consumed,
const Arg* const* args,
int n) const {
if (!ok()) {
@@ -914,7 +914,7 @@ bool RE2::DoMatch(const StringPiece& text,
return false;
}
- if (consumed != NULL)
+ if (consumed != NULL)
*consumed = static_cast<size_t>(EndPtr(vec[0]) - BeginPtr(text));
if (n == 0 || args == NULL) {
@@ -1061,11 +1061,11 @@ bool Parse(const char* str, size_t n, TString* dest) {
template <>
bool Parse(const char* str, size_t n, StringPiece* dest) {
- if (dest == NULL) return true;
+ if (dest == NULL) return true;
*dest = StringPiece(str, n);
- return true;
-}
-
+ return true;
+}
+
template <>
bool Parse(const char* str, size_t n, char* dest) {
if (n != 1) return false;
@@ -1079,13 +1079,13 @@ bool Parse(const char* str, size_t n, signed char* dest) {
if (n != 1) return false;
if (dest == NULL) return true;
*dest = str[0];
- return true;
-}
-
+ return true;
+}
+
template <>
bool Parse(const char* str, size_t n, unsigned char* dest) {
- if (n != 1) return false;
- if (dest == NULL) return true;
+ if (n != 1) return false;
+ if (dest == NULL) return true;
*dest = str[0];
return true;
}
@@ -1093,61 +1093,61 @@ bool Parse(const char* str, size_t n, unsigned char* dest) {
// Largest number spec that we are willing to parse
static const int kMaxNumberLength = 32;
-// REQUIRES "buf" must have length at least nbuf.
-// Copies "str" into "buf" and null-terminates.
-// Overwrites *np with the new length.
-static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
- size_t* np, bool accept_spaces) {
- size_t n = *np;
- if (n == 0) return "";
- if (n > 0 && isspace(*str)) {
+// REQUIRES "buf" must have length at least nbuf.
+// Copies "str" into "buf" and null-terminates.
+// Overwrites *np with the new length.
+static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
+ size_t* np, bool accept_spaces) {
+ size_t n = *np;
+ if (n == 0) return "";
+ if (n > 0 && isspace(*str)) {
// We are less forgiving than the strtoxxx() routines and do not
- // allow leading spaces. We do allow leading spaces for floats.
- if (!accept_spaces) {
- return "";
- }
- while (n > 0 && isspace(*str)) {
- n--;
- str++;
- }
- }
-
- // Although buf has a fixed maximum size, we can still handle
- // arbitrarily large integers correctly by omitting leading zeros.
- // (Numbers that are still too long will be out of range.)
- // Before deciding whether str is too long,
- // remove leading zeros with s/000+/00/.
- // Leaving the leading two zeros in place means that
- // we don't change 0000x123 (invalid) into 0x123 (valid).
- // Skip over leading - before replacing.
- bool neg = false;
- if (n >= 1 && str[0] == '-') {
- neg = true;
- n--;
- str++;
- }
-
- if (n >= 3 && str[0] == '0' && str[1] == '0') {
- while (n >= 3 && str[2] == '0') {
- n--;
- str++;
- }
- }
-
- if (neg) { // make room in buf for -
- n++;
- str--;
+ // allow leading spaces. We do allow leading spaces for floats.
+ if (!accept_spaces) {
+ return "";
+ }
+ while (n > 0 && isspace(*str)) {
+ n--;
+ str++;
+ }
}
- if (n > nbuf-1) return "";
-
- memmove(buf, str, n);
- if (neg) {
- buf[0] = '-';
+ // Although buf has a fixed maximum size, we can still handle
+ // arbitrarily large integers correctly by omitting leading zeros.
+ // (Numbers that are still too long will be out of range.)
+ // Before deciding whether str is too long,
+ // remove leading zeros with s/000+/00/.
+ // Leaving the leading two zeros in place means that
+ // we don't change 0000x123 (invalid) into 0x123 (valid).
+ // Skip over leading - before replacing.
+ bool neg = false;
+ if (n >= 1 && str[0] == '-') {
+ neg = true;
+ n--;
+ str++;
}
- buf[n] = '\0';
- *np = n;
- return buf;
+
+ if (n >= 3 && str[0] == '0' && str[1] == '0') {
+ while (n >= 3 && str[2] == '0') {
+ n--;
+ str++;
+ }
+ }
+
+ if (neg) { // make room in buf for -
+ n++;
+ str--;
+ }
+
+ if (n > nbuf-1) return "";
+
+ memmove(buf, str, n);
+ if (neg) {
+ buf[0] = '-';
+ }
+ buf[n] = '\0';
+ *np = n;
+ return buf;
}
template <>
@@ -1186,7 +1186,7 @@ template <>
bool Parse(const char* str, size_t n, long* dest, int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, sizeof buf, str, &n, false);
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
char* end;
errno = 0;
long r = strtol(str, &end, radix);
@@ -1201,11 +1201,11 @@ template <>
bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, sizeof buf, str, &n, false);
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
if (str[0] == '-') {
- // strtoul() will silently accept negative numbers and parse
- // them. This module is more strict and treats them as errors.
- return false;
+ // strtoul() will silently accept negative numbers and parse
+ // them. This module is more strict and treats them as errors.
+ return false;
}
char* end;
@@ -1262,10 +1262,10 @@ template <>
bool Parse(const char* str, size_t n, long long* dest, int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, sizeof buf, str, &n, false);
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
char* end;
errno = 0;
- long long r = strtoll(str, &end, radix);
+ long long r = strtoll(str, &end, radix);
if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
@@ -1277,7 +1277,7 @@ template <>
bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
if (n == 0) return false;
char buf[kMaxNumberLength+1];
- str = TerminateNumber(buf, sizeof buf, str, &n, false);
+ str = TerminateNumber(buf, sizeof buf, str, &n, false);
if (str[0] == '-') {
// strtoull() will silently accept negative numbers and parse
// them. This module is more strict and treats them as errors.
@@ -1285,7 +1285,7 @@ bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
}
char* end;
errno = 0;
- unsigned long long r = strtoull(str, &end, radix);
+ unsigned long long r = strtoull(str, &end, radix);
if (end != str + n) return false; // Leftover junk
if (errno) return false;
if (dest == NULL) return true;
diff --git a/contrib/libs/re2/re2/re2.h b/contrib/libs/re2/re2/re2.h
index f8f8043daf..90cdf87880 100644
--- a/contrib/libs/re2/re2/re2.h
+++ b/contrib/libs/re2/re2/re2.h
@@ -1,35 +1,35 @@
-// Copyright 2003-2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_RE2_H_
-#define RE2_RE2_H_
-
-// C++ interface to the re2 regular-expression library.
-// RE2 supports Perl-style regular expressions (with extensions like
-// \d, \w, \s, ...).
-//
-// -----------------------------------------------------------------------
-// REGEXP SYNTAX:
-//
-// This module uses the re2 library and hence supports
-// its syntax for regular expressions, which is similar to Perl's with
-// some of the more complicated things thrown away. In particular,
-// backreferences and generalized assertions are not available, nor is \Z.
-//
-// See https://github.com/google/re2/wiki/Syntax for the syntax
-// supported by RE2, and a comparison with PCRE and PERL regexps.
-//
-// For those not familiar with Perl's regular expressions,
-// here are some examples of the most commonly used extensions:
-//
-// "hello (\\w+) world" -- \w matches a "word" character
-// "version (\\d+)" -- \d matches a digit
-// "hello\\s+world" -- \s matches any whitespace character
-// "\\b(\\w+)\\b" -- \b matches non-empty string at word boundary
-// "(?i)hello" -- (?i) turns on case-insensitive matching
-// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
-//
+// Copyright 2003-2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_RE2_H_
+#define RE2_RE2_H_
+
+// C++ interface to the re2 regular-expression library.
+// RE2 supports Perl-style regular expressions (with extensions like
+// \d, \w, \s, ...).
+//
+// -----------------------------------------------------------------------
+// REGEXP SYNTAX:
+//
+// This module uses the re2 library and hence supports
+// its syntax for regular expressions, which is similar to Perl's with
+// some of the more complicated things thrown away. In particular,
+// backreferences and generalized assertions are not available, nor is \Z.
+//
+// See https://github.com/google/re2/wiki/Syntax for the syntax
+// supported by RE2, and a comparison with PCRE and PERL regexps.
+//
+// For those not familiar with Perl's regular expressions,
+// here are some examples of the most commonly used extensions:
+//
+// "hello (\\w+) world" -- \w matches a "word" character
+// "version (\\d+)" -- \d matches a digit
+// "hello\\s+world" -- \s matches any whitespace character
+// "\\b(\\w+)\\b" -- \b matches non-empty string at word boundary
+// "(?i)hello" -- (?i) turns on case-insensitive matching
+// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
+//
// The double backslashes are needed when writing C++ string literals.
// However, they should NOT be used when writing C++11 raw string literals:
//
@@ -43,31 +43,31 @@
// When using UTF-8 encoding, case-insensitive matching will perform
// simple case folding, not full case folding.
//
-// -----------------------------------------------------------------------
-// MATCHING INTERFACE:
-//
-// The "FullMatch" operation checks that supplied text matches a
-// supplied pattern exactly.
-//
-// Example: successful match
-// CHECK(RE2::FullMatch("hello", "h.*o"));
-//
-// Example: unsuccessful match (requires full match):
-// CHECK(!RE2::FullMatch("hello", "e"));
-//
-// -----------------------------------------------------------------------
-// UTF-8 AND THE MATCHING INTERFACE:
-//
-// By default, the pattern and input text are interpreted as UTF-8.
-// The RE2::Latin1 option causes them to be interpreted as Latin-1.
-//
-// Example:
-// CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));
-// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
-//
-// -----------------------------------------------------------------------
+// -----------------------------------------------------------------------
+// MATCHING INTERFACE:
+//
+// The "FullMatch" operation checks that supplied text matches a
+// supplied pattern exactly.
+//
+// Example: successful match
+// CHECK(RE2::FullMatch("hello", "h.*o"));
+//
+// Example: unsuccessful match (requires full match):
+// CHECK(!RE2::FullMatch("hello", "e"));
+//
+// -----------------------------------------------------------------------
+// UTF-8 AND THE MATCHING INTERFACE:
+//
+// By default, the pattern and input text are interpreted as UTF-8.
+// The RE2::Latin1 option causes them to be interpreted as Latin-1.
+//
+// Example:
+// CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));
+// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
+//
+// -----------------------------------------------------------------------
// MATCHING WITH SUBSTRING EXTRACTION:
-//
+//
// You can supply extra pointer arguments to extract matched substrings.
// On match failure, none of the pointees will have been modified.
// On match success, the substrings will be converted (as necessary) and
@@ -79,252 +79,252 @@
// that do not inspect the substring contents. Hence, in the common case
// where all of the pointees are of such types, failure is always due to
// match failure and thus none of the pointees will have been modified.
-//
-// Example: extracts "ruby" into "s" and 1234 into "i"
-// int i;
+//
+// Example: extracts "ruby" into "s" and 1234 into "i"
+// int i;
// std::string s;
-// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
-//
-// Example: fails because string cannot be stored in integer
-// CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
-//
+// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
+//
+// Example: fails because string cannot be stored in integer
+// CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
+//
// Example: fails because there aren't enough sub-patterns
-// CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));
-//
-// Example: does not try to extract any extra sub-patterns
-// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
-//
-// Example: does not try to extract into NULL
-// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
-//
-// Example: integer overflow causes failure
-// CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
-//
-// NOTE(rsc): Asking for substrings slows successful matches quite a bit.
-// This may get a little faster in the future, but right now is slower
-// than PCRE. On the other hand, failed matches run *very* fast (faster
-// than PCRE), as do matches without substring extraction.
-//
-// -----------------------------------------------------------------------
-// PARTIAL MATCHES
-//
-// You can use the "PartialMatch" operation when you want the pattern
-// to match any substring of the text.
-//
-// Example: simple search for a string:
-// CHECK(RE2::PartialMatch("hello", "ell"));
-//
-// Example: find first number in a string
-// int number;
-// CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number));
-// CHECK_EQ(number, 100);
-//
-// -----------------------------------------------------------------------
-// PRE-COMPILED REGULAR EXPRESSIONS
-//
-// RE2 makes it easy to use any string as a regular expression, without
-// requiring a separate compilation step.
-//
-// If speed is of the essence, you can create a pre-compiled "RE2"
-// object from the pattern and use it multiple times. If you do so,
-// you can typically parse text faster than with sscanf.
-//
-// Example: precompile pattern for faster matching:
-// RE2 pattern("h.*o");
-// while (ReadLine(&str)) {
-// if (RE2::FullMatch(str, pattern)) ...;
-// }
-//
-// -----------------------------------------------------------------------
-// SCANNING TEXT INCREMENTALLY
-//
-// The "Consume" operation may be useful if you want to repeatedly
-// match regular expressions at the front of a string and skip over
-// them as they match. This requires use of the "StringPiece" type,
-// which represents a sub-range of a real string.
-//
-// Example: read lines of the form "var = value" from a string.
+// CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));
+//
+// Example: does not try to extract any extra sub-patterns
+// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
+//
+// Example: does not try to extract into NULL
+// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
+//
+// Example: integer overflow causes failure
+// CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
+//
+// NOTE(rsc): Asking for substrings slows successful matches quite a bit.
+// This may get a little faster in the future, but right now is slower
+// than PCRE. On the other hand, failed matches run *very* fast (faster
+// than PCRE), as do matches without substring extraction.
+//
+// -----------------------------------------------------------------------
+// PARTIAL MATCHES
+//
+// You can use the "PartialMatch" operation when you want the pattern
+// to match any substring of the text.
+//
+// Example: simple search for a string:
+// CHECK(RE2::PartialMatch("hello", "ell"));
+//
+// Example: find first number in a string
+// int number;
+// CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number));
+// CHECK_EQ(number, 100);
+//
+// -----------------------------------------------------------------------
+// PRE-COMPILED REGULAR EXPRESSIONS
+//
+// RE2 makes it easy to use any string as a regular expression, without
+// requiring a separate compilation step.
+//
+// If speed is of the essence, you can create a pre-compiled "RE2"
+// object from the pattern and use it multiple times. If you do so,
+// you can typically parse text faster than with sscanf.
+//
+// Example: precompile pattern for faster matching:
+// RE2 pattern("h.*o");
+// while (ReadLine(&str)) {
+// if (RE2::FullMatch(str, pattern)) ...;
+// }
+//
+// -----------------------------------------------------------------------
+// SCANNING TEXT INCREMENTALLY
+//
+// The "Consume" operation may be useful if you want to repeatedly
+// match regular expressions at the front of a string and skip over
+// them as they match. This requires use of the "StringPiece" type,
+// which represents a sub-range of a real string.
+//
+// Example: read lines of the form "var = value" from a string.
// std::string contents = ...; // Fill string somehow
-// StringPiece input(contents); // Wrap a StringPiece around it
-//
+// StringPiece input(contents); // Wrap a StringPiece around it
+//
// std::string var;
-// int value;
-// while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
-// ...;
-// }
-//
-// Each successful call to "Consume" will set "var/value", and also
-// advance "input" so it points past the matched text. Note that if the
-// regular expression matches an empty string, input will advance
-// by 0 bytes. If the regular expression being used might match
-// an empty string, the loop body must check for this case and either
-// advance the string or break out of the loop.
-//
-// The "FindAndConsume" operation is similar to "Consume" but does not
-// anchor your match at the beginning of the string. For example, you
-// could extract all words from a string by repeatedly calling
-// RE2::FindAndConsume(&input, "(\\w+)", &word)
-//
-// -----------------------------------------------------------------------
-// USING VARIABLE NUMBER OF ARGUMENTS
-//
-// The above operations require you to know the number of arguments
-// when you write the code. This is not always possible or easy (for
-// example, the regular expression may be calculated at run time).
-// You can use the "N" version of the operations when the number of
-// match arguments are determined at run time.
-//
-// Example:
-// const RE2::Arg* args[10];
-// int n;
-// // ... populate args with pointers to RE2::Arg values ...
-// // ... set n to the number of RE2::Arg objects ...
-// bool match = RE2::FullMatchN(input, pattern, args, n);
-//
-// The last statement is equivalent to
-//
-// bool match = RE2::FullMatch(input, pattern,
-// *args[0], *args[1], ..., *args[n - 1]);
-//
-// -----------------------------------------------------------------------
-// PARSING HEX/OCTAL/C-RADIX NUMBERS
-//
-// By default, if you pass a pointer to a numeric value, the
-// corresponding text is interpreted as a base-10 number. You can
-// instead wrap the pointer with a call to one of the operators Hex(),
-// Octal(), or CRadix() to interpret the text in another base. The
-// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
-// prefixes, but defaults to base-10.
-//
-// Example:
-// int a, b, c, d;
-// CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
-// RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d));
-// will leave 64 in a, b, c, and d.
-
-#include <stddef.h>
-#include <stdint.h>
-#include <algorithm>
-#include <map>
-#include <mutex>
-#include <string>
+// int value;
+// while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
+// ...;
+// }
+//
+// Each successful call to "Consume" will set "var/value", and also
+// advance "input" so it points past the matched text. Note that if the
+// regular expression matches an empty string, input will advance
+// by 0 bytes. If the regular expression being used might match
+// an empty string, the loop body must check for this case and either
+// advance the string or break out of the loop.
+//
+// The "FindAndConsume" operation is similar to "Consume" but does not
+// anchor your match at the beginning of the string. For example, you
+// could extract all words from a string by repeatedly calling
+// RE2::FindAndConsume(&input, "(\\w+)", &word)
+//
+// -----------------------------------------------------------------------
+// USING VARIABLE NUMBER OF ARGUMENTS
+//
+// The above operations require you to know the number of arguments
+// when you write the code. This is not always possible or easy (for
+// example, the regular expression may be calculated at run time).
+// You can use the "N" version of the operations when the number of
+// match arguments are determined at run time.
+//
+// Example:
+// const RE2::Arg* args[10];
+// int n;
+// // ... populate args with pointers to RE2::Arg values ...
+// // ... set n to the number of RE2::Arg objects ...
+// bool match = RE2::FullMatchN(input, pattern, args, n);
+//
+// The last statement is equivalent to
+//
+// bool match = RE2::FullMatch(input, pattern,
+// *args[0], *args[1], ..., *args[n - 1]);
+//
+// -----------------------------------------------------------------------
+// PARSING HEX/OCTAL/C-RADIX NUMBERS
+//
+// By default, if you pass a pointer to a numeric value, the
+// corresponding text is interpreted as a base-10 number. You can
+// instead wrap the pointer with a call to one of the operators Hex(),
+// Octal(), or CRadix() to interpret the text in another base. The
+// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
+// prefixes, but defaults to base-10.
+//
+// Example:
+// int a, b, c, d;
+// CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
+// RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d));
+// will leave 64 in a, b, c, and d.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <algorithm>
+#include <map>
+#include <mutex>
+#include <string>
#include <type_traits>
#include <vector>
#if defined(ARCADIA_ROOT)
-#include <util/generic/string.h>
+#include <util/generic/string.h>
#endif
-
+
#if defined(__APPLE__)
#include <TargetConditionals.h>
#endif
-#include "re2/stringpiece.h"
-
-namespace re2 {
-class Prog;
-class Regexp;
-} // namespace re2
-
-namespace re2 {
-
-// Interface for regular expression matching. Also corresponds to a
-// pre-compiled regular expression. An "RE2" object is safe for
-// concurrent use by multiple threads.
-class RE2 {
- public:
- // We convert user-passed pointers into special Arg objects
- class Arg;
- class Options;
-
- // Defined in set.h.
- class Set;
-
- enum ErrorCode {
- NoError = 0,
-
- // Unexpected error
- ErrorInternal,
-
- // Parse errors
- ErrorBadEscape, // bad escape sequence
- ErrorBadCharClass, // bad character class
- ErrorBadCharRange, // bad character class range
- ErrorMissingBracket, // missing closing ]
- ErrorMissingParen, // missing closing )
+#include "re2/stringpiece.h"
+
+namespace re2 {
+class Prog;
+class Regexp;
+} // namespace re2
+
+namespace re2 {
+
+// Interface for regular expression matching. Also corresponds to a
+// pre-compiled regular expression. An "RE2" object is safe for
+// concurrent use by multiple threads.
+class RE2 {
+ public:
+ // We convert user-passed pointers into special Arg objects
+ class Arg;
+ class Options;
+
+ // Defined in set.h.
+ class Set;
+
+ enum ErrorCode {
+ NoError = 0,
+
+ // Unexpected error
+ ErrorInternal,
+
+ // Parse errors
+ ErrorBadEscape, // bad escape sequence
+ ErrorBadCharClass, // bad character class
+ ErrorBadCharRange, // bad character class range
+ ErrorMissingBracket, // missing closing ]
+ ErrorMissingParen, // missing closing )
ErrorUnexpectedParen, // unexpected closing )
- ErrorTrailingBackslash, // trailing \ at end of regexp
- ErrorRepeatArgument, // repeat argument missing, e.g. "*"
- ErrorRepeatSize, // bad repetition argument
- ErrorRepeatOp, // bad repetition operator
- ErrorBadPerlOp, // bad perl operator
- ErrorBadUTF8, // invalid UTF-8 in regexp
- ErrorBadNamedCapture, // bad named capture group
- ErrorPatternTooLarge // pattern too large (compile failed)
- };
-
- // Predefined common options.
- // If you need more complicated things, instantiate
- // an Option class, possibly passing one of these to
- // the Option constructor, change the settings, and pass that
- // Option class to the RE2 constructor.
- enum CannedOptions {
- DefaultOptions = 0,
- Latin1, // treat input as Latin-1 (default UTF-8)
- POSIX, // POSIX syntax, leftmost-longest match
- Quiet // do not log about regexp parse errors
- };
-
+ ErrorTrailingBackslash, // trailing \ at end of regexp
+ ErrorRepeatArgument, // repeat argument missing, e.g. "*"
+ ErrorRepeatSize, // bad repetition argument
+ ErrorRepeatOp, // bad repetition operator
+ ErrorBadPerlOp, // bad perl operator
+ ErrorBadUTF8, // invalid UTF-8 in regexp
+ ErrorBadNamedCapture, // bad named capture group
+ ErrorPatternTooLarge // pattern too large (compile failed)
+ };
+
+ // Predefined common options.
+ // If you need more complicated things, instantiate
+ // an Option class, possibly passing one of these to
+ // the Option constructor, change the settings, and pass that
+ // Option class to the RE2 constructor.
+ enum CannedOptions {
+ DefaultOptions = 0,
+ Latin1, // treat input as Latin-1 (default UTF-8)
+ POSIX, // POSIX syntax, leftmost-longest match
+ Quiet // do not log about regexp parse errors
+ };
+
// Need to have the const char* and const std::string& forms for implicit
- // conversions when passing string literals to FullMatch and PartialMatch.
- // Otherwise the StringPiece form would be sufficient.
-#ifndef SWIG
- RE2(const char* pattern);
+ // conversions when passing string literals to FullMatch and PartialMatch.
+ // Otherwise the StringPiece form would be sufficient.
+#ifndef SWIG
+ RE2(const char* pattern);
RE2(const std::string& pattern);
-#endif
- RE2(const StringPiece& pattern);
- RE2(const StringPiece& pattern, const Options& options);
+#endif
+ RE2(const StringPiece& pattern);
+ RE2(const StringPiece& pattern, const Options& options);
#if defined(ARCADIA_ROOT)
- // ambiguity resolution.
- RE2(const TString& pattern) : RE2(StringPiece(pattern)) {}
+ // ambiguity resolution.
+ RE2(const TString& pattern) : RE2(StringPiece(pattern)) {}
#endif
- ~RE2();
-
- // Returns whether RE2 was created properly.
- bool ok() const { return error_code() == NoError; }
-
- // The string specification for this RE2. E.g.
- // RE2 re("ab*c?d+");
- // re.pattern(); // "ab*c?d+"
+ ~RE2();
+
+ // Returns whether RE2 was created properly.
+ bool ok() const { return error_code() == NoError; }
+
+ // The string specification for this RE2. E.g.
+ // RE2 re("ab*c?d+");
+ // re.pattern(); // "ab*c?d+"
const std::string& pattern() const { return pattern_; }
-
- // If RE2 could not be created properly, returns an error string.
- // Else returns the empty string.
+
+ // If RE2 could not be created properly, returns an error string.
+ // Else returns the empty string.
const std::string& error() const { return *error_; }
-
- // If RE2 could not be created properly, returns an error code.
- // Else returns RE2::NoError (== 0).
- ErrorCode error_code() const { return error_code_; }
-
- // If RE2 could not be created properly, returns the offending
- // portion of the regexp.
+
+ // If RE2 could not be created properly, returns an error code.
+ // Else returns RE2::NoError (== 0).
+ ErrorCode error_code() const { return error_code_; }
+
+ // If RE2 could not be created properly, returns the offending
+ // portion of the regexp.
const std::string& error_arg() const { return error_arg_; }
-
- // Returns the program size, a very approximate measure of a regexp's "cost".
- // Larger numbers are more expensive than smaller numbers.
- int ProgramSize() const;
+
+ // Returns the program size, a very approximate measure of a regexp's "cost".
+ // Larger numbers are more expensive than smaller numbers.
+ int ProgramSize() const;
int ReverseProgramSize() const;
-
+
// If histogram is not null, outputs the program fanout
// as a histogram bucketed by powers of 2.
- // Returns the number of the largest non-empty bucket.
+ // Returns the number of the largest non-empty bucket.
int ProgramFanout(std::vector<int>* histogram) const;
int ReverseProgramFanout(std::vector<int>* histogram) const;
-
- // Returns the underlying Regexp; not for general use.
- // Returns entire_regexp_ so that callers don't need
- // to know about prefix_ and prefix_foldcase_.
- re2::Regexp* Regexp() const { return entire_regexp_; }
-
+
+ // Returns the underlying Regexp; not for general use.
+ // Returns entire_regexp_ so that callers don't need
+ // to know about prefix_ and prefix_foldcase_.
+ re2::Regexp* Regexp() const { return entire_regexp_; }
+
/***** The array-based matching interface ******/
// The functions here have names ending in 'N' and are used to implement
@@ -360,41 +360,41 @@ class RE2 {
// The first layer constructs the temporary Arg objects. The second layer
// (above) constructs the array of pointers to the temporary Arg objects.
- /***** The useful part: the matching interface *****/
-
- // Matches "text" against "re". If pointer arguments are
- // supplied, copies matched sub-patterns into them.
- //
+ /***** The useful part: the matching interface *****/
+
+ // Matches "text" against "re". If pointer arguments are
+ // supplied, copies matched sub-patterns into them.
+ //
// You can pass in a "const char*" or a "std::string" for "text".
// You can pass in a "const char*" or a "std::string" or a "RE2" for "re".
- //
- // The provided pointer arguments can be pointers to any scalar numeric
- // type, or one of:
+ //
+ // The provided pointer arguments can be pointers to any scalar numeric
+ // type, or one of:
// std::string (matched piece is copied to string)
- // StringPiece (StringPiece is mutated to point to matched piece)
- // T (where "bool T::ParseFrom(const char*, size_t)" exists)
- // (void*)NULL (the corresponding matched sub-pattern is not copied)
- //
- // Returns true iff all of the following conditions are satisfied:
+ // StringPiece (StringPiece is mutated to point to matched piece)
+ // T (where "bool T::ParseFrom(const char*, size_t)" exists)
+ // (void*)NULL (the corresponding matched sub-pattern is not copied)
+ //
+ // Returns true iff all of the following conditions are satisfied:
// a. "text" matches "re" fully - from the beginning to the end of "text".
// b. The number of matched sub-patterns is >= number of supplied pointers.
- // c. The "i"th argument has a suitable type for holding the
- // string captured as the "i"th sub-pattern. If you pass in
- // NULL for the "i"th argument, or pass fewer arguments than
+ // c. The "i"th argument has a suitable type for holding the
+ // string captured as the "i"th sub-pattern. If you pass in
+ // NULL for the "i"th argument, or pass fewer arguments than
// number of sub-patterns, the "i"th captured sub-pattern is
- // ignored.
- //
- // CAVEAT: An optional sub-pattern that does not exist in the
- // matched string is assigned the empty string. Therefore, the
- // following will return false (because the empty string is not a
- // valid number):
- // int number;
- // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
- template <typename... A>
- static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) {
- return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
- }
-
+ // ignored.
+ //
+ // CAVEAT: An optional sub-pattern that does not exist in the
+ // matched string is assigned the empty string. Therefore, the
+ // following will return false (because the empty string is not a
+ // valid number):
+ // int number;
+ // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
+ template <typename... A>
+ static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) {
+ return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
+ }
+
// Like FullMatch(), except that "re" is allowed to match a substring
// of "text".
//
@@ -406,11 +406,11 @@ class RE2 {
// NULL for the "i"th argument, or pass fewer arguments than
// number of sub-patterns, the "i"th captured sub-pattern is
// ignored.
- template <typename... A>
- static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) {
- return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);
- }
-
+ template <typename... A>
+ static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) {
+ return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);
+ }
+
// Like FullMatch() and PartialMatch(), except that "re" has to match
// a prefix of the text, and "input" is advanced past the matched
// text. Note: "input" is modified iff this routine returns true
@@ -424,11 +424,11 @@ class RE2 {
// NULL for the "i"th argument, or pass fewer arguments than
// number of sub-patterns, the "i"th captured sub-pattern is
// ignored.
- template <typename... A>
- static bool Consume(StringPiece* input, const RE2& re, A&&... a) {
- return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);
- }
-
+ template <typename... A>
+ static bool Consume(StringPiece* input, const RE2& re, A&&... a) {
+ return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);
+ }
+
// Like Consume(), but does not anchor the match at the beginning of
// the text. That is, "re" need not start its match at the beginning
// of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds
@@ -442,374 +442,374 @@ class RE2 {
// NULL for the "i"th argument, or pass fewer arguments than
// number of sub-patterns, the "i"th captured sub-pattern is
// ignored.
- template <typename... A>
- static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
- return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
- }
-#endif
-
+ template <typename... A>
+ static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
+ return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
+ }
+#endif
+
// Replace the first match of "re" in "str" with "rewrite".
- // Within "rewrite", backslash-escaped digits (\1 to \9) can be
- // used to insert text matching corresponding parenthesized group
- // from the pattern. \0 in "rewrite" refers to the entire matching
- // text. E.g.,
- //
+ // Within "rewrite", backslash-escaped digits (\1 to \9) can be
+ // used to insert text matching corresponding parenthesized group
+ // from the pattern. \0 in "rewrite" refers to the entire matching
+ // text. E.g.,
+ //
// std::string s = "yabba dabba doo";
- // CHECK(RE2::Replace(&s, "b+", "d"));
- //
- // will leave "s" containing "yada dabba doo"
- //
- // Returns true if the pattern matches and a replacement occurs,
- // false otherwise.
+ // CHECK(RE2::Replace(&s, "b+", "d"));
+ //
+ // will leave "s" containing "yada dabba doo"
+ //
+ // Returns true if the pattern matches and a replacement occurs,
+ // false otherwise.
static bool Replace(std::string* str,
const RE2& re,
- const StringPiece& rewrite);
+ const StringPiece& rewrite);
#if defined(ARCADIA_ROOT)
- static bool Replace(TString *str,
- const RE2& pattern,
- const StringPiece& rewrite) {
+ static bool Replace(TString *str,
+ const RE2& pattern,
+ const StringPiece& rewrite) {
std::string tmp(*str);
- bool res = Replace(&tmp, pattern, rewrite);
- *str = tmp;
- return res;
- }
+ bool res = Replace(&tmp, pattern, rewrite);
+ *str = tmp;
+ return res;
+ }
#endif
-
- // Like Replace(), except replaces successive non-overlapping occurrences
- // of the pattern in the string with the rewrite. E.g.
- //
+
+ // Like Replace(), except replaces successive non-overlapping occurrences
+ // of the pattern in the string with the rewrite. E.g.
+ //
// std::string s = "yabba dabba doo";
- // CHECK(RE2::GlobalReplace(&s, "b+", "d"));
- //
- // will leave "s" containing "yada dada doo"
- // Replacements are not subject to re-matching.
- //
- // Because GlobalReplace only replaces non-overlapping matches,
- // replacing "ana" within "banana" makes only one replacement, not two.
- //
- // Returns the number of replacements made.
+ // CHECK(RE2::GlobalReplace(&s, "b+", "d"));
+ //
+ // will leave "s" containing "yada dada doo"
+ // Replacements are not subject to re-matching.
+ //
+ // Because GlobalReplace only replaces non-overlapping matches,
+ // replacing "ana" within "banana" makes only one replacement, not two.
+ //
+ // Returns the number of replacements made.
static int GlobalReplace(std::string* str,
const RE2& re,
- const StringPiece& rewrite);
-
+ const StringPiece& rewrite);
+
#if defined(ARCADIA_ROOT)
static int GlobalReplace(TString* str,
const RE2& pattern,
const StringPiece& rewrite) {
std::string tmp(*str);
- int res = GlobalReplace(&tmp, pattern, rewrite);
- *str = tmp;
- return res;
- }
+ int res = GlobalReplace(&tmp, pattern, rewrite);
+ *str = tmp;
+ return res;
+ }
#endif
-
- // Like Replace, except that if the pattern matches, "rewrite"
- // is copied into "out" with substitutions. The non-matching
- // portions of "text" are ignored.
- //
- // Returns true iff a match occurred and the extraction happened
- // successfully; if no match occurs, the string is left unaffected.
- //
- // REQUIRES: "text" must not alias any part of "*out".
+
+ // Like Replace, except that if the pattern matches, "rewrite"
+ // is copied into "out" with substitutions. The non-matching
+ // portions of "text" are ignored.
+ //
+ // Returns true iff a match occurred and the extraction happened
+ // successfully; if no match occurs, the string is left unaffected.
+ //
+ // REQUIRES: "text" must not alias any part of "*out".
static bool Extract(const StringPiece& text,
const RE2& re,
const StringPiece& rewrite,
std::string* out);
-
+
#if defined(ARCADIA_ROOT)
static bool Extract(const StringPiece& text,
- const RE2& pattern,
+ const RE2& pattern,
const StringPiece& rewrite,
- TString *out) {
- std::string tmp;
- bool res = Extract(text, pattern, rewrite, &tmp);
- *out = tmp;
- return res;
- }
+ TString *out) {
+ std::string tmp;
+ bool res = Extract(text, pattern, rewrite, &tmp);
+ *out = tmp;
+ return res;
+ }
#endif
-
- // Escapes all potentially meaningful regexp characters in
- // 'unquoted'. The returned string, used as a regular expression,
+
+ // Escapes all potentially meaningful regexp characters in
+ // 'unquoted'. The returned string, used as a regular expression,
// will match exactly the original string. For example,
- // 1.5-2.0?
- // may become:
- // 1\.5\-2\.0\?
+ // 1.5-2.0?
+ // may become:
+ // 1\.5\-2\.0\?
static std::string QuoteMeta(const StringPiece& unquoted);
-
- // Computes range for any strings matching regexp. The min and max can in
- // some cases be arbitrarily precise, so the caller gets to specify the
- // maximum desired length of string returned.
- //
- // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any
- // string s that is an anchored match for this regexp satisfies
- // min <= s && s <= max.
- //
- // Note that PossibleMatchRange() will only consider the first copy of an
- // infinitely repeated element (i.e., any regexp element followed by a '*' or
- // '+' operator). Regexps with "{N}" constructions are not affected, as those
- // do not compile down to infinite repetitions.
- //
- // Returns true on success, false on error.
+
+ // Computes range for any strings matching regexp. The min and max can in
+ // some cases be arbitrarily precise, so the caller gets to specify the
+ // maximum desired length of string returned.
+ //
+ // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any
+ // string s that is an anchored match for this regexp satisfies
+ // min <= s && s <= max.
+ //
+ // Note that PossibleMatchRange() will only consider the first copy of an
+ // infinitely repeated element (i.e., any regexp element followed by a '*' or
+ // '+' operator). Regexps with "{N}" constructions are not affected, as those
+ // do not compile down to infinite repetitions.
+ //
+ // Returns true on success, false on error.
bool PossibleMatchRange(std::string* min, std::string* max,
int maxlen) const;
-
- // Generic matching interface
-
- // Type of match.
- enum Anchor {
- UNANCHORED, // No anchoring
- ANCHOR_START, // Anchor at start only
- ANCHOR_BOTH // Anchor at start and end
- };
-
- // Return the number of capturing subpatterns, or -1 if the
- // regexp wasn't valid on construction. The overall match ($0)
- // does not count: if the regexp is "(a)(b)", returns 2.
+
+ // Generic matching interface
+
+ // Type of match.
+ enum Anchor {
+ UNANCHORED, // No anchoring
+ ANCHOR_START, // Anchor at start only
+ ANCHOR_BOTH // Anchor at start and end
+ };
+
+ // Return the number of capturing subpatterns, or -1 if the
+ // regexp wasn't valid on construction. The overall match ($0)
+ // does not count: if the regexp is "(a)(b)", returns 2.
int NumberOfCapturingGroups() const { return num_captures_; }
-
- // Return a map from names to capturing indices.
- // The map records the index of the leftmost group
- // with the given name.
- // Only valid until the re is deleted.
+
+ // Return a map from names to capturing indices.
+ // The map records the index of the leftmost group
+ // with the given name.
+ // Only valid until the re is deleted.
const std::map<std::string, int>& NamedCapturingGroups() const;
-
- // Return a map from capturing indices to names.
- // The map has no entries for unnamed groups.
- // Only valid until the re is deleted.
+
+ // Return a map from capturing indices to names.
+ // The map has no entries for unnamed groups.
+ // Only valid until the re is deleted.
const std::map<int, std::string>& CapturingGroupNames() const;
-
- // General matching routine.
- // Match against text starting at offset startpos
- // and stopping the search at offset endpos.
- // Returns true if match found, false if not.
+
+ // General matching routine.
+ // Match against text starting at offset startpos
+ // and stopping the search at offset endpos.
+ // Returns true if match found, false if not.
// On a successful match, fills in submatch[] (up to nsubmatch entries)
- // with information about submatches.
+ // with information about submatches.
// I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, with
// submatch[0] = "barbaz", submatch[1].data() = NULL, submatch[2] = "bar",
// submatch[3].data() = NULL, ..., up to submatch[nsubmatch-1].data() = NULL.
// Caveat: submatch[] may be clobbered even on match failure.
- //
- // Don't ask for more match information than you will use:
+ //
+ // Don't ask for more match information than you will use:
// runs much faster with nsubmatch == 1 than nsubmatch > 1, and
// runs even faster if nsubmatch == 0.
// Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(),
- // but will be handled correctly.
- //
- // Passing text == StringPiece(NULL, 0) will be handled like any other
- // empty string, but note that on return, it will not be possible to tell
- // whether submatch i matched the empty string or did not match:
+ // but will be handled correctly.
+ //
+ // Passing text == StringPiece(NULL, 0) will be handled like any other
+ // empty string, but note that on return, it will not be possible to tell
+ // whether submatch i matched the empty string or did not match:
// either way, submatch[i].data() == NULL.
- bool Match(const StringPiece& text,
- size_t startpos,
- size_t endpos,
+ bool Match(const StringPiece& text,
+ size_t startpos,
+ size_t endpos,
Anchor re_anchor,
StringPiece* submatch,
int nsubmatch) const;
-
- // Check that the given rewrite string is suitable for use with this
- // regular expression. It checks that:
- // * The regular expression has enough parenthesized subexpressions
- // to satisfy all of the \N tokens in rewrite
- // * The rewrite string doesn't have any syntax errors. E.g.,
- // '\' followed by anything other than a digit or '\'.
- // A true return value guarantees that Replace() and Extract() won't
- // fail because of a bad rewrite string.
+
+ // Check that the given rewrite string is suitable for use with this
+ // regular expression. It checks that:
+ // * The regular expression has enough parenthesized subexpressions
+ // to satisfy all of the \N tokens in rewrite
+ // * The rewrite string doesn't have any syntax errors. E.g.,
+ // '\' followed by anything other than a digit or '\'.
+ // A true return value guarantees that Replace() and Extract() won't
+ // fail because of a bad rewrite string.
bool CheckRewriteString(const StringPiece& rewrite,
std::string* error) const;
-
+
bool CheckRewriteString(const StringPiece& rewrite, std::nullptr_t error) const {
return CheckRewriteString(rewrite, static_cast<std::string*>(error));
- }
-
+ }
+
#if defined(ARCADIA_ROOT)
- bool CheckRewriteString(const StringPiece& rewrite, TString* error) const {
- if (error) {
- std::string tmp;
- bool res = CheckRewriteString(rewrite, &tmp);
- error->assign(tmp.data(), tmp.size());
- return res;
- } else {
- return CheckRewriteString(rewrite, nullptr);
- }
- }
+ bool CheckRewriteString(const StringPiece& rewrite, TString* error) const {
+ if (error) {
+ std::string tmp;
+ bool res = CheckRewriteString(rewrite, &tmp);
+ error->assign(tmp.data(), tmp.size());
+ return res;
+ } else {
+ return CheckRewriteString(rewrite, nullptr);
+ }
+ }
#endif
-
- // Returns the maximum submatch needed for the rewrite to be done by
- // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2.
- static int MaxSubmatch(const StringPiece& rewrite);
-
- // Append the "rewrite" string, with backslash subsitutions from "vec",
- // to string "out".
- // Returns true on success. This method can fail because of a malformed
- // rewrite string. CheckRewriteString guarantees that the rewrite will
- // be sucessful.
+
+ // Returns the maximum submatch needed for the rewrite to be done by
+ // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2.
+ static int MaxSubmatch(const StringPiece& rewrite);
+
+ // Append the "rewrite" string, with backslash subsitutions from "vec",
+ // to string "out".
+ // Returns true on success. This method can fail because of a malformed
+ // rewrite string. CheckRewriteString guarantees that the rewrite will
+ // be sucessful.
bool Rewrite(std::string* out,
const StringPiece& rewrite,
- const StringPiece* vec,
- int veclen) const;
-
- // Constructor options
- class Options {
- public:
- // The options are (defaults in parentheses):
- //
- // utf8 (true) text and pattern are UTF-8; otherwise Latin-1
- // posix_syntax (false) restrict regexps to POSIX egrep syntax
- // longest_match (false) search for longest match, not first match
- // log_errors (true) log syntax and execution errors to ERROR
- // max_mem (see below) approx. max memory footprint of RE2
- // literal (false) interpret string as literal, not regexp
- // never_nl (false) never match \n, even if it is in regexp
- // dot_nl (false) dot matches everything including new line
- // never_capture (false) parse all parens as non-capturing
- // case_sensitive (true) match is case-sensitive (regexp can override
- // with (?i) unless in posix_syntax mode)
- //
- // The following options are only consulted when posix_syntax == true.
+ const StringPiece* vec,
+ int veclen) const;
+
+ // Constructor options
+ class Options {
+ public:
+ // The options are (defaults in parentheses):
+ //
+ // utf8 (true) text and pattern are UTF-8; otherwise Latin-1
+ // posix_syntax (false) restrict regexps to POSIX egrep syntax
+ // longest_match (false) search for longest match, not first match
+ // log_errors (true) log syntax and execution errors to ERROR
+ // max_mem (see below) approx. max memory footprint of RE2
+ // literal (false) interpret string as literal, not regexp
+ // never_nl (false) never match \n, even if it is in regexp
+ // dot_nl (false) dot matches everything including new line
+ // never_capture (false) parse all parens as non-capturing
+ // case_sensitive (true) match is case-sensitive (regexp can override
+ // with (?i) unless in posix_syntax mode)
+ //
+ // The following options are only consulted when posix_syntax == true.
// When posix_syntax == false, these features are always enabled and
// cannot be turned off; to perform multi-line matching in that case,
// begin the regexp with (?m).
- // perl_classes (false) allow Perl's \d \s \w \D \S \W
- // word_boundary (false) allow Perl's \b \B (word boundary and not)
- // one_line (false) ^ and $ only match beginning and end of text
- //
- // The max_mem option controls how much memory can be used
- // to hold the compiled form of the regexp (the Prog) and
- // its cached DFA graphs. Code Search placed limits on the number
- // of Prog instructions and DFA states: 10,000 for both.
- // In RE2, those limits would translate to about 240 KB per Prog
- // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a
- // better job of keeping them small than Code Search did).
- // Each RE2 has two Progs (one forward, one reverse), and each Prog
- // can have two DFAs (one first match, one longest match).
- // That makes 4 DFAs:
- //
+ // perl_classes (false) allow Perl's \d \s \w \D \S \W
+ // word_boundary (false) allow Perl's \b \B (word boundary and not)
+ // one_line (false) ^ and $ only match beginning and end of text
+ //
+ // The max_mem option controls how much memory can be used
+ // to hold the compiled form of the regexp (the Prog) and
+ // its cached DFA graphs. Code Search placed limits on the number
+ // of Prog instructions and DFA states: 10,000 for both.
+ // In RE2, those limits would translate to about 240 KB per Prog
+ // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a
+ // better job of keeping them small than Code Search did).
+ // Each RE2 has two Progs (one forward, one reverse), and each Prog
+ // can have two DFAs (one first match, one longest match).
+ // That makes 4 DFAs:
+ //
// forward, first-match - used for UNANCHORED or ANCHOR_START searches
- // if opt.longest_match() == false
- // forward, longest-match - used for all ANCHOR_BOTH searches,
- // and the other two kinds if
- // opt.longest_match() == true
- // reverse, first-match - never used
- // reverse, longest-match - used as second phase for unanchored searches
- //
- // The RE2 memory budget is statically divided between the two
- // Progs and then the DFAs: two thirds to the forward Prog
- // and one third to the reverse Prog. The forward Prog gives half
- // of what it has left over to each of its DFAs. The reverse Prog
- // gives it all to its longest-match DFA.
- //
- // Once a DFA fills its budget, it flushes its cache and starts over.
- // If this happens too often, RE2 falls back on the NFA implementation.
-
- // For now, make the default budget something close to Code Search.
- static const int kDefaultMaxMem = 8<<20;
-
- enum Encoding {
- EncodingUTF8 = 1,
- EncodingLatin1
- };
-
- Options() :
- encoding_(EncodingUTF8),
- posix_syntax_(false),
- longest_match_(false),
- log_errors_(true),
- max_mem_(kDefaultMaxMem),
- literal_(false),
- never_nl_(false),
- dot_nl_(false),
- never_capture_(false),
- case_sensitive_(true),
- perl_classes_(false),
- word_boundary_(false),
- one_line_(false) {
- }
-
- /*implicit*/ Options(CannedOptions);
-
- Encoding encoding() const { return encoding_; }
- void set_encoding(Encoding encoding) { encoding_ = encoding; }
-
- bool posix_syntax() const { return posix_syntax_; }
- void set_posix_syntax(bool b) { posix_syntax_ = b; }
-
- bool longest_match() const { return longest_match_; }
- void set_longest_match(bool b) { longest_match_ = b; }
-
- bool log_errors() const { return log_errors_; }
- void set_log_errors(bool b) { log_errors_ = b; }
-
- int64_t max_mem() const { return max_mem_; }
- void set_max_mem(int64_t m) { max_mem_ = m; }
-
- bool literal() const { return literal_; }
- void set_literal(bool b) { literal_ = b; }
-
- bool never_nl() const { return never_nl_; }
- void set_never_nl(bool b) { never_nl_ = b; }
-
- bool dot_nl() const { return dot_nl_; }
- void set_dot_nl(bool b) { dot_nl_ = b; }
-
- bool never_capture() const { return never_capture_; }
- void set_never_capture(bool b) { never_capture_ = b; }
-
- bool case_sensitive() const { return case_sensitive_; }
- void set_case_sensitive(bool b) { case_sensitive_ = b; }
-
- bool perl_classes() const { return perl_classes_; }
- void set_perl_classes(bool b) { perl_classes_ = b; }
-
- bool word_boundary() const { return word_boundary_; }
- void set_word_boundary(bool b) { word_boundary_ = b; }
-
- bool one_line() const { return one_line_; }
- void set_one_line(bool b) { one_line_ = b; }
-
- void Copy(const Options& src) {
- *this = src;
- }
-
- int ParseFlags() const;
-
- private:
- Encoding encoding_;
- bool posix_syntax_;
- bool longest_match_;
- bool log_errors_;
- int64_t max_mem_;
- bool literal_;
- bool never_nl_;
- bool dot_nl_;
- bool never_capture_;
- bool case_sensitive_;
- bool perl_classes_;
- bool word_boundary_;
- bool one_line_;
- };
-
- // Returns the options set in the constructor.
+ // if opt.longest_match() == false
+ // forward, longest-match - used for all ANCHOR_BOTH searches,
+ // and the other two kinds if
+ // opt.longest_match() == true
+ // reverse, first-match - never used
+ // reverse, longest-match - used as second phase for unanchored searches
+ //
+ // The RE2 memory budget is statically divided between the two
+ // Progs and then the DFAs: two thirds to the forward Prog
+ // and one third to the reverse Prog. The forward Prog gives half
+ // of what it has left over to each of its DFAs. The reverse Prog
+ // gives it all to its longest-match DFA.
+ //
+ // Once a DFA fills its budget, it flushes its cache and starts over.
+ // If this happens too often, RE2 falls back on the NFA implementation.
+
+ // For now, make the default budget something close to Code Search.
+ static const int kDefaultMaxMem = 8<<20;
+
+ enum Encoding {
+ EncodingUTF8 = 1,
+ EncodingLatin1
+ };
+
+ Options() :
+ encoding_(EncodingUTF8),
+ posix_syntax_(false),
+ longest_match_(false),
+ log_errors_(true),
+ max_mem_(kDefaultMaxMem),
+ literal_(false),
+ never_nl_(false),
+ dot_nl_(false),
+ never_capture_(false),
+ case_sensitive_(true),
+ perl_classes_(false),
+ word_boundary_(false),
+ one_line_(false) {
+ }
+
+ /*implicit*/ Options(CannedOptions);
+
+ Encoding encoding() const { return encoding_; }
+ void set_encoding(Encoding encoding) { encoding_ = encoding; }
+
+ bool posix_syntax() const { return posix_syntax_; }
+ void set_posix_syntax(bool b) { posix_syntax_ = b; }
+
+ bool longest_match() const { return longest_match_; }
+ void set_longest_match(bool b) { longest_match_ = b; }
+
+ bool log_errors() const { return log_errors_; }
+ void set_log_errors(bool b) { log_errors_ = b; }
+
+ int64_t max_mem() const { return max_mem_; }
+ void set_max_mem(int64_t m) { max_mem_ = m; }
+
+ bool literal() const { return literal_; }
+ void set_literal(bool b) { literal_ = b; }
+
+ bool never_nl() const { return never_nl_; }
+ void set_never_nl(bool b) { never_nl_ = b; }
+
+ bool dot_nl() const { return dot_nl_; }
+ void set_dot_nl(bool b) { dot_nl_ = b; }
+
+ bool never_capture() const { return never_capture_; }
+ void set_never_capture(bool b) { never_capture_ = b; }
+
+ bool case_sensitive() const { return case_sensitive_; }
+ void set_case_sensitive(bool b) { case_sensitive_ = b; }
+
+ bool perl_classes() const { return perl_classes_; }
+ void set_perl_classes(bool b) { perl_classes_ = b; }
+
+ bool word_boundary() const { return word_boundary_; }
+ void set_word_boundary(bool b) { word_boundary_ = b; }
+
+ bool one_line() const { return one_line_; }
+ void set_one_line(bool b) { one_line_ = b; }
+
+ void Copy(const Options& src) {
+ *this = src;
+ }
+
+ int ParseFlags() const;
+
+ private:
+ Encoding encoding_;
+ bool posix_syntax_;
+ bool longest_match_;
+ bool log_errors_;
+ int64_t max_mem_;
+ bool literal_;
+ bool never_nl_;
+ bool dot_nl_;
+ bool never_capture_;
+ bool case_sensitive_;
+ bool perl_classes_;
+ bool word_boundary_;
+ bool one_line_;
+ };
+
+ // Returns the options set in the constructor.
const Options& options() const { return options_; }
-
- // Argument converters; see below.
+
+ // Argument converters; see below.
template <typename T>
static Arg CRadix(T* ptr);
template <typename T>
static Arg Hex(T* ptr);
template <typename T>
static Arg Octal(T* ptr);
-
- private:
- void Init(const StringPiece& pattern, const Options& options);
-
- bool DoMatch(const StringPiece& text,
+
+ private:
+ void Init(const StringPiece& pattern, const Options& options);
+
+ bool DoMatch(const StringPiece& text,
Anchor re_anchor,
- size_t* consumed,
- const Arg* const args[],
- int n) const;
-
- re2::Prog* ReverseProg() const;
-
+ size_t* consumed,
+ const Arg* const args[],
+ int n) const;
+
+ re2::Prog* ReverseProg() const;
+
std::string pattern_; // string regular expression
Options options_; // option flags
re2::Regexp* entire_regexp_; // parsed regular expression
@@ -822,26 +822,26 @@ class RE2 {
re2::Prog* prog_; // compiled program for regexp
int num_captures_; // number of capturing groups
bool is_one_pass_; // can use prog_->SearchOnePass?
-
+
// Reverse Prog for DFA execution only
mutable re2::Prog* rprog_;
- // Map from capture names to indices
+ // Map from capture names to indices
mutable const std::map<std::string, int>* named_groups_;
- // Map from capture indices to names
+ // Map from capture indices to names
mutable const std::map<int, std::string>* group_names_;
-
- mutable std::once_flag rprog_once_;
- mutable std::once_flag named_groups_once_;
- mutable std::once_flag group_names_once_;
-
- RE2(const RE2&) = delete;
- RE2& operator=(const RE2&) = delete;
-};
-
-/***** Implementation details *****/
-
+
+ mutable std::once_flag rprog_once_;
+ mutable std::once_flag named_groups_once_;
+ mutable std::once_flag group_names_once_;
+
+ RE2(const RE2&) = delete;
+ RE2& operator=(const RE2&) = delete;
+};
+
+/***** Implementation details *****/
+
namespace re2_internal {
-
+
// Types for which the 3-ary Parse() function template has specializations.
template <typename T> struct Parse3ary : public std::false_type {};
template <> struct Parse3ary<void> : public std::true_type {};
@@ -855,7 +855,7 @@ template <> struct Parse3ary<signed char> : public std::true_type {};
template <> struct Parse3ary<unsigned char> : public std::true_type {};
template <> struct Parse3ary<float> : public std::true_type {};
template <> struct Parse3ary<double> : public std::true_type {};
-
+
template <typename T>
bool Parse(const char* str, size_t n, T* dest);
@@ -875,18 +875,18 @@ bool Parse(const char* str, size_t n, T* dest, int radix);
} // namespace re2_internal
-class RE2::Arg {
+class RE2::Arg {
private:
template <typename T>
using CanParse3ary = typename std::enable_if<
re2_internal::Parse3ary<T>::value,
int>::type;
-
+
template <typename T>
using CanParse4ary = typename std::enable_if<
re2_internal::Parse4ary<T>::value,
int>::type;
-
+
#if !defined(_MSC_VER)
template <typename T>
using CanParseFrom = typename std::enable_if<
@@ -895,46 +895,46 @@ class RE2::Arg {
&T::ParseFrom))>::value,
int>::type;
#endif
-
+
public:
Arg() : Arg(nullptr) {}
Arg(std::nullptr_t ptr) : arg_(ptr), parser_(DoNothing) {}
-
+
template <typename T, CanParse3ary<T> = 0>
Arg(T* ptr) : arg_(ptr), parser_(DoParse3ary<T>) {}
-
+
template <typename T, CanParse4ary<T> = 0>
Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {}
-
+
#if !defined(_MSC_VER)
template <typename T, CanParseFrom<T> = 0>
Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {}
#endif
-
+
typedef bool (*Parser)(const char* str, size_t n, void* dest);
-
+
template <typename T>
Arg(T* ptr, Parser parser) : arg_(ptr), parser_(parser) {}
-
+
bool Parse(const char* str, size_t n) const {
return (*parser_)(str, n, arg_);
}
- private:
+ private:
static bool DoNothing(const char* /*str*/, size_t /*n*/, void* /*dest*/) {
return true;
}
-
+
template <typename T>
static bool DoParse3ary(const char* str, size_t n, void* dest) {
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest));
}
-
+
template <typename T>
static bool DoParse4ary(const char* str, size_t n, void* dest) {
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10);
}
-
+
#if !defined(_MSC_VER)
template <typename T>
static bool DoParseFrom(const char* str, size_t n, void* dest) {
@@ -942,85 +942,85 @@ class RE2::Arg {
return reinterpret_cast<T*>(dest)->ParseFrom(str, n);
}
#endif
-
+
void* arg_;
Parser parser_;
-};
-
+};
+
template <typename T>
inline RE2::Arg RE2::CRadix(T* ptr) {
return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 0);
});
}
-
+
template <typename T>
inline RE2::Arg RE2::Hex(T* ptr) {
return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 16);
});
-}
-
+}
+
template <typename T>
inline RE2::Arg RE2::Octal(T* ptr) {
return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 8);
});
}
-
-#ifndef SWIG
-// Silence warnings about missing initializers for members of LazyRE2.
+
+#ifndef SWIG
+// Silence warnings about missing initializers for members of LazyRE2.
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
-#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
+
+// Helper for writing global or static RE2s safely.
+// Write
+// static LazyRE2 re = {".*"};
+// and then use *re instead of writing
+// static RE2 re(".*");
+// The former is more careful about multithreaded
+// situations than the latter.
+//
+// N.B. This class never deletes the RE2 object that
+// it constructs: that's a feature, so that it can be used
+// for global and function static variables.
+class LazyRE2 {
+ private:
+ struct NoArg {};
+
+ public:
+ typedef RE2 element_type; // support std::pointer_traits
+
+ // Constructor omitted to preserve braced initialization in C++98.
+
+ // Pretend to be a pointer to Type (never NULL due to on-demand creation):
+ RE2& operator*() const { return *get(); }
+ RE2* operator->() const { return get(); }
+
+ // Named accessor/initializer:
+ RE2* get() const {
+ std::call_once(once_, &LazyRE2::Init, this);
+ return ptr_;
+ }
+
+ // All data fields must be public to support {"foo"} initialization.
+ const char* pattern_;
+ RE2::CannedOptions options_;
+ NoArg barrier_against_excess_initializers_;
+
+ mutable RE2* ptr_;
+ mutable std::once_flag once_;
+
+ private:
+ static void Init(const LazyRE2* lazy_re2) {
+ lazy_re2->ptr_ = new RE2(lazy_re2->pattern_, lazy_re2->options_);
+ }
+
+ void operator=(const LazyRE2&); // disallowed
+};
#endif
-
-// Helper for writing global or static RE2s safely.
-// Write
-// static LazyRE2 re = {".*"};
-// and then use *re instead of writing
-// static RE2 re(".*");
-// The former is more careful about multithreaded
-// situations than the latter.
-//
-// N.B. This class never deletes the RE2 object that
-// it constructs: that's a feature, so that it can be used
-// for global and function static variables.
-class LazyRE2 {
- private:
- struct NoArg {};
-
- public:
- typedef RE2 element_type; // support std::pointer_traits
-
- // Constructor omitted to preserve braced initialization in C++98.
-
- // Pretend to be a pointer to Type (never NULL due to on-demand creation):
- RE2& operator*() const { return *get(); }
- RE2* operator->() const { return get(); }
-
- // Named accessor/initializer:
- RE2* get() const {
- std::call_once(once_, &LazyRE2::Init, this);
- return ptr_;
- }
-
- // All data fields must be public to support {"foo"} initialization.
- const char* pattern_;
- RE2::CannedOptions options_;
- NoArg barrier_against_excess_initializers_;
-
- mutable RE2* ptr_;
- mutable std::once_flag once_;
-
- private:
- static void Init(const LazyRE2* lazy_re2) {
- lazy_re2->ptr_ = new RE2(lazy_re2->pattern_, lazy_re2->options_);
- }
-
- void operator=(const LazyRE2&); // disallowed
-};
-#endif
-
+
namespace hooks {
// Most platforms support thread_local. Older versions of iOS don't support
@@ -1069,9 +1069,9 @@ DECLARE_HOOK(DFASearchFailure)
} // namespace hooks
-} // namespace re2
-
-using re2::RE2;
-using re2::LazyRE2;
-
-#endif // RE2_RE2_H_
+} // namespace re2
+
+using re2::RE2;
+using re2::LazyRE2;
+
+#endif // RE2_RE2_H_
diff --git a/contrib/libs/re2/re2/regexp.cc b/contrib/libs/re2/re2/regexp.cc
index ca1318b43d..c583f3e593 100644
--- a/contrib/libs/re2/re2/regexp.cc
+++ b/contrib/libs/re2/re2/regexp.cc
@@ -6,31 +6,31 @@
// Tested by parse_test.cc
#include "re2/regexp.h"
-
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-#include <algorithm>
-#include <map>
-#include <mutex>
-#include <string>
-#include <vector>
-
-#include "util/util.h"
-#include "util/logging.h"
-#include "util/mutex.h"
-#include "util/utf.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <algorithm>
+#include <map>
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/mutex.h"
+#include "util/utf.h"
#include "re2/pod_array.h"
-#include "re2/stringpiece.h"
+#include "re2/stringpiece.h"
#include "re2/walker-inl.h"
namespace re2 {
// Constructor. Allocates vectors as appropriate for operator.
Regexp::Regexp(RegexpOp op, ParseFlags parse_flags)
- : op_(static_cast<uint8_t>(op)),
+ : op_(static_cast<uint8_t>(op)),
simple_(false),
- parse_flags_(static_cast<uint16_t>(parse_flags)),
+ parse_flags_(static_cast<uint16_t>(parse_flags)),
ref_(1),
nsub_(0),
down_(NULL) {
@@ -57,8 +57,8 @@ Regexp::~Regexp() {
delete[] runes_;
break;
case kRegexpCharClass:
- if (cc_)
- cc_->Delete();
+ if (cc_)
+ cc_->Delete();
delete ccb_;
break;
}
@@ -74,36 +74,36 @@ bool Regexp::QuickDestroy() {
return false;
}
-// Lazily allocated.
-static Mutex* ref_mutex;
-static std::map<Regexp*, int>* ref_map;
+// Lazily allocated.
+static Mutex* ref_mutex;
+static std::map<Regexp*, int>* ref_map;
int Regexp::Ref() {
if (ref_ < kMaxRef)
return ref_;
- MutexLock l(ref_mutex);
- return (*ref_map)[this];
+ MutexLock l(ref_mutex);
+ return (*ref_map)[this];
}
// Increments reference count, returns object as convenience.
Regexp* Regexp::Incref() {
if (ref_ >= kMaxRef-1) {
- static std::once_flag ref_once;
- std::call_once(ref_once, []() {
- ref_mutex = new Mutex;
- ref_map = new std::map<Regexp*, int>;
- });
-
+ static std::once_flag ref_once;
+ std::call_once(ref_once, []() {
+ ref_mutex = new Mutex;
+ ref_map = new std::map<Regexp*, int>;
+ });
+
// Store ref count in overflow map.
- MutexLock l(ref_mutex);
- if (ref_ == kMaxRef) {
- // already overflowed
- (*ref_map)[this]++;
- } else {
- // overflowing now
- (*ref_map)[this] = kMaxRef;
- ref_ = kMaxRef;
+ MutexLock l(ref_mutex);
+ if (ref_ == kMaxRef) {
+ // already overflowed
+ (*ref_map)[this]++;
+ } else {
+ // overflowing now
+ (*ref_map)[this] = kMaxRef;
+ ref_ = kMaxRef;
}
return this;
}
@@ -116,13 +116,13 @@ Regexp* Regexp::Incref() {
void Regexp::Decref() {
if (ref_ == kMaxRef) {
// Ref count is stored in overflow map.
- MutexLock l(ref_mutex);
- int r = (*ref_map)[this] - 1;
+ MutexLock l(ref_mutex);
+ int r = (*ref_map)[this] - 1;
if (r < kMaxRef) {
- ref_ = static_cast<uint16_t>(r);
- ref_map->erase(this);
+ ref_ = static_cast<uint16_t>(r);
+ ref_map->erase(this);
} else {
- (*ref_map)[this] = r;
+ (*ref_map)[this] = r;
}
return;
}
@@ -191,45 +191,45 @@ Regexp* Regexp::HaveMatch(int match_id, ParseFlags flags) {
return re;
}
-Regexp* Regexp::StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags) {
- // Squash **, ++ and ??.
- if (op == sub->op() && flags == sub->parse_flags())
+Regexp* Regexp::StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags) {
+ // Squash **, ++ and ??.
+ if (op == sub->op() && flags == sub->parse_flags())
return sub;
-
- // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because
- // op is Star/Plus/Quest, we just have to check that sub->op() is too.
- if ((sub->op() == kRegexpStar ||
- sub->op() == kRegexpPlus ||
- sub->op() == kRegexpQuest) &&
- flags == sub->parse_flags()) {
- // If sub is Star, no need to rewrite it.
- if (sub->op() == kRegexpStar)
- return sub;
-
- // Rewrite sub to Star.
- Regexp* re = new Regexp(kRegexpStar, flags);
- re->AllocSub(1);
- re->sub()[0] = sub->sub()[0]->Incref();
- sub->Decref(); // We didn't consume the reference after all.
- return re;
- }
-
- Regexp* re = new Regexp(op, flags);
+
+ // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because
+ // op is Star/Plus/Quest, we just have to check that sub->op() is too.
+ if ((sub->op() == kRegexpStar ||
+ sub->op() == kRegexpPlus ||
+ sub->op() == kRegexpQuest) &&
+ flags == sub->parse_flags()) {
+ // If sub is Star, no need to rewrite it.
+ if (sub->op() == kRegexpStar)
+ return sub;
+
+ // Rewrite sub to Star.
+ Regexp* re = new Regexp(kRegexpStar, flags);
+ re->AllocSub(1);
+ re->sub()[0] = sub->sub()[0]->Incref();
+ sub->Decref(); // We didn't consume the reference after all.
+ return re;
+ }
+
+ Regexp* re = new Regexp(op, flags);
re->AllocSub(1);
re->sub()[0] = sub;
return re;
}
-Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) {
- return StarPlusOrQuest(kRegexpPlus, sub, flags);
-}
-
+Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) {
+ return StarPlusOrQuest(kRegexpPlus, sub, flags);
+}
+
Regexp* Regexp::Star(Regexp* sub, ParseFlags flags) {
- return StarPlusOrQuest(kRegexpStar, sub, flags);
+ return StarPlusOrQuest(kRegexpStar, sub, flags);
}
Regexp* Regexp::Quest(Regexp* sub, ParseFlags flags) {
- return StarPlusOrQuest(kRegexpQuest, sub, flags);
+ return StarPlusOrQuest(kRegexpQuest, sub, flags);
}
Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
@@ -237,13 +237,13 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
if (nsub == 1)
return sub[0];
- if (nsub == 0) {
- if (op == kRegexpAlternate)
- return new Regexp(kRegexpNoMatch, flags);
- else
- return new Regexp(kRegexpEmptyMatch, flags);
- }
-
+ if (nsub == 0) {
+ if (op == kRegexpAlternate)
+ return new Regexp(kRegexpNoMatch, flags);
+ else
+ return new Regexp(kRegexpEmptyMatch, flags);
+ }
+
PODArray<Regexp*> subcopy;
if (op == kRegexpAlternate && can_factor) {
// Going to edit sub; make a copy so we don't step on caller.
@@ -436,7 +436,7 @@ bool Regexp::Equal(Regexp* a, Regexp* b) {
// The stack (vector) has pairs of regexps waiting to
// be compared. The regexps are only equal if
// all the pairs end up being equal.
- std::vector<Regexp*> stk;
+ std::vector<Regexp*> stk;
for (;;) {
// Invariant: TopEqual(a, b) == true.
@@ -476,11 +476,11 @@ bool Regexp::Equal(Regexp* a, Regexp* b) {
continue;
}
- size_t n = stk.size();
+ size_t n = stk.size();
if (n == 0)
break;
- DCHECK_GE(n, 2);
+ DCHECK_GE(n, 2);
a = stk[n-2];
b = stk[n-1];
stk.resize(n-2);
@@ -490,7 +490,7 @@ bool Regexp::Equal(Regexp* a, Regexp* b) {
}
// Keep in sync with enum RegexpStatusCode in regexp.h
-static const char *kErrorStrings[] = {
+static const char *kErrorStrings[] = {
"no error",
"unexpected error",
"invalid escape sequence",
@@ -553,9 +553,9 @@ class NumCapturesWalker : public Regexp::Walker<Ignored> {
private:
int ncapture_;
-
- NumCapturesWalker(const NumCapturesWalker&) = delete;
- NumCapturesWalker& operator=(const NumCapturesWalker&) = delete;
+
+ NumCapturesWalker(const NumCapturesWalker&) = delete;
+ NumCapturesWalker& operator=(const NumCapturesWalker&) = delete;
};
int Regexp::NumCaptures() {
@@ -600,9 +600,9 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
private:
std::map<std::string, int>* map_;
-
- NamedCapturesWalker(const NamedCapturesWalker&) = delete;
- NamedCapturesWalker& operator=(const NamedCapturesWalker&) = delete;
+
+ NamedCapturesWalker(const NamedCapturesWalker&) = delete;
+ NamedCapturesWalker& operator=(const NamedCapturesWalker&) = delete;
};
std::map<std::string, int>* Regexp::NamedCaptures() {
@@ -644,9 +644,9 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
private:
std::map<int, std::string>* map_;
-
- CaptureNamesWalker(const CaptureNamesWalker&) = delete;
- CaptureNamesWalker& operator=(const CaptureNamesWalker&) = delete;
+
+ CaptureNamesWalker(const CaptureNamesWalker&) = delete;
+ CaptureNamesWalker& operator=(const CaptureNamesWalker&) = delete;
};
std::map<int, std::string>* Regexp::CaptureNames() {
@@ -690,13 +690,13 @@ bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
int i = 0;
while (i < nsub_ && sub()[i]->op_ == kRegexpBeginText)
i++;
- if (i == 0 || i >= nsub_)
+ if (i == 0 || i >= nsub_)
return false;
Regexp* re = sub()[i];
if (re->op_ != kRegexpLiteral &&
re->op_ != kRegexpLiteralString)
return false;
- i++;
+ i++;
if (i < nsub_) {
for (int j = i; j < nsub_; j++)
sub()[j]->Incref();
@@ -761,13 +761,13 @@ bool CharClassBuilder::AddRange(Rune lo, Rune hi) {
if (lo <= 'z' && hi >= 'A') {
// Overlaps some alpha, maybe not all.
// Update bitmaps telling which ASCII letters are in the set.
- Rune lo1 = std::max<Rune>(lo, 'A');
- Rune hi1 = std::min<Rune>(hi, 'Z');
+ Rune lo1 = std::max<Rune>(lo, 'A');
+ Rune hi1 = std::min<Rune>(hi, 'Z');
if (lo1 <= hi1)
upper_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'A');
- lo1 = std::max<Rune>(lo, 'a');
- hi1 = std::min<Rune>(hi, 'z');
+ lo1 = std::max<Rune>(lo, 'a');
+ hi1 = std::min<Rune>(hi, 'z');
if (lo1 <= hi1)
lower_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'a');
}
@@ -883,7 +883,7 @@ void CharClassBuilder::RemoveAbove(Rune r) {
void CharClassBuilder::Negate() {
// Build up negation and then copy in.
// Could edit ranges in place, but C++ won't let me.
- std::vector<RuneRange> v;
+ std::vector<RuneRange> v;
v.reserve(ranges_.size() + 1);
// In negation, first range begins at 0, unless
@@ -906,7 +906,7 @@ void CharClassBuilder::Negate() {
}
ranges_.clear();
- for (size_t i = 0; i < v.size(); i++)
+ for (size_t i = 0; i < v.size(); i++)
ranges_.insert(v[i]);
upper_ = AlphaMask & ~upper_;
@@ -920,7 +920,7 @@ void CharClassBuilder::Negate() {
CharClass* CharClass::New(size_t maxranges) {
CharClass* cc;
- uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]];
+ uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]];
cc = reinterpret_cast<CharClass*>(data);
cc->ranges_ = reinterpret_cast<RuneRange*>(data + sizeof *cc);
cc->nranges_ = 0;
@@ -930,7 +930,7 @@ CharClass* CharClass::New(size_t maxranges) {
}
void CharClass::Delete() {
- uint8_t* data = reinterpret_cast<uint8_t*>(this);
+ uint8_t* data = reinterpret_cast<uint8_t*>(this);
delete[] data;
}
@@ -977,7 +977,7 @@ CharClass* CharClassBuilder::GetCharClass() {
for (iterator it = begin(); it != end(); ++it)
cc->ranges_[n++] = *it;
cc->nranges_ = n;
- DCHECK_LE(n, static_cast<int>(ranges_.size()));
+ DCHECK_LE(n, static_cast<int>(ranges_.size()));
cc->nrunes_ = nrunes_;
cc->folds_ascii_ = FoldsASCII();
return cc;
diff --git a/contrib/libs/re2/re2/regexp.h b/contrib/libs/re2/re2/regexp.h
index b6446f9fe5..164e93392a 100644
--- a/contrib/libs/re2/re2/regexp.h
+++ b/contrib/libs/re2/re2/regexp.h
@@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#ifndef RE2_REGEXP_H_
-#define RE2_REGEXP_H_
-
+#ifndef RE2_REGEXP_H_
+#define RE2_REGEXP_H_
+
// --- SPONSORED LINK --------------------------------------------------
// If you want to use this library for regular expression matching,
// you should use re2/re2.h, which provides a class RE2 that
@@ -87,15 +87,15 @@
// parsed regular expressions.
#include <stddef.h>
-#include <stdint.h>
-#include <map>
-#include <set>
-#include <string>
+#include <stdint.h>
+#include <map>
+#include <set>
+#include <string>
#include "util/util.h"
#include "util/logging.h"
#include "util/utf.h"
-#include "re2/stringpiece.h"
+#include "re2/stringpiece.h"
namespace re2 {
@@ -194,10 +194,10 @@ class RegexpStatus {
RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {}
~RegexpStatus() { delete tmp_; }
- void set_code(RegexpStatusCode code) { code_ = code; }
+ void set_code(RegexpStatusCode code) { code_ = code; }
void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }
void set_tmp(std::string* tmp) { delete tmp_; tmp_ = tmp; }
- RegexpStatusCode code() const { return code_; }
+ RegexpStatusCode code() const { return code_; }
const StringPiece& error_arg() const { return error_arg_; }
bool ok() const { return code() == kRegexpSuccess; }
@@ -213,12 +213,12 @@ class RegexpStatus {
std::string Text() const;
private:
- RegexpStatusCode code_; // Kind of error
+ RegexpStatusCode code_; // Kind of error
StringPiece error_arg_; // Piece of regexp containing syntax error.
std::string* tmp_; // Temporary storage, possibly where error_arg_ is.
- RegexpStatus(const RegexpStatus&) = delete;
- RegexpStatus& operator=(const RegexpStatus&) = delete;
+ RegexpStatus(const RegexpStatus&) = delete;
+ RegexpStatus& operator=(const RegexpStatus&) = delete;
};
// Compiled form; see prog.h
@@ -268,9 +268,9 @@ class CharClass {
int nrunes_;
RuneRange *ranges_;
int nranges_;
-
- CharClass(const CharClass&) = delete;
- CharClass& operator=(const CharClass&) = delete;
+
+ CharClass(const CharClass&) = delete;
+ CharClass& operator=(const CharClass&) = delete;
};
class Regexp {
@@ -278,52 +278,52 @@ class Regexp {
// Flags for parsing. Can be ORed together.
enum ParseFlags {
- NoParseFlags = 0,
- FoldCase = 1<<0, // Fold case during matching (case-insensitive).
- Literal = 1<<1, // Treat s as literal string instead of a regexp.
- ClassNL = 1<<2, // Allow char classes like [^a-z] and \D and \s
- // and [[:space:]] to match newline.
- DotNL = 1<<3, // Allow . to match newline.
- MatchNL = ClassNL | DotNL,
- OneLine = 1<<4, // Treat ^ and $ as only matching at beginning and
- // end of text, not around embedded newlines.
- // (Perl's default)
- Latin1 = 1<<5, // Regexp and text are in Latin1, not UTF-8.
- NonGreedy = 1<<6, // Repetition operators are non-greedy by default.
- PerlClasses = 1<<7, // Allow Perl character classes like \d.
- PerlB = 1<<8, // Allow Perl's \b and \B.
- PerlX = 1<<9, // Perl extensions:
- // non-capturing parens - (?: )
- // non-greedy operators - *? +? ?? {}?
- // flag edits - (?i) (?-i) (?i: )
- // i - FoldCase
- // m - !OneLine
- // s - DotNL
- // U - NonGreedy
- // line ends: \A \z
- // \Q and \E to disable/enable metacharacters
- // (?P<name>expr) for named captures
- // \C to match any single byte
- UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group
- // and \P{Han} for its negation.
- NeverNL = 1<<11, // Never match NL, even if the regexp mentions
- // it explicitly.
- NeverCapture = 1<<12, // Parse all parens as non-capturing.
+ NoParseFlags = 0,
+ FoldCase = 1<<0, // Fold case during matching (case-insensitive).
+ Literal = 1<<1, // Treat s as literal string instead of a regexp.
+ ClassNL = 1<<2, // Allow char classes like [^a-z] and \D and \s
+ // and [[:space:]] to match newline.
+ DotNL = 1<<3, // Allow . to match newline.
+ MatchNL = ClassNL | DotNL,
+ OneLine = 1<<4, // Treat ^ and $ as only matching at beginning and
+ // end of text, not around embedded newlines.
+ // (Perl's default)
+ Latin1 = 1<<5, // Regexp and text are in Latin1, not UTF-8.
+ NonGreedy = 1<<6, // Repetition operators are non-greedy by default.
+ PerlClasses = 1<<7, // Allow Perl character classes like \d.
+ PerlB = 1<<8, // Allow Perl's \b and \B.
+ PerlX = 1<<9, // Perl extensions:
+ // non-capturing parens - (?: )
+ // non-greedy operators - *? +? ?? {}?
+ // flag edits - (?i) (?-i) (?i: )
+ // i - FoldCase
+ // m - !OneLine
+ // s - DotNL
+ // U - NonGreedy
+ // line ends: \A \z
+ // \Q and \E to disable/enable metacharacters
+ // (?P<name>expr) for named captures
+ // \C to match any single byte
+ UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group
+ // and \P{Han} for its negation.
+ NeverNL = 1<<11, // Never match NL, even if the regexp mentions
+ // it explicitly.
+ NeverCapture = 1<<12, // Parse all parens as non-capturing.
// As close to Perl as we can get.
- LikePerl = ClassNL | OneLine | PerlClasses | PerlB | PerlX |
- UnicodeGroups,
+ LikePerl = ClassNL | OneLine | PerlClasses | PerlB | PerlX |
+ UnicodeGroups,
// Internal use only.
- WasDollar = 1<<13, // on kRegexpEndText: was $ in regexp text
- AllParseFlags = (1<<14)-1,
+ WasDollar = 1<<13, // on kRegexpEndText: was $ in regexp text
+ AllParseFlags = (1<<14)-1,
};
// Get. No set, Regexps are logically immutable once created.
RegexpOp op() { return static_cast<RegexpOp>(op_); }
int nsub() { return nsub_; }
- bool simple() { return simple_ != 0; }
- ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
+ bool simple() { return simple_ != 0; }
+ ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
int Ref(); // For testing.
Regexp** sub() {
@@ -363,7 +363,7 @@ class Regexp {
// removed. The result will capture exactly the same
// subexpressions the original did, unless formatted with ToString.
Regexp* Simplify();
- friend class CoalesceWalker;
+ friend class CoalesceWalker;
friend class SimplifyWalker;
// Parses the regexp src and then simplifies it and sets *dst to the
@@ -420,8 +420,8 @@ class Regexp {
// Construction and execution of prog will
// stay within approximately max_mem bytes of memory.
// If max_mem <= 0, a reasonable default is used.
- Prog* CompileToProg(int64_t max_mem);
- Prog* CompileToReverseProg(int64_t max_mem);
+ Prog* CompileToProg(int64_t max_mem);
+ Prog* CompileToReverseProg(int64_t max_mem);
// Whether to expect this library to find exactly the same answer as PCRE
// when running this regexp. Most regexps do mimic PCRE exactly, but a few
@@ -465,7 +465,7 @@ class Regexp {
// Helpers for Parse. Listed here so they can edit Regexps.
class ParseState;
-
+
friend class ParseState;
friend bool ParseCharClass(StringPiece* s, Regexp** out_re,
RegexpStatus* status);
@@ -476,10 +476,10 @@ class Regexp {
// Computes whether Regexp is already simple.
bool ComputeSimple();
- // Constructor that generates a Star, Plus or Quest,
- // squashing the pair if sub is also a Star, Plus or Quest.
- static Regexp* StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags);
-
+ // Constructor that generates a Star, Plus or Quest,
+ // squashing the pair if sub is also a Star, Plus or Quest.
+ static Regexp* StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags);
+
// Constructor that generates a concatenation or alternation,
// enforcing the limit on the number of subexpressions for
// a particular Regexp.
@@ -516,7 +516,7 @@ class Regexp {
// Allocate space for n sub-regexps.
void AllocSub(int n) {
- DCHECK(n >= 0 && static_cast<uint16_t>(n) == n);
+ DCHECK(n >= 0 && static_cast<uint16_t>(n) == n);
if (n > 1)
submany_ = new Regexp*[n];
nsub_ = static_cast<uint16_t>(n);
@@ -529,38 +529,38 @@ class Regexp {
void Swap(Regexp *that);
// Operator. See description of operators above.
- // uint8_t instead of RegexpOp to control space usage.
- uint8_t op_;
+ // uint8_t instead of RegexpOp to control space usage.
+ uint8_t op_;
// Is this regexp structure already simple
// (has it been returned by Simplify)?
- // uint8_t instead of bool to control space usage.
- uint8_t simple_;
+ // uint8_t instead of bool to control space usage.
+ uint8_t simple_;
// Flags saved from parsing and used during execution.
// (Only FoldCase is used.)
- // uint16_t instead of ParseFlags to control space usage.
- uint16_t parse_flags_;
+ // uint16_t instead of ParseFlags to control space usage.
+ uint16_t parse_flags_;
// Reference count. Exists so that SimplifyRegexp can build
// regexp structures that are dags rather than trees to avoid
// exponential blowup in space requirements.
- // uint16_t to control space usage.
+ // uint16_t to control space usage.
// The standard regexp routines will never generate a
- // ref greater than the maximum repeat count (kMaxRepeat),
+ // ref greater than the maximum repeat count (kMaxRepeat),
// but even so, Incref and Decref consult an overflow map
// when ref_ reaches kMaxRef.
- uint16_t ref_;
- static const uint16_t kMaxRef = 0xffff;
+ uint16_t ref_;
+ static const uint16_t kMaxRef = 0xffff;
// Subexpressions.
- // uint16_t to control space usage.
+ // uint16_t to control space usage.
// Concat and Alternate handle larger numbers of subexpressions
// by building concatenation or alternation trees.
// Other routines should call Concat or Alternate instead of
// filling in sub() by hand.
- uint16_t nsub_;
- static const uint16_t kMaxNsub = 0xffff;
+ uint16_t nsub_;
+ static const uint16_t kMaxNsub = 0xffff;
union {
Regexp** submany_; // if nsub_ > 1
Regexp* subone_; // if nsub_ == 1
@@ -595,12 +595,12 @@ class Regexp {
void *the_union_[2]; // as big as any other element, for memset
};
- Regexp(const Regexp&) = delete;
- Regexp& operator=(const Regexp&) = delete;
+ Regexp(const Regexp&) = delete;
+ Regexp& operator=(const Regexp&) = delete;
};
// Character class set: contains non-overlapping, non-abutting RuneRanges.
-typedef std::set<RuneRange, RuneRangeLess> RuneRangeSet;
+typedef std::set<RuneRange, RuneRangeLess> RuneRangeSet;
class CharClassBuilder {
public:
@@ -625,41 +625,41 @@ class CharClassBuilder {
void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags);
private:
- static const uint32_t AlphaMask = (1<<26) - 1;
- uint32_t upper_; // bitmap of A-Z
- uint32_t lower_; // bitmap of a-z
+ static const uint32_t AlphaMask = (1<<26) - 1;
+ uint32_t upper_; // bitmap of A-Z
+ uint32_t lower_; // bitmap of a-z
int nrunes_;
RuneRangeSet ranges_;
-
- CharClassBuilder(const CharClassBuilder&) = delete;
- CharClassBuilder& operator=(const CharClassBuilder&) = delete;
+
+ CharClassBuilder(const CharClassBuilder&) = delete;
+ CharClassBuilder& operator=(const CharClassBuilder&) = delete;
};
-// Bitwise ops on ParseFlags produce ParseFlags.
-inline Regexp::ParseFlags operator|(Regexp::ParseFlags a,
- Regexp::ParseFlags b) {
- return static_cast<Regexp::ParseFlags>(
- static_cast<int>(a) | static_cast<int>(b));
+// Bitwise ops on ParseFlags produce ParseFlags.
+inline Regexp::ParseFlags operator|(Regexp::ParseFlags a,
+ Regexp::ParseFlags b) {
+ return static_cast<Regexp::ParseFlags>(
+ static_cast<int>(a) | static_cast<int>(b));
}
-inline Regexp::ParseFlags operator^(Regexp::ParseFlags a,
- Regexp::ParseFlags b) {
- return static_cast<Regexp::ParseFlags>(
- static_cast<int>(a) ^ static_cast<int>(b));
+inline Regexp::ParseFlags operator^(Regexp::ParseFlags a,
+ Regexp::ParseFlags b) {
+ return static_cast<Regexp::ParseFlags>(
+ static_cast<int>(a) ^ static_cast<int>(b));
}
-inline Regexp::ParseFlags operator&(Regexp::ParseFlags a,
- Regexp::ParseFlags b) {
- return static_cast<Regexp::ParseFlags>(
- static_cast<int>(a) & static_cast<int>(b));
+inline Regexp::ParseFlags operator&(Regexp::ParseFlags a,
+ Regexp::ParseFlags b) {
+ return static_cast<Regexp::ParseFlags>(
+ static_cast<int>(a) & static_cast<int>(b));
}
-inline Regexp::ParseFlags operator~(Regexp::ParseFlags a) {
- // Attempting to produce a value out of enum's range has undefined behaviour.
- return static_cast<Regexp::ParseFlags>(
- ~static_cast<int>(a) & static_cast<int>(Regexp::AllParseFlags));
+inline Regexp::ParseFlags operator~(Regexp::ParseFlags a) {
+ // Attempting to produce a value out of enum's range has undefined behaviour.
+ return static_cast<Regexp::ParseFlags>(
+ ~static_cast<int>(a) & static_cast<int>(Regexp::AllParseFlags));
}
} // namespace re2
-
-#endif // RE2_REGEXP_H_
+
+#endif // RE2_REGEXP_H_
diff --git a/contrib/libs/re2/re2/set.cc b/contrib/libs/re2/re2/set.cc
index 18705663a5..81b100c0d4 100644
--- a/contrib/libs/re2/re2/set.cc
+++ b/contrib/libs/re2/re2/set.cc
@@ -2,22 +2,22 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "re2/set.h"
+#include "re2/set.h"
-#include <stddef.h>
+#include <stddef.h>
#include <algorithm>
#include <memory>
#include <utility>
-
-#include "util/util.h"
-#include "util/logging.h"
+
+#include "util/util.h"
+#include "util/logging.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
-#include "re2/re2.h"
+#include "re2/re2.h"
#include "re2/regexp.h"
#include "re2/stringpiece.h"
-namespace re2 {
+namespace re2 {
RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
: options_(options),
@@ -61,7 +61,7 @@ int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
options_.ParseFlags());
RegexpStatus status;
- re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
+ re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
if (re == NULL) {
if (error != NULL)
*error = status.Text();
@@ -72,7 +72,7 @@ int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
// Concatenate with match index and push on vector.
int n = static_cast<int>(elem_.size());
- re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
+ re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
if (re->op() == kRegexpConcat) {
int nsub = re->nsub();
PODArray<re2::Regexp*> sub(nsub + 1);
@@ -82,10 +82,10 @@ int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
re->Decref();
re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
} else {
- re2::Regexp* sub[2];
+ re2::Regexp* sub[2];
sub[0] = re;
sub[1] = m;
- re = re2::Regexp::Concat(sub, 2, pf);
+ re = re2::Regexp::Concat(sub, 2, pf);
}
elem_.emplace_back(std::string(pattern), re);
return n;
@@ -140,20 +140,20 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
std::unique_ptr<SparseSet> matches;
if (v != NULL) {
matches.reset(new SparseSet(size_));
- v->clear();
+ v->clear();
}
bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
NULL, &dfa_failed, matches.get());
- if (dfa_failed) {
- if (options_.log_errors())
+ if (dfa_failed) {
+ if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
if (error_info != NULL)
error_info->kind = kOutOfMemory;
- return false;
- }
+ return false;
+ }
if (ret == false) {
if (error_info != NULL)
error_info->kind = kNoError;
@@ -172,5 +172,5 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
error_info->kind = kNoError;
return true;
}
-
-} // namespace re2
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/set.h b/contrib/libs/re2/re2/set.h
index 8d64f30ccd..a23cc6cc21 100644
--- a/contrib/libs/re2/re2/set.h
+++ b/contrib/libs/re2/re2/set.h
@@ -1,28 +1,28 @@
-// Copyright 2010 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_SET_H_
-#define RE2_SET_H_
-
+// Copyright 2010 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_SET_H_
+#define RE2_SET_H_
+
#include <memory>
-#include <string>
+#include <string>
#include <utility>
-#include <vector>
-
-#include "re2/re2.h"
-
-namespace re2 {
-class Prog;
-class Regexp;
-} // namespace re2
-
-namespace re2 {
-
-// An RE2::Set represents a collection of regexps that can
-// be searched for simultaneously.
-class RE2::Set {
- public:
+#include <vector>
+
+#include "re2/re2.h"
+
+namespace re2 {
+class Prog;
+class Regexp;
+} // namespace re2
+
+namespace re2 {
+
+// An RE2::Set represents a collection of regexps that can
+// be searched for simultaneously.
+class RE2::Set {
+ public:
enum ErrorKind {
kNoError = 0,
kNotCompiled, // The set is not compiled.
@@ -34,9 +34,9 @@ class RE2::Set {
ErrorKind kind;
};
- Set(const RE2::Options& options, RE2::Anchor anchor);
- ~Set();
-
+ Set(const RE2::Options& options, RE2::Anchor anchor);
+ ~Set();
+
// Not copyable.
Set(const Set&) = delete;
Set& operator=(const Set&) = delete;
@@ -47,39 +47,39 @@ class RE2::Set {
// Adds pattern to the set using the options passed to the constructor.
// Returns the index that will identify the regexp in the output of Match(),
// or -1 if the regexp cannot be parsed.
- // Indices are assigned in sequential order starting from 0.
+ // Indices are assigned in sequential order starting from 0.
// Errors do not increment the index; if error is not NULL, *error will hold
// the error message from the parser.
int Add(const StringPiece& pattern, std::string* error);
-
+
// Compiles the set in preparation for matching.
// Returns false if the compiler runs out of memory.
// Add() must not be called again after Compile().
// Compile() must be called before Match().
- bool Compile();
-
+ bool Compile();
+
// Returns true if text matches at least one of the regexps in the set.
// Fills v (if not NULL) with the indices of the matching regexps.
- // Callers must not expect v to be sorted.
- bool Match(const StringPiece& text, std::vector<int>* v) const;
-
+ // Callers must not expect v to be sorted.
+ bool Match(const StringPiece& text, std::vector<int>* v) const;
+
// As above, but populates error_info (if not NULL) when none of the regexps
// in the set matched. This can inform callers when DFA execution fails, for
// example, because they might wish to handle that case differently.
bool Match(const StringPiece& text, std::vector<int>* v,
ErrorInfo* error_info) const;
- private:
+ private:
typedef std::pair<std::string, re2::Regexp*> Elem;
- RE2::Options options_;
- RE2::Anchor anchor_;
+ RE2::Options options_;
+ RE2::Anchor anchor_;
std::vector<Elem> elem_;
- bool compiled_;
+ bool compiled_;
int size_;
std::unique_ptr<re2::Prog> prog_;
-};
-
-} // namespace re2
-
-#endif // RE2_SET_H_
+};
+
+} // namespace re2
+
+#endif // RE2_SET_H_
diff --git a/contrib/libs/re2/re2/simplify.cc b/contrib/libs/re2/re2/simplify.cc
index 663d5fcd45..8a53ee2eac 100644
--- a/contrib/libs/re2/re2/simplify.cc
+++ b/contrib/libs/re2/re2/simplify.cc
@@ -6,11 +6,11 @@
// to use simple extended regular expression features.
// Also sort and simplify character classes.
-#include <string>
-
-#include "util/util.h"
-#include "util/logging.h"
-#include "util/utf.h"
+#include <string>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/utf.h"
#include "re2/pod_array.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
@@ -63,7 +63,7 @@ bool Regexp::ComputeSimple() {
// These are simple as long as the subpieces are simple.
subs = sub();
for (int i = 0; i < nsub_; i++)
- if (!subs[i]->simple())
+ if (!subs[i]->simple())
return false;
return true;
case kRegexpCharClass:
@@ -73,12 +73,12 @@ bool Regexp::ComputeSimple() {
return !cc_->empty() && !cc_->full();
case kRegexpCapture:
subs = sub();
- return subs[0]->simple();
+ return subs[0]->simple();
case kRegexpStar:
case kRegexpPlus:
case kRegexpQuest:
subs = sub();
- if (!subs[0]->simple())
+ if (!subs[0]->simple())
return false;
switch (subs[0]->op_) {
case kRegexpStar:
@@ -99,37 +99,37 @@ bool Regexp::ComputeSimple() {
}
// Walker subclass used by Simplify.
-// Coalesces runs of star/plus/quest/repeat of the same literal along with any
-// occurrences of that literal into repeats of that literal. It also works for
-// char classes, any char and any byte.
-// PostVisit creates the coalesced result, which should then be simplified.
-class CoalesceWalker : public Regexp::Walker<Regexp*> {
- public:
- CoalesceWalker() {}
- virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
- Regexp** child_args, int nchild_args);
- virtual Regexp* Copy(Regexp* re);
- virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
-
- private:
- // These functions are declared inside CoalesceWalker so that
- // they can edit the private fields of the Regexps they construct.
-
- // Returns true if r1 and r2 can be coalesced. In particular, ensures that
- // the parse flags are consistent. (They will not be checked again later.)
- static bool CanCoalesce(Regexp* r1, Regexp* r2);
-
- // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards
- // will be empty match and the coalesced op. In other cases, where part of a
- // literal string was removed to be coalesced, the array elements afterwards
- // will be the coalesced op and the remainder of the literal string.
- static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr);
-
- CoalesceWalker(const CoalesceWalker&) = delete;
- CoalesceWalker& operator=(const CoalesceWalker&) = delete;
-};
-
-// Walker subclass used by Simplify.
+// Coalesces runs of star/plus/quest/repeat of the same literal along with any
+// occurrences of that literal into repeats of that literal. It also works for
+// char classes, any char and any byte.
+// PostVisit creates the coalesced result, which should then be simplified.
+class CoalesceWalker : public Regexp::Walker<Regexp*> {
+ public:
+ CoalesceWalker() {}
+ virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
+ Regexp** child_args, int nchild_args);
+ virtual Regexp* Copy(Regexp* re);
+ virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
+
+ private:
+ // These functions are declared inside CoalesceWalker so that
+ // they can edit the private fields of the Regexps they construct.
+
+ // Returns true if r1 and r2 can be coalesced. In particular, ensures that
+ // the parse flags are consistent. (They will not be checked again later.)
+ static bool CanCoalesce(Regexp* r1, Regexp* r2);
+
+ // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards
+ // will be empty match and the coalesced op. In other cases, where part of a
+ // literal string was removed to be coalesced, the array elements afterwards
+ // will be the coalesced op and the remainder of the literal string.
+ static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr);
+
+ CoalesceWalker(const CoalesceWalker&) = delete;
+ CoalesceWalker& operator=(const CoalesceWalker&) = delete;
+};
+
+// Walker subclass used by Simplify.
// The simplify walk is purely post-recursive: given the simplified children,
// PostVisit creates the simplified result.
// The child_args are simplified Regexp*s.
@@ -137,7 +137,7 @@ class SimplifyWalker : public Regexp::Walker<Regexp*> {
public:
SimplifyWalker() {}
virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
- virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
+ virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
Regexp** child_args, int nchild_args);
virtual Regexp* Copy(Regexp* re);
virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
@@ -161,8 +161,8 @@ class SimplifyWalker : public Regexp::Walker<Regexp*> {
// Caller must Decref return value when done with it.
static Regexp* SimplifyCharClass(Regexp* re);
- SimplifyWalker(const SimplifyWalker&) = delete;
- SimplifyWalker& operator=(const SimplifyWalker&) = delete;
+ SimplifyWalker(const SimplifyWalker&) = delete;
+ SimplifyWalker& operator=(const SimplifyWalker&) = delete;
};
// Simplifies a regular expression, returning a new regexp.
@@ -175,272 +175,272 @@ class SimplifyWalker : public Regexp::Walker<Regexp*> {
// Caller must Decref() return value when done with it.
Regexp* Regexp::Simplify() {
- CoalesceWalker cw;
- Regexp* cre = cw.Walk(this, NULL);
- if (cre == NULL)
+ CoalesceWalker cw;
+ Regexp* cre = cw.Walk(this, NULL);
+ if (cre == NULL)
return NULL;
if (cw.stopped_early()) {
cre->Decref();
return NULL;
}
- SimplifyWalker sw;
- Regexp* sre = sw.Walk(cre, NULL);
- cre->Decref();
+ SimplifyWalker sw;
+ Regexp* sre = sw.Walk(cre, NULL);
+ cre->Decref();
if (sre == NULL)
return NULL;
if (sw.stopped_early()) {
sre->Decref();
return NULL;
}
- return sre;
+ return sre;
}
#define Simplify DontCallSimplify // Avoid accidental recursion
-// Utility function for PostVisit implementations that compares re->sub() with
-// child_args to determine whether any child_args changed. In the common case,
-// where nothing changed, calls Decref() for all child_args and returns false,
-// so PostVisit must return re->Incref(). Otherwise, returns true.
-static bool ChildArgsChanged(Regexp* re, Regexp** child_args) {
- for (int i = 0; i < re->nsub(); i++) {
- Regexp* sub = re->sub()[i];
- Regexp* newsub = child_args[i];
- if (newsub != sub)
- return true;
- }
- for (int i = 0; i < re->nsub(); i++) {
- Regexp* newsub = child_args[i];
- newsub->Decref();
- }
- return false;
-}
-
-Regexp* CoalesceWalker::Copy(Regexp* re) {
- return re->Incref();
-}
-
-Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
+// Utility function for PostVisit implementations that compares re->sub() with
+// child_args to determine whether any child_args changed. In the common case,
+// where nothing changed, calls Decref() for all child_args and returns false,
+// so PostVisit must return re->Incref(). Otherwise, returns true.
+static bool ChildArgsChanged(Regexp* re, Regexp** child_args) {
+ for (int i = 0; i < re->nsub(); i++) {
+ Regexp* sub = re->sub()[i];
+ Regexp* newsub = child_args[i];
+ if (newsub != sub)
+ return true;
+ }
+ for (int i = 0; i < re->nsub(); i++) {
+ Regexp* newsub = child_args[i];
+ newsub->Decref();
+ }
+ return false;
+}
+
+Regexp* CoalesceWalker::Copy(Regexp* re) {
+ return re->Incref();
+}
+
+Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
// Should never be called: we use Walk(), not WalkExponential().
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
- LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
+ LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
#endif
- return re->Incref();
-}
-
-Regexp* CoalesceWalker::PostVisit(Regexp* re,
- Regexp* parent_arg,
- Regexp* pre_arg,
- Regexp** child_args,
- int nchild_args) {
- if (re->nsub() == 0)
- return re->Incref();
-
- if (re->op() != kRegexpConcat) {
- if (!ChildArgsChanged(re, child_args))
- return re->Incref();
-
- // Something changed. Build a new op.
- Regexp* nre = new Regexp(re->op(), re->parse_flags());
- nre->AllocSub(re->nsub());
- Regexp** nre_subs = nre->sub();
- for (int i = 0; i < re->nsub(); i++)
- nre_subs[i] = child_args[i];
- // Repeats and Captures have additional data that must be copied.
- if (re->op() == kRegexpRepeat) {
- nre->min_ = re->min();
- nre->max_ = re->max();
- } else if (re->op() == kRegexpCapture) {
- nre->cap_ = re->cap();
- }
- return nre;
- }
-
- bool can_coalesce = false;
- for (int i = 0; i < re->nsub(); i++) {
- if (i+1 < re->nsub() &&
- CanCoalesce(child_args[i], child_args[i+1])) {
- can_coalesce = true;
- break;
- }
- }
- if (!can_coalesce) {
- if (!ChildArgsChanged(re, child_args))
- return re->Incref();
-
- // Something changed. Build a new op.
- Regexp* nre = new Regexp(re->op(), re->parse_flags());
- nre->AllocSub(re->nsub());
- Regexp** nre_subs = nre->sub();
- for (int i = 0; i < re->nsub(); i++)
- nre_subs[i] = child_args[i];
- return nre;
- }
-
- for (int i = 0; i < re->nsub(); i++) {
- if (i+1 < re->nsub() &&
- CanCoalesce(child_args[i], child_args[i+1]))
- DoCoalesce(&child_args[i], &child_args[i+1]);
- }
- // Determine how many empty matches were left by DoCoalesce.
- int n = 0;
- for (int i = n; i < re->nsub(); i++) {
- if (child_args[i]->op() == kRegexpEmptyMatch)
- n++;
- }
- // Build a new op.
- Regexp* nre = new Regexp(re->op(), re->parse_flags());
- nre->AllocSub(re->nsub() - n);
- Regexp** nre_subs = nre->sub();
- for (int i = 0, j = 0; i < re->nsub(); i++) {
- if (child_args[i]->op() == kRegexpEmptyMatch) {
- child_args[i]->Decref();
- continue;
- }
- nre_subs[j] = child_args[i];
- j++;
- }
- return nre;
-}
-
-bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) {
- // r1 must be a star/plus/quest/repeat of a literal, char class, any char or
- // any byte.
- if ((r1->op() == kRegexpStar ||
- r1->op() == kRegexpPlus ||
- r1->op() == kRegexpQuest ||
- r1->op() == kRegexpRepeat) &&
- (r1->sub()[0]->op() == kRegexpLiteral ||
- r1->sub()[0]->op() == kRegexpCharClass ||
- r1->sub()[0]->op() == kRegexpAnyChar ||
- r1->sub()[0]->op() == kRegexpAnyByte)) {
- // r2 must be a star/plus/quest/repeat of the same literal, char class,
- // any char or any byte.
- if ((r2->op() == kRegexpStar ||
- r2->op() == kRegexpPlus ||
- r2->op() == kRegexpQuest ||
- r2->op() == kRegexpRepeat) &&
- Regexp::Equal(r1->sub()[0], r2->sub()[0]) &&
- // The parse flags must be consistent.
- ((r1->parse_flags() & Regexp::NonGreedy) ==
- (r2->parse_flags() & Regexp::NonGreedy))) {
- return true;
- }
- // ... OR an occurrence of that literal, char class, any char or any byte
- if (Regexp::Equal(r1->sub()[0], r2)) {
- return true;
- }
- // ... OR a literal string that begins with that literal.
- if (r1->sub()[0]->op() == kRegexpLiteral &&
- r2->op() == kRegexpLiteralString &&
- r2->runes()[0] == r1->sub()[0]->rune() &&
- // The parse flags must be consistent.
- ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) ==
- (r2->parse_flags() & Regexp::FoldCase))) {
- return true;
- }
- }
- return false;
-}
-
-void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
- Regexp* r1 = *r1ptr;
- Regexp* r2 = *r2ptr;
-
- Regexp* nre = Regexp::Repeat(
- r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0);
-
- switch (r1->op()) {
- case kRegexpStar:
- nre->min_ = 0;
- nre->max_ = -1;
- break;
-
- case kRegexpPlus:
- nre->min_ = 1;
- nre->max_ = -1;
- break;
-
- case kRegexpQuest:
- nre->min_ = 0;
- nre->max_ = 1;
- break;
-
- case kRegexpRepeat:
- nre->min_ = r1->min();
- nre->max_ = r1->max();
- break;
-
- default:
- LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
- nre->Decref();
- return;
- }
-
- switch (r2->op()) {
- case kRegexpStar:
- nre->max_ = -1;
- goto LeaveEmpty;
-
- case kRegexpPlus:
- nre->min_++;
- nre->max_ = -1;
- goto LeaveEmpty;
-
- case kRegexpQuest:
- if (nre->max() != -1)
- nre->max_++;
- goto LeaveEmpty;
-
- case kRegexpRepeat:
- nre->min_ += r2->min();
- if (r2->max() == -1)
- nre->max_ = -1;
- else if (nre->max() != -1)
- nre->max_ += r2->max();
- goto LeaveEmpty;
-
- case kRegexpLiteral:
- case kRegexpCharClass:
- case kRegexpAnyChar:
- case kRegexpAnyByte:
- nre->min_++;
- if (nre->max() != -1)
- nre->max_++;
- goto LeaveEmpty;
-
- LeaveEmpty:
- *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags);
- *r2ptr = nre;
- break;
-
- case kRegexpLiteralString: {
- Rune r = r1->sub()[0]->rune();
- // Determine how much of the literal string is removed.
- // We know that we have at least one rune. :)
- int n = 1;
- while (n < r2->nrunes() && r2->runes()[n] == r)
- n++;
- nre->min_ += n;
- if (nre->max() != -1)
- nre->max_ += n;
- if (n == r2->nrunes())
- goto LeaveEmpty;
- *r1ptr = nre;
- *r2ptr = Regexp::LiteralString(
- &r2->runes()[n], r2->nrunes() - n, r2->parse_flags());
- break;
- }
-
- default:
- LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
- nre->Decref();
- return;
- }
-
- r1->Decref();
- r2->Decref();
-}
-
+ return re->Incref();
+}
+
+Regexp* CoalesceWalker::PostVisit(Regexp* re,
+ Regexp* parent_arg,
+ Regexp* pre_arg,
+ Regexp** child_args,
+ int nchild_args) {
+ if (re->nsub() == 0)
+ return re->Incref();
+
+ if (re->op() != kRegexpConcat) {
+ if (!ChildArgsChanged(re, child_args))
+ return re->Incref();
+
+ // Something changed. Build a new op.
+ Regexp* nre = new Regexp(re->op(), re->parse_flags());
+ nre->AllocSub(re->nsub());
+ Regexp** nre_subs = nre->sub();
+ for (int i = 0; i < re->nsub(); i++)
+ nre_subs[i] = child_args[i];
+ // Repeats and Captures have additional data that must be copied.
+ if (re->op() == kRegexpRepeat) {
+ nre->min_ = re->min();
+ nre->max_ = re->max();
+ } else if (re->op() == kRegexpCapture) {
+ nre->cap_ = re->cap();
+ }
+ return nre;
+ }
+
+ bool can_coalesce = false;
+ for (int i = 0; i < re->nsub(); i++) {
+ if (i+1 < re->nsub() &&
+ CanCoalesce(child_args[i], child_args[i+1])) {
+ can_coalesce = true;
+ break;
+ }
+ }
+ if (!can_coalesce) {
+ if (!ChildArgsChanged(re, child_args))
+ return re->Incref();
+
+ // Something changed. Build a new op.
+ Regexp* nre = new Regexp(re->op(), re->parse_flags());
+ nre->AllocSub(re->nsub());
+ Regexp** nre_subs = nre->sub();
+ for (int i = 0; i < re->nsub(); i++)
+ nre_subs[i] = child_args[i];
+ return nre;
+ }
+
+ for (int i = 0; i < re->nsub(); i++) {
+ if (i+1 < re->nsub() &&
+ CanCoalesce(child_args[i], child_args[i+1]))
+ DoCoalesce(&child_args[i], &child_args[i+1]);
+ }
+ // Determine how many empty matches were left by DoCoalesce.
+ int n = 0;
+ for (int i = n; i < re->nsub(); i++) {
+ if (child_args[i]->op() == kRegexpEmptyMatch)
+ n++;
+ }
+ // Build a new op.
+ Regexp* nre = new Regexp(re->op(), re->parse_flags());
+ nre->AllocSub(re->nsub() - n);
+ Regexp** nre_subs = nre->sub();
+ for (int i = 0, j = 0; i < re->nsub(); i++) {
+ if (child_args[i]->op() == kRegexpEmptyMatch) {
+ child_args[i]->Decref();
+ continue;
+ }
+ nre_subs[j] = child_args[i];
+ j++;
+ }
+ return nre;
+}
+
+bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) {
+ // r1 must be a star/plus/quest/repeat of a literal, char class, any char or
+ // any byte.
+ if ((r1->op() == kRegexpStar ||
+ r1->op() == kRegexpPlus ||
+ r1->op() == kRegexpQuest ||
+ r1->op() == kRegexpRepeat) &&
+ (r1->sub()[0]->op() == kRegexpLiteral ||
+ r1->sub()[0]->op() == kRegexpCharClass ||
+ r1->sub()[0]->op() == kRegexpAnyChar ||
+ r1->sub()[0]->op() == kRegexpAnyByte)) {
+ // r2 must be a star/plus/quest/repeat of the same literal, char class,
+ // any char or any byte.
+ if ((r2->op() == kRegexpStar ||
+ r2->op() == kRegexpPlus ||
+ r2->op() == kRegexpQuest ||
+ r2->op() == kRegexpRepeat) &&
+ Regexp::Equal(r1->sub()[0], r2->sub()[0]) &&
+ // The parse flags must be consistent.
+ ((r1->parse_flags() & Regexp::NonGreedy) ==
+ (r2->parse_flags() & Regexp::NonGreedy))) {
+ return true;
+ }
+ // ... OR an occurrence of that literal, char class, any char or any byte
+ if (Regexp::Equal(r1->sub()[0], r2)) {
+ return true;
+ }
+ // ... OR a literal string that begins with that literal.
+ if (r1->sub()[0]->op() == kRegexpLiteral &&
+ r2->op() == kRegexpLiteralString &&
+ r2->runes()[0] == r1->sub()[0]->rune() &&
+ // The parse flags must be consistent.
+ ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) ==
+ (r2->parse_flags() & Regexp::FoldCase))) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
+ Regexp* r1 = *r1ptr;
+ Regexp* r2 = *r2ptr;
+
+ Regexp* nre = Regexp::Repeat(
+ r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0);
+
+ switch (r1->op()) {
+ case kRegexpStar:
+ nre->min_ = 0;
+ nre->max_ = -1;
+ break;
+
+ case kRegexpPlus:
+ nre->min_ = 1;
+ nre->max_ = -1;
+ break;
+
+ case kRegexpQuest:
+ nre->min_ = 0;
+ nre->max_ = 1;
+ break;
+
+ case kRegexpRepeat:
+ nre->min_ = r1->min();
+ nre->max_ = r1->max();
+ break;
+
+ default:
+ LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
+ nre->Decref();
+ return;
+ }
+
+ switch (r2->op()) {
+ case kRegexpStar:
+ nre->max_ = -1;
+ goto LeaveEmpty;
+
+ case kRegexpPlus:
+ nre->min_++;
+ nre->max_ = -1;
+ goto LeaveEmpty;
+
+ case kRegexpQuest:
+ if (nre->max() != -1)
+ nre->max_++;
+ goto LeaveEmpty;
+
+ case kRegexpRepeat:
+ nre->min_ += r2->min();
+ if (r2->max() == -1)
+ nre->max_ = -1;
+ else if (nre->max() != -1)
+ nre->max_ += r2->max();
+ goto LeaveEmpty;
+
+ case kRegexpLiteral:
+ case kRegexpCharClass:
+ case kRegexpAnyChar:
+ case kRegexpAnyByte:
+ nre->min_++;
+ if (nre->max() != -1)
+ nre->max_++;
+ goto LeaveEmpty;
+
+ LeaveEmpty:
+ *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags);
+ *r2ptr = nre;
+ break;
+
+ case kRegexpLiteralString: {
+ Rune r = r1->sub()[0]->rune();
+ // Determine how much of the literal string is removed.
+ // We know that we have at least one rune. :)
+ int n = 1;
+ while (n < r2->nrunes() && r2->runes()[n] == r)
+ n++;
+ nre->min_ += n;
+ if (nre->max() != -1)
+ nre->max_ += n;
+ if (n == r2->nrunes())
+ goto LeaveEmpty;
+ *r1ptr = nre;
+ *r2ptr = Regexp::LiteralString(
+ &r2->runes()[n], r2->nrunes() - n, r2->parse_flags());
+ break;
+ }
+
+ default:
+ LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
+ nre->Decref();
+ return;
+ }
+
+ r1->Decref();
+ r2->Decref();
+}
+
Regexp* SimplifyWalker::Copy(Regexp* re) {
return re->Incref();
}
@@ -454,7 +454,7 @@ Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
}
Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
- if (re->simple()) {
+ if (re->simple()) {
*stop = true;
return re->Incref();
}
@@ -487,14 +487,14 @@ Regexp* SimplifyWalker::PostVisit(Regexp* re,
case kRegexpConcat:
case kRegexpAlternate: {
// These are simple as long as the subpieces are simple.
- if (!ChildArgsChanged(re, child_args)) {
+ if (!ChildArgsChanged(re, child_args)) {
re->simple_ = true;
return re->Incref();
}
Regexp* nre = new Regexp(re->op(), re->parse_flags());
- nre->AllocSub(re->nsub());
+ nre->AllocSub(re->nsub());
Regexp** nre_subs = nre->sub();
- for (int i = 0; i < re->nsub(); i++)
+ for (int i = 0; i < re->nsub(); i++)
nre_subs[i] = child_args[i];
nre->simple_ = true;
return nre;
@@ -510,7 +510,7 @@ Regexp* SimplifyWalker::PostVisit(Regexp* re,
Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
nre->AllocSub(1);
nre->sub()[0] = newsub;
- nre->cap_ = re->cap();
+ nre->cap_ = re->cap();
nre->simple_ = true;
return nre;
}
diff --git a/contrib/libs/re2/re2/sparse_array.h b/contrib/libs/re2/re2/sparse_array.h
index 09ffe086b7..d519912bdc 100644
--- a/contrib/libs/re2/re2/sparse_array.h
+++ b/contrib/libs/re2/re2/sparse_array.h
@@ -4,51 +4,51 @@
#ifndef RE2_SPARSE_ARRAY_H_
#define RE2_SPARSE_ARRAY_H_
-
+
// DESCRIPTION
-//
+//
// SparseArray<T>(m) is a map from integers in [0, m) to T values.
// It requires (sizeof(T)+sizeof(int))*m memory, but it provides
// fast iteration through the elements in the array and fast clearing
// of the array. The array has a concept of certain elements being
// uninitialized (having no value).
-//
+//
// Insertion and deletion are constant time operations.
-//
-// Allocating the array is a constant time operation
+//
+// Allocating the array is a constant time operation
// when memory allocation is a constant time operation.
-//
+//
// Clearing the array is a constant time operation (unusual!).
-//
+//
// Iterating through the array is an O(n) operation, where n
// is the number of items in the array (not O(m)).
//
-// The array iterator visits entries in the order they were first
+// The array iterator visits entries in the order they were first
// inserted into the array. It is safe to add items to the array while
// using an iterator: the iterator will visit indices added to the array
// during the iteration, but will not re-visit indices whose values
// change after visiting. Thus SparseArray can be a convenient
// implementation of a work queue.
-//
+//
// The SparseArray implementation is NOT thread-safe. It is up to the
// caller to make sure only one thread is accessing the array. (Typically
// these arrays are temporary values and used in situations where speed is
// important.)
-//
+//
// The SparseArray interface does not present all the usual STL bells and
// whistles.
-//
+//
// Implemented with reference to Briggs & Torczon, An Efficient
// Representation for Sparse Sets, ACM Letters on Programming Languages
// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69.
-//
+//
// Briggs & Torczon popularized this technique, but it had been known
// long before their paper. They point out that Aho, Hopcroft, and
// Ullman's 1974 Design and Analysis of Computer Algorithms and Bentley's
// 1986 Programming Pearls both hint at the technique in exercises to the
// reader (in Aho & Hopcroft, exercise 2.12; in Bentley, column 1
// exercise 8).
-//
+//
// Briggs & Torczon describe a sparse set implementation. I have
// trivially generalized it to create a sparse array (actually the original
// target of the AHU and Bentley exercises).
@@ -57,7 +57,7 @@
//
// SparseArray is an array dense_ and an array sparse_ of identical size.
// At any point, the number of elements in the sparse array is size_.
-//
+//
// The array dense_ contains the size_ elements in the sparse array (with
// their indices),
// in the order that the elements were first inserted. This array is dense:
@@ -67,13 +67,13 @@
// For indices present in the array, dense_[sparse_[i]].index_ == i.
// For indices not present in the array, sparse_ can contain any value at all,
// perhaps outside the range [0, size_) but perhaps not.
-//
+//
// The lax requirement on sparse_ values makes clearing the array very easy:
// set size_ to 0. Lookups are slightly more complicated.
// An index i has a value in the array if and only if:
// sparse_[i] is in [0, size_) AND
// dense_[sparse_[i]].index_ == i.
-// If both these properties hold, only then it is safe to refer to
+// If both these properties hold, only then it is safe to refer to
// dense_[sparse_[i]].value_
// as the value associated with index i.
//
@@ -85,22 +85,22 @@
// array through a call to resize(). They immediately become inaccessible, but
// they are only guaranteed to be destroyed when the SparseArray destructor is
// called.
-//
-// A moved-from SparseArray will be empty.
+//
+// A moved-from SparseArray will be empty.
// Doing this simplifies the logic below.
#ifndef __has_feature
#define __has_feature(x) 0
#endif
-#include <assert.h>
-#include <stdint.h>
+#include <assert.h>
+#include <stdint.h>
#if __has_feature(memory_sanitizer)
#include <sanitizer/msan_interface.h>
#endif
-#include <algorithm>
-#include <memory>
-#include <utility>
+#include <algorithm>
+#include <memory>
+#include <utility>
#include "re2/pod_array.h"
@@ -110,7 +110,7 @@ template<typename Value>
class SparseArray {
public:
SparseArray();
- explicit SparseArray(int max_size);
+ explicit SparseArray(int max_size);
~SparseArray();
// IndexValue pairs: exposed in SparseArray::iterator.
@@ -119,22 +119,22 @@ class SparseArray {
typedef IndexValue* iterator;
typedef const IndexValue* const_iterator;
- SparseArray(const SparseArray& src);
+ SparseArray(const SparseArray& src);
SparseArray(SparseArray&& src);
- SparseArray& operator=(const SparseArray& src);
+ SparseArray& operator=(const SparseArray& src);
SparseArray& operator=(SparseArray&& src);
-
+
// Return the number of entries in the array.
int size() const {
return size_;
}
- // Indicate whether the array is empty.
- int empty() const {
- return size_ == 0;
- }
-
+ // Indicate whether the array is empty.
+ int empty() const {
+ return size_ == 0;
+ }
+
// Iterate over the array.
iterator begin() {
return dense_.data();
@@ -169,82 +169,82 @@ class SparseArray {
}
// Check whether index i is in the array.
- bool has_index(int i) const;
+ bool has_index(int i) const;
// Comparison function for sorting.
// Can sort the sparse array so that future iterations
// will visit indices in increasing order using
- // std::sort(arr.begin(), arr.end(), arr.less);
+ // std::sort(arr.begin(), arr.end(), arr.less);
static bool less(const IndexValue& a, const IndexValue& b);
public:
// Set the value at index i to v.
- iterator set(int i, const Value& v) {
- return SetInternal(true, i, v);
- }
+ iterator set(int i, const Value& v) {
+ return SetInternal(true, i, v);
+ }
// Set the value at new index i to v.
// Fast but unsafe: only use if has_index(i) is false.
iterator set_new(int i, const Value& v) {
return SetInternal(false, i, v);
- }
+ }
// Set the value at index i to v.
// Fast but unsafe: only use if has_index(i) is true.
- iterator set_existing(int i, const Value& v) {
- return SetExistingInternal(i, v);
- }
+ iterator set_existing(int i, const Value& v) {
+ return SetExistingInternal(i, v);
+ }
// Get the value at index i.
// Fast but unsafe: only use if has_index(i) is true.
Value& get_existing(int i) {
assert(has_index(i));
return dense_[sparse_[i]].value_;
- }
+ }
const Value& get_existing(int i) const {
assert(has_index(i));
return dense_[sparse_[i]].value_;
- }
+ }
private:
iterator SetInternal(bool allow_existing, int i, const Value& v) {
- DebugCheckInvariants();
+ DebugCheckInvariants();
if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
- assert(false && "illegal index");
- // Semantically, end() would be better here, but we already know
- // the user did something stupid, so begin() insulates them from
- // dereferencing an invalid pointer.
- return begin();
- }
+ assert(false && "illegal index");
+ // Semantically, end() would be better here, but we already know
+ // the user did something stupid, so begin() insulates them from
+ // dereferencing an invalid pointer.
+ return begin();
+ }
if (!allow_existing) {
- assert(!has_index(i));
- create_index(i);
- } else {
- if (!has_index(i))
- create_index(i);
- }
+ assert(!has_index(i));
+ create_index(i);
+ } else {
+ if (!has_index(i))
+ create_index(i);
+ }
return SetExistingInternal(i, v);
- }
-
+ }
+
iterator SetExistingInternal(int i, const Value& v) {
- DebugCheckInvariants();
- assert(has_index(i));
+ DebugCheckInvariants();
+ assert(has_index(i));
dense_[sparse_[i]].value_ = v;
- DebugCheckInvariants();
+ DebugCheckInvariants();
return dense_.data() + sparse_[i];
- }
-
+ }
+
// Add the index i to the array.
// Only use if has_index(i) is known to be false.
// Since it doesn't set the value associated with i,
// this function is private, only intended as a helper
// for other methods.
- void create_index(int i);
+ void create_index(int i);
// In debug mode, verify that some invariant properties of the class
// are being maintained. This is called at the end of the constructor
// and at the beginning and end of all public non-const member functions.
- void DebugCheckInvariants() const;
+ void DebugCheckInvariants() const;
// Initializes memory for elements [min, max).
void MaybeInitializeMemory(int min, int max) {
@@ -257,54 +257,54 @@ class SparseArray {
#endif
}
- int size_ = 0;
+ int size_ = 0;
PODArray<int> sparse_;
PODArray<IndexValue> dense_;
};
template<typename Value>
-SparseArray<Value>::SparseArray() = default;
+SparseArray<Value>::SparseArray() = default;
-template<typename Value>
-SparseArray<Value>::SparseArray(const SparseArray& src)
- : size_(src.size_),
+template<typename Value>
+SparseArray<Value>::SparseArray(const SparseArray& src)
+ : size_(src.size_),
sparse_(src.max_size()),
dense_(src.max_size()) {
std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
-}
-
-template<typename Value>
+}
+
+template<typename Value>
SparseArray<Value>::SparseArray(SparseArray&& src)
- : size_(src.size_),
+ : size_(src.size_),
sparse_(std::move(src.sparse_)),
- dense_(std::move(src.dense_)) {
- src.size_ = 0;
-}
-
-template<typename Value>
-SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) {
+ dense_(std::move(src.dense_)) {
+ src.size_ = 0;
+}
+
+template<typename Value>
+SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) {
// Construct these first for exception safety.
PODArray<int> a(src.max_size());
PODArray<IndexValue> b(src.max_size());
- size_ = src.size_;
+ size_ = src.size_;
sparse_ = std::move(a);
dense_ = std::move(b);
std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data());
std::copy_n(src.dense_.data(), src.max_size(), dense_.data());
- return *this;
-}
-
-template<typename Value>
+ return *this;
+}
+
+template<typename Value>
SparseArray<Value>& SparseArray<Value>::operator=(SparseArray&& src) {
- size_ = src.size_;
+ size_ = src.size_;
sparse_ = std::move(src.sparse_);
- dense_ = std::move(src.dense_);
- src.size_ = 0;
- return *this;
-}
-
+ dense_ = std::move(src.dense_);
+ src.size_ = 0;
+ return *this;
+}
+
// IndexValue pairs: exposed in SparseArray::iterator.
template<typename Value>
class SparseArray<Value>::IndexValue {
@@ -313,9 +313,9 @@ class SparseArray<Value>::IndexValue {
Value& value() { return value_; }
const Value& value() const { return value_; }
- private:
+ private:
friend class SparseArray;
- int index_;
+ int index_;
Value value_;
};
@@ -330,7 +330,7 @@ void SparseArray<Value>::resize(int new_max_size) {
// Construct these first for exception safety.
PODArray<int> a(new_max_size);
PODArray<IndexValue> b(new_max_size);
-
+
std::copy_n(sparse_.data(), old_max_size, a.data());
std::copy_n(dense_.data(), old_max_size, b.data());
@@ -347,7 +347,7 @@ void SparseArray<Value>::resize(int new_max_size) {
// Check whether index i is in the array.
template<typename Value>
bool SparseArray<Value>::has_index(int i) const {
- assert(i >= 0);
+ assert(i >= 0);
assert(i < max_size());
if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
return false;
@@ -359,7 +359,7 @@ bool SparseArray<Value>::has_index(int i) const {
template<typename Value>
void SparseArray<Value>::create_index(int i) {
- assert(!has_index(i));
+ assert(!has_index(i));
assert(size_ < max_size());
sparse_[i] = size_;
dense_[size_].index_ = i;
@@ -377,7 +377,7 @@ template<typename Value> SparseArray<Value>::~SparseArray() {
}
template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const {
- assert(0 <= size_);
+ assert(0 <= size_);
assert(size_ <= max_size());
}
diff --git a/contrib/libs/re2/re2/sparse_set.h b/contrib/libs/re2/re2/sparse_set.h
index 06ed88d81b..6f4b6fb926 100644
--- a/contrib/libs/re2/re2/sparse_set.h
+++ b/contrib/libs/re2/re2/sparse_set.h
@@ -4,172 +4,172 @@
#ifndef RE2_SPARSE_SET_H_
#define RE2_SPARSE_SET_H_
-
+
// DESCRIPTION
-//
-// SparseSet(m) is a set of integers in [0, m).
+//
+// SparseSet(m) is a set of integers in [0, m).
// It requires sizeof(int)*m memory, but it provides
// fast iteration through the elements in the set and fast clearing
// of the set.
-//
+//
// Insertion and deletion are constant time operations.
-//
-// Allocating the set is a constant time operation
+//
+// Allocating the set is a constant time operation
// when memory allocation is a constant time operation.
-//
+//
// Clearing the set is a constant time operation (unusual!).
-//
+//
// Iterating through the set is an O(n) operation, where n
// is the number of items in the set (not O(m)).
//
-// The set iterator visits entries in the order they were first
-// inserted into the set. It is safe to add items to the set while
+// The set iterator visits entries in the order they were first
+// inserted into the set. It is safe to add items to the set while
// using an iterator: the iterator will visit indices added to the set
// during the iteration, but will not re-visit indices whose values
// change after visiting. Thus SparseSet can be a convenient
// implementation of a work queue.
-//
+//
// The SparseSet implementation is NOT thread-safe. It is up to the
// caller to make sure only one thread is accessing the set. (Typically
// these sets are temporary values and used in situations where speed is
// important.)
-//
+//
// The SparseSet interface does not present all the usual STL bells and
// whistles.
-//
+//
// Implemented with reference to Briggs & Torczon, An Efficient
// Representation for Sparse Sets, ACM Letters on Programming Languages
// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69.
-//
-// This is a specialization of sparse array; see sparse_array.h.
+//
+// This is a specialization of sparse array; see sparse_array.h.
// IMPLEMENTATION
//
-// See sparse_array.h for implementation details.
+// See sparse_array.h for implementation details.
// Doing this simplifies the logic below.
#ifndef __has_feature
#define __has_feature(x) 0
#endif
-#include <assert.h>
-#include <stdint.h>
+#include <assert.h>
+#include <stdint.h>
#if __has_feature(memory_sanitizer)
#include <sanitizer/msan_interface.h>
#endif
-#include <algorithm>
-#include <memory>
-#include <utility>
+#include <algorithm>
+#include <memory>
+#include <utility>
#include "re2/pod_array.h"
namespace re2 {
-template<typename Value>
-class SparseSetT {
+template<typename Value>
+class SparseSetT {
public:
- SparseSetT();
- explicit SparseSetT(int max_size);
- ~SparseSetT();
+ SparseSetT();
+ explicit SparseSetT(int max_size);
+ ~SparseSetT();
typedef int* iterator;
typedef const int* const_iterator;
-
- // Return the number of entries in the set.
- int size() const {
- return size_;
+
+ // Return the number of entries in the set.
+ int size() const {
+ return size_;
}
- // Indicate whether the set is empty.
- int empty() const {
- return size_ == 0;
+ // Indicate whether the set is empty.
+ int empty() const {
+ return size_ == 0;
}
- // Iterate over the set.
- iterator begin() {
+ // Iterate over the set.
+ iterator begin() {
return dense_.data();
- }
- iterator end() {
+ }
+ iterator end() {
return dense_.data() + size_;
- }
+ }
- const_iterator begin() const {
+ const_iterator begin() const {
return dense_.data();
- }
- const_iterator end() const {
+ }
+ const_iterator end() const {
return dense_.data() + size_;
- }
+ }
- // Change the maximum size of the set.
+ // Change the maximum size of the set.
// Invalidates all iterators.
void resize(int new_max_size);
- // Return the maximum size of the set.
- // Indices can be in the range [0, max_size).
- int max_size() const {
+ // Return the maximum size of the set.
+ // Indices can be in the range [0, max_size).
+ int max_size() const {
if (dense_.data() != NULL)
return dense_.size();
else
return 0;
}
- // Clear the set.
- void clear() {
- size_ = 0;
- }
-
- // Check whether index i is in the set.
- bool contains(int i) const;
-
- // Comparison function for sorting.
- // Can sort the sparse set so that future iterations
- // will visit indices in increasing order using
- // std::sort(arr.begin(), arr.end(), arr.less);
- static bool less(int a, int b);
-
- public:
- // Insert index i into the set.
- iterator insert(int i) {
- return InsertInternal(true, i);
+ // Clear the set.
+ void clear() {
+ size_ = 0;
+ }
+
+ // Check whether index i is in the set.
+ bool contains(int i) const;
+
+ // Comparison function for sorting.
+ // Can sort the sparse set so that future iterations
+ // will visit indices in increasing order using
+ // std::sort(arr.begin(), arr.end(), arr.less);
+ static bool less(int a, int b);
+
+ public:
+ // Insert index i into the set.
+ iterator insert(int i) {
+ return InsertInternal(true, i);
}
- // Insert index i into the set.
- // Fast but unsafe: only use if contains(i) is false.
- iterator insert_new(int i) {
- return InsertInternal(false, i);
+ // Insert index i into the set.
+ // Fast but unsafe: only use if contains(i) is false.
+ iterator insert_new(int i) {
+ return InsertInternal(false, i);
}
- private:
- iterator InsertInternal(bool allow_existing, int i) {
- DebugCheckInvariants();
+ private:
+ iterator InsertInternal(bool allow_existing, int i) {
+ DebugCheckInvariants();
if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
- assert(false && "illegal index");
+ assert(false && "illegal index");
// Semantically, end() would be better here, but we already know
// the user did something stupid, so begin() insulates them from
// dereferencing an invalid pointer.
- return begin();
+ return begin();
}
- if (!allow_existing) {
- assert(!contains(i));
- create_index(i);
- } else {
- if (!contains(i))
- create_index(i);
- }
- DebugCheckInvariants();
+ if (!allow_existing) {
+ assert(!contains(i));
+ create_index(i);
+ } else {
+ if (!contains(i))
+ create_index(i);
+ }
+ DebugCheckInvariants();
return dense_.data() + sparse_[i];
}
- // Add the index i to the set.
- // Only use if contains(i) is known to be false.
- // This function is private, only intended as a helper
- // for other methods.
- void create_index(int i);
+ // Add the index i to the set.
+ // Only use if contains(i) is known to be false.
+ // This function is private, only intended as a helper
+ // for other methods.
+ void create_index(int i);
- // In debug mode, verify that some invariant properties of the class
- // are being maintained. This is called at the end of the constructor
- // and at the beginning and end of all public non-const member functions.
- void DebugCheckInvariants() const;
+ // In debug mode, verify that some invariant properties of the class
+ // are being maintained. This is called at the end of the constructor
+ // and at the beginning and end of all public non-const member functions.
+ void DebugCheckInvariants() const;
// Initializes memory for elements [min, max).
void MaybeInitializeMemory(int min, int max) {
@@ -182,26 +182,26 @@ class SparseSetT {
#endif
}
- int size_ = 0;
+ int size_ = 0;
PODArray<int> sparse_;
PODArray<int> dense_;
};
-template<typename Value>
-SparseSetT<Value>::SparseSetT() = default;
-
-// Change the maximum size of the set.
-// Invalidates all iterators.
-template<typename Value>
+template<typename Value>
+SparseSetT<Value>::SparseSetT() = default;
+
+// Change the maximum size of the set.
+// Invalidates all iterators.
+template<typename Value>
void SparseSetT<Value>::resize(int new_max_size) {
- DebugCheckInvariants();
+ DebugCheckInvariants();
if (new_max_size > max_size()) {
const int old_max_size = max_size();
-
+
// Construct these first for exception safety.
PODArray<int> a(new_max_size);
PODArray<int> b(new_max_size);
-
+
std::copy_n(sparse_.data(), old_max_size, a.data());
std::copy_n(dense_.data(), old_max_size, b.data());
@@ -209,56 +209,56 @@ void SparseSetT<Value>::resize(int new_max_size) {
dense_ = std::move(b);
MaybeInitializeMemory(old_max_size, new_max_size);
- }
+ }
if (size_ > new_max_size)
size_ = new_max_size;
- DebugCheckInvariants();
-}
-
-// Check whether index i is in the set.
-template<typename Value>
-bool SparseSetT<Value>::contains(int i) const {
- assert(i >= 0);
+ DebugCheckInvariants();
+}
+
+// Check whether index i is in the set.
+template<typename Value>
+bool SparseSetT<Value>::contains(int i) const {
+ assert(i >= 0);
assert(i < max_size());
if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
- return false;
- }
+ return false;
+ }
// Unsigned comparison avoids checking sparse_[i] < 0.
return (uint32_t)sparse_[i] < (uint32_t)size_ &&
dense_[sparse_[i]] == i;
-}
-
-template<typename Value>
-void SparseSetT<Value>::create_index(int i) {
- assert(!contains(i));
+}
+
+template<typename Value>
+void SparseSetT<Value>::create_index(int i) {
+ assert(!contains(i));
assert(size_ < max_size());
sparse_[i] = size_;
- dense_[size_] = i;
- size_++;
-}
-
+ dense_[size_] = i;
+ size_++;
+}
+
template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :
sparse_(max_size), dense_(max_size) {
MaybeInitializeMemory(size_, max_size);
- DebugCheckInvariants();
-}
-
-template<typename Value> SparseSetT<Value>::~SparseSetT() {
- DebugCheckInvariants();
-}
-
-template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
- assert(0 <= size_);
+ DebugCheckInvariants();
+}
+
+template<typename Value> SparseSetT<Value>::~SparseSetT() {
+ DebugCheckInvariants();
+}
+
+template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
+ assert(0 <= size_);
assert(size_ <= max_size());
-}
-
-// Comparison function for sorting.
-template<typename Value> bool SparseSetT<Value>::less(int a, int b) {
- return a < b;
-}
-
-typedef SparseSetT<void> SparseSet;
-
+}
+
+// Comparison function for sorting.
+template<typename Value> bool SparseSetT<Value>::less(int a, int b) {
+ return a < b;
+}
+
+typedef SparseSetT<void> SparseSet;
+
} // namespace re2
#endif // RE2_SPARSE_SET_H_
diff --git a/contrib/libs/re2/re2/stringpiece.cc b/contrib/libs/re2/re2/stringpiece.cc
index ef2e2874ea..61721c19cd 100644
--- a/contrib/libs/re2/re2/stringpiece.cc
+++ b/contrib/libs/re2/re2/stringpiece.cc
@@ -1,65 +1,65 @@
-// Copyright 2004 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "re2/stringpiece.h"
-
-#include <ostream>
-
-#include "util/util.h"
-
-namespace re2 {
-
-const StringPiece::size_type StringPiece::npos; // initialized in stringpiece.h
-
-StringPiece::size_type StringPiece::copy(char* buf, size_type n,
- size_type pos) const {
- size_type ret = std::min(size_ - pos, n);
- memcpy(buf, data_ + pos, ret);
- return ret;
-}
-
-StringPiece StringPiece::substr(size_type pos, size_type n) const {
- if (pos > size_) pos = size_;
- if (n > size_ - pos) n = size_ - pos;
- return StringPiece(data_ + pos, n);
-}
-
-StringPiece::size_type StringPiece::find(const StringPiece& s,
- size_type pos) const {
- if (pos > size_) return npos;
- const_pointer result = std::search(data_ + pos, data_ + size_,
- s.data_, s.data_ + s.size_);
- size_type xpos = result - data_;
- return xpos + s.size_ <= size_ ? xpos : npos;
-}
-
-StringPiece::size_type StringPiece::find(char c, size_type pos) const {
- if (size_ <= 0 || pos >= size_) return npos;
- const_pointer result = std::find(data_ + pos, data_ + size_, c);
- return result != data_ + size_ ? result - data_ : npos;
-}
-
-StringPiece::size_type StringPiece::rfind(const StringPiece& s,
- size_type pos) const {
- if (size_ < s.size_) return npos;
- if (s.size_ == 0) return std::min(size_, pos);
- const_pointer last = data_ + std::min(size_ - s.size_, pos) + s.size_;
- const_pointer result = std::find_end(data_, last, s.data_, s.data_ + s.size_);
- return result != last ? result - data_ : npos;
-}
-
-StringPiece::size_type StringPiece::rfind(char c, size_type pos) const {
- if (size_ <= 0) return npos;
- for (size_t i = std::min(pos + 1, size_); i != 0;) {
- if (data_[--i] == c) return i;
- }
- return npos;
-}
-
-std::ostream& operator<<(std::ostream& o, const StringPiece& p) {
- o.write(p.data(), p.size());
- return o;
-}
-
-} // namespace re2
+// Copyright 2004 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "re2/stringpiece.h"
+
+#include <ostream>
+
+#include "util/util.h"
+
+namespace re2 {
+
+const StringPiece::size_type StringPiece::npos; // initialized in stringpiece.h
+
+StringPiece::size_type StringPiece::copy(char* buf, size_type n,
+ size_type pos) const {
+ size_type ret = std::min(size_ - pos, n);
+ memcpy(buf, data_ + pos, ret);
+ return ret;
+}
+
+StringPiece StringPiece::substr(size_type pos, size_type n) const {
+ if (pos > size_) pos = size_;
+ if (n > size_ - pos) n = size_ - pos;
+ return StringPiece(data_ + pos, n);
+}
+
+StringPiece::size_type StringPiece::find(const StringPiece& s,
+ size_type pos) const {
+ if (pos > size_) return npos;
+ const_pointer result = std::search(data_ + pos, data_ + size_,
+ s.data_, s.data_ + s.size_);
+ size_type xpos = result - data_;
+ return xpos + s.size_ <= size_ ? xpos : npos;
+}
+
+StringPiece::size_type StringPiece::find(char c, size_type pos) const {
+ if (size_ <= 0 || pos >= size_) return npos;
+ const_pointer result = std::find(data_ + pos, data_ + size_, c);
+ return result != data_ + size_ ? result - data_ : npos;
+}
+
+StringPiece::size_type StringPiece::rfind(const StringPiece& s,
+ size_type pos) const {
+ if (size_ < s.size_) return npos;
+ if (s.size_ == 0) return std::min(size_, pos);
+ const_pointer last = data_ + std::min(size_ - s.size_, pos) + s.size_;
+ const_pointer result = std::find_end(data_, last, s.data_, s.data_ + s.size_);
+ return result != last ? result - data_ : npos;
+}
+
+StringPiece::size_type StringPiece::rfind(char c, size_type pos) const {
+ if (size_ <= 0) return npos;
+ for (size_t i = std::min(pos + 1, size_); i != 0;) {
+ if (data_[--i] == c) return i;
+ }
+ return npos;
+}
+
+std::ostream& operator<<(std::ostream& o, const StringPiece& p) {
+ o.write(p.data(), p.size());
+ return o;
+}
+
+} // namespace re2
diff --git a/contrib/libs/re2/re2/stringpiece.h b/contrib/libs/re2/re2/stringpiece.h
index ef73683401..0c74c73a41 100644
--- a/contrib/libs/re2/re2/stringpiece.h
+++ b/contrib/libs/re2/re2/stringpiece.h
@@ -1,115 +1,115 @@
-// Copyright 2001-2010 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef RE2_STRINGPIECE_H_
-#define RE2_STRINGPIECE_H_
-
-// A string-like object that points to a sized piece of memory.
-//
-// Functions or methods may use const StringPiece& parameters to accept either
-// a "const char*" or a "string" value that will be implicitly converted to
-// a StringPiece. The implicit conversion means that it is often appropriate
-// to include this .h file in other files rather than forward-declaring
-// StringPiece as would be appropriate for most other Google classes.
-//
-// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
-// conversions from "const char*" to "string" and back again.
-//
-//
-// Arghh! I wish C++ literals were "string".
-
+// Copyright 2001-2010 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_STRINGPIECE_H_
+#define RE2_STRINGPIECE_H_
+
+// A string-like object that points to a sized piece of memory.
+//
+// Functions or methods may use const StringPiece& parameters to accept either
+// a "const char*" or a "string" value that will be implicitly converted to
+// a StringPiece. The implicit conversion means that it is often appropriate
+// to include this .h file in other files rather than forward-declaring
+// StringPiece as would be appropriate for most other Google classes.
+//
+// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+// conversions from "const char*" to "string" and back again.
+//
+//
+// Arghh! I wish C++ literals were "string".
+
// Doing this simplifies the logic below.
#ifndef __has_include
#define __has_include(x) 0
#endif
-#include <stddef.h>
-#include <string.h>
-#include <algorithm>
-#include <iosfwd>
-#include <iterator>
-#include <string>
+#include <stddef.h>
+#include <string.h>
+#include <algorithm>
+#include <iosfwd>
+#include <iterator>
+#include <string>
#if __has_include(<string_view>) && __cplusplus >= 201703L
#include <string_view>
#endif
#if defined(ARCADIA_ROOT)
-#include <util/generic/string.h>
+#include <util/generic/string.h>
#endif
-
-namespace re2 {
-
-class StringPiece {
- public:
+
+namespace re2 {
+
+class StringPiece {
+ public:
typedef std::char_traits<char> traits_type;
- typedef char value_type;
- typedef char* pointer;
- typedef const char* const_pointer;
- typedef char& reference;
- typedef const char& const_reference;
- typedef const char* const_iterator;
- typedef const_iterator iterator;
- typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
- typedef const_reverse_iterator reverse_iterator;
- typedef size_t size_type;
- typedef ptrdiff_t difference_type;
- static const size_type npos = static_cast<size_type>(-1);
-
- // We provide non-explicit singleton constructors so users can pass
- // in a "const char*" or a "string" wherever a "StringPiece" is
- // expected.
- StringPiece()
- : data_(NULL), size_(0) {}
+ typedef char value_type;
+ typedef char* pointer;
+ typedef const char* const_pointer;
+ typedef char& reference;
+ typedef const char& const_reference;
+ typedef const char* const_iterator;
+ typedef const_iterator iterator;
+ typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+ typedef const_reverse_iterator reverse_iterator;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ static const size_type npos = static_cast<size_type>(-1);
+
+ // We provide non-explicit singleton constructors so users can pass
+ // in a "const char*" or a "string" wherever a "StringPiece" is
+ // expected.
+ StringPiece()
+ : data_(NULL), size_(0) {}
#if __has_include(<string_view>) && __cplusplus >= 201703L
StringPiece(const std::string_view& str)
: data_(str.data()), size_(str.size()) {}
#endif
- StringPiece(const std::string& str)
- : data_(str.data()), size_(str.size()) {}
- StringPiece(const char* str)
- : data_(str), size_(str == NULL ? 0 : strlen(str)) {}
- StringPiece(const char* str, size_type len)
- : data_(str), size_(len) {}
+ StringPiece(const std::string& str)
+ : data_(str.data()), size_(str.size()) {}
+ StringPiece(const char* str)
+ : data_(str), size_(str == NULL ? 0 : strlen(str)) {}
+ StringPiece(const char* str, size_type len)
+ : data_(str), size_(len) {}
#if defined(ARCADIA_ROOT)
StringPiece(const TString& str)
: StringPiece(str.data(), str.size()) {}
#endif
-
- const_iterator begin() const { return data_; }
- const_iterator end() const { return data_ + size_; }
- const_reverse_iterator rbegin() const {
- return const_reverse_iterator(data_ + size_);
- }
- const_reverse_iterator rend() const {
- return const_reverse_iterator(data_);
- }
-
- size_type size() const { return size_; }
- size_type length() const { return size_; }
- bool empty() const { return size_ == 0; }
-
- const_reference operator[](size_type i) const { return data_[i]; }
- const_pointer data() const { return data_; }
-
- void remove_prefix(size_type n) {
- data_ += n;
- size_ -= n;
- }
-
- void remove_suffix(size_type n) {
- size_ -= n;
- }
-
- void set(const char* str) {
- data_ = str;
- size_ = str == NULL ? 0 : strlen(str);
- }
-
- void set(const char* str, size_type len) {
- data_ = str;
- size_ = len;
- }
-
+
+ const_iterator begin() const { return data_; }
+ const_iterator end() const { return data_ + size_; }
+ const_reverse_iterator rbegin() const {
+ return const_reverse_iterator(data_ + size_);
+ }
+ const_reverse_iterator rend() const {
+ return const_reverse_iterator(data_);
+ }
+
+ size_type size() const { return size_; }
+ size_type length() const { return size_; }
+ bool empty() const { return size_ == 0; }
+
+ const_reference operator[](size_type i) const { return data_[i]; }
+ const_pointer data() const { return data_; }
+
+ void remove_prefix(size_type n) {
+ data_ += n;
+ size_ -= n;
+ }
+
+ void remove_suffix(size_type n) {
+ size_ -= n;
+ }
+
+ void set(const char* str) {
+ data_ = str;
+ size_ = str == NULL ? 0 : strlen(str);
+ }
+
+ void set(const char* str, size_type len) {
+ data_ = str;
+ size_ = len;
+ }
+
// Converts to `std::basic_string`.
template <typename A>
explicit operator std::basic_string<char, traits_type, A>() const {
@@ -117,101 +117,101 @@ class StringPiece {
return std::basic_string<char, traits_type, A>(data_, size_);
}
- std::string as_string() const {
- return std::string(data_, size_);
- }
-
- // We also define ToString() here, since many other string-like
- // interfaces name the routine that converts to a C++ string
- // "ToString", and it's confusing to have the method that does that
- // for a StringPiece be called "as_string()". We also leave the
- // "as_string()" method defined here for existing code.
- std::string ToString() const {
- return std::string(data_, size_);
- }
-
- void CopyToString(std::string* target) const {
- target->assign(data_, size_);
- }
-
- void AppendToString(std::string* target) const {
- target->append(data_, size_);
- }
-
- size_type copy(char* buf, size_type n, size_type pos = 0) const;
- StringPiece substr(size_type pos = 0, size_type n = npos) const;
-
- int compare(const StringPiece& x) const {
- size_type min_size = std::min(size(), x.size());
- if (min_size > 0) {
- int r = memcmp(data(), x.data(), min_size);
- if (r < 0) return -1;
- if (r > 0) return 1;
- }
- if (size() < x.size()) return -1;
- if (size() > x.size()) return 1;
- return 0;
- }
-
- // Does "this" start with "x"?
- bool starts_with(const StringPiece& x) const {
- return x.empty() ||
- (size() >= x.size() && memcmp(data(), x.data(), x.size()) == 0);
- }
-
- // Does "this" end with "x"?
- bool ends_with(const StringPiece& x) const {
- return x.empty() ||
- (size() >= x.size() &&
- memcmp(data() + (size() - x.size()), x.data(), x.size()) == 0);
- }
-
- bool contains(const StringPiece& s) const {
- return find(s) != npos;
- }
-
- size_type find(const StringPiece& s, size_type pos = 0) const;
- size_type find(char c, size_type pos = 0) const;
- size_type rfind(const StringPiece& s, size_type pos = npos) const;
- size_type rfind(char c, size_type pos = npos) const;
-
- private:
- const_pointer data_;
- size_type size_;
-};
-
-inline bool operator==(const StringPiece& x, const StringPiece& y) {
- StringPiece::size_type len = x.size();
- if (len != y.size()) return false;
- return x.data() == y.data() || len == 0 ||
- memcmp(x.data(), y.data(), len) == 0;
-}
-
-inline bool operator!=(const StringPiece& x, const StringPiece& y) {
- return !(x == y);
-}
-
-inline bool operator<(const StringPiece& x, const StringPiece& y) {
- StringPiece::size_type min_size = std::min(x.size(), y.size());
- int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size);
- return (r < 0) || (r == 0 && x.size() < y.size());
-}
-
-inline bool operator>(const StringPiece& x, const StringPiece& y) {
- return y < x;
-}
-
-inline bool operator<=(const StringPiece& x, const StringPiece& y) {
- return !(x > y);
-}
-
-inline bool operator>=(const StringPiece& x, const StringPiece& y) {
- return !(x < y);
-}
-
-// Allow StringPiece to be logged.
-std::ostream& operator<<(std::ostream& o, const StringPiece& p);
-
-} // namespace re2
-
-#endif // RE2_STRINGPIECE_H_
+ std::string as_string() const {
+ return std::string(data_, size_);
+ }
+
+ // We also define ToString() here, since many other string-like
+ // interfaces name the routine that converts to a C++ string
+ // "ToString", and it's confusing to have the method that does that
+ // for a StringPiece be called "as_string()". We also leave the
+ // "as_string()" method defined here for existing code.
+ std::string ToString() const {
+ return std::string(data_, size_);
+ }
+
+ void CopyToString(std::string* target) const {
+ target->assign(data_, size_);
+ }
+
+ void AppendToString(std::string* target) const {
+ target->append(data_, size_);
+ }
+
+ size_type copy(char* buf, size_type n, size_type pos = 0) const;
+ StringPiece substr(size_type pos = 0, size_type n = npos) const;
+
+ int compare(const StringPiece& x) const {
+ size_type min_size = std::min(size(), x.size());
+ if (min_size > 0) {
+ int r = memcmp(data(), x.data(), min_size);
+ if (r < 0) return -1;
+ if (r > 0) return 1;
+ }
+ if (size() < x.size()) return -1;
+ if (size() > x.size()) return 1;
+ return 0;
+ }
+
+ // Does "this" start with "x"?
+ bool starts_with(const StringPiece& x) const {
+ return x.empty() ||
+ (size() >= x.size() && memcmp(data(), x.data(), x.size()) == 0);
+ }
+
+ // Does "this" end with "x"?
+ bool ends_with(const StringPiece& x) const {
+ return x.empty() ||
+ (size() >= x.size() &&
+ memcmp(data() + (size() - x.size()), x.data(), x.size()) == 0);
+ }
+
+ bool contains(const StringPiece& s) const {
+ return find(s) != npos;
+ }
+
+ size_type find(const StringPiece& s, size_type pos = 0) const;
+ size_type find(char c, size_type pos = 0) const;
+ size_type rfind(const StringPiece& s, size_type pos = npos) const;
+ size_type rfind(char c, size_type pos = npos) const;
+
+ private:
+ const_pointer data_;
+ size_type size_;
+};
+
+inline bool operator==(const StringPiece& x, const StringPiece& y) {
+ StringPiece::size_type len = x.size();
+ if (len != y.size()) return false;
+ return x.data() == y.data() || len == 0 ||
+ memcmp(x.data(), y.data(), len) == 0;
+}
+
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+ return !(x == y);
+}
+
+inline bool operator<(const StringPiece& x, const StringPiece& y) {
+ StringPiece::size_type min_size = std::min(x.size(), y.size());
+ int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size);
+ return (r < 0) || (r == 0 && x.size() < y.size());
+}
+
+inline bool operator>(const StringPiece& x, const StringPiece& y) {
+ return y < x;
+}
+
+inline bool operator<=(const StringPiece& x, const StringPiece& y) {
+ return !(x > y);
+}
+
+inline bool operator>=(const StringPiece& x, const StringPiece& y) {
+ return !(x < y);
+}
+
+// Allow StringPiece to be logged.
+std::ostream& operator<<(std::ostream& o, const StringPiece& p);
+
+} // namespace re2
+
+#endif // RE2_STRINGPIECE_H_
diff --git a/contrib/libs/re2/re2/tostring.cc b/contrib/libs/re2/re2/tostring.cc
index 9c1c038ca6..edc6375584 100644
--- a/contrib/libs/re2/re2/tostring.cc
+++ b/contrib/libs/re2/re2/tostring.cc
@@ -5,13 +5,13 @@
// Format a regular expression structure as a string.
// Tested by parse_test.cc
-#include <string.h>
-#include <string>
-
-#include "util/util.h"
-#include "util/logging.h"
-#include "util/strutil.h"
-#include "util/utf.h"
+#include <string.h>
+#include <string>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
@@ -48,8 +48,8 @@ class ToStringWalker : public Regexp::Walker<int> {
private:
std::string* t_; // The string the walker appends to.
- ToStringWalker(const ToStringWalker&) = delete;
- ToStringWalker& operator=(const ToStringWalker&) = delete;
+ ToStringWalker(const ToStringWalker&) = delete;
+ ToStringWalker& operator=(const ToStringWalker&) = delete;
};
std::string Regexp::ToString() {
@@ -101,8 +101,8 @@ int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
case kRegexpCapture:
t_->append("(");
- if (re->cap() == 0)
- LOG(DFATAL) << "kRegexpCapture cap() == 0";
+ if (re->cap() == 0)
+ LOG(DFATAL) << "kRegexpCapture cap() == 0";
if (re->name()) {
t_->append("?P<");
t_->append(*re->name());
@@ -129,12 +129,12 @@ int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
static void AppendLiteral(std::string *t, Rune r, bool foldcase) {
if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
t->append(1, '\\');
- t->append(1, static_cast<char>(r));
+ t->append(1, static_cast<char>(r));
} else if (foldcase && 'a' <= r && r <= 'z') {
- r -= 'a' - 'A';
+ r -= 'a' - 'A';
t->append(1, '[');
- t->append(1, static_cast<char>(r));
- t->append(1, static_cast<char>(r) + 'a' - 'A');
+ t->append(1, static_cast<char>(r));
+ t->append(1, static_cast<char>(r) + 'a' - 'A');
t->append(1, ']');
} else {
AppendCCRange(t, r, r);
@@ -162,14 +162,14 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
break;
case kRegexpLiteral:
- AppendLiteral(t_, re->rune(),
- (re->parse_flags() & Regexp::FoldCase) != 0);
+ AppendLiteral(t_, re->rune(),
+ (re->parse_flags() & Regexp::FoldCase) != 0);
break;
case kRegexpLiteralString:
for (int i = 0; i < re->nrunes(); i++)
- AppendLiteral(t_, re->runes()[i],
- (re->parse_flags() & Regexp::FoldCase) != 0);
+ AppendLiteral(t_, re->runes()[i],
+ (re->parse_flags() & Regexp::FoldCase) != 0);
if (prec < PrecConcat)
t_->append(")");
break;
@@ -307,7 +307,7 @@ static void AppendCCChar(std::string* t, Rune r) {
if (0x20 <= r && r <= 0x7E) {
if (strchr("[]^-\\", r))
t->append("\\");
- t->append(1, static_cast<char>(r));
+ t->append(1, static_cast<char>(r));
return;
}
switch (r) {
diff --git a/contrib/libs/re2/re2/unicode_casefold.cc b/contrib/libs/re2/re2/unicode_casefold.cc
index d9de2821d5..72e0645652 100644
--- a/contrib/libs/re2/re2/unicode_casefold.cc
+++ b/contrib/libs/re2/re2/unicode_casefold.cc
@@ -1,191 +1,191 @@
-
-// GENERATED BY make_unicode_casefold.py; DO NOT EDIT.
-// make_unicode_casefold.py >unicode_casefold.cc
-
-#include "re2/unicode_casefold.h"
-
-namespace re2 {
-
-
+
+// GENERATED BY make_unicode_casefold.py; DO NOT EDIT.
+// make_unicode_casefold.py >unicode_casefold.cc
+
+#include "re2/unicode_casefold.h"
+
+namespace re2 {
+
+
// 1424 groups, 2878 pairs, 367 ranges
-const CaseFold unicode_casefold[] = {
- { 65, 90, 32 },
- { 97, 106, -32 },
- { 107, 107, 8383 },
- { 108, 114, -32 },
- { 115, 115, 268 },
- { 116, 122, -32 },
- { 181, 181, 743 },
- { 192, 214, 32 },
- { 216, 222, 32 },
- { 223, 223, 7615 },
- { 224, 228, -32 },
- { 229, 229, 8262 },
- { 230, 246, -32 },
- { 248, 254, -32 },
- { 255, 255, 121 },
- { 256, 303, EvenOdd },
- { 306, 311, EvenOdd },
- { 313, 328, OddEven },
- { 330, 375, EvenOdd },
- { 376, 376, -121 },
- { 377, 382, OddEven },
- { 383, 383, -300 },
- { 384, 384, 195 },
- { 385, 385, 210 },
- { 386, 389, EvenOdd },
- { 390, 390, 206 },
- { 391, 392, OddEven },
- { 393, 394, 205 },
- { 395, 396, OddEven },
- { 398, 398, 79 },
- { 399, 399, 202 },
- { 400, 400, 203 },
- { 401, 402, OddEven },
- { 403, 403, 205 },
- { 404, 404, 207 },
- { 405, 405, 97 },
- { 406, 406, 211 },
- { 407, 407, 209 },
- { 408, 409, EvenOdd },
- { 410, 410, 163 },
- { 412, 412, 211 },
- { 413, 413, 213 },
- { 414, 414, 130 },
- { 415, 415, 214 },
- { 416, 421, EvenOdd },
- { 422, 422, 218 },
- { 423, 424, OddEven },
- { 425, 425, 218 },
- { 428, 429, EvenOdd },
- { 430, 430, 218 },
- { 431, 432, OddEven },
- { 433, 434, 217 },
- { 435, 438, OddEven },
- { 439, 439, 219 },
- { 440, 441, EvenOdd },
- { 444, 445, EvenOdd },
- { 447, 447, 56 },
- { 452, 452, EvenOdd },
- { 453, 453, OddEven },
- { 454, 454, -2 },
- { 455, 455, OddEven },
- { 456, 456, EvenOdd },
- { 457, 457, -2 },
- { 458, 458, EvenOdd },
- { 459, 459, OddEven },
- { 460, 460, -2 },
- { 461, 476, OddEven },
- { 477, 477, -79 },
- { 478, 495, EvenOdd },
- { 497, 497, OddEven },
- { 498, 498, EvenOdd },
- { 499, 499, -2 },
- { 500, 501, EvenOdd },
- { 502, 502, -97 },
- { 503, 503, -56 },
- { 504, 543, EvenOdd },
- { 544, 544, -130 },
- { 546, 563, EvenOdd },
- { 570, 570, 10795 },
- { 571, 572, OddEven },
- { 573, 573, -163 },
- { 574, 574, 10792 },
- { 575, 576, 10815 },
- { 577, 578, OddEven },
- { 579, 579, -195 },
- { 580, 580, 69 },
- { 581, 581, 71 },
- { 582, 591, EvenOdd },
- { 592, 592, 10783 },
- { 593, 593, 10780 },
- { 594, 594, 10782 },
- { 595, 595, -210 },
- { 596, 596, -206 },
- { 598, 599, -205 },
- { 601, 601, -202 },
- { 603, 603, -203 },
- { 604, 604, 42319 },
- { 608, 608, -205 },
- { 609, 609, 42315 },
- { 611, 611, -207 },
- { 613, 613, 42280 },
- { 614, 614, 42308 },
- { 616, 616, -209 },
- { 617, 617, -211 },
+const CaseFold unicode_casefold[] = {
+ { 65, 90, 32 },
+ { 97, 106, -32 },
+ { 107, 107, 8383 },
+ { 108, 114, -32 },
+ { 115, 115, 268 },
+ { 116, 122, -32 },
+ { 181, 181, 743 },
+ { 192, 214, 32 },
+ { 216, 222, 32 },
+ { 223, 223, 7615 },
+ { 224, 228, -32 },
+ { 229, 229, 8262 },
+ { 230, 246, -32 },
+ { 248, 254, -32 },
+ { 255, 255, 121 },
+ { 256, 303, EvenOdd },
+ { 306, 311, EvenOdd },
+ { 313, 328, OddEven },
+ { 330, 375, EvenOdd },
+ { 376, 376, -121 },
+ { 377, 382, OddEven },
+ { 383, 383, -300 },
+ { 384, 384, 195 },
+ { 385, 385, 210 },
+ { 386, 389, EvenOdd },
+ { 390, 390, 206 },
+ { 391, 392, OddEven },
+ { 393, 394, 205 },
+ { 395, 396, OddEven },
+ { 398, 398, 79 },
+ { 399, 399, 202 },
+ { 400, 400, 203 },
+ { 401, 402, OddEven },
+ { 403, 403, 205 },
+ { 404, 404, 207 },
+ { 405, 405, 97 },
+ { 406, 406, 211 },
+ { 407, 407, 209 },
+ { 408, 409, EvenOdd },
+ { 410, 410, 163 },
+ { 412, 412, 211 },
+ { 413, 413, 213 },
+ { 414, 414, 130 },
+ { 415, 415, 214 },
+ { 416, 421, EvenOdd },
+ { 422, 422, 218 },
+ { 423, 424, OddEven },
+ { 425, 425, 218 },
+ { 428, 429, EvenOdd },
+ { 430, 430, 218 },
+ { 431, 432, OddEven },
+ { 433, 434, 217 },
+ { 435, 438, OddEven },
+ { 439, 439, 219 },
+ { 440, 441, EvenOdd },
+ { 444, 445, EvenOdd },
+ { 447, 447, 56 },
+ { 452, 452, EvenOdd },
+ { 453, 453, OddEven },
+ { 454, 454, -2 },
+ { 455, 455, OddEven },
+ { 456, 456, EvenOdd },
+ { 457, 457, -2 },
+ { 458, 458, EvenOdd },
+ { 459, 459, OddEven },
+ { 460, 460, -2 },
+ { 461, 476, OddEven },
+ { 477, 477, -79 },
+ { 478, 495, EvenOdd },
+ { 497, 497, OddEven },
+ { 498, 498, EvenOdd },
+ { 499, 499, -2 },
+ { 500, 501, EvenOdd },
+ { 502, 502, -97 },
+ { 503, 503, -56 },
+ { 504, 543, EvenOdd },
+ { 544, 544, -130 },
+ { 546, 563, EvenOdd },
+ { 570, 570, 10795 },
+ { 571, 572, OddEven },
+ { 573, 573, -163 },
+ { 574, 574, 10792 },
+ { 575, 576, 10815 },
+ { 577, 578, OddEven },
+ { 579, 579, -195 },
+ { 580, 580, 69 },
+ { 581, 581, 71 },
+ { 582, 591, EvenOdd },
+ { 592, 592, 10783 },
+ { 593, 593, 10780 },
+ { 594, 594, 10782 },
+ { 595, 595, -210 },
+ { 596, 596, -206 },
+ { 598, 599, -205 },
+ { 601, 601, -202 },
+ { 603, 603, -203 },
+ { 604, 604, 42319 },
+ { 608, 608, -205 },
+ { 609, 609, 42315 },
+ { 611, 611, -207 },
+ { 613, 613, 42280 },
+ { 614, 614, 42308 },
+ { 616, 616, -209 },
+ { 617, 617, -211 },
{ 618, 618, 42308 },
- { 619, 619, 10743 },
- { 620, 620, 42305 },
- { 623, 623, -211 },
- { 625, 625, 10749 },
- { 626, 626, -213 },
- { 629, 629, -214 },
- { 637, 637, 10727 },
- { 640, 640, -218 },
+ { 619, 619, 10743 },
+ { 620, 620, 42305 },
+ { 623, 623, -211 },
+ { 625, 625, 10749 },
+ { 626, 626, -213 },
+ { 629, 629, -214 },
+ { 637, 637, 10727 },
+ { 640, 640, -218 },
{ 642, 642, 42307 },
- { 643, 643, -218 },
- { 647, 647, 42282 },
- { 648, 648, -218 },
- { 649, 649, -69 },
- { 650, 651, -217 },
- { 652, 652, -71 },
- { 658, 658, -219 },
- { 669, 669, 42261 },
- { 670, 670, 42258 },
- { 837, 837, 84 },
- { 880, 883, EvenOdd },
- { 886, 887, EvenOdd },
- { 891, 893, 130 },
- { 895, 895, 116 },
- { 902, 902, 38 },
- { 904, 906, 37 },
- { 908, 908, 64 },
- { 910, 911, 63 },
- { 913, 929, 32 },
- { 931, 931, 31 },
- { 932, 939, 32 },
- { 940, 940, -38 },
- { 941, 943, -37 },
- { 945, 945, -32 },
- { 946, 946, 30 },
- { 947, 948, -32 },
- { 949, 949, 64 },
- { 950, 951, -32 },
- { 952, 952, 25 },
- { 953, 953, 7173 },
- { 954, 954, 54 },
- { 955, 955, -32 },
- { 956, 956, -775 },
- { 957, 959, -32 },
- { 960, 960, 22 },
- { 961, 961, 48 },
- { 962, 962, EvenOdd },
- { 963, 965, -32 },
- { 966, 966, 15 },
- { 967, 968, -32 },
- { 969, 969, 7517 },
- { 970, 971, -32 },
- { 972, 972, -64 },
- { 973, 974, -63 },
- { 975, 975, 8 },
- { 976, 976, -62 },
- { 977, 977, 35 },
- { 981, 981, -47 },
- { 982, 982, -54 },
- { 983, 983, -8 },
- { 984, 1007, EvenOdd },
- { 1008, 1008, -86 },
- { 1009, 1009, -80 },
- { 1010, 1010, 7 },
- { 1011, 1011, -116 },
- { 1012, 1012, -92 },
- { 1013, 1013, -96 },
- { 1015, 1016, OddEven },
- { 1017, 1017, -7 },
- { 1018, 1019, EvenOdd },
- { 1021, 1023, -130 },
- { 1024, 1039, 80 },
- { 1040, 1071, 32 },
+ { 643, 643, -218 },
+ { 647, 647, 42282 },
+ { 648, 648, -218 },
+ { 649, 649, -69 },
+ { 650, 651, -217 },
+ { 652, 652, -71 },
+ { 658, 658, -219 },
+ { 669, 669, 42261 },
+ { 670, 670, 42258 },
+ { 837, 837, 84 },
+ { 880, 883, EvenOdd },
+ { 886, 887, EvenOdd },
+ { 891, 893, 130 },
+ { 895, 895, 116 },
+ { 902, 902, 38 },
+ { 904, 906, 37 },
+ { 908, 908, 64 },
+ { 910, 911, 63 },
+ { 913, 929, 32 },
+ { 931, 931, 31 },
+ { 932, 939, 32 },
+ { 940, 940, -38 },
+ { 941, 943, -37 },
+ { 945, 945, -32 },
+ { 946, 946, 30 },
+ { 947, 948, -32 },
+ { 949, 949, 64 },
+ { 950, 951, -32 },
+ { 952, 952, 25 },
+ { 953, 953, 7173 },
+ { 954, 954, 54 },
+ { 955, 955, -32 },
+ { 956, 956, -775 },
+ { 957, 959, -32 },
+ { 960, 960, 22 },
+ { 961, 961, 48 },
+ { 962, 962, EvenOdd },
+ { 963, 965, -32 },
+ { 966, 966, 15 },
+ { 967, 968, -32 },
+ { 969, 969, 7517 },
+ { 970, 971, -32 },
+ { 972, 972, -64 },
+ { 973, 974, -63 },
+ { 975, 975, 8 },
+ { 976, 976, -62 },
+ { 977, 977, 35 },
+ { 981, 981, -47 },
+ { 982, 982, -54 },
+ { 983, 983, -8 },
+ { 984, 1007, EvenOdd },
+ { 1008, 1008, -86 },
+ { 1009, 1009, -80 },
+ { 1010, 1010, 7 },
+ { 1011, 1011, -116 },
+ { 1012, 1012, -92 },
+ { 1013, 1013, -96 },
+ { 1015, 1016, OddEven },
+ { 1017, 1017, -7 },
+ { 1018, 1019, EvenOdd },
+ { 1021, 1023, -130 },
+ { 1024, 1039, 80 },
+ { 1040, 1071, 32 },
{ 1072, 1073, -32 },
{ 1074, 1074, 6222 },
{ 1075, 1075, -32 },
@@ -197,25 +197,25 @@ const CaseFold unicode_casefold[] = {
{ 1091, 1097, -32 },
{ 1098, 1098, 6204 },
{ 1099, 1103, -32 },
- { 1104, 1119, -80 },
+ { 1104, 1119, -80 },
{ 1120, 1122, EvenOdd },
{ 1123, 1123, 6180 },
{ 1124, 1153, EvenOdd },
- { 1162, 1215, EvenOdd },
- { 1216, 1216, 15 },
- { 1217, 1230, OddEven },
- { 1231, 1231, -15 },
- { 1232, 1327, EvenOdd },
- { 1329, 1366, 48 },
- { 1377, 1414, -48 },
- { 4256, 4293, 7264 },
- { 4295, 4295, 7264 },
- { 4301, 4301, 7264 },
+ { 1162, 1215, EvenOdd },
+ { 1216, 1216, 15 },
+ { 1217, 1230, OddEven },
+ { 1231, 1231, -15 },
+ { 1232, 1327, EvenOdd },
+ { 1329, 1366, 48 },
+ { 1377, 1414, -48 },
+ { 4256, 4293, 7264 },
+ { 4295, 4295, 7264 },
+ { 4301, 4301, 7264 },
{ 4304, 4346, 3008 },
{ 4349, 4351, 3008 },
- { 5024, 5103, 38864 },
- { 5104, 5109, 8 },
- { 5112, 5117, -8 },
+ { 5024, 5103, 38864 },
+ { 5104, 5109, 8 },
+ { 5112, 5117, -8 },
{ 7296, 7296, -6254 },
{ 7297, 7297, -6253 },
{ 7298, 7298, -6244 },
@@ -227,123 +227,123 @@ const CaseFold unicode_casefold[] = {
{ 7304, 7304, 35266 },
{ 7312, 7354, -3008 },
{ 7357, 7359, -3008 },
- { 7545, 7545, 35332 },
- { 7549, 7549, 3814 },
+ { 7545, 7545, 35332 },
+ { 7549, 7549, 3814 },
{ 7566, 7566, 35384 },
- { 7680, 7776, EvenOdd },
- { 7777, 7777, 58 },
- { 7778, 7829, EvenOdd },
- { 7835, 7835, -59 },
- { 7838, 7838, -7615 },
- { 7840, 7935, EvenOdd },
- { 7936, 7943, 8 },
- { 7944, 7951, -8 },
- { 7952, 7957, 8 },
- { 7960, 7965, -8 },
- { 7968, 7975, 8 },
- { 7976, 7983, -8 },
- { 7984, 7991, 8 },
- { 7992, 7999, -8 },
- { 8000, 8005, 8 },
- { 8008, 8013, -8 },
- { 8017, 8017, 8 },
- { 8019, 8019, 8 },
- { 8021, 8021, 8 },
- { 8023, 8023, 8 },
- { 8025, 8025, -8 },
- { 8027, 8027, -8 },
- { 8029, 8029, -8 },
- { 8031, 8031, -8 },
- { 8032, 8039, 8 },
- { 8040, 8047, -8 },
- { 8048, 8049, 74 },
- { 8050, 8053, 86 },
- { 8054, 8055, 100 },
- { 8056, 8057, 128 },
- { 8058, 8059, 112 },
- { 8060, 8061, 126 },
- { 8064, 8071, 8 },
- { 8072, 8079, -8 },
- { 8080, 8087, 8 },
- { 8088, 8095, -8 },
- { 8096, 8103, 8 },
- { 8104, 8111, -8 },
- { 8112, 8113, 8 },
- { 8115, 8115, 9 },
- { 8120, 8121, -8 },
- { 8122, 8123, -74 },
- { 8124, 8124, -9 },
- { 8126, 8126, -7289 },
- { 8131, 8131, 9 },
- { 8136, 8139, -86 },
- { 8140, 8140, -9 },
- { 8144, 8145, 8 },
- { 8152, 8153, -8 },
- { 8154, 8155, -100 },
- { 8160, 8161, 8 },
- { 8165, 8165, 7 },
- { 8168, 8169, -8 },
- { 8170, 8171, -112 },
- { 8172, 8172, -7 },
- { 8179, 8179, 9 },
- { 8184, 8185, -128 },
- { 8186, 8187, -126 },
- { 8188, 8188, -9 },
- { 8486, 8486, -7549 },
- { 8490, 8490, -8415 },
- { 8491, 8491, -8294 },
- { 8498, 8498, 28 },
- { 8526, 8526, -28 },
- { 8544, 8559, 16 },
- { 8560, 8575, -16 },
- { 8579, 8580, OddEven },
- { 9398, 9423, 26 },
- { 9424, 9449, -26 },
+ { 7680, 7776, EvenOdd },
+ { 7777, 7777, 58 },
+ { 7778, 7829, EvenOdd },
+ { 7835, 7835, -59 },
+ { 7838, 7838, -7615 },
+ { 7840, 7935, EvenOdd },
+ { 7936, 7943, 8 },
+ { 7944, 7951, -8 },
+ { 7952, 7957, 8 },
+ { 7960, 7965, -8 },
+ { 7968, 7975, 8 },
+ { 7976, 7983, -8 },
+ { 7984, 7991, 8 },
+ { 7992, 7999, -8 },
+ { 8000, 8005, 8 },
+ { 8008, 8013, -8 },
+ { 8017, 8017, 8 },
+ { 8019, 8019, 8 },
+ { 8021, 8021, 8 },
+ { 8023, 8023, 8 },
+ { 8025, 8025, -8 },
+ { 8027, 8027, -8 },
+ { 8029, 8029, -8 },
+ { 8031, 8031, -8 },
+ { 8032, 8039, 8 },
+ { 8040, 8047, -8 },
+ { 8048, 8049, 74 },
+ { 8050, 8053, 86 },
+ { 8054, 8055, 100 },
+ { 8056, 8057, 128 },
+ { 8058, 8059, 112 },
+ { 8060, 8061, 126 },
+ { 8064, 8071, 8 },
+ { 8072, 8079, -8 },
+ { 8080, 8087, 8 },
+ { 8088, 8095, -8 },
+ { 8096, 8103, 8 },
+ { 8104, 8111, -8 },
+ { 8112, 8113, 8 },
+ { 8115, 8115, 9 },
+ { 8120, 8121, -8 },
+ { 8122, 8123, -74 },
+ { 8124, 8124, -9 },
+ { 8126, 8126, -7289 },
+ { 8131, 8131, 9 },
+ { 8136, 8139, -86 },
+ { 8140, 8140, -9 },
+ { 8144, 8145, 8 },
+ { 8152, 8153, -8 },
+ { 8154, 8155, -100 },
+ { 8160, 8161, 8 },
+ { 8165, 8165, 7 },
+ { 8168, 8169, -8 },
+ { 8170, 8171, -112 },
+ { 8172, 8172, -7 },
+ { 8179, 8179, 9 },
+ { 8184, 8185, -128 },
+ { 8186, 8187, -126 },
+ { 8188, 8188, -9 },
+ { 8486, 8486, -7549 },
+ { 8490, 8490, -8415 },
+ { 8491, 8491, -8294 },
+ { 8498, 8498, 28 },
+ { 8526, 8526, -28 },
+ { 8544, 8559, 16 },
+ { 8560, 8575, -16 },
+ { 8579, 8580, OddEven },
+ { 9398, 9423, 26 },
+ { 9424, 9449, -26 },
{ 11264, 11311, 48 },
{ 11312, 11359, -48 },
- { 11360, 11361, EvenOdd },
- { 11362, 11362, -10743 },
- { 11363, 11363, -3814 },
- { 11364, 11364, -10727 },
- { 11365, 11365, -10795 },
- { 11366, 11366, -10792 },
- { 11367, 11372, OddEven },
- { 11373, 11373, -10780 },
- { 11374, 11374, -10749 },
- { 11375, 11375, -10783 },
- { 11376, 11376, -10782 },
- { 11378, 11379, EvenOdd },
- { 11381, 11382, OddEven },
- { 11390, 11391, -10815 },
- { 11392, 11491, EvenOdd },
- { 11499, 11502, OddEven },
- { 11506, 11507, EvenOdd },
- { 11520, 11557, -7264 },
- { 11559, 11559, -7264 },
- { 11565, 11565, -7264 },
+ { 11360, 11361, EvenOdd },
+ { 11362, 11362, -10743 },
+ { 11363, 11363, -3814 },
+ { 11364, 11364, -10727 },
+ { 11365, 11365, -10795 },
+ { 11366, 11366, -10792 },
+ { 11367, 11372, OddEven },
+ { 11373, 11373, -10780 },
+ { 11374, 11374, -10749 },
+ { 11375, 11375, -10783 },
+ { 11376, 11376, -10782 },
+ { 11378, 11379, EvenOdd },
+ { 11381, 11382, OddEven },
+ { 11390, 11391, -10815 },
+ { 11392, 11491, EvenOdd },
+ { 11499, 11502, OddEven },
+ { 11506, 11507, EvenOdd },
+ { 11520, 11557, -7264 },
+ { 11559, 11559, -7264 },
+ { 11565, 11565, -7264 },
{ 42560, 42570, EvenOdd },
{ 42571, 42571, -35267 },
{ 42572, 42605, EvenOdd },
- { 42624, 42651, EvenOdd },
- { 42786, 42799, EvenOdd },
- { 42802, 42863, EvenOdd },
- { 42873, 42876, OddEven },
- { 42877, 42877, -35332 },
- { 42878, 42887, EvenOdd },
- { 42891, 42892, OddEven },
- { 42893, 42893, -42280 },
- { 42896, 42899, EvenOdd },
+ { 42624, 42651, EvenOdd },
+ { 42786, 42799, EvenOdd },
+ { 42802, 42863, EvenOdd },
+ { 42873, 42876, OddEven },
+ { 42877, 42877, -35332 },
+ { 42878, 42887, EvenOdd },
+ { 42891, 42892, OddEven },
+ { 42893, 42893, -42280 },
+ { 42896, 42899, EvenOdd },
{ 42900, 42900, 48 },
- { 42902, 42921, EvenOdd },
- { 42922, 42922, -42308 },
- { 42923, 42923, -42319 },
- { 42924, 42924, -42315 },
- { 42925, 42925, -42305 },
+ { 42902, 42921, EvenOdd },
+ { 42922, 42922, -42308 },
+ { 42923, 42923, -42319 },
+ { 42924, 42924, -42315 },
+ { 42925, 42925, -42305 },
{ 42926, 42926, -42308 },
- { 42928, 42928, -42258 },
- { 42929, 42929, -42282 },
- { 42930, 42930, -42261 },
- { 42931, 42931, 928 },
+ { 42928, 42928, -42258 },
+ { 42929, 42929, -42282 },
+ { 42930, 42930, -42261 },
+ { 42931, 42931, 928 },
{ 42932, 42947, EvenOdd },
{ 42948, 42948, -48 },
{ 42949, 42949, -42307 },
@@ -352,12 +352,12 @@ const CaseFold unicode_casefold[] = {
{ 42960, 42961, EvenOdd },
{ 42966, 42969, EvenOdd },
{ 42997, 42998, OddEven },
- { 43859, 43859, -928 },
- { 43888, 43967, -38864 },
- { 65313, 65338, 32 },
- { 65345, 65370, -32 },
- { 66560, 66599, 40 },
- { 66600, 66639, -40 },
+ { 43859, 43859, -928 },
+ { 43888, 43967, -38864 },
+ { 65313, 65338, 32 },
+ { 65345, 65370, -32 },
+ { 66560, 66599, 40 },
+ { 66600, 66639, -40 },
{ 66736, 66771, 40 },
{ 66776, 66811, -40 },
{ 66928, 66938, 39 },
@@ -368,120 +368,120 @@ const CaseFold unicode_casefold[] = {
{ 66979, 66993, -39 },
{ 66995, 67001, -39 },
{ 67003, 67004, -39 },
- { 68736, 68786, 64 },
- { 68800, 68850, -64 },
- { 71840, 71871, 32 },
- { 71872, 71903, -32 },
+ { 68736, 68786, 64 },
+ { 68800, 68850, -64 },
+ { 71840, 71871, 32 },
+ { 71872, 71903, -32 },
{ 93760, 93791, 32 },
{ 93792, 93823, -32 },
{ 125184, 125217, 34 },
{ 125218, 125251, -34 },
-};
+};
const int num_unicode_casefold = 367;
-
+
// 1424 groups, 1454 pairs, 205 ranges
-const CaseFold unicode_tolower[] = {
- { 65, 90, 32 },
- { 181, 181, 775 },
- { 192, 214, 32 },
- { 216, 222, 32 },
- { 256, 302, EvenOddSkip },
- { 306, 310, EvenOddSkip },
- { 313, 327, OddEvenSkip },
- { 330, 374, EvenOddSkip },
- { 376, 376, -121 },
- { 377, 381, OddEvenSkip },
- { 383, 383, -268 },
- { 385, 385, 210 },
- { 386, 388, EvenOddSkip },
- { 390, 390, 206 },
- { 391, 391, OddEven },
- { 393, 394, 205 },
- { 395, 395, OddEven },
- { 398, 398, 79 },
- { 399, 399, 202 },
- { 400, 400, 203 },
- { 401, 401, OddEven },
- { 403, 403, 205 },
- { 404, 404, 207 },
- { 406, 406, 211 },
- { 407, 407, 209 },
- { 408, 408, EvenOdd },
- { 412, 412, 211 },
- { 413, 413, 213 },
- { 415, 415, 214 },
- { 416, 420, EvenOddSkip },
- { 422, 422, 218 },
- { 423, 423, OddEven },
- { 425, 425, 218 },
- { 428, 428, EvenOdd },
- { 430, 430, 218 },
- { 431, 431, OddEven },
- { 433, 434, 217 },
- { 435, 437, OddEvenSkip },
- { 439, 439, 219 },
- { 440, 440, EvenOdd },
- { 444, 444, EvenOdd },
- { 452, 452, 2 },
- { 453, 453, OddEven },
- { 455, 455, 2 },
- { 456, 456, EvenOdd },
- { 458, 458, 2 },
- { 459, 475, OddEvenSkip },
- { 478, 494, EvenOddSkip },
- { 497, 497, 2 },
- { 498, 500, EvenOddSkip },
- { 502, 502, -97 },
- { 503, 503, -56 },
- { 504, 542, EvenOddSkip },
- { 544, 544, -130 },
- { 546, 562, EvenOddSkip },
- { 570, 570, 10795 },
- { 571, 571, OddEven },
- { 573, 573, -163 },
- { 574, 574, 10792 },
- { 577, 577, OddEven },
- { 579, 579, -195 },
- { 580, 580, 69 },
- { 581, 581, 71 },
- { 582, 590, EvenOddSkip },
- { 837, 837, 116 },
- { 880, 882, EvenOddSkip },
- { 886, 886, EvenOdd },
- { 895, 895, 116 },
- { 902, 902, 38 },
- { 904, 906, 37 },
- { 908, 908, 64 },
- { 910, 911, 63 },
- { 913, 929, 32 },
- { 931, 939, 32 },
- { 962, 962, EvenOdd },
- { 975, 975, 8 },
- { 976, 976, -30 },
- { 977, 977, -25 },
- { 981, 981, -15 },
- { 982, 982, -22 },
- { 984, 1006, EvenOddSkip },
- { 1008, 1008, -54 },
- { 1009, 1009, -48 },
- { 1012, 1012, -60 },
- { 1013, 1013, -64 },
- { 1015, 1015, OddEven },
- { 1017, 1017, -7 },
- { 1018, 1018, EvenOdd },
- { 1021, 1023, -130 },
- { 1024, 1039, 80 },
- { 1040, 1071, 32 },
- { 1120, 1152, EvenOddSkip },
- { 1162, 1214, EvenOddSkip },
- { 1216, 1216, 15 },
- { 1217, 1229, OddEvenSkip },
- { 1232, 1326, EvenOddSkip },
- { 1329, 1366, 48 },
- { 4256, 4293, 7264 },
- { 4295, 4295, 7264 },
- { 4301, 4301, 7264 },
- { 5112, 5117, -8 },
+const CaseFold unicode_tolower[] = {
+ { 65, 90, 32 },
+ { 181, 181, 775 },
+ { 192, 214, 32 },
+ { 216, 222, 32 },
+ { 256, 302, EvenOddSkip },
+ { 306, 310, EvenOddSkip },
+ { 313, 327, OddEvenSkip },
+ { 330, 374, EvenOddSkip },
+ { 376, 376, -121 },
+ { 377, 381, OddEvenSkip },
+ { 383, 383, -268 },
+ { 385, 385, 210 },
+ { 386, 388, EvenOddSkip },
+ { 390, 390, 206 },
+ { 391, 391, OddEven },
+ { 393, 394, 205 },
+ { 395, 395, OddEven },
+ { 398, 398, 79 },
+ { 399, 399, 202 },
+ { 400, 400, 203 },
+ { 401, 401, OddEven },
+ { 403, 403, 205 },
+ { 404, 404, 207 },
+ { 406, 406, 211 },
+ { 407, 407, 209 },
+ { 408, 408, EvenOdd },
+ { 412, 412, 211 },
+ { 413, 413, 213 },
+ { 415, 415, 214 },
+ { 416, 420, EvenOddSkip },
+ { 422, 422, 218 },
+ { 423, 423, OddEven },
+ { 425, 425, 218 },
+ { 428, 428, EvenOdd },
+ { 430, 430, 218 },
+ { 431, 431, OddEven },
+ { 433, 434, 217 },
+ { 435, 437, OddEvenSkip },
+ { 439, 439, 219 },
+ { 440, 440, EvenOdd },
+ { 444, 444, EvenOdd },
+ { 452, 452, 2 },
+ { 453, 453, OddEven },
+ { 455, 455, 2 },
+ { 456, 456, EvenOdd },
+ { 458, 458, 2 },
+ { 459, 475, OddEvenSkip },
+ { 478, 494, EvenOddSkip },
+ { 497, 497, 2 },
+ { 498, 500, EvenOddSkip },
+ { 502, 502, -97 },
+ { 503, 503, -56 },
+ { 504, 542, EvenOddSkip },
+ { 544, 544, -130 },
+ { 546, 562, EvenOddSkip },
+ { 570, 570, 10795 },
+ { 571, 571, OddEven },
+ { 573, 573, -163 },
+ { 574, 574, 10792 },
+ { 577, 577, OddEven },
+ { 579, 579, -195 },
+ { 580, 580, 69 },
+ { 581, 581, 71 },
+ { 582, 590, EvenOddSkip },
+ { 837, 837, 116 },
+ { 880, 882, EvenOddSkip },
+ { 886, 886, EvenOdd },
+ { 895, 895, 116 },
+ { 902, 902, 38 },
+ { 904, 906, 37 },
+ { 908, 908, 64 },
+ { 910, 911, 63 },
+ { 913, 929, 32 },
+ { 931, 939, 32 },
+ { 962, 962, EvenOdd },
+ { 975, 975, 8 },
+ { 976, 976, -30 },
+ { 977, 977, -25 },
+ { 981, 981, -15 },
+ { 982, 982, -22 },
+ { 984, 1006, EvenOddSkip },
+ { 1008, 1008, -54 },
+ { 1009, 1009, -48 },
+ { 1012, 1012, -60 },
+ { 1013, 1013, -64 },
+ { 1015, 1015, OddEven },
+ { 1017, 1017, -7 },
+ { 1018, 1018, EvenOdd },
+ { 1021, 1023, -130 },
+ { 1024, 1039, 80 },
+ { 1040, 1071, 32 },
+ { 1120, 1152, EvenOddSkip },
+ { 1162, 1214, EvenOddSkip },
+ { 1216, 1216, 15 },
+ { 1217, 1229, OddEvenSkip },
+ { 1232, 1326, EvenOddSkip },
+ { 1329, 1366, 48 },
+ { 4256, 4293, 7264 },
+ { 4295, 4295, 7264 },
+ { 4301, 4301, 7264 },
+ { 5112, 5117, -8 },
{ 7296, 7296, -6222 },
{ 7297, 7297, -6221 },
{ 7298, 7298, -6212 },
@@ -492,80 +492,80 @@ const CaseFold unicode_tolower[] = {
{ 7304, 7304, 35267 },
{ 7312, 7354, -3008 },
{ 7357, 7359, -3008 },
- { 7680, 7828, EvenOddSkip },
- { 7835, 7835, -58 },
- { 7838, 7838, -7615 },
- { 7840, 7934, EvenOddSkip },
- { 7944, 7951, -8 },
- { 7960, 7965, -8 },
- { 7976, 7983, -8 },
- { 7992, 7999, -8 },
- { 8008, 8013, -8 },
- { 8025, 8025, -8 },
- { 8027, 8027, -8 },
- { 8029, 8029, -8 },
- { 8031, 8031, -8 },
- { 8040, 8047, -8 },
- { 8072, 8079, -8 },
- { 8088, 8095, -8 },
- { 8104, 8111, -8 },
- { 8120, 8121, -8 },
- { 8122, 8123, -74 },
- { 8124, 8124, -9 },
- { 8126, 8126, -7173 },
- { 8136, 8139, -86 },
- { 8140, 8140, -9 },
- { 8152, 8153, -8 },
- { 8154, 8155, -100 },
- { 8168, 8169, -8 },
- { 8170, 8171, -112 },
- { 8172, 8172, -7 },
- { 8184, 8185, -128 },
- { 8186, 8187, -126 },
- { 8188, 8188, -9 },
- { 8486, 8486, -7517 },
- { 8490, 8490, -8383 },
- { 8491, 8491, -8262 },
- { 8498, 8498, 28 },
- { 8544, 8559, 16 },
- { 8579, 8579, OddEven },
- { 9398, 9423, 26 },
+ { 7680, 7828, EvenOddSkip },
+ { 7835, 7835, -58 },
+ { 7838, 7838, -7615 },
+ { 7840, 7934, EvenOddSkip },
+ { 7944, 7951, -8 },
+ { 7960, 7965, -8 },
+ { 7976, 7983, -8 },
+ { 7992, 7999, -8 },
+ { 8008, 8013, -8 },
+ { 8025, 8025, -8 },
+ { 8027, 8027, -8 },
+ { 8029, 8029, -8 },
+ { 8031, 8031, -8 },
+ { 8040, 8047, -8 },
+ { 8072, 8079, -8 },
+ { 8088, 8095, -8 },
+ { 8104, 8111, -8 },
+ { 8120, 8121, -8 },
+ { 8122, 8123, -74 },
+ { 8124, 8124, -9 },
+ { 8126, 8126, -7173 },
+ { 8136, 8139, -86 },
+ { 8140, 8140, -9 },
+ { 8152, 8153, -8 },
+ { 8154, 8155, -100 },
+ { 8168, 8169, -8 },
+ { 8170, 8171, -112 },
+ { 8172, 8172, -7 },
+ { 8184, 8185, -128 },
+ { 8186, 8187, -126 },
+ { 8188, 8188, -9 },
+ { 8486, 8486, -7517 },
+ { 8490, 8490, -8383 },
+ { 8491, 8491, -8262 },
+ { 8498, 8498, 28 },
+ { 8544, 8559, 16 },
+ { 8579, 8579, OddEven },
+ { 9398, 9423, 26 },
{ 11264, 11311, 48 },
- { 11360, 11360, EvenOdd },
- { 11362, 11362, -10743 },
- { 11363, 11363, -3814 },
- { 11364, 11364, -10727 },
- { 11367, 11371, OddEvenSkip },
- { 11373, 11373, -10780 },
- { 11374, 11374, -10749 },
- { 11375, 11375, -10783 },
- { 11376, 11376, -10782 },
- { 11378, 11378, EvenOdd },
- { 11381, 11381, OddEven },
- { 11390, 11391, -10815 },
- { 11392, 11490, EvenOddSkip },
- { 11499, 11501, OddEvenSkip },
- { 11506, 11506, EvenOdd },
- { 42560, 42604, EvenOddSkip },
- { 42624, 42650, EvenOddSkip },
- { 42786, 42798, EvenOddSkip },
- { 42802, 42862, EvenOddSkip },
- { 42873, 42875, OddEvenSkip },
- { 42877, 42877, -35332 },
- { 42878, 42886, EvenOddSkip },
- { 42891, 42891, OddEven },
- { 42893, 42893, -42280 },
- { 42896, 42898, EvenOddSkip },
- { 42902, 42920, EvenOddSkip },
- { 42922, 42922, -42308 },
- { 42923, 42923, -42319 },
- { 42924, 42924, -42315 },
- { 42925, 42925, -42305 },
+ { 11360, 11360, EvenOdd },
+ { 11362, 11362, -10743 },
+ { 11363, 11363, -3814 },
+ { 11364, 11364, -10727 },
+ { 11367, 11371, OddEvenSkip },
+ { 11373, 11373, -10780 },
+ { 11374, 11374, -10749 },
+ { 11375, 11375, -10783 },
+ { 11376, 11376, -10782 },
+ { 11378, 11378, EvenOdd },
+ { 11381, 11381, OddEven },
+ { 11390, 11391, -10815 },
+ { 11392, 11490, EvenOddSkip },
+ { 11499, 11501, OddEvenSkip },
+ { 11506, 11506, EvenOdd },
+ { 42560, 42604, EvenOddSkip },
+ { 42624, 42650, EvenOddSkip },
+ { 42786, 42798, EvenOddSkip },
+ { 42802, 42862, EvenOddSkip },
+ { 42873, 42875, OddEvenSkip },
+ { 42877, 42877, -35332 },
+ { 42878, 42886, EvenOddSkip },
+ { 42891, 42891, OddEven },
+ { 42893, 42893, -42280 },
+ { 42896, 42898, EvenOddSkip },
+ { 42902, 42920, EvenOddSkip },
+ { 42922, 42922, -42308 },
+ { 42923, 42923, -42319 },
+ { 42924, 42924, -42315 },
+ { 42925, 42925, -42305 },
{ 42926, 42926, -42308 },
- { 42928, 42928, -42258 },
- { 42929, 42929, -42282 },
- { 42930, 42930, -42261 },
- { 42931, 42931, 928 },
+ { 42928, 42928, -42258 },
+ { 42929, 42929, -42282 },
+ { 42930, 42930, -42261 },
+ { 42931, 42931, 928 },
{ 42932, 42946, EvenOddSkip },
{ 42948, 42948, -48 },
{ 42949, 42949, -42307 },
@@ -574,23 +574,23 @@ const CaseFold unicode_tolower[] = {
{ 42960, 42960, EvenOdd },
{ 42966, 42968, EvenOddSkip },
{ 42997, 42997, OddEven },
- { 43888, 43967, -38864 },
- { 65313, 65338, 32 },
- { 66560, 66599, 40 },
+ { 43888, 43967, -38864 },
+ { 65313, 65338, 32 },
+ { 66560, 66599, 40 },
{ 66736, 66771, 40 },
{ 66928, 66938, 39 },
{ 66940, 66954, 39 },
{ 66956, 66962, 39 },
{ 66964, 66965, 39 },
- { 68736, 68786, 64 },
- { 71840, 71871, 32 },
+ { 68736, 68786, 64 },
+ { 71840, 71871, 32 },
{ 93760, 93791, 32 },
{ 125184, 125217, 34 },
-};
+};
const int num_unicode_tolower = 205;
-
-
-
-} // namespace re2
-
-
+
+
+
+} // namespace re2
+
+
diff --git a/contrib/libs/re2/re2/unicode_casefold.h b/contrib/libs/re2/re2/unicode_casefold.h
index 8bdbb42fbc..e8158740a8 100644
--- a/contrib/libs/re2/re2/unicode_casefold.h
+++ b/contrib/libs/re2/re2/unicode_casefold.h
@@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#ifndef RE2_UNICODE_CASEFOLD_H_
-#define RE2_UNICODE_CASEFOLD_H_
-
+#ifndef RE2_UNICODE_CASEFOLD_H_
+#define RE2_UNICODE_CASEFOLD_H_
+
// Unicode case folding tables.
// The Unicode case folding tables encode the mapping from one Unicode point
@@ -19,7 +19,7 @@
// 'K' -> 'K'
//
// Like everything Unicode, these tables are big. If we represent the table
-// as a sorted list of uint32_t pairs, it has 2049 entries and is 16 kB.
+// as a sorted list of uint32_t pairs, it has 2049 entries and is 16 kB.
// Most table entries look like the ones around them:
// 'A' maps to 'A'+32, 'B' maps to 'B'+32, etc.
// Instead of listing all the pairs explicitly, we make a list of ranges
@@ -39,40 +39,40 @@
// The grouped form also allows for efficient fold range calculations
// rather than looping one character at a time.
-#include <stdint.h>
+#include <stdint.h>
-#include "util/util.h"
-#include "util/utf.h"
+#include "util/util.h"
+#include "util/utf.h"
namespace re2 {
enum {
EvenOdd = 1,
- OddEven = -1,
- EvenOddSkip = 1<<30,
- OddEvenSkip,
+ OddEven = -1,
+ EvenOddSkip = 1<<30,
+ OddEvenSkip,
};
struct CaseFold {
- Rune lo;
- Rune hi;
- int32_t delta;
+ Rune lo;
+ Rune hi;
+ int32_t delta;
};
extern const CaseFold unicode_casefold[];
extern const int num_unicode_casefold;
-extern const CaseFold unicode_tolower[];
-extern const int num_unicode_tolower;
-
+extern const CaseFold unicode_tolower[];
+extern const int num_unicode_tolower;
+
// Returns the CaseFold* in the tables that contains rune.
// If rune is not in the tables, returns the first CaseFold* after rune.
// If rune is larger than any value in the tables, returns NULL.
-extern const CaseFold* LookupCaseFold(const CaseFold*, int, Rune rune);
-
-// Returns the result of applying the fold f to the rune r.
-extern Rune ApplyFold(const CaseFold *f, Rune r);
+extern const CaseFold* LookupCaseFold(const CaseFold*, int, Rune rune);
+// Returns the result of applying the fold f to the rune r.
+extern Rune ApplyFold(const CaseFold *f, Rune r);
+
} // namespace re2
-#endif // RE2_UNICODE_CASEFOLD_H_
+#endif // RE2_UNICODE_CASEFOLD_H_
diff --git a/contrib/libs/re2/re2/unicode_groups.cc b/contrib/libs/re2/re2/unicode_groups.cc
index 2a8d7dae1f..d564572650 100644
--- a/contrib/libs/re2/re2/unicode_groups.cc
+++ b/contrib/libs/re2/re2/unicode_groups.cc
@@ -1,12 +1,12 @@
-
-// GENERATED BY make_unicode_groups.py; DO NOT EDIT.
-// make_unicode_groups.py >unicode_groups.cc
-
-#include "re2/unicode_groups.h"
-
-namespace re2 {
-
-
+
+// GENERATED BY make_unicode_groups.py; DO NOT EDIT.
+// make_unicode_groups.py >unicode_groups.cc
+
+#include "re2/unicode_groups.h"
+
+namespace re2 {
+
+
static const URange16 C_range16[] = {
{ 0, 31 },
{ 127, 159 },
@@ -25,7 +25,7 @@ static const URange16 C_range16[] = {
{ 55296, 63743 },
{ 65279, 65279 },
{ 65529, 65531 },
-};
+};
static const URange32 C_range32[] = {
{ 69821, 69821 },
{ 69837, 69837 },
@@ -36,11 +36,11 @@ static const URange32 C_range32[] = {
{ 917536, 917631 },
{ 983040, 1048573 },
{ 1048576, 1114109 },
-};
+};
static const URange16 Cc_range16[] = {
{ 0, 31 },
{ 127, 159 },
-};
+};
static const URange16 Cf_range16[] = {
{ 173, 173 },
{ 1536, 1541 },
@@ -56,7 +56,7 @@ static const URange16 Cf_range16[] = {
{ 8294, 8303 },
{ 65279, 65279 },
{ 65529, 65531 },
-};
+};
static const URange32 Cf_range32[] = {
{ 69821, 69821 },
{ 69837, 69837 },
@@ -65,7 +65,7 @@ static const URange32 Cf_range32[] = {
{ 119155, 119162 },
{ 917505, 917505 },
{ 917536, 917631 },
-};
+};
static const URange16 Co_range16[] = {
{ 57344, 63743 },
};
@@ -79,9 +79,9 @@ static const URange16 Cs_range16[] = {
static const URange16 L_range16[] = {
{ 65, 90 },
{ 97, 122 },
- { 170, 170 },
+ { 170, 170 },
{ 181, 181 },
- { 186, 186 },
+ { 186, 186 },
{ 192, 214 },
{ 216, 246 },
{ 248, 705 },
@@ -103,211 +103,211 @@ static const URange16 L_range16[] = {
{ 1329, 1366 },
{ 1369, 1369 },
{ 1376, 1416 },
- { 1488, 1514 },
+ { 1488, 1514 },
{ 1519, 1522 },
{ 1568, 1610 },
- { 1646, 1647 },
- { 1649, 1747 },
- { 1749, 1749 },
+ { 1646, 1647 },
+ { 1649, 1747 },
+ { 1749, 1749 },
{ 1765, 1766 },
- { 1774, 1775 },
- { 1786, 1788 },
- { 1791, 1791 },
- { 1808, 1808 },
- { 1810, 1839 },
- { 1869, 1957 },
- { 1969, 1969 },
- { 1994, 2026 },
+ { 1774, 1775 },
+ { 1786, 1788 },
+ { 1791, 1791 },
+ { 1808, 1808 },
+ { 1810, 1839 },
+ { 1869, 1957 },
+ { 1969, 1969 },
+ { 1994, 2026 },
{ 2036, 2037 },
{ 2042, 2042 },
- { 2048, 2069 },
+ { 2048, 2069 },
{ 2074, 2074 },
{ 2084, 2084 },
{ 2088, 2088 },
- { 2112, 2136 },
+ { 2112, 2136 },
{ 2144, 2154 },
{ 2160, 2183 },
{ 2185, 2190 },
{ 2208, 2249 },
- { 2308, 2361 },
- { 2365, 2365 },
- { 2384, 2384 },
- { 2392, 2401 },
+ { 2308, 2361 },
+ { 2365, 2365 },
+ { 2384, 2384 },
+ { 2392, 2401 },
{ 2417, 2432 },
- { 2437, 2444 },
- { 2447, 2448 },
- { 2451, 2472 },
- { 2474, 2480 },
- { 2482, 2482 },
- { 2486, 2489 },
- { 2493, 2493 },
- { 2510, 2510 },
- { 2524, 2525 },
- { 2527, 2529 },
- { 2544, 2545 },
+ { 2437, 2444 },
+ { 2447, 2448 },
+ { 2451, 2472 },
+ { 2474, 2480 },
+ { 2482, 2482 },
+ { 2486, 2489 },
+ { 2493, 2493 },
+ { 2510, 2510 },
+ { 2524, 2525 },
+ { 2527, 2529 },
+ { 2544, 2545 },
{ 2556, 2556 },
- { 2565, 2570 },
- { 2575, 2576 },
- { 2579, 2600 },
- { 2602, 2608 },
- { 2610, 2611 },
- { 2613, 2614 },
- { 2616, 2617 },
- { 2649, 2652 },
- { 2654, 2654 },
- { 2674, 2676 },
- { 2693, 2701 },
- { 2703, 2705 },
- { 2707, 2728 },
- { 2730, 2736 },
- { 2738, 2739 },
- { 2741, 2745 },
- { 2749, 2749 },
- { 2768, 2768 },
- { 2784, 2785 },
- { 2809, 2809 },
- { 2821, 2828 },
- { 2831, 2832 },
- { 2835, 2856 },
- { 2858, 2864 },
- { 2866, 2867 },
- { 2869, 2873 },
- { 2877, 2877 },
- { 2908, 2909 },
- { 2911, 2913 },
- { 2929, 2929 },
- { 2947, 2947 },
- { 2949, 2954 },
- { 2958, 2960 },
- { 2962, 2965 },
- { 2969, 2970 },
- { 2972, 2972 },
- { 2974, 2975 },
- { 2979, 2980 },
- { 2984, 2986 },
- { 2990, 3001 },
- { 3024, 3024 },
- { 3077, 3084 },
- { 3086, 3088 },
- { 3090, 3112 },
- { 3114, 3129 },
- { 3133, 3133 },
- { 3160, 3162 },
+ { 2565, 2570 },
+ { 2575, 2576 },
+ { 2579, 2600 },
+ { 2602, 2608 },
+ { 2610, 2611 },
+ { 2613, 2614 },
+ { 2616, 2617 },
+ { 2649, 2652 },
+ { 2654, 2654 },
+ { 2674, 2676 },
+ { 2693, 2701 },
+ { 2703, 2705 },
+ { 2707, 2728 },
+ { 2730, 2736 },
+ { 2738, 2739 },
+ { 2741, 2745 },
+ { 2749, 2749 },
+ { 2768, 2768 },
+ { 2784, 2785 },
+ { 2809, 2809 },
+ { 2821, 2828 },
+ { 2831, 2832 },
+ { 2835, 2856 },
+ { 2858, 2864 },
+ { 2866, 2867 },
+ { 2869, 2873 },
+ { 2877, 2877 },
+ { 2908, 2909 },
+ { 2911, 2913 },
+ { 2929, 2929 },
+ { 2947, 2947 },
+ { 2949, 2954 },
+ { 2958, 2960 },
+ { 2962, 2965 },
+ { 2969, 2970 },
+ { 2972, 2972 },
+ { 2974, 2975 },
+ { 2979, 2980 },
+ { 2984, 2986 },
+ { 2990, 3001 },
+ { 3024, 3024 },
+ { 3077, 3084 },
+ { 3086, 3088 },
+ { 3090, 3112 },
+ { 3114, 3129 },
+ { 3133, 3133 },
+ { 3160, 3162 },
{ 3165, 3165 },
- { 3168, 3169 },
+ { 3168, 3169 },
{ 3200, 3200 },
- { 3205, 3212 },
- { 3214, 3216 },
- { 3218, 3240 },
- { 3242, 3251 },
- { 3253, 3257 },
- { 3261, 3261 },
+ { 3205, 3212 },
+ { 3214, 3216 },
+ { 3218, 3240 },
+ { 3242, 3251 },
+ { 3253, 3257 },
+ { 3261, 3261 },
{ 3293, 3294 },
- { 3296, 3297 },
- { 3313, 3314 },
+ { 3296, 3297 },
+ { 3313, 3314 },
{ 3332, 3340 },
- { 3342, 3344 },
- { 3346, 3386 },
- { 3389, 3389 },
- { 3406, 3406 },
+ { 3342, 3344 },
+ { 3346, 3386 },
+ { 3389, 3389 },
+ { 3406, 3406 },
{ 3412, 3414 },
- { 3423, 3425 },
- { 3450, 3455 },
- { 3461, 3478 },
- { 3482, 3505 },
- { 3507, 3515 },
- { 3517, 3517 },
- { 3520, 3526 },
- { 3585, 3632 },
- { 3634, 3635 },
+ { 3423, 3425 },
+ { 3450, 3455 },
+ { 3461, 3478 },
+ { 3482, 3505 },
+ { 3507, 3515 },
+ { 3517, 3517 },
+ { 3520, 3526 },
+ { 3585, 3632 },
+ { 3634, 3635 },
{ 3648, 3654 },
- { 3713, 3714 },
- { 3716, 3716 },
+ { 3713, 3714 },
+ { 3716, 3716 },
{ 3718, 3722 },
{ 3724, 3747 },
- { 3749, 3749 },
+ { 3749, 3749 },
{ 3751, 3760 },
- { 3762, 3763 },
- { 3773, 3773 },
- { 3776, 3780 },
+ { 3762, 3763 },
+ { 3773, 3773 },
+ { 3776, 3780 },
{ 3782, 3782 },
- { 3804, 3807 },
- { 3840, 3840 },
- { 3904, 3911 },
- { 3913, 3948 },
- { 3976, 3980 },
- { 4096, 4138 },
- { 4159, 4159 },
- { 4176, 4181 },
- { 4186, 4189 },
- { 4193, 4193 },
- { 4197, 4198 },
- { 4206, 4208 },
- { 4213, 4225 },
- { 4238, 4238 },
+ { 3804, 3807 },
+ { 3840, 3840 },
+ { 3904, 3911 },
+ { 3913, 3948 },
+ { 3976, 3980 },
+ { 4096, 4138 },
+ { 4159, 4159 },
+ { 4176, 4181 },
+ { 4186, 4189 },
+ { 4193, 4193 },
+ { 4197, 4198 },
+ { 4206, 4208 },
+ { 4213, 4225 },
+ { 4238, 4238 },
{ 4256, 4293 },
{ 4295, 4295 },
{ 4301, 4301 },
- { 4304, 4346 },
+ { 4304, 4346 },
{ 4348, 4680 },
- { 4682, 4685 },
- { 4688, 4694 },
- { 4696, 4696 },
- { 4698, 4701 },
- { 4704, 4744 },
- { 4746, 4749 },
- { 4752, 4784 },
- { 4786, 4789 },
- { 4792, 4798 },
- { 4800, 4800 },
- { 4802, 4805 },
- { 4808, 4822 },
- { 4824, 4880 },
- { 4882, 4885 },
- { 4888, 4954 },
- { 4992, 5007 },
+ { 4682, 4685 },
+ { 4688, 4694 },
+ { 4696, 4696 },
+ { 4698, 4701 },
+ { 4704, 4744 },
+ { 4746, 4749 },
+ { 4752, 4784 },
+ { 4786, 4789 },
+ { 4792, 4798 },
+ { 4800, 4800 },
+ { 4802, 4805 },
+ { 4808, 4822 },
+ { 4824, 4880 },
+ { 4882, 4885 },
+ { 4888, 4954 },
+ { 4992, 5007 },
{ 5024, 5109 },
{ 5112, 5117 },
- { 5121, 5740 },
- { 5743, 5759 },
- { 5761, 5786 },
- { 5792, 5866 },
- { 5873, 5880 },
+ { 5121, 5740 },
+ { 5743, 5759 },
+ { 5761, 5786 },
+ { 5792, 5866 },
+ { 5873, 5880 },
{ 5888, 5905 },
{ 5919, 5937 },
- { 5952, 5969 },
- { 5984, 5996 },
- { 5998, 6000 },
- { 6016, 6067 },
+ { 5952, 5969 },
+ { 5984, 5996 },
+ { 5998, 6000 },
+ { 6016, 6067 },
{ 6103, 6103 },
- { 6108, 6108 },
+ { 6108, 6108 },
{ 6176, 6264 },
{ 6272, 6276 },
{ 6279, 6312 },
- { 6314, 6314 },
- { 6320, 6389 },
- { 6400, 6430 },
- { 6480, 6509 },
- { 6512, 6516 },
- { 6528, 6571 },
- { 6576, 6601 },
- { 6656, 6678 },
- { 6688, 6740 },
+ { 6314, 6314 },
+ { 6320, 6389 },
+ { 6400, 6430 },
+ { 6480, 6509 },
+ { 6512, 6516 },
+ { 6528, 6571 },
+ { 6576, 6601 },
+ { 6656, 6678 },
+ { 6688, 6740 },
{ 6823, 6823 },
- { 6917, 6963 },
+ { 6917, 6963 },
{ 6981, 6988 },
- { 7043, 7072 },
- { 7086, 7087 },
- { 7098, 7141 },
- { 7168, 7203 },
- { 7245, 7247 },
+ { 7043, 7072 },
+ { 7086, 7087 },
+ { 7098, 7141 },
+ { 7168, 7203 },
+ { 7245, 7247 },
{ 7258, 7293 },
{ 7296, 7304 },
{ 7312, 7354 },
{ 7357, 7359 },
- { 7401, 7404 },
+ { 7401, 7404 },
{ 7406, 7411 },
- { 7413, 7414 },
+ { 7413, 7414 },
{ 7418, 7418 },
{ 7424, 7615 },
{ 7680, 7957 },
@@ -352,38 +352,38 @@ static const URange16 L_range16[] = {
{ 11520, 11557 },
{ 11559, 11559 },
{ 11565, 11565 },
- { 11568, 11623 },
+ { 11568, 11623 },
{ 11631, 11631 },
- { 11648, 11670 },
- { 11680, 11686 },
- { 11688, 11694 },
- { 11696, 11702 },
- { 11704, 11710 },
- { 11712, 11718 },
- { 11720, 11726 },
- { 11728, 11734 },
- { 11736, 11742 },
+ { 11648, 11670 },
+ { 11680, 11686 },
+ { 11688, 11694 },
+ { 11696, 11702 },
+ { 11704, 11710 },
+ { 11712, 11718 },
+ { 11720, 11726 },
+ { 11728, 11734 },
+ { 11736, 11742 },
{ 11823, 11823 },
{ 12293, 12294 },
{ 12337, 12341 },
{ 12347, 12348 },
- { 12353, 12438 },
+ { 12353, 12438 },
{ 12445, 12447 },
- { 12449, 12538 },
+ { 12449, 12538 },
{ 12540, 12543 },
{ 12549, 12591 },
- { 12593, 12686 },
+ { 12593, 12686 },
{ 12704, 12735 },
- { 12784, 12799 },
+ { 12784, 12799 },
{ 13312, 19903 },
{ 19968, 42124 },
{ 42192, 42237 },
{ 42240, 42508 },
- { 42512, 42527 },
- { 42538, 42539 },
+ { 42512, 42527 },
+ { 42538, 42539 },
{ 42560, 42606 },
{ 42623, 42653 },
- { 42656, 42725 },
+ { 42656, 42725 },
{ 42775, 42783 },
{ 42786, 42888 },
{ 42891, 42954 },
@@ -391,95 +391,95 @@ static const URange16 L_range16[] = {
{ 42963, 42963 },
{ 42965, 42969 },
{ 42994, 43009 },
- { 43011, 43013 },
- { 43015, 43018 },
- { 43020, 43042 },
- { 43072, 43123 },
- { 43138, 43187 },
- { 43250, 43255 },
- { 43259, 43259 },
+ { 43011, 43013 },
+ { 43015, 43018 },
+ { 43020, 43042 },
+ { 43072, 43123 },
+ { 43138, 43187 },
+ { 43250, 43255 },
+ { 43259, 43259 },
{ 43261, 43262 },
- { 43274, 43301 },
- { 43312, 43334 },
- { 43360, 43388 },
- { 43396, 43442 },
+ { 43274, 43301 },
+ { 43312, 43334 },
+ { 43360, 43388 },
+ { 43396, 43442 },
{ 43471, 43471 },
- { 43488, 43492 },
+ { 43488, 43492 },
{ 43494, 43503 },
- { 43514, 43518 },
- { 43520, 43560 },
- { 43584, 43586 },
- { 43588, 43595 },
+ { 43514, 43518 },
+ { 43520, 43560 },
+ { 43584, 43586 },
+ { 43588, 43595 },
{ 43616, 43638 },
- { 43642, 43642 },
- { 43646, 43695 },
- { 43697, 43697 },
- { 43701, 43702 },
- { 43705, 43709 },
- { 43712, 43712 },
- { 43714, 43714 },
+ { 43642, 43642 },
+ { 43646, 43695 },
+ { 43697, 43697 },
+ { 43701, 43702 },
+ { 43705, 43709 },
+ { 43712, 43712 },
+ { 43714, 43714 },
{ 43739, 43741 },
- { 43744, 43754 },
+ { 43744, 43754 },
{ 43762, 43764 },
- { 43777, 43782 },
- { 43785, 43790 },
- { 43793, 43798 },
- { 43808, 43814 },
- { 43816, 43822 },
+ { 43777, 43782 },
+ { 43785, 43790 },
+ { 43793, 43798 },
+ { 43808, 43814 },
+ { 43816, 43822 },
{ 43824, 43866 },
{ 43868, 43881 },
{ 43888, 44002 },
- { 44032, 55203 },
- { 55216, 55238 },
- { 55243, 55291 },
- { 63744, 64109 },
- { 64112, 64217 },
+ { 44032, 55203 },
+ { 55216, 55238 },
+ { 55243, 55291 },
+ { 63744, 64109 },
+ { 64112, 64217 },
{ 64256, 64262 },
{ 64275, 64279 },
- { 64285, 64285 },
- { 64287, 64296 },
- { 64298, 64310 },
- { 64312, 64316 },
- { 64318, 64318 },
- { 64320, 64321 },
- { 64323, 64324 },
- { 64326, 64433 },
- { 64467, 64829 },
- { 64848, 64911 },
- { 64914, 64967 },
- { 65008, 65019 },
- { 65136, 65140 },
- { 65142, 65276 },
+ { 64285, 64285 },
+ { 64287, 64296 },
+ { 64298, 64310 },
+ { 64312, 64316 },
+ { 64318, 64318 },
+ { 64320, 64321 },
+ { 64323, 64324 },
+ { 64326, 64433 },
+ { 64467, 64829 },
+ { 64848, 64911 },
+ { 64914, 64967 },
+ { 65008, 65019 },
+ { 65136, 65140 },
+ { 65142, 65276 },
{ 65313, 65338 },
{ 65345, 65370 },
{ 65382, 65470 },
- { 65474, 65479 },
- { 65482, 65487 },
- { 65490, 65495 },
- { 65498, 65500 },
-};
+ { 65474, 65479 },
+ { 65482, 65487 },
+ { 65490, 65495 },
+ { 65498, 65500 },
+};
static const URange32 L_range32[] = {
- { 65536, 65547 },
- { 65549, 65574 },
- { 65576, 65594 },
- { 65596, 65597 },
- { 65599, 65613 },
- { 65616, 65629 },
- { 65664, 65786 },
- { 66176, 66204 },
- { 66208, 66256 },
- { 66304, 66335 },
+ { 65536, 65547 },
+ { 65549, 65574 },
+ { 65576, 65594 },
+ { 65596, 65597 },
+ { 65599, 65613 },
+ { 65616, 65629 },
+ { 65664, 65786 },
+ { 66176, 66204 },
+ { 66208, 66256 },
+ { 66304, 66335 },
{ 66349, 66368 },
- { 66370, 66377 },
- { 66384, 66421 },
- { 66432, 66461 },
- { 66464, 66499 },
- { 66504, 66511 },
+ { 66370, 66377 },
+ { 66384, 66421 },
+ { 66432, 66461 },
+ { 66464, 66499 },
+ { 66504, 66511 },
{ 66560, 66717 },
{ 66736, 66771 },
{ 66776, 66811 },
- { 66816, 66855 },
- { 66864, 66915 },
+ { 66816, 66855 },
+ { 66864, 66915 },
{ 66928, 66938 },
{ 66940, 66954 },
{ 66956, 66962 },
@@ -488,39 +488,39 @@ static const URange32 L_range32[] = {
{ 66979, 66993 },
{ 66995, 67001 },
{ 67003, 67004 },
- { 67072, 67382 },
- { 67392, 67413 },
- { 67424, 67431 },
+ { 67072, 67382 },
+ { 67392, 67413 },
+ { 67424, 67431 },
{ 67456, 67461 },
{ 67463, 67504 },
{ 67506, 67514 },
- { 67584, 67589 },
- { 67592, 67592 },
- { 67594, 67637 },
- { 67639, 67640 },
- { 67644, 67644 },
- { 67647, 67669 },
- { 67680, 67702 },
- { 67712, 67742 },
- { 67808, 67826 },
- { 67828, 67829 },
- { 67840, 67861 },
- { 67872, 67897 },
- { 67968, 68023 },
- { 68030, 68031 },
- { 68096, 68096 },
- { 68112, 68115 },
- { 68117, 68119 },
+ { 67584, 67589 },
+ { 67592, 67592 },
+ { 67594, 67637 },
+ { 67639, 67640 },
+ { 67644, 67644 },
+ { 67647, 67669 },
+ { 67680, 67702 },
+ { 67712, 67742 },
+ { 67808, 67826 },
+ { 67828, 67829 },
+ { 67840, 67861 },
+ { 67872, 67897 },
+ { 67968, 68023 },
+ { 68030, 68031 },
+ { 68096, 68096 },
+ { 68112, 68115 },
+ { 68117, 68119 },
{ 68121, 68149 },
- { 68192, 68220 },
- { 68224, 68252 },
- { 68288, 68295 },
- { 68297, 68324 },
- { 68352, 68405 },
- { 68416, 68437 },
- { 68448, 68466 },
- { 68480, 68497 },
- { 68608, 68680 },
+ { 68192, 68220 },
+ { 68224, 68252 },
+ { 68288, 68295 },
+ { 68297, 68324 },
+ { 68352, 68405 },
+ { 68416, 68437 },
+ { 68448, 68466 },
+ { 68480, 68497 },
+ { 68608, 68680 },
{ 68736, 68786 },
{ 68800, 68850 },
{ 68864, 68899 },
@@ -532,48 +532,48 @@ static const URange32 L_range32[] = {
{ 69488, 69505 },
{ 69552, 69572 },
{ 69600, 69622 },
- { 69635, 69687 },
+ { 69635, 69687 },
{ 69745, 69746 },
{ 69749, 69749 },
- { 69763, 69807 },
- { 69840, 69864 },
- { 69891, 69926 },
+ { 69763, 69807 },
+ { 69840, 69864 },
+ { 69891, 69926 },
{ 69956, 69956 },
{ 69959, 69959 },
- { 69968, 70002 },
- { 70006, 70006 },
- { 70019, 70066 },
- { 70081, 70084 },
- { 70106, 70106 },
- { 70108, 70108 },
- { 70144, 70161 },
- { 70163, 70187 },
- { 70272, 70278 },
- { 70280, 70280 },
- { 70282, 70285 },
- { 70287, 70301 },
- { 70303, 70312 },
- { 70320, 70366 },
- { 70405, 70412 },
- { 70415, 70416 },
- { 70419, 70440 },
- { 70442, 70448 },
- { 70450, 70451 },
- { 70453, 70457 },
- { 70461, 70461 },
- { 70480, 70480 },
- { 70493, 70497 },
+ { 69968, 70002 },
+ { 70006, 70006 },
+ { 70019, 70066 },
+ { 70081, 70084 },
+ { 70106, 70106 },
+ { 70108, 70108 },
+ { 70144, 70161 },
+ { 70163, 70187 },
+ { 70272, 70278 },
+ { 70280, 70280 },
+ { 70282, 70285 },
+ { 70287, 70301 },
+ { 70303, 70312 },
+ { 70320, 70366 },
+ { 70405, 70412 },
+ { 70415, 70416 },
+ { 70419, 70440 },
+ { 70442, 70448 },
+ { 70450, 70451 },
+ { 70453, 70457 },
+ { 70461, 70461 },
+ { 70480, 70480 },
+ { 70493, 70497 },
{ 70656, 70708 },
{ 70727, 70730 },
{ 70751, 70753 },
- { 70784, 70831 },
- { 70852, 70853 },
- { 70855, 70855 },
- { 71040, 71086 },
- { 71128, 71131 },
- { 71168, 71215 },
- { 71236, 71236 },
- { 71296, 71338 },
+ { 70784, 70831 },
+ { 70852, 70853 },
+ { 70855, 70855 },
+ { 71040, 71086 },
+ { 71128, 71131 },
+ { 71168, 71215 },
+ { 71236, 71236 },
+ { 71296, 71338 },
{ 71352, 71352 },
{ 71424, 71450 },
{ 71488, 71494 },
@@ -611,22 +611,22 @@ static const URange32 L_range32[] = {
{ 73112, 73112 },
{ 73440, 73458 },
{ 73648, 73648 },
- { 73728, 74649 },
- { 74880, 75075 },
+ { 73728, 74649 },
+ { 74880, 75075 },
{ 77712, 77808 },
- { 77824, 78894 },
- { 82944, 83526 },
- { 92160, 92728 },
- { 92736, 92766 },
+ { 77824, 78894 },
+ { 82944, 83526 },
+ { 92160, 92728 },
+ { 92736, 92766 },
{ 92784, 92862 },
- { 92880, 92909 },
- { 92928, 92975 },
+ { 92880, 92909 },
+ { 92928, 92975 },
{ 92992, 92995 },
- { 93027, 93047 },
- { 93053, 93071 },
+ { 93027, 93047 },
+ { 93053, 93071 },
{ 93760, 93823 },
{ 93952, 94026 },
- { 94032, 94032 },
+ { 94032, 94032 },
{ 94099, 94111 },
{ 94176, 94177 },
{ 94179, 94179 },
@@ -640,10 +640,10 @@ static const URange32 L_range32[] = {
{ 110928, 110930 },
{ 110948, 110951 },
{ 110960, 111355 },
- { 113664, 113770 },
- { 113776, 113788 },
- { 113792, 113800 },
- { 113808, 113817 },
+ { 113664, 113770 },
+ { 113776, 113788 },
+ { 113792, 113800 },
+ { 113808, 113817 },
{ 119808, 119892 },
{ 119894, 119964 },
{ 119966, 119967 },
@@ -684,647 +684,647 @@ static const URange32 L_range32[] = {
{ 124904, 124907 },
{ 124909, 124910 },
{ 124912, 124926 },
- { 124928, 125124 },
+ { 124928, 125124 },
{ 125184, 125251 },
{ 125259, 125259 },
- { 126464, 126467 },
- { 126469, 126495 },
- { 126497, 126498 },
- { 126500, 126500 },
- { 126503, 126503 },
- { 126505, 126514 },
- { 126516, 126519 },
- { 126521, 126521 },
- { 126523, 126523 },
- { 126530, 126530 },
- { 126535, 126535 },
- { 126537, 126537 },
- { 126539, 126539 },
- { 126541, 126543 },
- { 126545, 126546 },
- { 126548, 126548 },
- { 126551, 126551 },
- { 126553, 126553 },
- { 126555, 126555 },
- { 126557, 126557 },
- { 126559, 126559 },
- { 126561, 126562 },
- { 126564, 126564 },
- { 126567, 126570 },
- { 126572, 126578 },
- { 126580, 126583 },
- { 126585, 126588 },
- { 126590, 126590 },
- { 126592, 126601 },
- { 126603, 126619 },
- { 126625, 126627 },
- { 126629, 126633 },
- { 126635, 126651 },
+ { 126464, 126467 },
+ { 126469, 126495 },
+ { 126497, 126498 },
+ { 126500, 126500 },
+ { 126503, 126503 },
+ { 126505, 126514 },
+ { 126516, 126519 },
+ { 126521, 126521 },
+ { 126523, 126523 },
+ { 126530, 126530 },
+ { 126535, 126535 },
+ { 126537, 126537 },
+ { 126539, 126539 },
+ { 126541, 126543 },
+ { 126545, 126546 },
+ { 126548, 126548 },
+ { 126551, 126551 },
+ { 126553, 126553 },
+ { 126555, 126555 },
+ { 126557, 126557 },
+ { 126559, 126559 },
+ { 126561, 126562 },
+ { 126564, 126564 },
+ { 126567, 126570 },
+ { 126572, 126578 },
+ { 126580, 126583 },
+ { 126585, 126588 },
+ { 126590, 126590 },
+ { 126592, 126601 },
+ { 126603, 126619 },
+ { 126625, 126627 },
+ { 126629, 126633 },
+ { 126635, 126651 },
{ 131072, 173791 },
{ 173824, 177976 },
- { 177984, 178205 },
- { 178208, 183969 },
+ { 177984, 178205 },
+ { 178208, 183969 },
{ 183984, 191456 },
- { 194560, 195101 },
+ { 194560, 195101 },
{ 196608, 201546 },
-};
-static const URange16 Ll_range16[] = {
- { 97, 122 },
- { 181, 181 },
- { 223, 246 },
- { 248, 255 },
- { 257, 257 },
- { 259, 259 },
- { 261, 261 },
- { 263, 263 },
- { 265, 265 },
- { 267, 267 },
- { 269, 269 },
- { 271, 271 },
- { 273, 273 },
- { 275, 275 },
- { 277, 277 },
- { 279, 279 },
- { 281, 281 },
- { 283, 283 },
- { 285, 285 },
- { 287, 287 },
- { 289, 289 },
- { 291, 291 },
- { 293, 293 },
- { 295, 295 },
- { 297, 297 },
- { 299, 299 },
- { 301, 301 },
- { 303, 303 },
- { 305, 305 },
- { 307, 307 },
- { 309, 309 },
- { 311, 312 },
- { 314, 314 },
- { 316, 316 },
- { 318, 318 },
- { 320, 320 },
- { 322, 322 },
- { 324, 324 },
- { 326, 326 },
- { 328, 329 },
- { 331, 331 },
- { 333, 333 },
- { 335, 335 },
- { 337, 337 },
- { 339, 339 },
- { 341, 341 },
- { 343, 343 },
- { 345, 345 },
- { 347, 347 },
- { 349, 349 },
- { 351, 351 },
- { 353, 353 },
- { 355, 355 },
- { 357, 357 },
- { 359, 359 },
- { 361, 361 },
- { 363, 363 },
- { 365, 365 },
- { 367, 367 },
- { 369, 369 },
- { 371, 371 },
- { 373, 373 },
- { 375, 375 },
- { 378, 378 },
- { 380, 380 },
- { 382, 384 },
- { 387, 387 },
- { 389, 389 },
- { 392, 392 },
- { 396, 397 },
- { 402, 402 },
- { 405, 405 },
- { 409, 411 },
- { 414, 414 },
- { 417, 417 },
- { 419, 419 },
- { 421, 421 },
- { 424, 424 },
- { 426, 427 },
- { 429, 429 },
- { 432, 432 },
- { 436, 436 },
- { 438, 438 },
- { 441, 442 },
- { 445, 447 },
- { 454, 454 },
- { 457, 457 },
- { 460, 460 },
- { 462, 462 },
- { 464, 464 },
- { 466, 466 },
- { 468, 468 },
- { 470, 470 },
- { 472, 472 },
- { 474, 474 },
- { 476, 477 },
- { 479, 479 },
- { 481, 481 },
- { 483, 483 },
- { 485, 485 },
- { 487, 487 },
- { 489, 489 },
- { 491, 491 },
- { 493, 493 },
- { 495, 496 },
- { 499, 499 },
- { 501, 501 },
- { 505, 505 },
- { 507, 507 },
- { 509, 509 },
- { 511, 511 },
- { 513, 513 },
- { 515, 515 },
- { 517, 517 },
- { 519, 519 },
- { 521, 521 },
- { 523, 523 },
- { 525, 525 },
- { 527, 527 },
- { 529, 529 },
- { 531, 531 },
- { 533, 533 },
- { 535, 535 },
- { 537, 537 },
- { 539, 539 },
- { 541, 541 },
- { 543, 543 },
- { 545, 545 },
- { 547, 547 },
- { 549, 549 },
- { 551, 551 },
- { 553, 553 },
- { 555, 555 },
- { 557, 557 },
- { 559, 559 },
- { 561, 561 },
- { 563, 569 },
- { 572, 572 },
- { 575, 576 },
- { 578, 578 },
- { 583, 583 },
- { 585, 585 },
- { 587, 587 },
- { 589, 589 },
- { 591, 659 },
- { 661, 687 },
- { 881, 881 },
- { 883, 883 },
- { 887, 887 },
- { 891, 893 },
- { 912, 912 },
- { 940, 974 },
- { 976, 977 },
- { 981, 983 },
- { 985, 985 },
- { 987, 987 },
- { 989, 989 },
- { 991, 991 },
- { 993, 993 },
- { 995, 995 },
- { 997, 997 },
- { 999, 999 },
- { 1001, 1001 },
- { 1003, 1003 },
- { 1005, 1005 },
- { 1007, 1011 },
- { 1013, 1013 },
- { 1016, 1016 },
- { 1019, 1020 },
- { 1072, 1119 },
- { 1121, 1121 },
- { 1123, 1123 },
- { 1125, 1125 },
- { 1127, 1127 },
- { 1129, 1129 },
- { 1131, 1131 },
- { 1133, 1133 },
- { 1135, 1135 },
- { 1137, 1137 },
- { 1139, 1139 },
- { 1141, 1141 },
- { 1143, 1143 },
- { 1145, 1145 },
- { 1147, 1147 },
- { 1149, 1149 },
- { 1151, 1151 },
- { 1153, 1153 },
- { 1163, 1163 },
- { 1165, 1165 },
- { 1167, 1167 },
- { 1169, 1169 },
- { 1171, 1171 },
- { 1173, 1173 },
- { 1175, 1175 },
- { 1177, 1177 },
- { 1179, 1179 },
- { 1181, 1181 },
- { 1183, 1183 },
- { 1185, 1185 },
- { 1187, 1187 },
- { 1189, 1189 },
- { 1191, 1191 },
- { 1193, 1193 },
- { 1195, 1195 },
- { 1197, 1197 },
- { 1199, 1199 },
- { 1201, 1201 },
- { 1203, 1203 },
- { 1205, 1205 },
- { 1207, 1207 },
- { 1209, 1209 },
- { 1211, 1211 },
- { 1213, 1213 },
- { 1215, 1215 },
- { 1218, 1218 },
- { 1220, 1220 },
- { 1222, 1222 },
- { 1224, 1224 },
- { 1226, 1226 },
- { 1228, 1228 },
- { 1230, 1231 },
- { 1233, 1233 },
- { 1235, 1235 },
- { 1237, 1237 },
- { 1239, 1239 },
- { 1241, 1241 },
- { 1243, 1243 },
- { 1245, 1245 },
- { 1247, 1247 },
- { 1249, 1249 },
- { 1251, 1251 },
- { 1253, 1253 },
- { 1255, 1255 },
- { 1257, 1257 },
- { 1259, 1259 },
- { 1261, 1261 },
- { 1263, 1263 },
- { 1265, 1265 },
- { 1267, 1267 },
- { 1269, 1269 },
- { 1271, 1271 },
- { 1273, 1273 },
- { 1275, 1275 },
- { 1277, 1277 },
- { 1279, 1279 },
- { 1281, 1281 },
- { 1283, 1283 },
- { 1285, 1285 },
- { 1287, 1287 },
- { 1289, 1289 },
- { 1291, 1291 },
- { 1293, 1293 },
- { 1295, 1295 },
- { 1297, 1297 },
- { 1299, 1299 },
- { 1301, 1301 },
- { 1303, 1303 },
- { 1305, 1305 },
- { 1307, 1307 },
- { 1309, 1309 },
- { 1311, 1311 },
- { 1313, 1313 },
- { 1315, 1315 },
- { 1317, 1317 },
- { 1319, 1319 },
- { 1321, 1321 },
- { 1323, 1323 },
- { 1325, 1325 },
- { 1327, 1327 },
+};
+static const URange16 Ll_range16[] = {
+ { 97, 122 },
+ { 181, 181 },
+ { 223, 246 },
+ { 248, 255 },
+ { 257, 257 },
+ { 259, 259 },
+ { 261, 261 },
+ { 263, 263 },
+ { 265, 265 },
+ { 267, 267 },
+ { 269, 269 },
+ { 271, 271 },
+ { 273, 273 },
+ { 275, 275 },
+ { 277, 277 },
+ { 279, 279 },
+ { 281, 281 },
+ { 283, 283 },
+ { 285, 285 },
+ { 287, 287 },
+ { 289, 289 },
+ { 291, 291 },
+ { 293, 293 },
+ { 295, 295 },
+ { 297, 297 },
+ { 299, 299 },
+ { 301, 301 },
+ { 303, 303 },
+ { 305, 305 },
+ { 307, 307 },
+ { 309, 309 },
+ { 311, 312 },
+ { 314, 314 },
+ { 316, 316 },
+ { 318, 318 },
+ { 320, 320 },
+ { 322, 322 },
+ { 324, 324 },
+ { 326, 326 },
+ { 328, 329 },
+ { 331, 331 },
+ { 333, 333 },
+ { 335, 335 },
+ { 337, 337 },
+ { 339, 339 },
+ { 341, 341 },
+ { 343, 343 },
+ { 345, 345 },
+ { 347, 347 },
+ { 349, 349 },
+ { 351, 351 },
+ { 353, 353 },
+ { 355, 355 },
+ { 357, 357 },
+ { 359, 359 },
+ { 361, 361 },
+ { 363, 363 },
+ { 365, 365 },
+ { 367, 367 },
+ { 369, 369 },
+ { 371, 371 },
+ { 373, 373 },
+ { 375, 375 },
+ { 378, 378 },
+ { 380, 380 },
+ { 382, 384 },
+ { 387, 387 },
+ { 389, 389 },
+ { 392, 392 },
+ { 396, 397 },
+ { 402, 402 },
+ { 405, 405 },
+ { 409, 411 },
+ { 414, 414 },
+ { 417, 417 },
+ { 419, 419 },
+ { 421, 421 },
+ { 424, 424 },
+ { 426, 427 },
+ { 429, 429 },
+ { 432, 432 },
+ { 436, 436 },
+ { 438, 438 },
+ { 441, 442 },
+ { 445, 447 },
+ { 454, 454 },
+ { 457, 457 },
+ { 460, 460 },
+ { 462, 462 },
+ { 464, 464 },
+ { 466, 466 },
+ { 468, 468 },
+ { 470, 470 },
+ { 472, 472 },
+ { 474, 474 },
+ { 476, 477 },
+ { 479, 479 },
+ { 481, 481 },
+ { 483, 483 },
+ { 485, 485 },
+ { 487, 487 },
+ { 489, 489 },
+ { 491, 491 },
+ { 493, 493 },
+ { 495, 496 },
+ { 499, 499 },
+ { 501, 501 },
+ { 505, 505 },
+ { 507, 507 },
+ { 509, 509 },
+ { 511, 511 },
+ { 513, 513 },
+ { 515, 515 },
+ { 517, 517 },
+ { 519, 519 },
+ { 521, 521 },
+ { 523, 523 },
+ { 525, 525 },
+ { 527, 527 },
+ { 529, 529 },
+ { 531, 531 },
+ { 533, 533 },
+ { 535, 535 },
+ { 537, 537 },
+ { 539, 539 },
+ { 541, 541 },
+ { 543, 543 },
+ { 545, 545 },
+ { 547, 547 },
+ { 549, 549 },
+ { 551, 551 },
+ { 553, 553 },
+ { 555, 555 },
+ { 557, 557 },
+ { 559, 559 },
+ { 561, 561 },
+ { 563, 569 },
+ { 572, 572 },
+ { 575, 576 },
+ { 578, 578 },
+ { 583, 583 },
+ { 585, 585 },
+ { 587, 587 },
+ { 589, 589 },
+ { 591, 659 },
+ { 661, 687 },
+ { 881, 881 },
+ { 883, 883 },
+ { 887, 887 },
+ { 891, 893 },
+ { 912, 912 },
+ { 940, 974 },
+ { 976, 977 },
+ { 981, 983 },
+ { 985, 985 },
+ { 987, 987 },
+ { 989, 989 },
+ { 991, 991 },
+ { 993, 993 },
+ { 995, 995 },
+ { 997, 997 },
+ { 999, 999 },
+ { 1001, 1001 },
+ { 1003, 1003 },
+ { 1005, 1005 },
+ { 1007, 1011 },
+ { 1013, 1013 },
+ { 1016, 1016 },
+ { 1019, 1020 },
+ { 1072, 1119 },
+ { 1121, 1121 },
+ { 1123, 1123 },
+ { 1125, 1125 },
+ { 1127, 1127 },
+ { 1129, 1129 },
+ { 1131, 1131 },
+ { 1133, 1133 },
+ { 1135, 1135 },
+ { 1137, 1137 },
+ { 1139, 1139 },
+ { 1141, 1141 },
+ { 1143, 1143 },
+ { 1145, 1145 },
+ { 1147, 1147 },
+ { 1149, 1149 },
+ { 1151, 1151 },
+ { 1153, 1153 },
+ { 1163, 1163 },
+ { 1165, 1165 },
+ { 1167, 1167 },
+ { 1169, 1169 },
+ { 1171, 1171 },
+ { 1173, 1173 },
+ { 1175, 1175 },
+ { 1177, 1177 },
+ { 1179, 1179 },
+ { 1181, 1181 },
+ { 1183, 1183 },
+ { 1185, 1185 },
+ { 1187, 1187 },
+ { 1189, 1189 },
+ { 1191, 1191 },
+ { 1193, 1193 },
+ { 1195, 1195 },
+ { 1197, 1197 },
+ { 1199, 1199 },
+ { 1201, 1201 },
+ { 1203, 1203 },
+ { 1205, 1205 },
+ { 1207, 1207 },
+ { 1209, 1209 },
+ { 1211, 1211 },
+ { 1213, 1213 },
+ { 1215, 1215 },
+ { 1218, 1218 },
+ { 1220, 1220 },
+ { 1222, 1222 },
+ { 1224, 1224 },
+ { 1226, 1226 },
+ { 1228, 1228 },
+ { 1230, 1231 },
+ { 1233, 1233 },
+ { 1235, 1235 },
+ { 1237, 1237 },
+ { 1239, 1239 },
+ { 1241, 1241 },
+ { 1243, 1243 },
+ { 1245, 1245 },
+ { 1247, 1247 },
+ { 1249, 1249 },
+ { 1251, 1251 },
+ { 1253, 1253 },
+ { 1255, 1255 },
+ { 1257, 1257 },
+ { 1259, 1259 },
+ { 1261, 1261 },
+ { 1263, 1263 },
+ { 1265, 1265 },
+ { 1267, 1267 },
+ { 1269, 1269 },
+ { 1271, 1271 },
+ { 1273, 1273 },
+ { 1275, 1275 },
+ { 1277, 1277 },
+ { 1279, 1279 },
+ { 1281, 1281 },
+ { 1283, 1283 },
+ { 1285, 1285 },
+ { 1287, 1287 },
+ { 1289, 1289 },
+ { 1291, 1291 },
+ { 1293, 1293 },
+ { 1295, 1295 },
+ { 1297, 1297 },
+ { 1299, 1299 },
+ { 1301, 1301 },
+ { 1303, 1303 },
+ { 1305, 1305 },
+ { 1307, 1307 },
+ { 1309, 1309 },
+ { 1311, 1311 },
+ { 1313, 1313 },
+ { 1315, 1315 },
+ { 1317, 1317 },
+ { 1319, 1319 },
+ { 1321, 1321 },
+ { 1323, 1323 },
+ { 1325, 1325 },
+ { 1327, 1327 },
{ 1376, 1416 },
{ 4304, 4346 },
{ 4349, 4351 },
- { 5112, 5117 },
+ { 5112, 5117 },
{ 7296, 7304 },
- { 7424, 7467 },
- { 7531, 7543 },
- { 7545, 7578 },
- { 7681, 7681 },
- { 7683, 7683 },
- { 7685, 7685 },
- { 7687, 7687 },
- { 7689, 7689 },
- { 7691, 7691 },
- { 7693, 7693 },
- { 7695, 7695 },
- { 7697, 7697 },
- { 7699, 7699 },
- { 7701, 7701 },
- { 7703, 7703 },
- { 7705, 7705 },
- { 7707, 7707 },
- { 7709, 7709 },
- { 7711, 7711 },
- { 7713, 7713 },
- { 7715, 7715 },
- { 7717, 7717 },
- { 7719, 7719 },
- { 7721, 7721 },
- { 7723, 7723 },
- { 7725, 7725 },
- { 7727, 7727 },
- { 7729, 7729 },
- { 7731, 7731 },
- { 7733, 7733 },
- { 7735, 7735 },
- { 7737, 7737 },
- { 7739, 7739 },
- { 7741, 7741 },
- { 7743, 7743 },
- { 7745, 7745 },
- { 7747, 7747 },
- { 7749, 7749 },
- { 7751, 7751 },
- { 7753, 7753 },
- { 7755, 7755 },
- { 7757, 7757 },
- { 7759, 7759 },
- { 7761, 7761 },
- { 7763, 7763 },
- { 7765, 7765 },
- { 7767, 7767 },
- { 7769, 7769 },
- { 7771, 7771 },
- { 7773, 7773 },
- { 7775, 7775 },
- { 7777, 7777 },
- { 7779, 7779 },
- { 7781, 7781 },
- { 7783, 7783 },
- { 7785, 7785 },
- { 7787, 7787 },
- { 7789, 7789 },
- { 7791, 7791 },
- { 7793, 7793 },
- { 7795, 7795 },
- { 7797, 7797 },
- { 7799, 7799 },
- { 7801, 7801 },
- { 7803, 7803 },
- { 7805, 7805 },
- { 7807, 7807 },
- { 7809, 7809 },
- { 7811, 7811 },
- { 7813, 7813 },
- { 7815, 7815 },
- { 7817, 7817 },
- { 7819, 7819 },
- { 7821, 7821 },
- { 7823, 7823 },
- { 7825, 7825 },
- { 7827, 7827 },
- { 7829, 7837 },
- { 7839, 7839 },
- { 7841, 7841 },
- { 7843, 7843 },
- { 7845, 7845 },
- { 7847, 7847 },
- { 7849, 7849 },
- { 7851, 7851 },
- { 7853, 7853 },
- { 7855, 7855 },
- { 7857, 7857 },
- { 7859, 7859 },
- { 7861, 7861 },
- { 7863, 7863 },
- { 7865, 7865 },
- { 7867, 7867 },
- { 7869, 7869 },
- { 7871, 7871 },
- { 7873, 7873 },
- { 7875, 7875 },
- { 7877, 7877 },
- { 7879, 7879 },
- { 7881, 7881 },
- { 7883, 7883 },
- { 7885, 7885 },
- { 7887, 7887 },
- { 7889, 7889 },
- { 7891, 7891 },
- { 7893, 7893 },
- { 7895, 7895 },
- { 7897, 7897 },
- { 7899, 7899 },
- { 7901, 7901 },
- { 7903, 7903 },
- { 7905, 7905 },
- { 7907, 7907 },
- { 7909, 7909 },
- { 7911, 7911 },
- { 7913, 7913 },
- { 7915, 7915 },
- { 7917, 7917 },
- { 7919, 7919 },
- { 7921, 7921 },
- { 7923, 7923 },
- { 7925, 7925 },
- { 7927, 7927 },
- { 7929, 7929 },
- { 7931, 7931 },
- { 7933, 7933 },
- { 7935, 7943 },
- { 7952, 7957 },
- { 7968, 7975 },
- { 7984, 7991 },
- { 8000, 8005 },
- { 8016, 8023 },
- { 8032, 8039 },
- { 8048, 8061 },
- { 8064, 8071 },
- { 8080, 8087 },
- { 8096, 8103 },
- { 8112, 8116 },
- { 8118, 8119 },
- { 8126, 8126 },
- { 8130, 8132 },
- { 8134, 8135 },
- { 8144, 8147 },
- { 8150, 8151 },
- { 8160, 8167 },
- { 8178, 8180 },
- { 8182, 8183 },
- { 8458, 8458 },
- { 8462, 8463 },
- { 8467, 8467 },
- { 8495, 8495 },
- { 8500, 8500 },
- { 8505, 8505 },
- { 8508, 8509 },
- { 8518, 8521 },
- { 8526, 8526 },
- { 8580, 8580 },
+ { 7424, 7467 },
+ { 7531, 7543 },
+ { 7545, 7578 },
+ { 7681, 7681 },
+ { 7683, 7683 },
+ { 7685, 7685 },
+ { 7687, 7687 },
+ { 7689, 7689 },
+ { 7691, 7691 },
+ { 7693, 7693 },
+ { 7695, 7695 },
+ { 7697, 7697 },
+ { 7699, 7699 },
+ { 7701, 7701 },
+ { 7703, 7703 },
+ { 7705, 7705 },
+ { 7707, 7707 },
+ { 7709, 7709 },
+ { 7711, 7711 },
+ { 7713, 7713 },
+ { 7715, 7715 },
+ { 7717, 7717 },
+ { 7719, 7719 },
+ { 7721, 7721 },
+ { 7723, 7723 },
+ { 7725, 7725 },
+ { 7727, 7727 },
+ { 7729, 7729 },
+ { 7731, 7731 },
+ { 7733, 7733 },
+ { 7735, 7735 },
+ { 7737, 7737 },
+ { 7739, 7739 },
+ { 7741, 7741 },
+ { 7743, 7743 },
+ { 7745, 7745 },
+ { 7747, 7747 },
+ { 7749, 7749 },
+ { 7751, 7751 },
+ { 7753, 7753 },
+ { 7755, 7755 },
+ { 7757, 7757 },
+ { 7759, 7759 },
+ { 7761, 7761 },
+ { 7763, 7763 },
+ { 7765, 7765 },
+ { 7767, 7767 },
+ { 7769, 7769 },
+ { 7771, 7771 },
+ { 7773, 7773 },
+ { 7775, 7775 },
+ { 7777, 7777 },
+ { 7779, 7779 },
+ { 7781, 7781 },
+ { 7783, 7783 },
+ { 7785, 7785 },
+ { 7787, 7787 },
+ { 7789, 7789 },
+ { 7791, 7791 },
+ { 7793, 7793 },
+ { 7795, 7795 },
+ { 7797, 7797 },
+ { 7799, 7799 },
+ { 7801, 7801 },
+ { 7803, 7803 },
+ { 7805, 7805 },
+ { 7807, 7807 },
+ { 7809, 7809 },
+ { 7811, 7811 },
+ { 7813, 7813 },
+ { 7815, 7815 },
+ { 7817, 7817 },
+ { 7819, 7819 },
+ { 7821, 7821 },
+ { 7823, 7823 },
+ { 7825, 7825 },
+ { 7827, 7827 },
+ { 7829, 7837 },
+ { 7839, 7839 },
+ { 7841, 7841 },
+ { 7843, 7843 },
+ { 7845, 7845 },
+ { 7847, 7847 },
+ { 7849, 7849 },
+ { 7851, 7851 },
+ { 7853, 7853 },
+ { 7855, 7855 },
+ { 7857, 7857 },
+ { 7859, 7859 },
+ { 7861, 7861 },
+ { 7863, 7863 },
+ { 7865, 7865 },
+ { 7867, 7867 },
+ { 7869, 7869 },
+ { 7871, 7871 },
+ { 7873, 7873 },
+ { 7875, 7875 },
+ { 7877, 7877 },
+ { 7879, 7879 },
+ { 7881, 7881 },
+ { 7883, 7883 },
+ { 7885, 7885 },
+ { 7887, 7887 },
+ { 7889, 7889 },
+ { 7891, 7891 },
+ { 7893, 7893 },
+ { 7895, 7895 },
+ { 7897, 7897 },
+ { 7899, 7899 },
+ { 7901, 7901 },
+ { 7903, 7903 },
+ { 7905, 7905 },
+ { 7907, 7907 },
+ { 7909, 7909 },
+ { 7911, 7911 },
+ { 7913, 7913 },
+ { 7915, 7915 },
+ { 7917, 7917 },
+ { 7919, 7919 },
+ { 7921, 7921 },
+ { 7923, 7923 },
+ { 7925, 7925 },
+ { 7927, 7927 },
+ { 7929, 7929 },
+ { 7931, 7931 },
+ { 7933, 7933 },
+ { 7935, 7943 },
+ { 7952, 7957 },
+ { 7968, 7975 },
+ { 7984, 7991 },
+ { 8000, 8005 },
+ { 8016, 8023 },
+ { 8032, 8039 },
+ { 8048, 8061 },
+ { 8064, 8071 },
+ { 8080, 8087 },
+ { 8096, 8103 },
+ { 8112, 8116 },
+ { 8118, 8119 },
+ { 8126, 8126 },
+ { 8130, 8132 },
+ { 8134, 8135 },
+ { 8144, 8147 },
+ { 8150, 8151 },
+ { 8160, 8167 },
+ { 8178, 8180 },
+ { 8182, 8183 },
+ { 8458, 8458 },
+ { 8462, 8463 },
+ { 8467, 8467 },
+ { 8495, 8495 },
+ { 8500, 8500 },
+ { 8505, 8505 },
+ { 8508, 8509 },
+ { 8518, 8521 },
+ { 8526, 8526 },
+ { 8580, 8580 },
{ 11312, 11359 },
- { 11361, 11361 },
- { 11365, 11366 },
- { 11368, 11368 },
- { 11370, 11370 },
- { 11372, 11372 },
- { 11377, 11377 },
- { 11379, 11380 },
- { 11382, 11387 },
- { 11393, 11393 },
- { 11395, 11395 },
- { 11397, 11397 },
- { 11399, 11399 },
- { 11401, 11401 },
- { 11403, 11403 },
- { 11405, 11405 },
- { 11407, 11407 },
- { 11409, 11409 },
- { 11411, 11411 },
- { 11413, 11413 },
- { 11415, 11415 },
- { 11417, 11417 },
- { 11419, 11419 },
- { 11421, 11421 },
- { 11423, 11423 },
- { 11425, 11425 },
- { 11427, 11427 },
- { 11429, 11429 },
- { 11431, 11431 },
- { 11433, 11433 },
- { 11435, 11435 },
- { 11437, 11437 },
- { 11439, 11439 },
- { 11441, 11441 },
- { 11443, 11443 },
- { 11445, 11445 },
- { 11447, 11447 },
- { 11449, 11449 },
- { 11451, 11451 },
- { 11453, 11453 },
- { 11455, 11455 },
- { 11457, 11457 },
- { 11459, 11459 },
- { 11461, 11461 },
- { 11463, 11463 },
- { 11465, 11465 },
- { 11467, 11467 },
- { 11469, 11469 },
- { 11471, 11471 },
- { 11473, 11473 },
- { 11475, 11475 },
- { 11477, 11477 },
- { 11479, 11479 },
- { 11481, 11481 },
- { 11483, 11483 },
- { 11485, 11485 },
- { 11487, 11487 },
- { 11489, 11489 },
- { 11491, 11492 },
- { 11500, 11500 },
- { 11502, 11502 },
- { 11507, 11507 },
- { 11520, 11557 },
- { 11559, 11559 },
- { 11565, 11565 },
- { 42561, 42561 },
- { 42563, 42563 },
- { 42565, 42565 },
- { 42567, 42567 },
- { 42569, 42569 },
- { 42571, 42571 },
- { 42573, 42573 },
- { 42575, 42575 },
- { 42577, 42577 },
- { 42579, 42579 },
- { 42581, 42581 },
- { 42583, 42583 },
- { 42585, 42585 },
- { 42587, 42587 },
- { 42589, 42589 },
- { 42591, 42591 },
- { 42593, 42593 },
- { 42595, 42595 },
- { 42597, 42597 },
- { 42599, 42599 },
- { 42601, 42601 },
- { 42603, 42603 },
- { 42605, 42605 },
- { 42625, 42625 },
- { 42627, 42627 },
- { 42629, 42629 },
- { 42631, 42631 },
- { 42633, 42633 },
- { 42635, 42635 },
- { 42637, 42637 },
- { 42639, 42639 },
- { 42641, 42641 },
- { 42643, 42643 },
- { 42645, 42645 },
- { 42647, 42647 },
- { 42649, 42649 },
- { 42651, 42651 },
- { 42787, 42787 },
- { 42789, 42789 },
- { 42791, 42791 },
- { 42793, 42793 },
- { 42795, 42795 },
- { 42797, 42797 },
- { 42799, 42801 },
- { 42803, 42803 },
- { 42805, 42805 },
- { 42807, 42807 },
- { 42809, 42809 },
- { 42811, 42811 },
- { 42813, 42813 },
- { 42815, 42815 },
- { 42817, 42817 },
- { 42819, 42819 },
- { 42821, 42821 },
- { 42823, 42823 },
- { 42825, 42825 },
- { 42827, 42827 },
- { 42829, 42829 },
- { 42831, 42831 },
- { 42833, 42833 },
- { 42835, 42835 },
- { 42837, 42837 },
- { 42839, 42839 },
- { 42841, 42841 },
- { 42843, 42843 },
- { 42845, 42845 },
- { 42847, 42847 },
- { 42849, 42849 },
- { 42851, 42851 },
- { 42853, 42853 },
- { 42855, 42855 },
- { 42857, 42857 },
- { 42859, 42859 },
- { 42861, 42861 },
- { 42863, 42863 },
- { 42865, 42872 },
- { 42874, 42874 },
- { 42876, 42876 },
- { 42879, 42879 },
- { 42881, 42881 },
- { 42883, 42883 },
- { 42885, 42885 },
- { 42887, 42887 },
- { 42892, 42892 },
- { 42894, 42894 },
- { 42897, 42897 },
- { 42899, 42901 },
- { 42903, 42903 },
- { 42905, 42905 },
- { 42907, 42907 },
- { 42909, 42909 },
- { 42911, 42911 },
- { 42913, 42913 },
- { 42915, 42915 },
- { 42917, 42917 },
- { 42919, 42919 },
- { 42921, 42921 },
+ { 11361, 11361 },
+ { 11365, 11366 },
+ { 11368, 11368 },
+ { 11370, 11370 },
+ { 11372, 11372 },
+ { 11377, 11377 },
+ { 11379, 11380 },
+ { 11382, 11387 },
+ { 11393, 11393 },
+ { 11395, 11395 },
+ { 11397, 11397 },
+ { 11399, 11399 },
+ { 11401, 11401 },
+ { 11403, 11403 },
+ { 11405, 11405 },
+ { 11407, 11407 },
+ { 11409, 11409 },
+ { 11411, 11411 },
+ { 11413, 11413 },
+ { 11415, 11415 },
+ { 11417, 11417 },
+ { 11419, 11419 },
+ { 11421, 11421 },
+ { 11423, 11423 },
+ { 11425, 11425 },
+ { 11427, 11427 },
+ { 11429, 11429 },
+ { 11431, 11431 },
+ { 11433, 11433 },
+ { 11435, 11435 },
+ { 11437, 11437 },
+ { 11439, 11439 },
+ { 11441, 11441 },
+ { 11443, 11443 },
+ { 11445, 11445 },
+ { 11447, 11447 },
+ { 11449, 11449 },
+ { 11451, 11451 },
+ { 11453, 11453 },
+ { 11455, 11455 },
+ { 11457, 11457 },
+ { 11459, 11459 },
+ { 11461, 11461 },
+ { 11463, 11463 },
+ { 11465, 11465 },
+ { 11467, 11467 },
+ { 11469, 11469 },
+ { 11471, 11471 },
+ { 11473, 11473 },
+ { 11475, 11475 },
+ { 11477, 11477 },
+ { 11479, 11479 },
+ { 11481, 11481 },
+ { 11483, 11483 },
+ { 11485, 11485 },
+ { 11487, 11487 },
+ { 11489, 11489 },
+ { 11491, 11492 },
+ { 11500, 11500 },
+ { 11502, 11502 },
+ { 11507, 11507 },
+ { 11520, 11557 },
+ { 11559, 11559 },
+ { 11565, 11565 },
+ { 42561, 42561 },
+ { 42563, 42563 },
+ { 42565, 42565 },
+ { 42567, 42567 },
+ { 42569, 42569 },
+ { 42571, 42571 },
+ { 42573, 42573 },
+ { 42575, 42575 },
+ { 42577, 42577 },
+ { 42579, 42579 },
+ { 42581, 42581 },
+ { 42583, 42583 },
+ { 42585, 42585 },
+ { 42587, 42587 },
+ { 42589, 42589 },
+ { 42591, 42591 },
+ { 42593, 42593 },
+ { 42595, 42595 },
+ { 42597, 42597 },
+ { 42599, 42599 },
+ { 42601, 42601 },
+ { 42603, 42603 },
+ { 42605, 42605 },
+ { 42625, 42625 },
+ { 42627, 42627 },
+ { 42629, 42629 },
+ { 42631, 42631 },
+ { 42633, 42633 },
+ { 42635, 42635 },
+ { 42637, 42637 },
+ { 42639, 42639 },
+ { 42641, 42641 },
+ { 42643, 42643 },
+ { 42645, 42645 },
+ { 42647, 42647 },
+ { 42649, 42649 },
+ { 42651, 42651 },
+ { 42787, 42787 },
+ { 42789, 42789 },
+ { 42791, 42791 },
+ { 42793, 42793 },
+ { 42795, 42795 },
+ { 42797, 42797 },
+ { 42799, 42801 },
+ { 42803, 42803 },
+ { 42805, 42805 },
+ { 42807, 42807 },
+ { 42809, 42809 },
+ { 42811, 42811 },
+ { 42813, 42813 },
+ { 42815, 42815 },
+ { 42817, 42817 },
+ { 42819, 42819 },
+ { 42821, 42821 },
+ { 42823, 42823 },
+ { 42825, 42825 },
+ { 42827, 42827 },
+ { 42829, 42829 },
+ { 42831, 42831 },
+ { 42833, 42833 },
+ { 42835, 42835 },
+ { 42837, 42837 },
+ { 42839, 42839 },
+ { 42841, 42841 },
+ { 42843, 42843 },
+ { 42845, 42845 },
+ { 42847, 42847 },
+ { 42849, 42849 },
+ { 42851, 42851 },
+ { 42853, 42853 },
+ { 42855, 42855 },
+ { 42857, 42857 },
+ { 42859, 42859 },
+ { 42861, 42861 },
+ { 42863, 42863 },
+ { 42865, 42872 },
+ { 42874, 42874 },
+ { 42876, 42876 },
+ { 42879, 42879 },
+ { 42881, 42881 },
+ { 42883, 42883 },
+ { 42885, 42885 },
+ { 42887, 42887 },
+ { 42892, 42892 },
+ { 42894, 42894 },
+ { 42897, 42897 },
+ { 42899, 42901 },
+ { 42903, 42903 },
+ { 42905, 42905 },
+ { 42907, 42907 },
+ { 42909, 42909 },
+ { 42911, 42911 },
+ { 42913, 42913 },
+ { 42915, 42915 },
+ { 42917, 42917 },
+ { 42919, 42919 },
+ { 42921, 42921 },
{ 42927, 42927 },
- { 42933, 42933 },
- { 42935, 42935 },
+ { 42933, 42933 },
+ { 42935, 42935 },
{ 42937, 42937 },
{ 42939, 42939 },
{ 42941, 42941 },
@@ -1339,121 +1339,121 @@ static const URange16 Ll_range16[] = {
{ 42967, 42967 },
{ 42969, 42969 },
{ 42998, 42998 },
- { 43002, 43002 },
- { 43824, 43866 },
+ { 43002, 43002 },
+ { 43824, 43866 },
{ 43872, 43880 },
- { 43888, 43967 },
- { 64256, 64262 },
- { 64275, 64279 },
- { 65345, 65370 },
-};
-static const URange32 Ll_range32[] = {
- { 66600, 66639 },
+ { 43888, 43967 },
+ { 64256, 64262 },
+ { 64275, 64279 },
+ { 65345, 65370 },
+};
+static const URange32 Ll_range32[] = {
+ { 66600, 66639 },
{ 66776, 66811 },
{ 66967, 66977 },
{ 66979, 66993 },
{ 66995, 67001 },
{ 67003, 67004 },
- { 68800, 68850 },
- { 71872, 71903 },
+ { 68800, 68850 },
+ { 71872, 71903 },
{ 93792, 93823 },
- { 119834, 119859 },
- { 119886, 119892 },
- { 119894, 119911 },
- { 119938, 119963 },
- { 119990, 119993 },
- { 119995, 119995 },
- { 119997, 120003 },
- { 120005, 120015 },
- { 120042, 120067 },
- { 120094, 120119 },
- { 120146, 120171 },
- { 120198, 120223 },
- { 120250, 120275 },
- { 120302, 120327 },
- { 120354, 120379 },
- { 120406, 120431 },
- { 120458, 120485 },
- { 120514, 120538 },
- { 120540, 120545 },
- { 120572, 120596 },
- { 120598, 120603 },
- { 120630, 120654 },
- { 120656, 120661 },
- { 120688, 120712 },
- { 120714, 120719 },
- { 120746, 120770 },
- { 120772, 120777 },
- { 120779, 120779 },
+ { 119834, 119859 },
+ { 119886, 119892 },
+ { 119894, 119911 },
+ { 119938, 119963 },
+ { 119990, 119993 },
+ { 119995, 119995 },
+ { 119997, 120003 },
+ { 120005, 120015 },
+ { 120042, 120067 },
+ { 120094, 120119 },
+ { 120146, 120171 },
+ { 120198, 120223 },
+ { 120250, 120275 },
+ { 120302, 120327 },
+ { 120354, 120379 },
+ { 120406, 120431 },
+ { 120458, 120485 },
+ { 120514, 120538 },
+ { 120540, 120545 },
+ { 120572, 120596 },
+ { 120598, 120603 },
+ { 120630, 120654 },
+ { 120656, 120661 },
+ { 120688, 120712 },
+ { 120714, 120719 },
+ { 120746, 120770 },
+ { 120772, 120777 },
+ { 120779, 120779 },
{ 122624, 122633 },
{ 122635, 122654 },
{ 125218, 125251 },
-};
-static const URange16 Lm_range16[] = {
- { 688, 705 },
- { 710, 721 },
- { 736, 740 },
- { 748, 748 },
- { 750, 750 },
- { 884, 884 },
- { 890, 890 },
- { 1369, 1369 },
- { 1600, 1600 },
- { 1765, 1766 },
- { 2036, 2037 },
- { 2042, 2042 },
- { 2074, 2074 },
- { 2084, 2084 },
- { 2088, 2088 },
+};
+static const URange16 Lm_range16[] = {
+ { 688, 705 },
+ { 710, 721 },
+ { 736, 740 },
+ { 748, 748 },
+ { 750, 750 },
+ { 884, 884 },
+ { 890, 890 },
+ { 1369, 1369 },
+ { 1600, 1600 },
+ { 1765, 1766 },
+ { 2036, 2037 },
+ { 2042, 2042 },
+ { 2074, 2074 },
+ { 2084, 2084 },
+ { 2088, 2088 },
{ 2249, 2249 },
- { 2417, 2417 },
- { 3654, 3654 },
- { 3782, 3782 },
- { 4348, 4348 },
- { 6103, 6103 },
- { 6211, 6211 },
- { 6823, 6823 },
- { 7288, 7293 },
- { 7468, 7530 },
- { 7544, 7544 },
- { 7579, 7615 },
- { 8305, 8305 },
- { 8319, 8319 },
- { 8336, 8348 },
- { 11388, 11389 },
- { 11631, 11631 },
- { 11823, 11823 },
- { 12293, 12293 },
- { 12337, 12341 },
- { 12347, 12347 },
- { 12445, 12446 },
- { 12540, 12542 },
- { 40981, 40981 },
- { 42232, 42237 },
- { 42508, 42508 },
- { 42623, 42623 },
- { 42652, 42653 },
- { 42775, 42783 },
- { 42864, 42864 },
- { 42888, 42888 },
+ { 2417, 2417 },
+ { 3654, 3654 },
+ { 3782, 3782 },
+ { 4348, 4348 },
+ { 6103, 6103 },
+ { 6211, 6211 },
+ { 6823, 6823 },
+ { 7288, 7293 },
+ { 7468, 7530 },
+ { 7544, 7544 },
+ { 7579, 7615 },
+ { 8305, 8305 },
+ { 8319, 8319 },
+ { 8336, 8348 },
+ { 11388, 11389 },
+ { 11631, 11631 },
+ { 11823, 11823 },
+ { 12293, 12293 },
+ { 12337, 12341 },
+ { 12347, 12347 },
+ { 12445, 12446 },
+ { 12540, 12542 },
+ { 40981, 40981 },
+ { 42232, 42237 },
+ { 42508, 42508 },
+ { 42623, 42623 },
+ { 42652, 42653 },
+ { 42775, 42783 },
+ { 42864, 42864 },
+ { 42888, 42888 },
{ 42994, 42996 },
- { 43000, 43001 },
- { 43471, 43471 },
- { 43494, 43494 },
- { 43632, 43632 },
- { 43741, 43741 },
- { 43763, 43764 },
- { 43868, 43871 },
+ { 43000, 43001 },
+ { 43471, 43471 },
+ { 43494, 43494 },
+ { 43632, 43632 },
+ { 43741, 43741 },
+ { 43763, 43764 },
+ { 43868, 43871 },
{ 43881, 43881 },
- { 65392, 65392 },
- { 65438, 65439 },
-};
-static const URange32 Lm_range32[] = {
+ { 65392, 65392 },
+ { 65438, 65439 },
+};
+static const URange32 Lm_range32[] = {
{ 67456, 67461 },
{ 67463, 67504 },
{ 67506, 67514 },
- { 92992, 92995 },
- { 94099, 94111 },
+ { 92992, 92995 },
+ { 94099, 94111 },
{ 94176, 94177 },
{ 94179, 94179 },
{ 110576, 110579 },
@@ -1461,7 +1461,7 @@ static const URange32 Lm_range32[] = {
{ 110589, 110590 },
{ 123191, 123197 },
{ 125259, 125259 },
-};
+};
static const URange16 Lo_range16[] = {
{ 170, 170 },
{ 186, 186 },
@@ -1753,7 +1753,7 @@ static const URange16 Lo_range16[] = {
{ 65482, 65487 },
{ 65490, 65495 },
{ 65498, 65500 },
-};
+};
static const URange32 Lo_range32[] = {
{ 65536, 65547 },
{ 65549, 65574 },
@@ -1966,612 +1966,612 @@ static const URange32 Lo_range32[] = {
{ 183984, 191456 },
{ 194560, 195101 },
{ 196608, 201546 },
-};
-static const URange16 Lt_range16[] = {
- { 453, 453 },
- { 456, 456 },
- { 459, 459 },
- { 498, 498 },
- { 8072, 8079 },
- { 8088, 8095 },
- { 8104, 8111 },
- { 8124, 8124 },
- { 8140, 8140 },
- { 8188, 8188 },
-};
-static const URange16 Lu_range16[] = {
- { 65, 90 },
- { 192, 214 },
- { 216, 222 },
- { 256, 256 },
- { 258, 258 },
- { 260, 260 },
- { 262, 262 },
- { 264, 264 },
- { 266, 266 },
- { 268, 268 },
- { 270, 270 },
- { 272, 272 },
- { 274, 274 },
- { 276, 276 },
- { 278, 278 },
- { 280, 280 },
- { 282, 282 },
- { 284, 284 },
- { 286, 286 },
- { 288, 288 },
- { 290, 290 },
- { 292, 292 },
- { 294, 294 },
- { 296, 296 },
- { 298, 298 },
- { 300, 300 },
- { 302, 302 },
- { 304, 304 },
- { 306, 306 },
- { 308, 308 },
- { 310, 310 },
- { 313, 313 },
- { 315, 315 },
- { 317, 317 },
- { 319, 319 },
- { 321, 321 },
- { 323, 323 },
- { 325, 325 },
- { 327, 327 },
- { 330, 330 },
- { 332, 332 },
- { 334, 334 },
- { 336, 336 },
- { 338, 338 },
- { 340, 340 },
- { 342, 342 },
- { 344, 344 },
- { 346, 346 },
- { 348, 348 },
- { 350, 350 },
- { 352, 352 },
- { 354, 354 },
- { 356, 356 },
- { 358, 358 },
- { 360, 360 },
- { 362, 362 },
- { 364, 364 },
- { 366, 366 },
- { 368, 368 },
- { 370, 370 },
- { 372, 372 },
- { 374, 374 },
- { 376, 377 },
- { 379, 379 },
- { 381, 381 },
- { 385, 386 },
- { 388, 388 },
- { 390, 391 },
- { 393, 395 },
- { 398, 401 },
- { 403, 404 },
- { 406, 408 },
- { 412, 413 },
- { 415, 416 },
- { 418, 418 },
- { 420, 420 },
- { 422, 423 },
- { 425, 425 },
- { 428, 428 },
- { 430, 431 },
- { 433, 435 },
- { 437, 437 },
- { 439, 440 },
- { 444, 444 },
- { 452, 452 },
- { 455, 455 },
- { 458, 458 },
- { 461, 461 },
- { 463, 463 },
- { 465, 465 },
- { 467, 467 },
- { 469, 469 },
- { 471, 471 },
- { 473, 473 },
- { 475, 475 },
- { 478, 478 },
- { 480, 480 },
- { 482, 482 },
- { 484, 484 },
- { 486, 486 },
- { 488, 488 },
- { 490, 490 },
- { 492, 492 },
- { 494, 494 },
- { 497, 497 },
- { 500, 500 },
- { 502, 504 },
- { 506, 506 },
- { 508, 508 },
- { 510, 510 },
- { 512, 512 },
- { 514, 514 },
- { 516, 516 },
- { 518, 518 },
- { 520, 520 },
- { 522, 522 },
- { 524, 524 },
- { 526, 526 },
- { 528, 528 },
- { 530, 530 },
- { 532, 532 },
- { 534, 534 },
- { 536, 536 },
- { 538, 538 },
- { 540, 540 },
- { 542, 542 },
- { 544, 544 },
- { 546, 546 },
- { 548, 548 },
- { 550, 550 },
- { 552, 552 },
- { 554, 554 },
- { 556, 556 },
- { 558, 558 },
- { 560, 560 },
- { 562, 562 },
- { 570, 571 },
- { 573, 574 },
- { 577, 577 },
- { 579, 582 },
- { 584, 584 },
- { 586, 586 },
- { 588, 588 },
- { 590, 590 },
- { 880, 880 },
- { 882, 882 },
- { 886, 886 },
- { 895, 895 },
- { 902, 902 },
- { 904, 906 },
- { 908, 908 },
- { 910, 911 },
- { 913, 929 },
- { 931, 939 },
- { 975, 975 },
- { 978, 980 },
- { 984, 984 },
- { 986, 986 },
- { 988, 988 },
- { 990, 990 },
- { 992, 992 },
- { 994, 994 },
- { 996, 996 },
- { 998, 998 },
- { 1000, 1000 },
- { 1002, 1002 },
- { 1004, 1004 },
- { 1006, 1006 },
- { 1012, 1012 },
- { 1015, 1015 },
- { 1017, 1018 },
- { 1021, 1071 },
- { 1120, 1120 },
- { 1122, 1122 },
- { 1124, 1124 },
- { 1126, 1126 },
- { 1128, 1128 },
- { 1130, 1130 },
- { 1132, 1132 },
- { 1134, 1134 },
- { 1136, 1136 },
- { 1138, 1138 },
- { 1140, 1140 },
- { 1142, 1142 },
- { 1144, 1144 },
- { 1146, 1146 },
- { 1148, 1148 },
- { 1150, 1150 },
- { 1152, 1152 },
- { 1162, 1162 },
- { 1164, 1164 },
- { 1166, 1166 },
- { 1168, 1168 },
- { 1170, 1170 },
- { 1172, 1172 },
- { 1174, 1174 },
- { 1176, 1176 },
- { 1178, 1178 },
- { 1180, 1180 },
- { 1182, 1182 },
- { 1184, 1184 },
- { 1186, 1186 },
- { 1188, 1188 },
- { 1190, 1190 },
- { 1192, 1192 },
- { 1194, 1194 },
- { 1196, 1196 },
- { 1198, 1198 },
- { 1200, 1200 },
- { 1202, 1202 },
- { 1204, 1204 },
- { 1206, 1206 },
- { 1208, 1208 },
- { 1210, 1210 },
- { 1212, 1212 },
- { 1214, 1214 },
- { 1216, 1217 },
- { 1219, 1219 },
- { 1221, 1221 },
- { 1223, 1223 },
- { 1225, 1225 },
- { 1227, 1227 },
- { 1229, 1229 },
- { 1232, 1232 },
- { 1234, 1234 },
- { 1236, 1236 },
- { 1238, 1238 },
- { 1240, 1240 },
- { 1242, 1242 },
- { 1244, 1244 },
- { 1246, 1246 },
- { 1248, 1248 },
- { 1250, 1250 },
- { 1252, 1252 },
- { 1254, 1254 },
- { 1256, 1256 },
- { 1258, 1258 },
- { 1260, 1260 },
- { 1262, 1262 },
- { 1264, 1264 },
- { 1266, 1266 },
- { 1268, 1268 },
- { 1270, 1270 },
- { 1272, 1272 },
- { 1274, 1274 },
- { 1276, 1276 },
- { 1278, 1278 },
- { 1280, 1280 },
- { 1282, 1282 },
- { 1284, 1284 },
- { 1286, 1286 },
- { 1288, 1288 },
- { 1290, 1290 },
- { 1292, 1292 },
- { 1294, 1294 },
- { 1296, 1296 },
- { 1298, 1298 },
- { 1300, 1300 },
- { 1302, 1302 },
- { 1304, 1304 },
- { 1306, 1306 },
- { 1308, 1308 },
- { 1310, 1310 },
- { 1312, 1312 },
- { 1314, 1314 },
- { 1316, 1316 },
- { 1318, 1318 },
- { 1320, 1320 },
- { 1322, 1322 },
- { 1324, 1324 },
- { 1326, 1326 },
- { 1329, 1366 },
- { 4256, 4293 },
- { 4295, 4295 },
- { 4301, 4301 },
- { 5024, 5109 },
+};
+static const URange16 Lt_range16[] = {
+ { 453, 453 },
+ { 456, 456 },
+ { 459, 459 },
+ { 498, 498 },
+ { 8072, 8079 },
+ { 8088, 8095 },
+ { 8104, 8111 },
+ { 8124, 8124 },
+ { 8140, 8140 },
+ { 8188, 8188 },
+};
+static const URange16 Lu_range16[] = {
+ { 65, 90 },
+ { 192, 214 },
+ { 216, 222 },
+ { 256, 256 },
+ { 258, 258 },
+ { 260, 260 },
+ { 262, 262 },
+ { 264, 264 },
+ { 266, 266 },
+ { 268, 268 },
+ { 270, 270 },
+ { 272, 272 },
+ { 274, 274 },
+ { 276, 276 },
+ { 278, 278 },
+ { 280, 280 },
+ { 282, 282 },
+ { 284, 284 },
+ { 286, 286 },
+ { 288, 288 },
+ { 290, 290 },
+ { 292, 292 },
+ { 294, 294 },
+ { 296, 296 },
+ { 298, 298 },
+ { 300, 300 },
+ { 302, 302 },
+ { 304, 304 },
+ { 306, 306 },
+ { 308, 308 },
+ { 310, 310 },
+ { 313, 313 },
+ { 315, 315 },
+ { 317, 317 },
+ { 319, 319 },
+ { 321, 321 },
+ { 323, 323 },
+ { 325, 325 },
+ { 327, 327 },
+ { 330, 330 },
+ { 332, 332 },
+ { 334, 334 },
+ { 336, 336 },
+ { 338, 338 },
+ { 340, 340 },
+ { 342, 342 },
+ { 344, 344 },
+ { 346, 346 },
+ { 348, 348 },
+ { 350, 350 },
+ { 352, 352 },
+ { 354, 354 },
+ { 356, 356 },
+ { 358, 358 },
+ { 360, 360 },
+ { 362, 362 },
+ { 364, 364 },
+ { 366, 366 },
+ { 368, 368 },
+ { 370, 370 },
+ { 372, 372 },
+ { 374, 374 },
+ { 376, 377 },
+ { 379, 379 },
+ { 381, 381 },
+ { 385, 386 },
+ { 388, 388 },
+ { 390, 391 },
+ { 393, 395 },
+ { 398, 401 },
+ { 403, 404 },
+ { 406, 408 },
+ { 412, 413 },
+ { 415, 416 },
+ { 418, 418 },
+ { 420, 420 },
+ { 422, 423 },
+ { 425, 425 },
+ { 428, 428 },
+ { 430, 431 },
+ { 433, 435 },
+ { 437, 437 },
+ { 439, 440 },
+ { 444, 444 },
+ { 452, 452 },
+ { 455, 455 },
+ { 458, 458 },
+ { 461, 461 },
+ { 463, 463 },
+ { 465, 465 },
+ { 467, 467 },
+ { 469, 469 },
+ { 471, 471 },
+ { 473, 473 },
+ { 475, 475 },
+ { 478, 478 },
+ { 480, 480 },
+ { 482, 482 },
+ { 484, 484 },
+ { 486, 486 },
+ { 488, 488 },
+ { 490, 490 },
+ { 492, 492 },
+ { 494, 494 },
+ { 497, 497 },
+ { 500, 500 },
+ { 502, 504 },
+ { 506, 506 },
+ { 508, 508 },
+ { 510, 510 },
+ { 512, 512 },
+ { 514, 514 },
+ { 516, 516 },
+ { 518, 518 },
+ { 520, 520 },
+ { 522, 522 },
+ { 524, 524 },
+ { 526, 526 },
+ { 528, 528 },
+ { 530, 530 },
+ { 532, 532 },
+ { 534, 534 },
+ { 536, 536 },
+ { 538, 538 },
+ { 540, 540 },
+ { 542, 542 },
+ { 544, 544 },
+ { 546, 546 },
+ { 548, 548 },
+ { 550, 550 },
+ { 552, 552 },
+ { 554, 554 },
+ { 556, 556 },
+ { 558, 558 },
+ { 560, 560 },
+ { 562, 562 },
+ { 570, 571 },
+ { 573, 574 },
+ { 577, 577 },
+ { 579, 582 },
+ { 584, 584 },
+ { 586, 586 },
+ { 588, 588 },
+ { 590, 590 },
+ { 880, 880 },
+ { 882, 882 },
+ { 886, 886 },
+ { 895, 895 },
+ { 902, 902 },
+ { 904, 906 },
+ { 908, 908 },
+ { 910, 911 },
+ { 913, 929 },
+ { 931, 939 },
+ { 975, 975 },
+ { 978, 980 },
+ { 984, 984 },
+ { 986, 986 },
+ { 988, 988 },
+ { 990, 990 },
+ { 992, 992 },
+ { 994, 994 },
+ { 996, 996 },
+ { 998, 998 },
+ { 1000, 1000 },
+ { 1002, 1002 },
+ { 1004, 1004 },
+ { 1006, 1006 },
+ { 1012, 1012 },
+ { 1015, 1015 },
+ { 1017, 1018 },
+ { 1021, 1071 },
+ { 1120, 1120 },
+ { 1122, 1122 },
+ { 1124, 1124 },
+ { 1126, 1126 },
+ { 1128, 1128 },
+ { 1130, 1130 },
+ { 1132, 1132 },
+ { 1134, 1134 },
+ { 1136, 1136 },
+ { 1138, 1138 },
+ { 1140, 1140 },
+ { 1142, 1142 },
+ { 1144, 1144 },
+ { 1146, 1146 },
+ { 1148, 1148 },
+ { 1150, 1150 },
+ { 1152, 1152 },
+ { 1162, 1162 },
+ { 1164, 1164 },
+ { 1166, 1166 },
+ { 1168, 1168 },
+ { 1170, 1170 },
+ { 1172, 1172 },
+ { 1174, 1174 },
+ { 1176, 1176 },
+ { 1178, 1178 },
+ { 1180, 1180 },
+ { 1182, 1182 },
+ { 1184, 1184 },
+ { 1186, 1186 },
+ { 1188, 1188 },
+ { 1190, 1190 },
+ { 1192, 1192 },
+ { 1194, 1194 },
+ { 1196, 1196 },
+ { 1198, 1198 },
+ { 1200, 1200 },
+ { 1202, 1202 },
+ { 1204, 1204 },
+ { 1206, 1206 },
+ { 1208, 1208 },
+ { 1210, 1210 },
+ { 1212, 1212 },
+ { 1214, 1214 },
+ { 1216, 1217 },
+ { 1219, 1219 },
+ { 1221, 1221 },
+ { 1223, 1223 },
+ { 1225, 1225 },
+ { 1227, 1227 },
+ { 1229, 1229 },
+ { 1232, 1232 },
+ { 1234, 1234 },
+ { 1236, 1236 },
+ { 1238, 1238 },
+ { 1240, 1240 },
+ { 1242, 1242 },
+ { 1244, 1244 },
+ { 1246, 1246 },
+ { 1248, 1248 },
+ { 1250, 1250 },
+ { 1252, 1252 },
+ { 1254, 1254 },
+ { 1256, 1256 },
+ { 1258, 1258 },
+ { 1260, 1260 },
+ { 1262, 1262 },
+ { 1264, 1264 },
+ { 1266, 1266 },
+ { 1268, 1268 },
+ { 1270, 1270 },
+ { 1272, 1272 },
+ { 1274, 1274 },
+ { 1276, 1276 },
+ { 1278, 1278 },
+ { 1280, 1280 },
+ { 1282, 1282 },
+ { 1284, 1284 },
+ { 1286, 1286 },
+ { 1288, 1288 },
+ { 1290, 1290 },
+ { 1292, 1292 },
+ { 1294, 1294 },
+ { 1296, 1296 },
+ { 1298, 1298 },
+ { 1300, 1300 },
+ { 1302, 1302 },
+ { 1304, 1304 },
+ { 1306, 1306 },
+ { 1308, 1308 },
+ { 1310, 1310 },
+ { 1312, 1312 },
+ { 1314, 1314 },
+ { 1316, 1316 },
+ { 1318, 1318 },
+ { 1320, 1320 },
+ { 1322, 1322 },
+ { 1324, 1324 },
+ { 1326, 1326 },
+ { 1329, 1366 },
+ { 4256, 4293 },
+ { 4295, 4295 },
+ { 4301, 4301 },
+ { 5024, 5109 },
{ 7312, 7354 },
{ 7357, 7359 },
- { 7680, 7680 },
- { 7682, 7682 },
- { 7684, 7684 },
- { 7686, 7686 },
- { 7688, 7688 },
- { 7690, 7690 },
- { 7692, 7692 },
- { 7694, 7694 },
- { 7696, 7696 },
- { 7698, 7698 },
- { 7700, 7700 },
- { 7702, 7702 },
- { 7704, 7704 },
- { 7706, 7706 },
- { 7708, 7708 },
- { 7710, 7710 },
- { 7712, 7712 },
- { 7714, 7714 },
- { 7716, 7716 },
- { 7718, 7718 },
- { 7720, 7720 },
- { 7722, 7722 },
- { 7724, 7724 },
- { 7726, 7726 },
- { 7728, 7728 },
- { 7730, 7730 },
- { 7732, 7732 },
- { 7734, 7734 },
- { 7736, 7736 },
- { 7738, 7738 },
- { 7740, 7740 },
- { 7742, 7742 },
- { 7744, 7744 },
- { 7746, 7746 },
- { 7748, 7748 },
- { 7750, 7750 },
- { 7752, 7752 },
- { 7754, 7754 },
- { 7756, 7756 },
- { 7758, 7758 },
- { 7760, 7760 },
- { 7762, 7762 },
- { 7764, 7764 },
- { 7766, 7766 },
- { 7768, 7768 },
- { 7770, 7770 },
- { 7772, 7772 },
- { 7774, 7774 },
- { 7776, 7776 },
- { 7778, 7778 },
- { 7780, 7780 },
- { 7782, 7782 },
- { 7784, 7784 },
- { 7786, 7786 },
- { 7788, 7788 },
- { 7790, 7790 },
- { 7792, 7792 },
- { 7794, 7794 },
- { 7796, 7796 },
- { 7798, 7798 },
- { 7800, 7800 },
- { 7802, 7802 },
- { 7804, 7804 },
- { 7806, 7806 },
- { 7808, 7808 },
- { 7810, 7810 },
- { 7812, 7812 },
- { 7814, 7814 },
- { 7816, 7816 },
- { 7818, 7818 },
- { 7820, 7820 },
- { 7822, 7822 },
- { 7824, 7824 },
- { 7826, 7826 },
- { 7828, 7828 },
- { 7838, 7838 },
- { 7840, 7840 },
- { 7842, 7842 },
- { 7844, 7844 },
- { 7846, 7846 },
- { 7848, 7848 },
- { 7850, 7850 },
- { 7852, 7852 },
- { 7854, 7854 },
- { 7856, 7856 },
- { 7858, 7858 },
- { 7860, 7860 },
- { 7862, 7862 },
- { 7864, 7864 },
- { 7866, 7866 },
- { 7868, 7868 },
- { 7870, 7870 },
- { 7872, 7872 },
- { 7874, 7874 },
- { 7876, 7876 },
- { 7878, 7878 },
- { 7880, 7880 },
- { 7882, 7882 },
- { 7884, 7884 },
- { 7886, 7886 },
- { 7888, 7888 },
- { 7890, 7890 },
- { 7892, 7892 },
- { 7894, 7894 },
- { 7896, 7896 },
- { 7898, 7898 },
- { 7900, 7900 },
- { 7902, 7902 },
- { 7904, 7904 },
- { 7906, 7906 },
- { 7908, 7908 },
- { 7910, 7910 },
- { 7912, 7912 },
- { 7914, 7914 },
- { 7916, 7916 },
- { 7918, 7918 },
- { 7920, 7920 },
- { 7922, 7922 },
- { 7924, 7924 },
- { 7926, 7926 },
- { 7928, 7928 },
- { 7930, 7930 },
- { 7932, 7932 },
- { 7934, 7934 },
- { 7944, 7951 },
- { 7960, 7965 },
- { 7976, 7983 },
- { 7992, 7999 },
- { 8008, 8013 },
- { 8025, 8025 },
- { 8027, 8027 },
- { 8029, 8029 },
- { 8031, 8031 },
- { 8040, 8047 },
- { 8120, 8123 },
- { 8136, 8139 },
- { 8152, 8155 },
- { 8168, 8172 },
- { 8184, 8187 },
- { 8450, 8450 },
- { 8455, 8455 },
- { 8459, 8461 },
- { 8464, 8466 },
- { 8469, 8469 },
- { 8473, 8477 },
- { 8484, 8484 },
- { 8486, 8486 },
- { 8488, 8488 },
- { 8490, 8493 },
- { 8496, 8499 },
- { 8510, 8511 },
- { 8517, 8517 },
- { 8579, 8579 },
+ { 7680, 7680 },
+ { 7682, 7682 },
+ { 7684, 7684 },
+ { 7686, 7686 },
+ { 7688, 7688 },
+ { 7690, 7690 },
+ { 7692, 7692 },
+ { 7694, 7694 },
+ { 7696, 7696 },
+ { 7698, 7698 },
+ { 7700, 7700 },
+ { 7702, 7702 },
+ { 7704, 7704 },
+ { 7706, 7706 },
+ { 7708, 7708 },
+ { 7710, 7710 },
+ { 7712, 7712 },
+ { 7714, 7714 },
+ { 7716, 7716 },
+ { 7718, 7718 },
+ { 7720, 7720 },
+ { 7722, 7722 },
+ { 7724, 7724 },
+ { 7726, 7726 },
+ { 7728, 7728 },
+ { 7730, 7730 },
+ { 7732, 7732 },
+ { 7734, 7734 },
+ { 7736, 7736 },
+ { 7738, 7738 },
+ { 7740, 7740 },
+ { 7742, 7742 },
+ { 7744, 7744 },
+ { 7746, 7746 },
+ { 7748, 7748 },
+ { 7750, 7750 },
+ { 7752, 7752 },
+ { 7754, 7754 },
+ { 7756, 7756 },
+ { 7758, 7758 },
+ { 7760, 7760 },
+ { 7762, 7762 },
+ { 7764, 7764 },
+ { 7766, 7766 },
+ { 7768, 7768 },
+ { 7770, 7770 },
+ { 7772, 7772 },
+ { 7774, 7774 },
+ { 7776, 7776 },
+ { 7778, 7778 },
+ { 7780, 7780 },
+ { 7782, 7782 },
+ { 7784, 7784 },
+ { 7786, 7786 },
+ { 7788, 7788 },
+ { 7790, 7790 },
+ { 7792, 7792 },
+ { 7794, 7794 },
+ { 7796, 7796 },
+ { 7798, 7798 },
+ { 7800, 7800 },
+ { 7802, 7802 },
+ { 7804, 7804 },
+ { 7806, 7806 },
+ { 7808, 7808 },
+ { 7810, 7810 },
+ { 7812, 7812 },
+ { 7814, 7814 },
+ { 7816, 7816 },
+ { 7818, 7818 },
+ { 7820, 7820 },
+ { 7822, 7822 },
+ { 7824, 7824 },
+ { 7826, 7826 },
+ { 7828, 7828 },
+ { 7838, 7838 },
+ { 7840, 7840 },
+ { 7842, 7842 },
+ { 7844, 7844 },
+ { 7846, 7846 },
+ { 7848, 7848 },
+ { 7850, 7850 },
+ { 7852, 7852 },
+ { 7854, 7854 },
+ { 7856, 7856 },
+ { 7858, 7858 },
+ { 7860, 7860 },
+ { 7862, 7862 },
+ { 7864, 7864 },
+ { 7866, 7866 },
+ { 7868, 7868 },
+ { 7870, 7870 },
+ { 7872, 7872 },
+ { 7874, 7874 },
+ { 7876, 7876 },
+ { 7878, 7878 },
+ { 7880, 7880 },
+ { 7882, 7882 },
+ { 7884, 7884 },
+ { 7886, 7886 },
+ { 7888, 7888 },
+ { 7890, 7890 },
+ { 7892, 7892 },
+ { 7894, 7894 },
+ { 7896, 7896 },
+ { 7898, 7898 },
+ { 7900, 7900 },
+ { 7902, 7902 },
+ { 7904, 7904 },
+ { 7906, 7906 },
+ { 7908, 7908 },
+ { 7910, 7910 },
+ { 7912, 7912 },
+ { 7914, 7914 },
+ { 7916, 7916 },
+ { 7918, 7918 },
+ { 7920, 7920 },
+ { 7922, 7922 },
+ { 7924, 7924 },
+ { 7926, 7926 },
+ { 7928, 7928 },
+ { 7930, 7930 },
+ { 7932, 7932 },
+ { 7934, 7934 },
+ { 7944, 7951 },
+ { 7960, 7965 },
+ { 7976, 7983 },
+ { 7992, 7999 },
+ { 8008, 8013 },
+ { 8025, 8025 },
+ { 8027, 8027 },
+ { 8029, 8029 },
+ { 8031, 8031 },
+ { 8040, 8047 },
+ { 8120, 8123 },
+ { 8136, 8139 },
+ { 8152, 8155 },
+ { 8168, 8172 },
+ { 8184, 8187 },
+ { 8450, 8450 },
+ { 8455, 8455 },
+ { 8459, 8461 },
+ { 8464, 8466 },
+ { 8469, 8469 },
+ { 8473, 8477 },
+ { 8484, 8484 },
+ { 8486, 8486 },
+ { 8488, 8488 },
+ { 8490, 8493 },
+ { 8496, 8499 },
+ { 8510, 8511 },
+ { 8517, 8517 },
+ { 8579, 8579 },
{ 11264, 11311 },
- { 11360, 11360 },
- { 11362, 11364 },
- { 11367, 11367 },
- { 11369, 11369 },
- { 11371, 11371 },
- { 11373, 11376 },
- { 11378, 11378 },
- { 11381, 11381 },
- { 11390, 11392 },
- { 11394, 11394 },
- { 11396, 11396 },
- { 11398, 11398 },
- { 11400, 11400 },
- { 11402, 11402 },
- { 11404, 11404 },
- { 11406, 11406 },
- { 11408, 11408 },
- { 11410, 11410 },
- { 11412, 11412 },
- { 11414, 11414 },
- { 11416, 11416 },
- { 11418, 11418 },
- { 11420, 11420 },
- { 11422, 11422 },
- { 11424, 11424 },
- { 11426, 11426 },
- { 11428, 11428 },
- { 11430, 11430 },
- { 11432, 11432 },
- { 11434, 11434 },
- { 11436, 11436 },
- { 11438, 11438 },
- { 11440, 11440 },
- { 11442, 11442 },
- { 11444, 11444 },
- { 11446, 11446 },
- { 11448, 11448 },
- { 11450, 11450 },
- { 11452, 11452 },
- { 11454, 11454 },
- { 11456, 11456 },
- { 11458, 11458 },
- { 11460, 11460 },
- { 11462, 11462 },
- { 11464, 11464 },
- { 11466, 11466 },
- { 11468, 11468 },
- { 11470, 11470 },
- { 11472, 11472 },
- { 11474, 11474 },
- { 11476, 11476 },
- { 11478, 11478 },
- { 11480, 11480 },
- { 11482, 11482 },
- { 11484, 11484 },
- { 11486, 11486 },
- { 11488, 11488 },
- { 11490, 11490 },
- { 11499, 11499 },
- { 11501, 11501 },
- { 11506, 11506 },
- { 42560, 42560 },
- { 42562, 42562 },
- { 42564, 42564 },
- { 42566, 42566 },
- { 42568, 42568 },
- { 42570, 42570 },
- { 42572, 42572 },
- { 42574, 42574 },
- { 42576, 42576 },
- { 42578, 42578 },
- { 42580, 42580 },
- { 42582, 42582 },
- { 42584, 42584 },
- { 42586, 42586 },
- { 42588, 42588 },
- { 42590, 42590 },
- { 42592, 42592 },
- { 42594, 42594 },
- { 42596, 42596 },
- { 42598, 42598 },
- { 42600, 42600 },
- { 42602, 42602 },
- { 42604, 42604 },
- { 42624, 42624 },
- { 42626, 42626 },
- { 42628, 42628 },
- { 42630, 42630 },
- { 42632, 42632 },
- { 42634, 42634 },
- { 42636, 42636 },
- { 42638, 42638 },
- { 42640, 42640 },
- { 42642, 42642 },
- { 42644, 42644 },
- { 42646, 42646 },
- { 42648, 42648 },
- { 42650, 42650 },
- { 42786, 42786 },
- { 42788, 42788 },
- { 42790, 42790 },
- { 42792, 42792 },
- { 42794, 42794 },
- { 42796, 42796 },
- { 42798, 42798 },
- { 42802, 42802 },
- { 42804, 42804 },
- { 42806, 42806 },
- { 42808, 42808 },
- { 42810, 42810 },
- { 42812, 42812 },
- { 42814, 42814 },
- { 42816, 42816 },
- { 42818, 42818 },
- { 42820, 42820 },
- { 42822, 42822 },
- { 42824, 42824 },
- { 42826, 42826 },
- { 42828, 42828 },
- { 42830, 42830 },
- { 42832, 42832 },
- { 42834, 42834 },
- { 42836, 42836 },
- { 42838, 42838 },
- { 42840, 42840 },
- { 42842, 42842 },
- { 42844, 42844 },
- { 42846, 42846 },
- { 42848, 42848 },
- { 42850, 42850 },
- { 42852, 42852 },
- { 42854, 42854 },
- { 42856, 42856 },
- { 42858, 42858 },
- { 42860, 42860 },
- { 42862, 42862 },
- { 42873, 42873 },
- { 42875, 42875 },
- { 42877, 42878 },
- { 42880, 42880 },
- { 42882, 42882 },
- { 42884, 42884 },
- { 42886, 42886 },
- { 42891, 42891 },
- { 42893, 42893 },
- { 42896, 42896 },
- { 42898, 42898 },
- { 42902, 42902 },
- { 42904, 42904 },
- { 42906, 42906 },
- { 42908, 42908 },
- { 42910, 42910 },
- { 42912, 42912 },
- { 42914, 42914 },
- { 42916, 42916 },
- { 42918, 42918 },
- { 42920, 42920 },
+ { 11360, 11360 },
+ { 11362, 11364 },
+ { 11367, 11367 },
+ { 11369, 11369 },
+ { 11371, 11371 },
+ { 11373, 11376 },
+ { 11378, 11378 },
+ { 11381, 11381 },
+ { 11390, 11392 },
+ { 11394, 11394 },
+ { 11396, 11396 },
+ { 11398, 11398 },
+ { 11400, 11400 },
+ { 11402, 11402 },
+ { 11404, 11404 },
+ { 11406, 11406 },
+ { 11408, 11408 },
+ { 11410, 11410 },
+ { 11412, 11412 },
+ { 11414, 11414 },
+ { 11416, 11416 },
+ { 11418, 11418 },
+ { 11420, 11420 },
+ { 11422, 11422 },
+ { 11424, 11424 },
+ { 11426, 11426 },
+ { 11428, 11428 },
+ { 11430, 11430 },
+ { 11432, 11432 },
+ { 11434, 11434 },
+ { 11436, 11436 },
+ { 11438, 11438 },
+ { 11440, 11440 },
+ { 11442, 11442 },
+ { 11444, 11444 },
+ { 11446, 11446 },
+ { 11448, 11448 },
+ { 11450, 11450 },
+ { 11452, 11452 },
+ { 11454, 11454 },
+ { 11456, 11456 },
+ { 11458, 11458 },
+ { 11460, 11460 },
+ { 11462, 11462 },
+ { 11464, 11464 },
+ { 11466, 11466 },
+ { 11468, 11468 },
+ { 11470, 11470 },
+ { 11472, 11472 },
+ { 11474, 11474 },
+ { 11476, 11476 },
+ { 11478, 11478 },
+ { 11480, 11480 },
+ { 11482, 11482 },
+ { 11484, 11484 },
+ { 11486, 11486 },
+ { 11488, 11488 },
+ { 11490, 11490 },
+ { 11499, 11499 },
+ { 11501, 11501 },
+ { 11506, 11506 },
+ { 42560, 42560 },
+ { 42562, 42562 },
+ { 42564, 42564 },
+ { 42566, 42566 },
+ { 42568, 42568 },
+ { 42570, 42570 },
+ { 42572, 42572 },
+ { 42574, 42574 },
+ { 42576, 42576 },
+ { 42578, 42578 },
+ { 42580, 42580 },
+ { 42582, 42582 },
+ { 42584, 42584 },
+ { 42586, 42586 },
+ { 42588, 42588 },
+ { 42590, 42590 },
+ { 42592, 42592 },
+ { 42594, 42594 },
+ { 42596, 42596 },
+ { 42598, 42598 },
+ { 42600, 42600 },
+ { 42602, 42602 },
+ { 42604, 42604 },
+ { 42624, 42624 },
+ { 42626, 42626 },
+ { 42628, 42628 },
+ { 42630, 42630 },
+ { 42632, 42632 },
+ { 42634, 42634 },
+ { 42636, 42636 },
+ { 42638, 42638 },
+ { 42640, 42640 },
+ { 42642, 42642 },
+ { 42644, 42644 },
+ { 42646, 42646 },
+ { 42648, 42648 },
+ { 42650, 42650 },
+ { 42786, 42786 },
+ { 42788, 42788 },
+ { 42790, 42790 },
+ { 42792, 42792 },
+ { 42794, 42794 },
+ { 42796, 42796 },
+ { 42798, 42798 },
+ { 42802, 42802 },
+ { 42804, 42804 },
+ { 42806, 42806 },
+ { 42808, 42808 },
+ { 42810, 42810 },
+ { 42812, 42812 },
+ { 42814, 42814 },
+ { 42816, 42816 },
+ { 42818, 42818 },
+ { 42820, 42820 },
+ { 42822, 42822 },
+ { 42824, 42824 },
+ { 42826, 42826 },
+ { 42828, 42828 },
+ { 42830, 42830 },
+ { 42832, 42832 },
+ { 42834, 42834 },
+ { 42836, 42836 },
+ { 42838, 42838 },
+ { 42840, 42840 },
+ { 42842, 42842 },
+ { 42844, 42844 },
+ { 42846, 42846 },
+ { 42848, 42848 },
+ { 42850, 42850 },
+ { 42852, 42852 },
+ { 42854, 42854 },
+ { 42856, 42856 },
+ { 42858, 42858 },
+ { 42860, 42860 },
+ { 42862, 42862 },
+ { 42873, 42873 },
+ { 42875, 42875 },
+ { 42877, 42878 },
+ { 42880, 42880 },
+ { 42882, 42882 },
+ { 42884, 42884 },
+ { 42886, 42886 },
+ { 42891, 42891 },
+ { 42893, 42893 },
+ { 42896, 42896 },
+ { 42898, 42898 },
+ { 42902, 42902 },
+ { 42904, 42904 },
+ { 42906, 42906 },
+ { 42908, 42908 },
+ { 42910, 42910 },
+ { 42912, 42912 },
+ { 42914, 42914 },
+ { 42916, 42916 },
+ { 42918, 42918 },
+ { 42920, 42920 },
{ 42922, 42926 },
- { 42928, 42932 },
- { 42934, 42934 },
+ { 42928, 42932 },
+ { 42934, 42934 },
{ 42936, 42936 },
{ 42938, 42938 },
{ 42940, 42940 },
@@ -2584,51 +2584,51 @@ static const URange16 Lu_range16[] = {
{ 42966, 42966 },
{ 42968, 42968 },
{ 42997, 42997 },
- { 65313, 65338 },
-};
-static const URange32 Lu_range32[] = {
- { 66560, 66599 },
+ { 65313, 65338 },
+};
+static const URange32 Lu_range32[] = {
+ { 66560, 66599 },
{ 66736, 66771 },
{ 66928, 66938 },
{ 66940, 66954 },
{ 66956, 66962 },
{ 66964, 66965 },
- { 68736, 68786 },
- { 71840, 71871 },
+ { 68736, 68786 },
+ { 71840, 71871 },
{ 93760, 93791 },
- { 119808, 119833 },
- { 119860, 119885 },
- { 119912, 119937 },
- { 119964, 119964 },
- { 119966, 119967 },
- { 119970, 119970 },
- { 119973, 119974 },
- { 119977, 119980 },
- { 119982, 119989 },
- { 120016, 120041 },
- { 120068, 120069 },
- { 120071, 120074 },
- { 120077, 120084 },
- { 120086, 120092 },
- { 120120, 120121 },
- { 120123, 120126 },
- { 120128, 120132 },
- { 120134, 120134 },
- { 120138, 120144 },
- { 120172, 120197 },
- { 120224, 120249 },
- { 120276, 120301 },
- { 120328, 120353 },
- { 120380, 120405 },
- { 120432, 120457 },
- { 120488, 120512 },
- { 120546, 120570 },
- { 120604, 120628 },
- { 120662, 120686 },
- { 120720, 120744 },
- { 120778, 120778 },
+ { 119808, 119833 },
+ { 119860, 119885 },
+ { 119912, 119937 },
+ { 119964, 119964 },
+ { 119966, 119967 },
+ { 119970, 119970 },
+ { 119973, 119974 },
+ { 119977, 119980 },
+ { 119982, 119989 },
+ { 120016, 120041 },
+ { 120068, 120069 },
+ { 120071, 120074 },
+ { 120077, 120084 },
+ { 120086, 120092 },
+ { 120120, 120121 },
+ { 120123, 120126 },
+ { 120128, 120132 },
+ { 120134, 120134 },
+ { 120138, 120144 },
+ { 120172, 120197 },
+ { 120224, 120249 },
+ { 120276, 120301 },
+ { 120328, 120353 },
+ { 120380, 120405 },
+ { 120432, 120457 },
+ { 120488, 120512 },
+ { 120546, 120570 },
+ { 120604, 120628 },
+ { 120662, 120686 },
+ { 120720, 120744 },
+ { 120778, 120778 },
{ 125184, 125217 },
-};
+};
static const URange16 M_range16[] = {
{ 768, 879 },
{ 1155, 1161 },
@@ -2819,7 +2819,7 @@ static const URange16 M_range16[] = {
{ 64286, 64286 },
{ 65024, 65039 },
{ 65056, 65071 },
-};
+};
static const URange32 M_range32[] = {
{ 66045, 66045 },
{ 66272, 66272 },
@@ -2931,161 +2931,161 @@ static const URange32 M_range32[] = {
{ 125136, 125142 },
{ 125252, 125258 },
{ 917760, 917999 },
-};
-static const URange16 Mc_range16[] = {
- { 2307, 2307 },
- { 2363, 2363 },
- { 2366, 2368 },
- { 2377, 2380 },
- { 2382, 2383 },
- { 2434, 2435 },
- { 2494, 2496 },
- { 2503, 2504 },
- { 2507, 2508 },
- { 2519, 2519 },
- { 2563, 2563 },
- { 2622, 2624 },
- { 2691, 2691 },
- { 2750, 2752 },
- { 2761, 2761 },
- { 2763, 2764 },
- { 2818, 2819 },
- { 2878, 2878 },
- { 2880, 2880 },
- { 2887, 2888 },
- { 2891, 2892 },
- { 2903, 2903 },
- { 3006, 3007 },
- { 3009, 3010 },
- { 3014, 3016 },
- { 3018, 3020 },
- { 3031, 3031 },
- { 3073, 3075 },
- { 3137, 3140 },
- { 3202, 3203 },
- { 3262, 3262 },
- { 3264, 3268 },
- { 3271, 3272 },
- { 3274, 3275 },
- { 3285, 3286 },
- { 3330, 3331 },
- { 3390, 3392 },
- { 3398, 3400 },
- { 3402, 3404 },
- { 3415, 3415 },
- { 3458, 3459 },
- { 3535, 3537 },
- { 3544, 3551 },
- { 3570, 3571 },
- { 3902, 3903 },
- { 3967, 3967 },
- { 4139, 4140 },
- { 4145, 4145 },
- { 4152, 4152 },
- { 4155, 4156 },
- { 4182, 4183 },
- { 4194, 4196 },
- { 4199, 4205 },
- { 4227, 4228 },
- { 4231, 4236 },
- { 4239, 4239 },
- { 4250, 4252 },
+};
+static const URange16 Mc_range16[] = {
+ { 2307, 2307 },
+ { 2363, 2363 },
+ { 2366, 2368 },
+ { 2377, 2380 },
+ { 2382, 2383 },
+ { 2434, 2435 },
+ { 2494, 2496 },
+ { 2503, 2504 },
+ { 2507, 2508 },
+ { 2519, 2519 },
+ { 2563, 2563 },
+ { 2622, 2624 },
+ { 2691, 2691 },
+ { 2750, 2752 },
+ { 2761, 2761 },
+ { 2763, 2764 },
+ { 2818, 2819 },
+ { 2878, 2878 },
+ { 2880, 2880 },
+ { 2887, 2888 },
+ { 2891, 2892 },
+ { 2903, 2903 },
+ { 3006, 3007 },
+ { 3009, 3010 },
+ { 3014, 3016 },
+ { 3018, 3020 },
+ { 3031, 3031 },
+ { 3073, 3075 },
+ { 3137, 3140 },
+ { 3202, 3203 },
+ { 3262, 3262 },
+ { 3264, 3268 },
+ { 3271, 3272 },
+ { 3274, 3275 },
+ { 3285, 3286 },
+ { 3330, 3331 },
+ { 3390, 3392 },
+ { 3398, 3400 },
+ { 3402, 3404 },
+ { 3415, 3415 },
+ { 3458, 3459 },
+ { 3535, 3537 },
+ { 3544, 3551 },
+ { 3570, 3571 },
+ { 3902, 3903 },
+ { 3967, 3967 },
+ { 4139, 4140 },
+ { 4145, 4145 },
+ { 4152, 4152 },
+ { 4155, 4156 },
+ { 4182, 4183 },
+ { 4194, 4196 },
+ { 4199, 4205 },
+ { 4227, 4228 },
+ { 4231, 4236 },
+ { 4239, 4239 },
+ { 4250, 4252 },
{ 5909, 5909 },
{ 5940, 5940 },
- { 6070, 6070 },
- { 6078, 6085 },
- { 6087, 6088 },
- { 6435, 6438 },
- { 6441, 6443 },
- { 6448, 6449 },
- { 6451, 6456 },
- { 6681, 6682 },
- { 6741, 6741 },
- { 6743, 6743 },
- { 6753, 6753 },
- { 6755, 6756 },
- { 6765, 6770 },
- { 6916, 6916 },
- { 6965, 6965 },
- { 6971, 6971 },
- { 6973, 6977 },
- { 6979, 6980 },
- { 7042, 7042 },
- { 7073, 7073 },
- { 7078, 7079 },
- { 7082, 7082 },
- { 7143, 7143 },
- { 7146, 7148 },
- { 7150, 7150 },
- { 7154, 7155 },
- { 7204, 7211 },
- { 7220, 7221 },
- { 7393, 7393 },
+ { 6070, 6070 },
+ { 6078, 6085 },
+ { 6087, 6088 },
+ { 6435, 6438 },
+ { 6441, 6443 },
+ { 6448, 6449 },
+ { 6451, 6456 },
+ { 6681, 6682 },
+ { 6741, 6741 },
+ { 6743, 6743 },
+ { 6753, 6753 },
+ { 6755, 6756 },
+ { 6765, 6770 },
+ { 6916, 6916 },
+ { 6965, 6965 },
+ { 6971, 6971 },
+ { 6973, 6977 },
+ { 6979, 6980 },
+ { 7042, 7042 },
+ { 7073, 7073 },
+ { 7078, 7079 },
+ { 7082, 7082 },
+ { 7143, 7143 },
+ { 7146, 7148 },
+ { 7150, 7150 },
+ { 7154, 7155 },
+ { 7204, 7211 },
+ { 7220, 7221 },
+ { 7393, 7393 },
{ 7415, 7415 },
- { 12334, 12335 },
- { 43043, 43044 },
- { 43047, 43047 },
- { 43136, 43137 },
- { 43188, 43203 },
- { 43346, 43347 },
- { 43395, 43395 },
- { 43444, 43445 },
- { 43450, 43451 },
+ { 12334, 12335 },
+ { 43043, 43044 },
+ { 43047, 43047 },
+ { 43136, 43137 },
+ { 43188, 43203 },
+ { 43346, 43347 },
+ { 43395, 43395 },
+ { 43444, 43445 },
+ { 43450, 43451 },
{ 43454, 43456 },
- { 43567, 43568 },
- { 43571, 43572 },
- { 43597, 43597 },
- { 43643, 43643 },
- { 43645, 43645 },
- { 43755, 43755 },
- { 43758, 43759 },
- { 43765, 43765 },
- { 44003, 44004 },
- { 44006, 44007 },
- { 44009, 44010 },
- { 44012, 44012 },
-};
-static const URange32 Mc_range32[] = {
- { 69632, 69632 },
- { 69634, 69634 },
- { 69762, 69762 },
- { 69808, 69810 },
- { 69815, 69816 },
- { 69932, 69932 },
+ { 43567, 43568 },
+ { 43571, 43572 },
+ { 43597, 43597 },
+ { 43643, 43643 },
+ { 43645, 43645 },
+ { 43755, 43755 },
+ { 43758, 43759 },
+ { 43765, 43765 },
+ { 44003, 44004 },
+ { 44006, 44007 },
+ { 44009, 44010 },
+ { 44012, 44012 },
+};
+static const URange32 Mc_range32[] = {
+ { 69632, 69632 },
+ { 69634, 69634 },
+ { 69762, 69762 },
+ { 69808, 69810 },
+ { 69815, 69816 },
+ { 69932, 69932 },
{ 69957, 69958 },
- { 70018, 70018 },
- { 70067, 70069 },
- { 70079, 70080 },
+ { 70018, 70018 },
+ { 70067, 70069 },
+ { 70079, 70080 },
{ 70094, 70094 },
- { 70188, 70190 },
- { 70194, 70195 },
- { 70197, 70197 },
- { 70368, 70370 },
- { 70402, 70403 },
- { 70462, 70463 },
- { 70465, 70468 },
- { 70471, 70472 },
- { 70475, 70477 },
- { 70487, 70487 },
- { 70498, 70499 },
+ { 70188, 70190 },
+ { 70194, 70195 },
+ { 70197, 70197 },
+ { 70368, 70370 },
+ { 70402, 70403 },
+ { 70462, 70463 },
+ { 70465, 70468 },
+ { 70471, 70472 },
+ { 70475, 70477 },
+ { 70487, 70487 },
+ { 70498, 70499 },
{ 70709, 70711 },
{ 70720, 70721 },
{ 70725, 70725 },
- { 70832, 70834 },
- { 70841, 70841 },
- { 70843, 70846 },
- { 70849, 70849 },
- { 71087, 71089 },
- { 71096, 71099 },
- { 71102, 71102 },
- { 71216, 71218 },
- { 71227, 71228 },
- { 71230, 71230 },
- { 71340, 71340 },
- { 71342, 71343 },
- { 71350, 71350 },
- { 71456, 71457 },
- { 71462, 71462 },
+ { 70832, 70834 },
+ { 70841, 70841 },
+ { 70843, 70846 },
+ { 70849, 70849 },
+ { 71087, 71089 },
+ { 71096, 71099 },
+ { 71102, 71102 },
+ { 71216, 71218 },
+ { 71227, 71228 },
+ { 71230, 71230 },
+ { 71340, 71340 },
+ { 71342, 71343 },
+ { 71350, 71350 },
+ { 71456, 71457 },
+ { 71462, 71462 },
{ 71724, 71726 },
{ 71736, 71736 },
{ 71984, 71989 },
@@ -3110,9 +3110,9 @@ static const URange32 Mc_range32[] = {
{ 73461, 73462 },
{ 94033, 94087 },
{ 94192, 94193 },
- { 119141, 119142 },
- { 119149, 119154 },
-};
+ { 119141, 119142 },
+ { 119149, 119154 },
+};
static const URange16 Me_range16[] = {
{ 1160, 1161 },
{ 6846, 6846 },
@@ -3120,283 +3120,283 @@ static const URange16 Me_range16[] = {
{ 8418, 8420 },
{ 42608, 42610 },
};
-static const URange16 Mn_range16[] = {
- { 768, 879 },
- { 1155, 1159 },
- { 1425, 1469 },
- { 1471, 1471 },
- { 1473, 1474 },
- { 1476, 1477 },
- { 1479, 1479 },
- { 1552, 1562 },
- { 1611, 1631 },
- { 1648, 1648 },
- { 1750, 1756 },
- { 1759, 1764 },
- { 1767, 1768 },
- { 1770, 1773 },
- { 1809, 1809 },
- { 1840, 1866 },
- { 1958, 1968 },
- { 2027, 2035 },
+static const URange16 Mn_range16[] = {
+ { 768, 879 },
+ { 1155, 1159 },
+ { 1425, 1469 },
+ { 1471, 1471 },
+ { 1473, 1474 },
+ { 1476, 1477 },
+ { 1479, 1479 },
+ { 1552, 1562 },
+ { 1611, 1631 },
+ { 1648, 1648 },
+ { 1750, 1756 },
+ { 1759, 1764 },
+ { 1767, 1768 },
+ { 1770, 1773 },
+ { 1809, 1809 },
+ { 1840, 1866 },
+ { 1958, 1968 },
+ { 2027, 2035 },
{ 2045, 2045 },
- { 2070, 2073 },
- { 2075, 2083 },
- { 2085, 2087 },
- { 2089, 2093 },
- { 2137, 2139 },
+ { 2070, 2073 },
+ { 2075, 2083 },
+ { 2085, 2087 },
+ { 2089, 2093 },
+ { 2137, 2139 },
{ 2200, 2207 },
{ 2250, 2273 },
- { 2275, 2306 },
- { 2362, 2362 },
- { 2364, 2364 },
- { 2369, 2376 },
- { 2381, 2381 },
- { 2385, 2391 },
- { 2402, 2403 },
- { 2433, 2433 },
- { 2492, 2492 },
- { 2497, 2500 },
- { 2509, 2509 },
- { 2530, 2531 },
+ { 2275, 2306 },
+ { 2362, 2362 },
+ { 2364, 2364 },
+ { 2369, 2376 },
+ { 2381, 2381 },
+ { 2385, 2391 },
+ { 2402, 2403 },
+ { 2433, 2433 },
+ { 2492, 2492 },
+ { 2497, 2500 },
+ { 2509, 2509 },
+ { 2530, 2531 },
{ 2558, 2558 },
- { 2561, 2562 },
- { 2620, 2620 },
- { 2625, 2626 },
- { 2631, 2632 },
- { 2635, 2637 },
- { 2641, 2641 },
- { 2672, 2673 },
- { 2677, 2677 },
- { 2689, 2690 },
- { 2748, 2748 },
- { 2753, 2757 },
- { 2759, 2760 },
- { 2765, 2765 },
- { 2786, 2787 },
+ { 2561, 2562 },
+ { 2620, 2620 },
+ { 2625, 2626 },
+ { 2631, 2632 },
+ { 2635, 2637 },
+ { 2641, 2641 },
+ { 2672, 2673 },
+ { 2677, 2677 },
+ { 2689, 2690 },
+ { 2748, 2748 },
+ { 2753, 2757 },
+ { 2759, 2760 },
+ { 2765, 2765 },
+ { 2786, 2787 },
{ 2810, 2815 },
- { 2817, 2817 },
- { 2876, 2876 },
- { 2879, 2879 },
- { 2881, 2884 },
- { 2893, 2893 },
+ { 2817, 2817 },
+ { 2876, 2876 },
+ { 2879, 2879 },
+ { 2881, 2884 },
+ { 2893, 2893 },
{ 2901, 2902 },
- { 2914, 2915 },
- { 2946, 2946 },
- { 3008, 3008 },
- { 3021, 3021 },
- { 3072, 3072 },
+ { 2914, 2915 },
+ { 2946, 2946 },
+ { 3008, 3008 },
+ { 3021, 3021 },
+ { 3072, 3072 },
{ 3076, 3076 },
{ 3132, 3132 },
- { 3134, 3136 },
- { 3142, 3144 },
- { 3146, 3149 },
- { 3157, 3158 },
- { 3170, 3171 },
- { 3201, 3201 },
- { 3260, 3260 },
- { 3263, 3263 },
- { 3270, 3270 },
- { 3276, 3277 },
- { 3298, 3299 },
+ { 3134, 3136 },
+ { 3142, 3144 },
+ { 3146, 3149 },
+ { 3157, 3158 },
+ { 3170, 3171 },
+ { 3201, 3201 },
+ { 3260, 3260 },
+ { 3263, 3263 },
+ { 3270, 3270 },
+ { 3276, 3277 },
+ { 3298, 3299 },
{ 3328, 3329 },
{ 3387, 3388 },
- { 3393, 3396 },
- { 3405, 3405 },
- { 3426, 3427 },
+ { 3393, 3396 },
+ { 3405, 3405 },
+ { 3426, 3427 },
{ 3457, 3457 },
- { 3530, 3530 },
- { 3538, 3540 },
- { 3542, 3542 },
- { 3633, 3633 },
- { 3636, 3642 },
- { 3655, 3662 },
- { 3761, 3761 },
+ { 3530, 3530 },
+ { 3538, 3540 },
+ { 3542, 3542 },
+ { 3633, 3633 },
+ { 3636, 3642 },
+ { 3655, 3662 },
+ { 3761, 3761 },
{ 3764, 3772 },
- { 3784, 3789 },
- { 3864, 3865 },
- { 3893, 3893 },
- { 3895, 3895 },
- { 3897, 3897 },
- { 3953, 3966 },
- { 3968, 3972 },
- { 3974, 3975 },
- { 3981, 3991 },
- { 3993, 4028 },
- { 4038, 4038 },
- { 4141, 4144 },
- { 4146, 4151 },
- { 4153, 4154 },
- { 4157, 4158 },
- { 4184, 4185 },
- { 4190, 4192 },
- { 4209, 4212 },
- { 4226, 4226 },
- { 4229, 4230 },
- { 4237, 4237 },
- { 4253, 4253 },
- { 4957, 4959 },
- { 5906, 5908 },
+ { 3784, 3789 },
+ { 3864, 3865 },
+ { 3893, 3893 },
+ { 3895, 3895 },
+ { 3897, 3897 },
+ { 3953, 3966 },
+ { 3968, 3972 },
+ { 3974, 3975 },
+ { 3981, 3991 },
+ { 3993, 4028 },
+ { 4038, 4038 },
+ { 4141, 4144 },
+ { 4146, 4151 },
+ { 4153, 4154 },
+ { 4157, 4158 },
+ { 4184, 4185 },
+ { 4190, 4192 },
+ { 4209, 4212 },
+ { 4226, 4226 },
+ { 4229, 4230 },
+ { 4237, 4237 },
+ { 4253, 4253 },
+ { 4957, 4959 },
+ { 5906, 5908 },
{ 5938, 5939 },
- { 5970, 5971 },
- { 6002, 6003 },
- { 6068, 6069 },
- { 6071, 6077 },
- { 6086, 6086 },
- { 6089, 6099 },
- { 6109, 6109 },
- { 6155, 6157 },
+ { 5970, 5971 },
+ { 6002, 6003 },
+ { 6068, 6069 },
+ { 6071, 6077 },
+ { 6086, 6086 },
+ { 6089, 6099 },
+ { 6109, 6109 },
+ { 6155, 6157 },
{ 6159, 6159 },
{ 6277, 6278 },
- { 6313, 6313 },
- { 6432, 6434 },
- { 6439, 6440 },
- { 6450, 6450 },
- { 6457, 6459 },
- { 6679, 6680 },
- { 6683, 6683 },
- { 6742, 6742 },
- { 6744, 6750 },
- { 6752, 6752 },
- { 6754, 6754 },
- { 6757, 6764 },
- { 6771, 6780 },
- { 6783, 6783 },
- { 6832, 6845 },
+ { 6313, 6313 },
+ { 6432, 6434 },
+ { 6439, 6440 },
+ { 6450, 6450 },
+ { 6457, 6459 },
+ { 6679, 6680 },
+ { 6683, 6683 },
+ { 6742, 6742 },
+ { 6744, 6750 },
+ { 6752, 6752 },
+ { 6754, 6754 },
+ { 6757, 6764 },
+ { 6771, 6780 },
+ { 6783, 6783 },
+ { 6832, 6845 },
{ 6847, 6862 },
- { 6912, 6915 },
- { 6964, 6964 },
- { 6966, 6970 },
- { 6972, 6972 },
- { 6978, 6978 },
- { 7019, 7027 },
- { 7040, 7041 },
- { 7074, 7077 },
- { 7080, 7081 },
- { 7083, 7085 },
- { 7142, 7142 },
- { 7144, 7145 },
- { 7149, 7149 },
- { 7151, 7153 },
- { 7212, 7219 },
- { 7222, 7223 },
- { 7376, 7378 },
- { 7380, 7392 },
- { 7394, 7400 },
- { 7405, 7405 },
- { 7412, 7412 },
- { 7416, 7417 },
+ { 6912, 6915 },
+ { 6964, 6964 },
+ { 6966, 6970 },
+ { 6972, 6972 },
+ { 6978, 6978 },
+ { 7019, 7027 },
+ { 7040, 7041 },
+ { 7074, 7077 },
+ { 7080, 7081 },
+ { 7083, 7085 },
+ { 7142, 7142 },
+ { 7144, 7145 },
+ { 7149, 7149 },
+ { 7151, 7153 },
+ { 7212, 7219 },
+ { 7222, 7223 },
+ { 7376, 7378 },
+ { 7380, 7392 },
+ { 7394, 7400 },
+ { 7405, 7405 },
+ { 7412, 7412 },
+ { 7416, 7417 },
{ 7616, 7679 },
- { 8400, 8412 },
- { 8417, 8417 },
- { 8421, 8432 },
- { 11503, 11505 },
- { 11647, 11647 },
- { 11744, 11775 },
- { 12330, 12333 },
- { 12441, 12442 },
- { 42607, 42607 },
- { 42612, 42621 },
- { 42654, 42655 },
- { 42736, 42737 },
- { 43010, 43010 },
- { 43014, 43014 },
- { 43019, 43019 },
- { 43045, 43046 },
+ { 8400, 8412 },
+ { 8417, 8417 },
+ { 8421, 8432 },
+ { 11503, 11505 },
+ { 11647, 11647 },
+ { 11744, 11775 },
+ { 12330, 12333 },
+ { 12441, 12442 },
+ { 42607, 42607 },
+ { 42612, 42621 },
+ { 42654, 42655 },
+ { 42736, 42737 },
+ { 43010, 43010 },
+ { 43014, 43014 },
+ { 43019, 43019 },
+ { 43045, 43046 },
{ 43052, 43052 },
{ 43204, 43205 },
- { 43232, 43249 },
+ { 43232, 43249 },
{ 43263, 43263 },
- { 43302, 43309 },
- { 43335, 43345 },
- { 43392, 43394 },
- { 43443, 43443 },
- { 43446, 43449 },
+ { 43302, 43309 },
+ { 43335, 43345 },
+ { 43392, 43394 },
+ { 43443, 43443 },
+ { 43446, 43449 },
{ 43452, 43453 },
- { 43493, 43493 },
- { 43561, 43566 },
- { 43569, 43570 },
- { 43573, 43574 },
- { 43587, 43587 },
- { 43596, 43596 },
- { 43644, 43644 },
- { 43696, 43696 },
- { 43698, 43700 },
- { 43703, 43704 },
- { 43710, 43711 },
- { 43713, 43713 },
- { 43756, 43757 },
- { 43766, 43766 },
- { 44005, 44005 },
- { 44008, 44008 },
- { 44013, 44013 },
- { 64286, 64286 },
- { 65024, 65039 },
- { 65056, 65071 },
-};
-static const URange32 Mn_range32[] = {
- { 66045, 66045 },
- { 66272, 66272 },
- { 66422, 66426 },
- { 68097, 68099 },
- { 68101, 68102 },
- { 68108, 68111 },
- { 68152, 68154 },
- { 68159, 68159 },
- { 68325, 68326 },
+ { 43493, 43493 },
+ { 43561, 43566 },
+ { 43569, 43570 },
+ { 43573, 43574 },
+ { 43587, 43587 },
+ { 43596, 43596 },
+ { 43644, 43644 },
+ { 43696, 43696 },
+ { 43698, 43700 },
+ { 43703, 43704 },
+ { 43710, 43711 },
+ { 43713, 43713 },
+ { 43756, 43757 },
+ { 43766, 43766 },
+ { 44005, 44005 },
+ { 44008, 44008 },
+ { 44013, 44013 },
+ { 64286, 64286 },
+ { 65024, 65039 },
+ { 65056, 65071 },
+};
+static const URange32 Mn_range32[] = {
+ { 66045, 66045 },
+ { 66272, 66272 },
+ { 66422, 66426 },
+ { 68097, 68099 },
+ { 68101, 68102 },
+ { 68108, 68111 },
+ { 68152, 68154 },
+ { 68159, 68159 },
+ { 68325, 68326 },
{ 68900, 68903 },
{ 69291, 69292 },
{ 69446, 69456 },
{ 69506, 69509 },
- { 69633, 69633 },
- { 69688, 69702 },
+ { 69633, 69633 },
+ { 69688, 69702 },
{ 69744, 69744 },
{ 69747, 69748 },
- { 69759, 69761 },
- { 69811, 69814 },
- { 69817, 69818 },
+ { 69759, 69761 },
+ { 69811, 69814 },
+ { 69817, 69818 },
{ 69826, 69826 },
- { 69888, 69890 },
- { 69927, 69931 },
- { 69933, 69940 },
- { 70003, 70003 },
- { 70016, 70017 },
- { 70070, 70078 },
+ { 69888, 69890 },
+ { 69927, 69931 },
+ { 69933, 69940 },
+ { 70003, 70003 },
+ { 70016, 70017 },
+ { 70070, 70078 },
{ 70089, 70092 },
{ 70095, 70095 },
- { 70191, 70193 },
- { 70196, 70196 },
- { 70198, 70199 },
+ { 70191, 70193 },
+ { 70196, 70196 },
+ { 70198, 70199 },
{ 70206, 70206 },
- { 70367, 70367 },
- { 70371, 70378 },
- { 70400, 70401 },
+ { 70367, 70367 },
+ { 70371, 70378 },
+ { 70400, 70401 },
{ 70459, 70460 },
- { 70464, 70464 },
- { 70502, 70508 },
- { 70512, 70516 },
+ { 70464, 70464 },
+ { 70502, 70508 },
+ { 70512, 70516 },
{ 70712, 70719 },
{ 70722, 70724 },
{ 70726, 70726 },
{ 70750, 70750 },
- { 70835, 70840 },
- { 70842, 70842 },
- { 70847, 70848 },
- { 70850, 70851 },
- { 71090, 71093 },
- { 71100, 71101 },
- { 71103, 71104 },
- { 71132, 71133 },
- { 71219, 71226 },
- { 71229, 71229 },
- { 71231, 71232 },
- { 71339, 71339 },
- { 71341, 71341 },
- { 71344, 71349 },
- { 71351, 71351 },
- { 71453, 71455 },
- { 71458, 71461 },
- { 71463, 71467 },
+ { 70835, 70840 },
+ { 70842, 70842 },
+ { 70847, 70848 },
+ { 70850, 70851 },
+ { 71090, 71093 },
+ { 71100, 71101 },
+ { 71103, 71104 },
+ { 71132, 71133 },
+ { 71219, 71226 },
+ { 71229, 71229 },
+ { 71231, 71232 },
+ { 71339, 71339 },
+ { 71341, 71341 },
+ { 71344, 71349 },
+ { 71351, 71351 },
+ { 71453, 71455 },
+ { 71458, 71461 },
+ { 71463, 71467 },
{ 71727, 71735 },
{ 71737, 71738 },
{ 71995, 71996 },
@@ -3429,25 +3429,25 @@ static const URange32 Mn_range32[] = {
{ 73109, 73109 },
{ 73111, 73111 },
{ 73459, 73460 },
- { 92912, 92916 },
- { 92976, 92982 },
+ { 92912, 92916 },
+ { 92976, 92982 },
{ 94031, 94031 },
- { 94095, 94098 },
+ { 94095, 94098 },
{ 94180, 94180 },
- { 113821, 113822 },
+ { 113821, 113822 },
{ 118528, 118573 },
{ 118576, 118598 },
- { 119143, 119145 },
- { 119163, 119170 },
- { 119173, 119179 },
- { 119210, 119213 },
- { 119362, 119364 },
- { 121344, 121398 },
- { 121403, 121452 },
- { 121461, 121461 },
- { 121476, 121476 },
- { 121499, 121503 },
- { 121505, 121519 },
+ { 119143, 119145 },
+ { 119163, 119170 },
+ { 119173, 119179 },
+ { 119210, 119213 },
+ { 119362, 119364 },
+ { 121344, 121398 },
+ { 121403, 121452 },
+ { 121461, 121461 },
+ { 121476, 121476 },
+ { 121499, 121503 },
+ { 121505, 121519 },
{ 122880, 122886 },
{ 122888, 122904 },
{ 122907, 122913 },
@@ -3456,148 +3456,148 @@ static const URange32 Mn_range32[] = {
{ 123184, 123190 },
{ 123566, 123566 },
{ 123628, 123631 },
- { 125136, 125142 },
+ { 125136, 125142 },
{ 125252, 125258 },
- { 917760, 917999 },
-};
-static const URange16 N_range16[] = {
- { 48, 57 },
- { 178, 179 },
- { 185, 185 },
- { 188, 190 },
- { 1632, 1641 },
- { 1776, 1785 },
- { 1984, 1993 },
- { 2406, 2415 },
- { 2534, 2543 },
- { 2548, 2553 },
- { 2662, 2671 },
- { 2790, 2799 },
- { 2918, 2927 },
- { 2930, 2935 },
- { 3046, 3058 },
- { 3174, 3183 },
- { 3192, 3198 },
- { 3302, 3311 },
+ { 917760, 917999 },
+};
+static const URange16 N_range16[] = {
+ { 48, 57 },
+ { 178, 179 },
+ { 185, 185 },
+ { 188, 190 },
+ { 1632, 1641 },
+ { 1776, 1785 },
+ { 1984, 1993 },
+ { 2406, 2415 },
+ { 2534, 2543 },
+ { 2548, 2553 },
+ { 2662, 2671 },
+ { 2790, 2799 },
+ { 2918, 2927 },
+ { 2930, 2935 },
+ { 3046, 3058 },
+ { 3174, 3183 },
+ { 3192, 3198 },
+ { 3302, 3311 },
{ 3416, 3422 },
{ 3430, 3448 },
- { 3558, 3567 },
- { 3664, 3673 },
- { 3792, 3801 },
- { 3872, 3891 },
- { 4160, 4169 },
- { 4240, 4249 },
- { 4969, 4988 },
- { 5870, 5872 },
- { 6112, 6121 },
- { 6128, 6137 },
- { 6160, 6169 },
- { 6470, 6479 },
- { 6608, 6618 },
- { 6784, 6793 },
- { 6800, 6809 },
- { 6992, 7001 },
- { 7088, 7097 },
- { 7232, 7241 },
- { 7248, 7257 },
- { 8304, 8304 },
- { 8308, 8313 },
- { 8320, 8329 },
- { 8528, 8578 },
- { 8581, 8585 },
- { 9312, 9371 },
- { 9450, 9471 },
- { 10102, 10131 },
- { 11517, 11517 },
- { 12295, 12295 },
- { 12321, 12329 },
- { 12344, 12346 },
- { 12690, 12693 },
- { 12832, 12841 },
- { 12872, 12879 },
- { 12881, 12895 },
- { 12928, 12937 },
- { 12977, 12991 },
- { 42528, 42537 },
- { 42726, 42735 },
- { 43056, 43061 },
- { 43216, 43225 },
- { 43264, 43273 },
- { 43472, 43481 },
- { 43504, 43513 },
- { 43600, 43609 },
- { 44016, 44025 },
- { 65296, 65305 },
-};
-static const URange32 N_range32[] = {
- { 65799, 65843 },
- { 65856, 65912 },
- { 65930, 65931 },
- { 66273, 66299 },
- { 66336, 66339 },
- { 66369, 66369 },
- { 66378, 66378 },
- { 66513, 66517 },
- { 66720, 66729 },
- { 67672, 67679 },
- { 67705, 67711 },
- { 67751, 67759 },
- { 67835, 67839 },
- { 67862, 67867 },
- { 68028, 68029 },
- { 68032, 68047 },
- { 68050, 68095 },
+ { 3558, 3567 },
+ { 3664, 3673 },
+ { 3792, 3801 },
+ { 3872, 3891 },
+ { 4160, 4169 },
+ { 4240, 4249 },
+ { 4969, 4988 },
+ { 5870, 5872 },
+ { 6112, 6121 },
+ { 6128, 6137 },
+ { 6160, 6169 },
+ { 6470, 6479 },
+ { 6608, 6618 },
+ { 6784, 6793 },
+ { 6800, 6809 },
+ { 6992, 7001 },
+ { 7088, 7097 },
+ { 7232, 7241 },
+ { 7248, 7257 },
+ { 8304, 8304 },
+ { 8308, 8313 },
+ { 8320, 8329 },
+ { 8528, 8578 },
+ { 8581, 8585 },
+ { 9312, 9371 },
+ { 9450, 9471 },
+ { 10102, 10131 },
+ { 11517, 11517 },
+ { 12295, 12295 },
+ { 12321, 12329 },
+ { 12344, 12346 },
+ { 12690, 12693 },
+ { 12832, 12841 },
+ { 12872, 12879 },
+ { 12881, 12895 },
+ { 12928, 12937 },
+ { 12977, 12991 },
+ { 42528, 42537 },
+ { 42726, 42735 },
+ { 43056, 43061 },
+ { 43216, 43225 },
+ { 43264, 43273 },
+ { 43472, 43481 },
+ { 43504, 43513 },
+ { 43600, 43609 },
+ { 44016, 44025 },
+ { 65296, 65305 },
+};
+static const URange32 N_range32[] = {
+ { 65799, 65843 },
+ { 65856, 65912 },
+ { 65930, 65931 },
+ { 66273, 66299 },
+ { 66336, 66339 },
+ { 66369, 66369 },
+ { 66378, 66378 },
+ { 66513, 66517 },
+ { 66720, 66729 },
+ { 67672, 67679 },
+ { 67705, 67711 },
+ { 67751, 67759 },
+ { 67835, 67839 },
+ { 67862, 67867 },
+ { 68028, 68029 },
+ { 68032, 68047 },
+ { 68050, 68095 },
{ 68160, 68168 },
- { 68221, 68222 },
- { 68253, 68255 },
- { 68331, 68335 },
- { 68440, 68447 },
- { 68472, 68479 },
- { 68521, 68527 },
- { 68858, 68863 },
+ { 68221, 68222 },
+ { 68253, 68255 },
+ { 68331, 68335 },
+ { 68440, 68447 },
+ { 68472, 68479 },
+ { 68521, 68527 },
+ { 68858, 68863 },
{ 68912, 68921 },
- { 69216, 69246 },
+ { 69216, 69246 },
{ 69405, 69414 },
{ 69457, 69460 },
{ 69573, 69579 },
- { 69714, 69743 },
- { 69872, 69881 },
- { 69942, 69951 },
- { 70096, 70105 },
- { 70113, 70132 },
- { 70384, 70393 },
+ { 69714, 69743 },
+ { 69872, 69881 },
+ { 69942, 69951 },
+ { 70096, 70105 },
+ { 70113, 70132 },
+ { 70384, 70393 },
{ 70736, 70745 },
- { 70864, 70873 },
- { 71248, 71257 },
- { 71360, 71369 },
- { 71472, 71483 },
- { 71904, 71922 },
+ { 70864, 70873 },
+ { 71248, 71257 },
+ { 71360, 71369 },
+ { 71472, 71483 },
+ { 71904, 71922 },
{ 72016, 72025 },
{ 72784, 72812 },
{ 73040, 73049 },
{ 73120, 73129 },
{ 73664, 73684 },
- { 74752, 74862 },
- { 92768, 92777 },
+ { 74752, 74862 },
+ { 92768, 92777 },
{ 92864, 92873 },
- { 93008, 93017 },
- { 93019, 93025 },
+ { 93008, 93017 },
+ { 93019, 93025 },
{ 93824, 93846 },
{ 119520, 119539 },
{ 119648, 119672 },
- { 120782, 120831 },
+ { 120782, 120831 },
{ 123200, 123209 },
{ 123632, 123641 },
- { 125127, 125135 },
+ { 125127, 125135 },
{ 125264, 125273 },
{ 126065, 126123 },
{ 126125, 126127 },
{ 126129, 126132 },
{ 126209, 126253 },
{ 126255, 126269 },
- { 127232, 127244 },
+ { 127232, 127244 },
{ 130032, 130041 },
-};
+};
static const URange16 Nd_range16[] = {
{ 48, 57 },
{ 1632, 1641 },
@@ -3636,7 +3636,7 @@ static const URange16 Nd_range16[] = {
{ 43600, 43609 },
{ 44016, 44025 },
{ 65296, 65305 },
-};
+};
static const URange32 Nd_range32[] = {
{ 66720, 66729 },
{ 68912, 68921 },
@@ -3663,7 +3663,7 @@ static const URange32 Nd_range32[] = {
{ 123632, 123641 },
{ 125264, 125273 },
{ 130032, 130041 },
-};
+};
static const URange16 Nl_range16[] = {
{ 5870, 5872 },
{ 8544, 8578 },
@@ -3755,177 +3755,177 @@ static const URange32 No_range32[] = {
{ 126255, 126269 },
{ 127232, 127244 },
};
-static const URange16 P_range16[] = {
- { 33, 35 },
- { 37, 42 },
- { 44, 47 },
- { 58, 59 },
- { 63, 64 },
- { 91, 93 },
- { 95, 95 },
- { 123, 123 },
- { 125, 125 },
- { 161, 161 },
- { 167, 167 },
- { 171, 171 },
- { 182, 183 },
- { 187, 187 },
- { 191, 191 },
- { 894, 894 },
- { 903, 903 },
- { 1370, 1375 },
- { 1417, 1418 },
- { 1470, 1470 },
- { 1472, 1472 },
- { 1475, 1475 },
- { 1478, 1478 },
- { 1523, 1524 },
- { 1545, 1546 },
- { 1548, 1549 },
- { 1563, 1563 },
+static const URange16 P_range16[] = {
+ { 33, 35 },
+ { 37, 42 },
+ { 44, 47 },
+ { 58, 59 },
+ { 63, 64 },
+ { 91, 93 },
+ { 95, 95 },
+ { 123, 123 },
+ { 125, 125 },
+ { 161, 161 },
+ { 167, 167 },
+ { 171, 171 },
+ { 182, 183 },
+ { 187, 187 },
+ { 191, 191 },
+ { 894, 894 },
+ { 903, 903 },
+ { 1370, 1375 },
+ { 1417, 1418 },
+ { 1470, 1470 },
+ { 1472, 1472 },
+ { 1475, 1475 },
+ { 1478, 1478 },
+ { 1523, 1524 },
+ { 1545, 1546 },
+ { 1548, 1549 },
+ { 1563, 1563 },
{ 1565, 1567 },
- { 1642, 1645 },
- { 1748, 1748 },
- { 1792, 1805 },
- { 2039, 2041 },
- { 2096, 2110 },
- { 2142, 2142 },
- { 2404, 2405 },
- { 2416, 2416 },
+ { 1642, 1645 },
+ { 1748, 1748 },
+ { 1792, 1805 },
+ { 2039, 2041 },
+ { 2096, 2110 },
+ { 2142, 2142 },
+ { 2404, 2405 },
+ { 2416, 2416 },
{ 2557, 2557 },
{ 2678, 2678 },
- { 2800, 2800 },
+ { 2800, 2800 },
{ 3191, 3191 },
{ 3204, 3204 },
- { 3572, 3572 },
- { 3663, 3663 },
- { 3674, 3675 },
- { 3844, 3858 },
- { 3860, 3860 },
- { 3898, 3901 },
- { 3973, 3973 },
- { 4048, 4052 },
- { 4057, 4058 },
- { 4170, 4175 },
- { 4347, 4347 },
- { 4960, 4968 },
- { 5120, 5120 },
+ { 3572, 3572 },
+ { 3663, 3663 },
+ { 3674, 3675 },
+ { 3844, 3858 },
+ { 3860, 3860 },
+ { 3898, 3901 },
+ { 3973, 3973 },
+ { 4048, 4052 },
+ { 4057, 4058 },
+ { 4170, 4175 },
+ { 4347, 4347 },
+ { 4960, 4968 },
+ { 5120, 5120 },
{ 5742, 5742 },
- { 5787, 5788 },
- { 5867, 5869 },
- { 5941, 5942 },
- { 6100, 6102 },
- { 6104, 6106 },
- { 6144, 6154 },
- { 6468, 6469 },
- { 6686, 6687 },
- { 6816, 6822 },
- { 6824, 6829 },
- { 7002, 7008 },
+ { 5787, 5788 },
+ { 5867, 5869 },
+ { 5941, 5942 },
+ { 6100, 6102 },
+ { 6104, 6106 },
+ { 6144, 6154 },
+ { 6468, 6469 },
+ { 6686, 6687 },
+ { 6816, 6822 },
+ { 6824, 6829 },
+ { 7002, 7008 },
{ 7037, 7038 },
- { 7164, 7167 },
- { 7227, 7231 },
- { 7294, 7295 },
- { 7360, 7367 },
- { 7379, 7379 },
- { 8208, 8231 },
- { 8240, 8259 },
- { 8261, 8273 },
- { 8275, 8286 },
- { 8317, 8318 },
- { 8333, 8334 },
- { 8968, 8971 },
- { 9001, 9002 },
- { 10088, 10101 },
- { 10181, 10182 },
- { 10214, 10223 },
- { 10627, 10648 },
- { 10712, 10715 },
- { 10748, 10749 },
- { 11513, 11516 },
- { 11518, 11519 },
- { 11632, 11632 },
- { 11776, 11822 },
+ { 7164, 7167 },
+ { 7227, 7231 },
+ { 7294, 7295 },
+ { 7360, 7367 },
+ { 7379, 7379 },
+ { 8208, 8231 },
+ { 8240, 8259 },
+ { 8261, 8273 },
+ { 8275, 8286 },
+ { 8317, 8318 },
+ { 8333, 8334 },
+ { 8968, 8971 },
+ { 9001, 9002 },
+ { 10088, 10101 },
+ { 10181, 10182 },
+ { 10214, 10223 },
+ { 10627, 10648 },
+ { 10712, 10715 },
+ { 10748, 10749 },
+ { 11513, 11516 },
+ { 11518, 11519 },
+ { 11632, 11632 },
+ { 11776, 11822 },
{ 11824, 11855 },
{ 11858, 11869 },
- { 12289, 12291 },
- { 12296, 12305 },
- { 12308, 12319 },
- { 12336, 12336 },
- { 12349, 12349 },
- { 12448, 12448 },
- { 12539, 12539 },
- { 42238, 42239 },
- { 42509, 42511 },
- { 42611, 42611 },
- { 42622, 42622 },
- { 42738, 42743 },
- { 43124, 43127 },
- { 43214, 43215 },
- { 43256, 43258 },
- { 43260, 43260 },
- { 43310, 43311 },
- { 43359, 43359 },
- { 43457, 43469 },
- { 43486, 43487 },
- { 43612, 43615 },
- { 43742, 43743 },
- { 43760, 43761 },
- { 44011, 44011 },
- { 64830, 64831 },
- { 65040, 65049 },
- { 65072, 65106 },
- { 65108, 65121 },
- { 65123, 65123 },
- { 65128, 65128 },
- { 65130, 65131 },
- { 65281, 65283 },
- { 65285, 65290 },
- { 65292, 65295 },
- { 65306, 65307 },
- { 65311, 65312 },
- { 65339, 65341 },
- { 65343, 65343 },
- { 65371, 65371 },
- { 65373, 65373 },
- { 65375, 65381 },
-};
-static const URange32 P_range32[] = {
- { 65792, 65794 },
- { 66463, 66463 },
- { 66512, 66512 },
- { 66927, 66927 },
- { 67671, 67671 },
- { 67871, 67871 },
- { 67903, 67903 },
- { 68176, 68184 },
- { 68223, 68223 },
- { 68336, 68342 },
- { 68409, 68415 },
- { 68505, 68508 },
+ { 12289, 12291 },
+ { 12296, 12305 },
+ { 12308, 12319 },
+ { 12336, 12336 },
+ { 12349, 12349 },
+ { 12448, 12448 },
+ { 12539, 12539 },
+ { 42238, 42239 },
+ { 42509, 42511 },
+ { 42611, 42611 },
+ { 42622, 42622 },
+ { 42738, 42743 },
+ { 43124, 43127 },
+ { 43214, 43215 },
+ { 43256, 43258 },
+ { 43260, 43260 },
+ { 43310, 43311 },
+ { 43359, 43359 },
+ { 43457, 43469 },
+ { 43486, 43487 },
+ { 43612, 43615 },
+ { 43742, 43743 },
+ { 43760, 43761 },
+ { 44011, 44011 },
+ { 64830, 64831 },
+ { 65040, 65049 },
+ { 65072, 65106 },
+ { 65108, 65121 },
+ { 65123, 65123 },
+ { 65128, 65128 },
+ { 65130, 65131 },
+ { 65281, 65283 },
+ { 65285, 65290 },
+ { 65292, 65295 },
+ { 65306, 65307 },
+ { 65311, 65312 },
+ { 65339, 65341 },
+ { 65343, 65343 },
+ { 65371, 65371 },
+ { 65373, 65373 },
+ { 65375, 65381 },
+};
+static const URange32 P_range32[] = {
+ { 65792, 65794 },
+ { 66463, 66463 },
+ { 66512, 66512 },
+ { 66927, 66927 },
+ { 67671, 67671 },
+ { 67871, 67871 },
+ { 67903, 67903 },
+ { 68176, 68184 },
+ { 68223, 68223 },
+ { 68336, 68342 },
+ { 68409, 68415 },
+ { 68505, 68508 },
{ 69293, 69293 },
{ 69461, 69465 },
{ 69510, 69513 },
- { 69703, 69709 },
- { 69819, 69820 },
- { 69822, 69825 },
- { 69952, 69955 },
- { 70004, 70005 },
+ { 69703, 69709 },
+ { 69819, 69820 },
+ { 69822, 69825 },
+ { 69952, 69955 },
+ { 70004, 70005 },
{ 70085, 70088 },
- { 70093, 70093 },
- { 70107, 70107 },
- { 70109, 70111 },
- { 70200, 70205 },
- { 70313, 70313 },
+ { 70093, 70093 },
+ { 70107, 70107 },
+ { 70109, 70111 },
+ { 70200, 70205 },
+ { 70313, 70313 },
{ 70731, 70735 },
{ 70746, 70747 },
{ 70749, 70749 },
- { 70854, 70854 },
- { 71105, 71127 },
- { 71233, 71235 },
+ { 70854, 70854 },
+ { 71105, 71127 },
+ { 71233, 71235 },
{ 71264, 71276 },
{ 71353, 71353 },
- { 71484, 71486 },
+ { 71484, 71486 },
{ 71739, 71739 },
{ 72004, 72006 },
{ 72162, 72162 },
@@ -3936,18 +3936,18 @@ static const URange32 P_range32[] = {
{ 72816, 72817 },
{ 73463, 73464 },
{ 73727, 73727 },
- { 74864, 74868 },
+ { 74864, 74868 },
{ 77809, 77810 },
- { 92782, 92783 },
- { 92917, 92917 },
- { 92983, 92987 },
- { 92996, 92996 },
+ { 92782, 92783 },
+ { 92917, 92917 },
+ { 92983, 92987 },
+ { 92996, 92996 },
{ 93847, 93850 },
{ 94178, 94178 },
- { 113823, 113823 },
- { 121479, 121483 },
+ { 113823, 113823 },
+ { 121479, 121483 },
{ 125278, 125279 },
-};
+};
static const URange16 Pc_range16[] = {
{ 95, 95 },
{ 8255, 8256 },
@@ -4352,229 +4352,229 @@ static const URange16 Ps_range16[] = {
{ 65375, 65375 },
{ 65378, 65378 },
};
-static const URange16 S_range16[] = {
- { 36, 36 },
- { 43, 43 },
- { 60, 62 },
- { 94, 94 },
- { 96, 96 },
- { 124, 124 },
- { 126, 126 },
- { 162, 166 },
- { 168, 169 },
- { 172, 172 },
- { 174, 177 },
- { 180, 180 },
- { 184, 184 },
- { 215, 215 },
- { 247, 247 },
- { 706, 709 },
- { 722, 735 },
- { 741, 747 },
- { 749, 749 },
- { 751, 767 },
- { 885, 885 },
- { 900, 901 },
- { 1014, 1014 },
- { 1154, 1154 },
- { 1421, 1423 },
- { 1542, 1544 },
- { 1547, 1547 },
- { 1550, 1551 },
- { 1758, 1758 },
- { 1769, 1769 },
- { 1789, 1790 },
- { 2038, 2038 },
+static const URange16 S_range16[] = {
+ { 36, 36 },
+ { 43, 43 },
+ { 60, 62 },
+ { 94, 94 },
+ { 96, 96 },
+ { 124, 124 },
+ { 126, 126 },
+ { 162, 166 },
+ { 168, 169 },
+ { 172, 172 },
+ { 174, 177 },
+ { 180, 180 },
+ { 184, 184 },
+ { 215, 215 },
+ { 247, 247 },
+ { 706, 709 },
+ { 722, 735 },
+ { 741, 747 },
+ { 749, 749 },
+ { 751, 767 },
+ { 885, 885 },
+ { 900, 901 },
+ { 1014, 1014 },
+ { 1154, 1154 },
+ { 1421, 1423 },
+ { 1542, 1544 },
+ { 1547, 1547 },
+ { 1550, 1551 },
+ { 1758, 1758 },
+ { 1769, 1769 },
+ { 1789, 1790 },
+ { 2038, 2038 },
{ 2046, 2047 },
{ 2184, 2184 },
- { 2546, 2547 },
- { 2554, 2555 },
- { 2801, 2801 },
- { 2928, 2928 },
- { 3059, 3066 },
- { 3199, 3199 },
+ { 2546, 2547 },
+ { 2554, 2555 },
+ { 2801, 2801 },
+ { 2928, 2928 },
+ { 3059, 3066 },
+ { 3199, 3199 },
{ 3407, 3407 },
- { 3449, 3449 },
- { 3647, 3647 },
- { 3841, 3843 },
- { 3859, 3859 },
- { 3861, 3863 },
- { 3866, 3871 },
- { 3892, 3892 },
- { 3894, 3894 },
- { 3896, 3896 },
- { 4030, 4037 },
- { 4039, 4044 },
- { 4046, 4047 },
- { 4053, 4056 },
- { 4254, 4255 },
- { 5008, 5017 },
+ { 3449, 3449 },
+ { 3647, 3647 },
+ { 3841, 3843 },
+ { 3859, 3859 },
+ { 3861, 3863 },
+ { 3866, 3871 },
+ { 3892, 3892 },
+ { 3894, 3894 },
+ { 3896, 3896 },
+ { 4030, 4037 },
+ { 4039, 4044 },
+ { 4046, 4047 },
+ { 4053, 4056 },
+ { 4254, 4255 },
+ { 5008, 5017 },
{ 5741, 5741 },
- { 6107, 6107 },
- { 6464, 6464 },
- { 6622, 6655 },
- { 7009, 7018 },
- { 7028, 7036 },
- { 8125, 8125 },
- { 8127, 8129 },
- { 8141, 8143 },
- { 8157, 8159 },
- { 8173, 8175 },
- { 8189, 8190 },
- { 8260, 8260 },
- { 8274, 8274 },
- { 8314, 8316 },
- { 8330, 8332 },
+ { 6107, 6107 },
+ { 6464, 6464 },
+ { 6622, 6655 },
+ { 7009, 7018 },
+ { 7028, 7036 },
+ { 8125, 8125 },
+ { 8127, 8129 },
+ { 8141, 8143 },
+ { 8157, 8159 },
+ { 8173, 8175 },
+ { 8189, 8190 },
+ { 8260, 8260 },
+ { 8274, 8274 },
+ { 8314, 8316 },
+ { 8330, 8332 },
{ 8352, 8384 },
- { 8448, 8449 },
- { 8451, 8454 },
- { 8456, 8457 },
- { 8468, 8468 },
- { 8470, 8472 },
- { 8478, 8483 },
- { 8485, 8485 },
- { 8487, 8487 },
- { 8489, 8489 },
- { 8494, 8494 },
- { 8506, 8507 },
- { 8512, 8516 },
- { 8522, 8525 },
- { 8527, 8527 },
- { 8586, 8587 },
- { 8592, 8967 },
- { 8972, 9000 },
+ { 8448, 8449 },
+ { 8451, 8454 },
+ { 8456, 8457 },
+ { 8468, 8468 },
+ { 8470, 8472 },
+ { 8478, 8483 },
+ { 8485, 8485 },
+ { 8487, 8487 },
+ { 8489, 8489 },
+ { 8494, 8494 },
+ { 8506, 8507 },
+ { 8512, 8516 },
+ { 8522, 8525 },
+ { 8527, 8527 },
+ { 8586, 8587 },
+ { 8592, 8967 },
+ { 8972, 9000 },
{ 9003, 9254 },
- { 9280, 9290 },
- { 9372, 9449 },
- { 9472, 10087 },
- { 10132, 10180 },
- { 10183, 10213 },
- { 10224, 10626 },
- { 10649, 10711 },
- { 10716, 10747 },
- { 10750, 11123 },
- { 11126, 11157 },
+ { 9280, 9290 },
+ { 9372, 9449 },
+ { 9472, 10087 },
+ { 10132, 10180 },
+ { 10183, 10213 },
+ { 10224, 10626 },
+ { 10649, 10711 },
+ { 10716, 10747 },
+ { 10750, 11123 },
+ { 11126, 11157 },
{ 11159, 11263 },
- { 11493, 11498 },
+ { 11493, 11498 },
{ 11856, 11857 },
- { 11904, 11929 },
- { 11931, 12019 },
- { 12032, 12245 },
- { 12272, 12283 },
- { 12292, 12292 },
- { 12306, 12307 },
- { 12320, 12320 },
- { 12342, 12343 },
- { 12350, 12351 },
- { 12443, 12444 },
- { 12688, 12689 },
- { 12694, 12703 },
- { 12736, 12771 },
- { 12800, 12830 },
- { 12842, 12871 },
- { 12880, 12880 },
- { 12896, 12927 },
- { 12938, 12976 },
+ { 11904, 11929 },
+ { 11931, 12019 },
+ { 12032, 12245 },
+ { 12272, 12283 },
+ { 12292, 12292 },
+ { 12306, 12307 },
+ { 12320, 12320 },
+ { 12342, 12343 },
+ { 12350, 12351 },
+ { 12443, 12444 },
+ { 12688, 12689 },
+ { 12694, 12703 },
+ { 12736, 12771 },
+ { 12800, 12830 },
+ { 12842, 12871 },
+ { 12880, 12880 },
+ { 12896, 12927 },
+ { 12938, 12976 },
{ 12992, 13311 },
- { 19904, 19967 },
- { 42128, 42182 },
- { 42752, 42774 },
- { 42784, 42785 },
- { 42889, 42890 },
- { 43048, 43051 },
- { 43062, 43065 },
- { 43639, 43641 },
- { 43867, 43867 },
+ { 19904, 19967 },
+ { 42128, 42182 },
+ { 42752, 42774 },
+ { 42784, 42785 },
+ { 42889, 42890 },
+ { 43048, 43051 },
+ { 43062, 43065 },
+ { 43639, 43641 },
+ { 43867, 43867 },
{ 43882, 43883 },
- { 64297, 64297 },
+ { 64297, 64297 },
{ 64434, 64450 },
{ 64832, 64847 },
{ 64975, 64975 },
{ 65020, 65023 },
- { 65122, 65122 },
- { 65124, 65126 },
- { 65129, 65129 },
- { 65284, 65284 },
- { 65291, 65291 },
- { 65308, 65310 },
- { 65342, 65342 },
- { 65344, 65344 },
- { 65372, 65372 },
- { 65374, 65374 },
- { 65504, 65510 },
- { 65512, 65518 },
- { 65532, 65533 },
-};
-static const URange32 S_range32[] = {
- { 65847, 65855 },
- { 65913, 65929 },
+ { 65122, 65122 },
+ { 65124, 65126 },
+ { 65129, 65129 },
+ { 65284, 65284 },
+ { 65291, 65291 },
+ { 65308, 65310 },
+ { 65342, 65342 },
+ { 65344, 65344 },
+ { 65372, 65372 },
+ { 65374, 65374 },
+ { 65504, 65510 },
+ { 65512, 65518 },
+ { 65532, 65533 },
+};
+static const URange32 S_range32[] = {
+ { 65847, 65855 },
+ { 65913, 65929 },
{ 65932, 65934 },
{ 65936, 65948 },
- { 65952, 65952 },
- { 66000, 66044 },
- { 67703, 67704 },
- { 68296, 68296 },
- { 71487, 71487 },
+ { 65952, 65952 },
+ { 66000, 66044 },
+ { 67703, 67704 },
+ { 68296, 68296 },
+ { 71487, 71487 },
{ 73685, 73713 },
- { 92988, 92991 },
- { 92997, 92997 },
- { 113820, 113820 },
+ { 92988, 92991 },
+ { 92997, 92997 },
+ { 113820, 113820 },
{ 118608, 118723 },
- { 118784, 119029 },
- { 119040, 119078 },
- { 119081, 119140 },
- { 119146, 119148 },
- { 119171, 119172 },
- { 119180, 119209 },
+ { 118784, 119029 },
+ { 119040, 119078 },
+ { 119081, 119140 },
+ { 119146, 119148 },
+ { 119171, 119172 },
+ { 119180, 119209 },
{ 119214, 119274 },
- { 119296, 119361 },
- { 119365, 119365 },
- { 119552, 119638 },
- { 120513, 120513 },
- { 120539, 120539 },
- { 120571, 120571 },
- { 120597, 120597 },
- { 120629, 120629 },
- { 120655, 120655 },
- { 120687, 120687 },
- { 120713, 120713 },
- { 120745, 120745 },
- { 120771, 120771 },
- { 120832, 121343 },
- { 121399, 121402 },
- { 121453, 121460 },
- { 121462, 121475 },
- { 121477, 121478 },
+ { 119296, 119361 },
+ { 119365, 119365 },
+ { 119552, 119638 },
+ { 120513, 120513 },
+ { 120539, 120539 },
+ { 120571, 120571 },
+ { 120597, 120597 },
+ { 120629, 120629 },
+ { 120655, 120655 },
+ { 120687, 120687 },
+ { 120713, 120713 },
+ { 120745, 120745 },
+ { 120771, 120771 },
+ { 120832, 121343 },
+ { 121399, 121402 },
+ { 121453, 121460 },
+ { 121462, 121475 },
+ { 121477, 121478 },
{ 123215, 123215 },
{ 123647, 123647 },
{ 126124, 126124 },
{ 126128, 126128 },
{ 126254, 126254 },
- { 126704, 126705 },
- { 126976, 127019 },
- { 127024, 127123 },
- { 127136, 127150 },
- { 127153, 127167 },
- { 127169, 127183 },
- { 127185, 127221 },
+ { 126704, 126705 },
+ { 126976, 127019 },
+ { 127024, 127123 },
+ { 127136, 127150 },
+ { 127153, 127167 },
+ { 127169, 127183 },
+ { 127185, 127221 },
{ 127245, 127405 },
- { 127462, 127490 },
+ { 127462, 127490 },
{ 127504, 127547 },
- { 127552, 127560 },
- { 127568, 127569 },
+ { 127552, 127560 },
+ { 127568, 127569 },
{ 127584, 127589 },
{ 127744, 128727 },
{ 128733, 128748 },
{ 128752, 128764 },
- { 128768, 128883 },
+ { 128768, 128883 },
{ 128896, 128984 },
{ 128992, 129003 },
{ 129008, 129008 },
- { 129024, 129035 },
- { 129040, 129095 },
- { 129104, 129113 },
- { 129120, 129159 },
- { 129168, 129197 },
+ { 129024, 129035 },
+ { 129040, 129095 },
+ { 129104, 129113 },
+ { 129120, 129159 },
+ { 129168, 129197 },
{ 129200, 129201 },
{ 129280, 129619 },
{ 129632, 129645 },
@@ -4589,7 +4589,7 @@ static const URange32 S_range32[] = {
{ 129776, 129782 },
{ 129792, 129938 },
{ 129940, 129994 },
-};
+};
static const URange16 Sc_range16[] = {
{ 36, 36 },
{ 162, 165 },
@@ -4718,181 +4718,181 @@ static const URange32 Sm_range32[] = {
{ 120771, 120771 },
{ 126704, 126705 },
};
-static const URange16 So_range16[] = {
- { 166, 166 },
- { 169, 169 },
- { 174, 174 },
- { 176, 176 },
- { 1154, 1154 },
- { 1421, 1422 },
- { 1550, 1551 },
- { 1758, 1758 },
- { 1769, 1769 },
- { 1789, 1790 },
- { 2038, 2038 },
- { 2554, 2554 },
- { 2928, 2928 },
- { 3059, 3064 },
- { 3066, 3066 },
- { 3199, 3199 },
+static const URange16 So_range16[] = {
+ { 166, 166 },
+ { 169, 169 },
+ { 174, 174 },
+ { 176, 176 },
+ { 1154, 1154 },
+ { 1421, 1422 },
+ { 1550, 1551 },
+ { 1758, 1758 },
+ { 1769, 1769 },
+ { 1789, 1790 },
+ { 2038, 2038 },
+ { 2554, 2554 },
+ { 2928, 2928 },
+ { 3059, 3064 },
+ { 3066, 3066 },
+ { 3199, 3199 },
{ 3407, 3407 },
- { 3449, 3449 },
- { 3841, 3843 },
- { 3859, 3859 },
- { 3861, 3863 },
- { 3866, 3871 },
- { 3892, 3892 },
- { 3894, 3894 },
- { 3896, 3896 },
- { 4030, 4037 },
- { 4039, 4044 },
- { 4046, 4047 },
- { 4053, 4056 },
- { 4254, 4255 },
- { 5008, 5017 },
+ { 3449, 3449 },
+ { 3841, 3843 },
+ { 3859, 3859 },
+ { 3861, 3863 },
+ { 3866, 3871 },
+ { 3892, 3892 },
+ { 3894, 3894 },
+ { 3896, 3896 },
+ { 4030, 4037 },
+ { 4039, 4044 },
+ { 4046, 4047 },
+ { 4053, 4056 },
+ { 4254, 4255 },
+ { 5008, 5017 },
{ 5741, 5741 },
- { 6464, 6464 },
- { 6622, 6655 },
- { 7009, 7018 },
- { 7028, 7036 },
- { 8448, 8449 },
- { 8451, 8454 },
- { 8456, 8457 },
- { 8468, 8468 },
- { 8470, 8471 },
- { 8478, 8483 },
- { 8485, 8485 },
- { 8487, 8487 },
- { 8489, 8489 },
- { 8494, 8494 },
- { 8506, 8507 },
- { 8522, 8522 },
- { 8524, 8525 },
- { 8527, 8527 },
- { 8586, 8587 },
- { 8597, 8601 },
- { 8604, 8607 },
- { 8609, 8610 },
- { 8612, 8613 },
- { 8615, 8621 },
- { 8623, 8653 },
- { 8656, 8657 },
- { 8659, 8659 },
- { 8661, 8691 },
- { 8960, 8967 },
- { 8972, 8991 },
- { 8994, 9000 },
- { 9003, 9083 },
- { 9085, 9114 },
- { 9140, 9179 },
+ { 6464, 6464 },
+ { 6622, 6655 },
+ { 7009, 7018 },
+ { 7028, 7036 },
+ { 8448, 8449 },
+ { 8451, 8454 },
+ { 8456, 8457 },
+ { 8468, 8468 },
+ { 8470, 8471 },
+ { 8478, 8483 },
+ { 8485, 8485 },
+ { 8487, 8487 },
+ { 8489, 8489 },
+ { 8494, 8494 },
+ { 8506, 8507 },
+ { 8522, 8522 },
+ { 8524, 8525 },
+ { 8527, 8527 },
+ { 8586, 8587 },
+ { 8597, 8601 },
+ { 8604, 8607 },
+ { 8609, 8610 },
+ { 8612, 8613 },
+ { 8615, 8621 },
+ { 8623, 8653 },
+ { 8656, 8657 },
+ { 8659, 8659 },
+ { 8661, 8691 },
+ { 8960, 8967 },
+ { 8972, 8991 },
+ { 8994, 9000 },
+ { 9003, 9083 },
+ { 9085, 9114 },
+ { 9140, 9179 },
{ 9186, 9254 },
- { 9280, 9290 },
- { 9372, 9449 },
- { 9472, 9654 },
- { 9656, 9664 },
- { 9666, 9719 },
- { 9728, 9838 },
- { 9840, 10087 },
- { 10132, 10175 },
- { 10240, 10495 },
- { 11008, 11055 },
- { 11077, 11078 },
- { 11085, 11123 },
- { 11126, 11157 },
+ { 9280, 9290 },
+ { 9372, 9449 },
+ { 9472, 9654 },
+ { 9656, 9664 },
+ { 9666, 9719 },
+ { 9728, 9838 },
+ { 9840, 10087 },
+ { 10132, 10175 },
+ { 10240, 10495 },
+ { 11008, 11055 },
+ { 11077, 11078 },
+ { 11085, 11123 },
+ { 11126, 11157 },
{ 11159, 11263 },
- { 11493, 11498 },
+ { 11493, 11498 },
{ 11856, 11857 },
- { 11904, 11929 },
- { 11931, 12019 },
- { 12032, 12245 },
- { 12272, 12283 },
- { 12292, 12292 },
- { 12306, 12307 },
- { 12320, 12320 },
- { 12342, 12343 },
- { 12350, 12351 },
- { 12688, 12689 },
- { 12694, 12703 },
- { 12736, 12771 },
- { 12800, 12830 },
- { 12842, 12871 },
- { 12880, 12880 },
- { 12896, 12927 },
- { 12938, 12976 },
+ { 11904, 11929 },
+ { 11931, 12019 },
+ { 12032, 12245 },
+ { 12272, 12283 },
+ { 12292, 12292 },
+ { 12306, 12307 },
+ { 12320, 12320 },
+ { 12342, 12343 },
+ { 12350, 12351 },
+ { 12688, 12689 },
+ { 12694, 12703 },
+ { 12736, 12771 },
+ { 12800, 12830 },
+ { 12842, 12871 },
+ { 12880, 12880 },
+ { 12896, 12927 },
+ { 12938, 12976 },
{ 12992, 13311 },
- { 19904, 19967 },
- { 42128, 42182 },
- { 43048, 43051 },
- { 43062, 43063 },
- { 43065, 43065 },
- { 43639, 43641 },
+ { 19904, 19967 },
+ { 42128, 42182 },
+ { 43048, 43051 },
+ { 43062, 43063 },
+ { 43065, 43065 },
+ { 43639, 43641 },
{ 64832, 64847 },
{ 64975, 64975 },
{ 65021, 65023 },
- { 65508, 65508 },
- { 65512, 65512 },
- { 65517, 65518 },
- { 65532, 65533 },
-};
-static const URange32 So_range32[] = {
- { 65847, 65855 },
- { 65913, 65929 },
+ { 65508, 65508 },
+ { 65512, 65512 },
+ { 65517, 65518 },
+ { 65532, 65533 },
+};
+static const URange32 So_range32[] = {
+ { 65847, 65855 },
+ { 65913, 65929 },
{ 65932, 65934 },
{ 65936, 65948 },
- { 65952, 65952 },
- { 66000, 66044 },
- { 67703, 67704 },
- { 68296, 68296 },
- { 71487, 71487 },
+ { 65952, 65952 },
+ { 66000, 66044 },
+ { 67703, 67704 },
+ { 68296, 68296 },
+ { 71487, 71487 },
{ 73685, 73692 },
{ 73697, 73713 },
- { 92988, 92991 },
- { 92997, 92997 },
- { 113820, 113820 },
+ { 92988, 92991 },
+ { 92997, 92997 },
+ { 113820, 113820 },
{ 118608, 118723 },
- { 118784, 119029 },
- { 119040, 119078 },
- { 119081, 119140 },
- { 119146, 119148 },
- { 119171, 119172 },
- { 119180, 119209 },
+ { 118784, 119029 },
+ { 119040, 119078 },
+ { 119081, 119140 },
+ { 119146, 119148 },
+ { 119171, 119172 },
+ { 119180, 119209 },
{ 119214, 119274 },
- { 119296, 119361 },
- { 119365, 119365 },
- { 119552, 119638 },
- { 120832, 121343 },
- { 121399, 121402 },
- { 121453, 121460 },
- { 121462, 121475 },
- { 121477, 121478 },
+ { 119296, 119361 },
+ { 119365, 119365 },
+ { 119552, 119638 },
+ { 120832, 121343 },
+ { 121399, 121402 },
+ { 121453, 121460 },
+ { 121462, 121475 },
+ { 121477, 121478 },
{ 123215, 123215 },
{ 126124, 126124 },
{ 126254, 126254 },
- { 126976, 127019 },
- { 127024, 127123 },
- { 127136, 127150 },
- { 127153, 127167 },
- { 127169, 127183 },
- { 127185, 127221 },
+ { 126976, 127019 },
+ { 127024, 127123 },
+ { 127136, 127150 },
+ { 127153, 127167 },
+ { 127169, 127183 },
+ { 127185, 127221 },
{ 127245, 127405 },
- { 127462, 127490 },
+ { 127462, 127490 },
{ 127504, 127547 },
- { 127552, 127560 },
- { 127568, 127569 },
+ { 127552, 127560 },
+ { 127568, 127569 },
{ 127584, 127589 },
- { 127744, 127994 },
+ { 127744, 127994 },
{ 128000, 128727 },
{ 128733, 128748 },
{ 128752, 128764 },
- { 128768, 128883 },
+ { 128768, 128883 },
{ 128896, 128984 },
{ 128992, 129003 },
{ 129008, 129008 },
- { 129024, 129035 },
- { 129040, 129095 },
- { 129104, 129113 },
- { 129120, 129159 },
- { 129168, 129197 },
+ { 129024, 129035 },
+ { 129040, 129095 },
+ { 129104, 129113 },
+ { 129120, 129159 },
+ { 129168, 129197 },
{ 129200, 129201 },
{ 129280, 129619 },
{ 129632, 129645 },
@@ -4907,23 +4907,23 @@ static const URange32 So_range32[] = {
{ 129776, 129782 },
{ 129792, 129938 },
{ 129940, 129994 },
-};
-static const URange16 Z_range16[] = {
- { 32, 32 },
- { 160, 160 },
- { 5760, 5760 },
- { 8192, 8202 },
- { 8232, 8233 },
- { 8239, 8239 },
- { 8287, 8287 },
- { 12288, 12288 },
-};
-static const URange16 Zl_range16[] = {
- { 8232, 8232 },
-};
+};
+static const URange16 Z_range16[] = {
+ { 32, 32 },
+ { 160, 160 },
+ { 5760, 5760 },
+ { 8192, 8202 },
+ { 8232, 8233 },
+ { 8239, 8239 },
+ { 8287, 8287 },
+ { 12288, 12288 },
+};
+static const URange16 Zl_range16[] = {
+ { 8232, 8232 },
+};
static const URange16 Zp_range16[] = {
{ 8233, 8233 },
-};
+};
static const URange16 Zs_range16[] = {
{ 32, 32 },
{ 160, 160 },
@@ -4932,12 +4932,12 @@ static const URange16 Zs_range16[] = {
{ 8239, 8239 },
{ 8287, 8287 },
{ 12288, 12288 },
-};
+};
static const URange32 Adlam_range32[] = {
{ 125184, 125259 },
{ 125264, 125273 },
{ 125278, 125279 },
-};
+};
static const URange32 Ahom_range32[] = {
{ 71424, 71450 },
{ 71453, 71467 },
@@ -5129,7 +5129,7 @@ static const URange16 Common_range16[] = {
{ 1563, 1563 },
{ 1567, 1567 },
{ 1600, 1600 },
- { 1757, 1757 },
+ { 1757, 1757 },
{ 2274, 2274 },
{ 2404, 2405 },
{ 3647, 3647 },
@@ -5192,7 +5192,7 @@ static const URange16 Common_range16[] = {
{ 65072, 65106 },
{ 65108, 65126 },
{ 65128, 65131 },
- { 65279, 65279 },
+ { 65279, 65279 },
{ 65281, 65312 },
{ 65339, 65344 },
{ 65371, 65381 },
@@ -5201,7 +5201,7 @@ static const URange16 Common_range16[] = {
{ 65504, 65510 },
{ 65512, 65518 },
{ 65529, 65533 },
-};
+};
static const URange32 Common_range32[] = {
{ 65792, 65794 },
{ 65799, 65843 },
@@ -5209,7 +5209,7 @@ static const URange32 Common_range32[] = {
{ 65936, 65948 },
{ 66000, 66044 },
{ 66273, 66299 },
- { 113824, 113827 },
+ { 113824, 113827 },
{ 118608, 118723 },
{ 118784, 119029 },
{ 119040, 119078 },
@@ -5284,20 +5284,20 @@ static const URange32 Common_range32[] = {
{ 129792, 129938 },
{ 129940, 129994 },
{ 130032, 130041 },
- { 917505, 917505 },
- { 917536, 917631 },
-};
+ { 917505, 917505 },
+ { 917536, 917631 },
+};
static const URange16 Coptic_range16[] = {
{ 994, 1007 },
{ 11392, 11507 },
{ 11513, 11519 },
-};
+};
static const URange32 Cuneiform_range32[] = {
{ 73728, 74649 },
{ 74752, 74862 },
{ 74864, 74868 },
{ 74880, 75075 },
-};
+};
static const URange32 Cypriot_range32[] = {
{ 67584, 67589 },
{ 67592, 67592 },
@@ -5305,29 +5305,29 @@ static const URange32 Cypriot_range32[] = {
{ 67639, 67640 },
{ 67644, 67644 },
{ 67647, 67647 },
-};
+};
static const URange32 Cypro_Minoan_range32[] = {
{ 77712, 77810 },
};
-static const URange16 Cyrillic_range16[] = {
- { 1024, 1156 },
- { 1159, 1327 },
+static const URange16 Cyrillic_range16[] = {
+ { 1024, 1156 },
+ { 1159, 1327 },
{ 7296, 7304 },
- { 7467, 7467 },
- { 7544, 7544 },
- { 11744, 11775 },
- { 42560, 42655 },
- { 65070, 65071 },
-};
+ { 7467, 7467 },
+ { 7544, 7544 },
+ { 11744, 11775 },
+ { 42560, 42655 },
+ { 65070, 65071 },
+};
static const URange32 Deseret_range32[] = {
{ 66560, 66639 },
-};
+};
static const URange16 Devanagari_range16[] = {
{ 2304, 2384 },
{ 2389, 2403 },
{ 2406, 2431 },
{ 43232, 43263 },
-};
+};
static const URange32 Dives_Akuru_range32[] = {
{ 71936, 71942 },
{ 71945, 71945 },
@@ -5358,40 +5358,40 @@ static const URange32 Elbasan_range32[] = {
static const URange32 Elymaic_range32[] = {
{ 69600, 69622 },
};
-static const URange16 Ethiopic_range16[] = {
- { 4608, 4680 },
- { 4682, 4685 },
- { 4688, 4694 },
- { 4696, 4696 },
- { 4698, 4701 },
- { 4704, 4744 },
- { 4746, 4749 },
- { 4752, 4784 },
- { 4786, 4789 },
- { 4792, 4798 },
- { 4800, 4800 },
- { 4802, 4805 },
- { 4808, 4822 },
- { 4824, 4880 },
- { 4882, 4885 },
- { 4888, 4954 },
- { 4957, 4988 },
- { 4992, 5017 },
- { 11648, 11670 },
- { 11680, 11686 },
- { 11688, 11694 },
- { 11696, 11702 },
- { 11704, 11710 },
- { 11712, 11718 },
- { 11720, 11726 },
- { 11728, 11734 },
- { 11736, 11742 },
- { 43777, 43782 },
- { 43785, 43790 },
- { 43793, 43798 },
- { 43808, 43814 },
- { 43816, 43822 },
-};
+static const URange16 Ethiopic_range16[] = {
+ { 4608, 4680 },
+ { 4682, 4685 },
+ { 4688, 4694 },
+ { 4696, 4696 },
+ { 4698, 4701 },
+ { 4704, 4744 },
+ { 4746, 4749 },
+ { 4752, 4784 },
+ { 4786, 4789 },
+ { 4792, 4798 },
+ { 4800, 4800 },
+ { 4802, 4805 },
+ { 4808, 4822 },
+ { 4824, 4880 },
+ { 4882, 4885 },
+ { 4888, 4954 },
+ { 4957, 4988 },
+ { 4992, 5017 },
+ { 11648, 11670 },
+ { 11680, 11686 },
+ { 11688, 11694 },
+ { 11696, 11702 },
+ { 11704, 11710 },
+ { 11712, 11718 },
+ { 11720, 11726 },
+ { 11728, 11734 },
+ { 11736, 11742 },
+ { 43777, 43782 },
+ { 43785, 43790 },
+ { 43793, 43798 },
+ { 43808, 43814 },
+ { 43816, 43822 },
+};
static const URange32 Ethiopic_range32[] = {
{ 124896, 124902 },
{ 124904, 124907 },
@@ -5409,20 +5409,20 @@ static const URange16 Georgian_range16[] = {
{ 11520, 11557 },
{ 11559, 11559 },
{ 11565, 11565 },
-};
+};
static const URange16 Glagolitic_range16[] = {
{ 11264, 11359 },
-};
+};
static const URange32 Glagolitic_range32[] = {
{ 122880, 122886 },
{ 122888, 122904 },
{ 122907, 122913 },
{ 122915, 122916 },
{ 122918, 122922 },
-};
+};
static const URange32 Gothic_range32[] = {
{ 66352, 66378 },
-};
+};
static const URange32 Grantha_range32[] = {
{ 70400, 70403 },
{ 70405, 70412 },
@@ -5522,30 +5522,30 @@ static const URange16 Gurmukhi_range16[] = {
{ 2654, 2654 },
{ 2662, 2678 },
};
-static const URange16 Han_range16[] = {
- { 11904, 11929 },
- { 11931, 12019 },
- { 12032, 12245 },
- { 12293, 12293 },
- { 12295, 12295 },
- { 12321, 12329 },
- { 12344, 12347 },
+static const URange16 Han_range16[] = {
+ { 11904, 11929 },
+ { 11931, 12019 },
+ { 12032, 12245 },
+ { 12293, 12293 },
+ { 12295, 12295 },
+ { 12321, 12329 },
+ { 12344, 12347 },
{ 13312, 19903 },
{ 19968, 40959 },
- { 63744, 64109 },
- { 64112, 64217 },
-};
-static const URange32 Han_range32[] = {
+ { 63744, 64109 },
+ { 64112, 64217 },
+};
+static const URange32 Han_range32[] = {
{ 94178, 94179 },
{ 94192, 94193 },
{ 131072, 173791 },
{ 173824, 177976 },
- { 177984, 178205 },
- { 178208, 183969 },
+ { 177984, 178205 },
+ { 178208, 183969 },
{ 183984, 191456 },
- { 194560, 195101 },
+ { 194560, 195101 },
{ 196608, 201546 },
-};
+};
static const URange16 Hangul_range16[] = {
{ 4352, 4607 },
{ 12334, 12335 },
@@ -5561,19 +5561,19 @@ static const URange16 Hangul_range16[] = {
{ 65482, 65487 },
{ 65490, 65495 },
{ 65498, 65500 },
-};
+};
static const URange32 Hanifi_Rohingya_range32[] = {
{ 68864, 68903 },
{ 68912, 68921 },
-};
+};
static const URange16 Hanunoo_range16[] = {
{ 5920, 5940 },
-};
+};
static const URange32 Hatran_range32[] = {
{ 67808, 67826 },
{ 67828, 67829 },
{ 67835, 67839 },
-};
+};
static const URange16 Hebrew_range16[] = {
{ 1425, 1479 },
{ 1488, 1514 },
@@ -5584,20 +5584,20 @@ static const URange16 Hebrew_range16[] = {
{ 64320, 64321 },
{ 64323, 64324 },
{ 64326, 64335 },
-};
-static const URange16 Hiragana_range16[] = {
- { 12353, 12438 },
- { 12445, 12447 },
-};
-static const URange32 Hiragana_range32[] = {
+};
+static const URange16 Hiragana_range16[] = {
+ { 12353, 12438 },
+ { 12445, 12447 },
+};
+static const URange32 Hiragana_range32[] = {
{ 110593, 110879 },
{ 110928, 110930 },
- { 127488, 127488 },
-};
+ { 127488, 127488 },
+};
static const URange32 Imperial_Aramaic_range32[] = {
{ 67648, 67669 },
{ 67671, 67679 },
-};
+};
static const URange16 Inherited_range16[] = {
{ 768, 879 },
{ 1157, 1158 },
@@ -5618,7 +5618,7 @@ static const URange16 Inherited_range16[] = {
{ 12441, 12442 },
{ 65024, 65039 },
{ 65056, 65069 },
-};
+};
static const URange32 Inherited_range32[] = {
{ 66045, 66045 },
{ 66272, 66272 },
@@ -5630,11 +5630,11 @@ static const URange32 Inherited_range32[] = {
{ 119173, 119179 },
{ 119210, 119213 },
{ 917760, 917999 },
-};
+};
static const URange32 Inscriptional_Pahlavi_range32[] = {
{ 68448, 68466 },
{ 68472, 68479 },
-};
+};
static const URange32 Inscriptional_Parthian_range32[] = {
{ 68416, 68437 },
{ 68440, 68447 },
@@ -5680,38 +5680,38 @@ static const URange32 Katakana_range32[] = {
{ 110880, 110882 },
{ 110948, 110951 },
};
-static const URange16 Kayah_Li_range16[] = {
- { 43264, 43309 },
- { 43311, 43311 },
-};
-static const URange32 Kharoshthi_range32[] = {
- { 68096, 68099 },
- { 68101, 68102 },
- { 68108, 68115 },
- { 68117, 68119 },
+static const URange16 Kayah_Li_range16[] = {
+ { 43264, 43309 },
+ { 43311, 43311 },
+};
+static const URange32 Kharoshthi_range32[] = {
+ { 68096, 68099 },
+ { 68101, 68102 },
+ { 68108, 68115 },
+ { 68117, 68119 },
{ 68121, 68149 },
- { 68152, 68154 },
+ { 68152, 68154 },
{ 68159, 68168 },
- { 68176, 68184 },
-};
+ { 68176, 68184 },
+};
static const URange32 Khitan_Small_Script_range32[] = {
{ 94180, 94180 },
{ 101120, 101589 },
-};
+};
static const URange16 Khmer_range16[] = {
{ 6016, 6109 },
{ 6112, 6121 },
{ 6128, 6137 },
{ 6624, 6655 },
-};
+};
static const URange32 Khojki_range32[] = {
{ 70144, 70161 },
{ 70163, 70206 },
-};
+};
static const URange32 Khudawadi_range32[] = {
{ 70320, 70378 },
{ 70384, 70393 },
-};
+};
static const URange16 Lao_range16[] = {
{ 3713, 3714 },
{ 3716, 3716 },
@@ -5725,42 +5725,42 @@ static const URange16 Lao_range16[] = {
{ 3792, 3801 },
{ 3804, 3807 },
};
-static const URange16 Latin_range16[] = {
- { 65, 90 },
- { 97, 122 },
- { 170, 170 },
- { 186, 186 },
- { 192, 214 },
- { 216, 246 },
- { 248, 696 },
- { 736, 740 },
- { 7424, 7461 },
- { 7468, 7516 },
- { 7522, 7525 },
- { 7531, 7543 },
- { 7545, 7614 },
- { 7680, 7935 },
- { 8305, 8305 },
- { 8319, 8319 },
- { 8336, 8348 },
- { 8490, 8491 },
- { 8498, 8498 },
- { 8526, 8526 },
- { 8544, 8584 },
- { 11360, 11391 },
- { 42786, 42887 },
+static const URange16 Latin_range16[] = {
+ { 65, 90 },
+ { 97, 122 },
+ { 170, 170 },
+ { 186, 186 },
+ { 192, 214 },
+ { 216, 246 },
+ { 248, 696 },
+ { 736, 740 },
+ { 7424, 7461 },
+ { 7468, 7516 },
+ { 7522, 7525 },
+ { 7531, 7543 },
+ { 7545, 7614 },
+ { 7680, 7935 },
+ { 8305, 8305 },
+ { 8319, 8319 },
+ { 8336, 8348 },
+ { 8490, 8491 },
+ { 8498, 8498 },
+ { 8526, 8526 },
+ { 8544, 8584 },
+ { 11360, 11391 },
+ { 42786, 42887 },
{ 42891, 42954 },
{ 42960, 42961 },
{ 42963, 42963 },
{ 42965, 42969 },
{ 42994, 43007 },
- { 43824, 43866 },
- { 43868, 43876 },
+ { 43824, 43866 },
+ { 43868, 43876 },
{ 43878, 43881 },
- { 64256, 64262 },
- { 65313, 65338 },
- { 65345, 65370 },
-};
+ { 64256, 64262 },
+ { 65313, 65338 },
+ { 65345, 65370 },
+};
static const URange32 Latin_range32[] = {
{ 67456, 67461 },
{ 67463, 67504 },
@@ -5771,19 +5771,19 @@ static const URange16 Lepcha_range16[] = {
{ 7168, 7223 },
{ 7227, 7241 },
{ 7245, 7247 },
-};
+};
static const URange16 Limbu_range16[] = {
{ 6400, 6430 },
{ 6432, 6443 },
{ 6448, 6459 },
{ 6464, 6464 },
{ 6468, 6479 },
-};
+};
static const URange32 Linear_A_range32[] = {
{ 67072, 67382 },
{ 67392, 67413 },
{ 67424, 67431 },
-};
+};
static const URange32 Linear_B_range32[] = {
{ 65536, 65547 },
{ 65549, 65574 },
@@ -5792,26 +5792,26 @@ static const URange32 Linear_B_range32[] = {
{ 65599, 65613 },
{ 65616, 65629 },
{ 65664, 65786 },
-};
+};
static const URange16 Lisu_range16[] = {
{ 42192, 42239 },
-};
+};
static const URange32 Lisu_range32[] = {
{ 73648, 73648 },
-};
+};
static const URange32 Lycian_range32[] = {
{ 66176, 66204 },
-};
+};
static const URange32 Lydian_range32[] = {
{ 67872, 67897 },
{ 67903, 67903 },
-};
+};
static const URange32 Mahajani_range32[] = {
{ 69968, 70006 },
-};
+};
static const URange32 Makasar_range32[] = {
{ 73440, 73464 },
-};
+};
static const URange16 Malayalam_range16[] = {
{ 3328, 3340 },
{ 3342, 3344 },
@@ -5820,20 +5820,20 @@ static const URange16 Malayalam_range16[] = {
{ 3402, 3407 },
{ 3412, 3427 },
{ 3430, 3455 },
-};
+};
static const URange16 Mandaic_range16[] = {
{ 2112, 2139 },
{ 2142, 2142 },
-};
+};
static const URange32 Manichaean_range32[] = {
{ 68288, 68326 },
{ 68331, 68342 },
-};
+};
static const URange32 Marchen_range32[] = {
{ 72816, 72847 },
{ 72850, 72871 },
{ 72873, 72886 },
-};
+};
static const URange32 Masaram_Gondi_range32[] = {
{ 72960, 72966 },
{ 72968, 72969 },
@@ -5842,78 +5842,78 @@ static const URange32 Masaram_Gondi_range32[] = {
{ 73020, 73021 },
{ 73023, 73031 },
{ 73040, 73049 },
-};
+};
static const URange32 Medefaidrin_range32[] = {
{ 93760, 93850 },
-};
+};
static const URange16 Meetei_Mayek_range16[] = {
{ 43744, 43766 },
{ 43968, 44013 },
{ 44016, 44025 },
};
-static const URange32 Mende_Kikakui_range32[] = {
- { 124928, 125124 },
- { 125127, 125142 },
-};
+static const URange32 Mende_Kikakui_range32[] = {
+ { 124928, 125124 },
+ { 125127, 125142 },
+};
static const URange32 Meroitic_Cursive_range32[] = {
{ 68000, 68023 },
{ 68028, 68047 },
{ 68050, 68095 },
-};
+};
static const URange32 Meroitic_Hieroglyphs_range32[] = {
{ 67968, 67999 },
-};
+};
static const URange32 Miao_range32[] = {
{ 93952, 94026 },
{ 94031, 94087 },
{ 94095, 94111 },
-};
+};
static const URange32 Modi_range32[] = {
{ 71168, 71236 },
{ 71248, 71257 },
-};
+};
static const URange16 Mongolian_range16[] = {
{ 6144, 6145 },
{ 6148, 6148 },
{ 6150, 6169 },
{ 6176, 6264 },
{ 6272, 6314 },
-};
+};
static const URange32 Mongolian_range32[] = {
{ 71264, 71276 },
-};
+};
static const URange32 Mro_range32[] = {
{ 92736, 92766 },
{ 92768, 92777 },
{ 92782, 92783 },
-};
+};
static const URange32 Multani_range32[] = {
{ 70272, 70278 },
{ 70280, 70280 },
{ 70282, 70285 },
{ 70287, 70301 },
{ 70303, 70313 },
-};
+};
static const URange16 Myanmar_range16[] = {
{ 4096, 4255 },
{ 43488, 43518 },
{ 43616, 43647 },
-};
+};
static const URange32 Nabataean_range32[] = {
{ 67712, 67742 },
{ 67751, 67759 },
-};
+};
static const URange32 Nandinagari_range32[] = {
{ 72096, 72103 },
{ 72106, 72151 },
{ 72154, 72164 },
-};
-static const URange16 New_Tai_Lue_range16[] = {
- { 6528, 6571 },
- { 6576, 6601 },
- { 6608, 6618 },
- { 6622, 6623 },
-};
+};
+static const URange16 New_Tai_Lue_range16[] = {
+ { 6528, 6571 },
+ { 6576, 6601 },
+ { 6608, 6618 },
+ { 6622, 6623 },
+};
static const URange32 Newa_range32[] = {
{ 70656, 70747 },
{ 70749, 70753 },
@@ -5935,24 +5935,24 @@ static const URange32 Nyiakeng_Puachue_Hmong_range32[] = {
static const URange16 Ogham_range16[] = {
{ 5760, 5788 },
};
-static const URange16 Ol_Chiki_range16[] = {
- { 7248, 7295 },
-};
+static const URange16 Ol_Chiki_range16[] = {
+ { 7248, 7295 },
+};
static const URange32 Old_Hungarian_range32[] = {
{ 68736, 68786 },
{ 68800, 68850 },
{ 68858, 68863 },
-};
+};
static const URange32 Old_Italic_range32[] = {
{ 66304, 66339 },
{ 66349, 66351 },
-};
+};
static const URange32 Old_North_Arabian_range32[] = {
{ 68224, 68255 },
-};
+};
static const URange32 Old_Permic_range32[] = {
{ 66384, 66426 },
-};
+};
static const URange32 Old_Persian_range32[] = {
{ 66464, 66499 },
{ 66504, 66517 },
@@ -5969,154 +5969,154 @@ static const URange32 Old_Turkic_range32[] = {
static const URange32 Old_Uyghur_range32[] = {
{ 69488, 69513 },
};
-static const URange16 Oriya_range16[] = {
- { 2817, 2819 },
- { 2821, 2828 },
- { 2831, 2832 },
- { 2835, 2856 },
- { 2858, 2864 },
- { 2866, 2867 },
- { 2869, 2873 },
- { 2876, 2884 },
- { 2887, 2888 },
- { 2891, 2893 },
+static const URange16 Oriya_range16[] = {
+ { 2817, 2819 },
+ { 2821, 2828 },
+ { 2831, 2832 },
+ { 2835, 2856 },
+ { 2858, 2864 },
+ { 2866, 2867 },
+ { 2869, 2873 },
+ { 2876, 2884 },
+ { 2887, 2888 },
+ { 2891, 2893 },
{ 2901, 2903 },
- { 2908, 2909 },
- { 2911, 2915 },
- { 2918, 2935 },
-};
+ { 2908, 2909 },
+ { 2911, 2915 },
+ { 2918, 2935 },
+};
static const URange32 Osage_range32[] = {
{ 66736, 66771 },
{ 66776, 66811 },
-};
+};
static const URange32 Osmanya_range32[] = {
{ 66688, 66717 },
{ 66720, 66729 },
-};
+};
static const URange32 Pahawh_Hmong_range32[] = {
{ 92928, 92997 },
{ 93008, 93017 },
{ 93019, 93025 },
{ 93027, 93047 },
{ 93053, 93071 },
-};
+};
static const URange32 Palmyrene_range32[] = {
{ 67680, 67711 },
-};
+};
static const URange32 Pau_Cin_Hau_range32[] = {
{ 72384, 72440 },
-};
+};
static const URange16 Phags_Pa_range16[] = {
{ 43072, 43127 },
-};
-static const URange32 Phoenician_range32[] = {
- { 67840, 67867 },
- { 67871, 67871 },
-};
+};
+static const URange32 Phoenician_range32[] = {
+ { 67840, 67867 },
+ { 67871, 67871 },
+};
static const URange32 Psalter_Pahlavi_range32[] = {
{ 68480, 68497 },
{ 68505, 68508 },
{ 68521, 68527 },
-};
+};
static const URange16 Rejang_range16[] = {
{ 43312, 43347 },
{ 43359, 43359 },
-};
+};
static const URange16 Runic_range16[] = {
{ 5792, 5866 },
{ 5870, 5880 },
-};
+};
static const URange16 Samaritan_range16[] = {
{ 2048, 2093 },
{ 2096, 2110 },
-};
+};
static const URange16 Saurashtra_range16[] = {
{ 43136, 43205 },
{ 43214, 43225 },
-};
+};
static const URange32 Sharada_range32[] = {
{ 70016, 70111 },
-};
+};
static const URange32 Shavian_range32[] = {
{ 66640, 66687 },
-};
+};
static const URange32 Siddham_range32[] = {
{ 71040, 71093 },
{ 71096, 71133 },
-};
+};
static const URange32 SignWriting_range32[] = {
{ 120832, 121483 },
{ 121499, 121503 },
{ 121505, 121519 },
-};
-static const URange16 Sinhala_range16[] = {
+};
+static const URange16 Sinhala_range16[] = {
{ 3457, 3459 },
- { 3461, 3478 },
- { 3482, 3505 },
- { 3507, 3515 },
- { 3517, 3517 },
- { 3520, 3526 },
- { 3530, 3530 },
- { 3535, 3540 },
- { 3542, 3542 },
- { 3544, 3551 },
- { 3558, 3567 },
- { 3570, 3572 },
-};
-static const URange32 Sinhala_range32[] = {
- { 70113, 70132 },
-};
+ { 3461, 3478 },
+ { 3482, 3505 },
+ { 3507, 3515 },
+ { 3517, 3517 },
+ { 3520, 3526 },
+ { 3530, 3530 },
+ { 3535, 3540 },
+ { 3542, 3542 },
+ { 3544, 3551 },
+ { 3558, 3567 },
+ { 3570, 3572 },
+};
+static const URange32 Sinhala_range32[] = {
+ { 70113, 70132 },
+};
static const URange32 Sogdian_range32[] = {
{ 69424, 69465 },
-};
+};
static const URange32 Sora_Sompeng_range32[] = {
{ 69840, 69864 },
{ 69872, 69881 },
-};
+};
static const URange32 Soyombo_range32[] = {
{ 72272, 72354 },
-};
+};
static const URange16 Sundanese_range16[] = {
{ 7040, 7103 },
{ 7360, 7367 },
-};
+};
static const URange16 Syloti_Nagri_range16[] = {
{ 43008, 43052 },
-};
+};
static const URange16 Syriac_range16[] = {
{ 1792, 1805 },
{ 1807, 1866 },
{ 1869, 1871 },
{ 2144, 2154 },
-};
+};
static const URange16 Tagalog_range16[] = {
{ 5888, 5909 },
{ 5919, 5919 },
-};
+};
static const URange16 Tagbanwa_range16[] = {
{ 5984, 5996 },
{ 5998, 6000 },
{ 6002, 6003 },
-};
+};
static const URange16 Tai_Le_range16[] = {
{ 6480, 6509 },
{ 6512, 6516 },
-};
+};
static const URange16 Tai_Tham_range16[] = {
{ 6688, 6750 },
{ 6752, 6780 },
{ 6783, 6793 },
{ 6800, 6809 },
{ 6816, 6829 },
-};
+};
static const URange16 Tai_Viet_range16[] = {
{ 43648, 43714 },
{ 43739, 43743 },
-};
+};
static const URange32 Takri_range32[] = {
{ 71296, 71353 },
{ 71360, 71369 },
-};
+};
static const URange16 Tamil_range16[] = {
{ 2946, 2947 },
{ 2949, 2954 },
@@ -6134,11 +6134,11 @@ static const URange16 Tamil_range16[] = {
{ 3024, 3024 },
{ 3031, 3031 },
{ 3046, 3066 },
-};
+};
static const URange32 Tamil_range32[] = {
{ 73664, 73713 },
{ 73727, 73727 },
-};
+};
static const URange32 Tangsa_range32[] = {
{ 92784, 92862 },
{ 92864, 92873 },
@@ -6148,7 +6148,7 @@ static const URange32 Tangut_range32[] = {
{ 94208, 100343 },
{ 100352, 101119 },
{ 101632, 101640 },
-};
+};
static const URange16 Telugu_range16[] = {
{ 3072, 3084 },
{ 3086, 3088 },
@@ -6163,14 +6163,14 @@ static const URange16 Telugu_range16[] = {
{ 3168, 3171 },
{ 3174, 3183 },
{ 3191, 3199 },
-};
+};
static const URange16 Thaana_range16[] = {
{ 1920, 1969 },
-};
+};
static const URange16 Thai_range16[] = {
{ 3585, 3642 },
{ 3648, 3675 },
-};
+};
static const URange16 Tibetan_range16[] = {
{ 3840, 3911 },
{ 3913, 3948 },
@@ -6179,26 +6179,26 @@ static const URange16 Tibetan_range16[] = {
{ 4030, 4044 },
{ 4046, 4052 },
{ 4057, 4058 },
-};
+};
static const URange16 Tifinagh_range16[] = {
{ 11568, 11623 },
{ 11631, 11632 },
{ 11647, 11647 },
-};
-static const URange32 Tirhuta_range32[] = {
- { 70784, 70855 },
- { 70864, 70873 },
-};
+};
+static const URange32 Tirhuta_range32[] = {
+ { 70784, 70855 },
+ { 70864, 70873 },
+};
static const URange32 Toto_range32[] = {
{ 123536, 123566 },
};
static const URange32 Ugaritic_range32[] = {
{ 66432, 66461 },
{ 66463, 66463 },
-};
+};
static const URange16 Vai_range16[] = {
{ 42240, 42539 },
-};
+};
static const URange32 Vithkuqi_range32[] = {
{ 66928, 66938 },
{ 66940, 66954 },
@@ -6212,226 +6212,226 @@ static const URange32 Vithkuqi_range32[] = {
static const URange32 Wancho_range32[] = {
{ 123584, 123641 },
{ 123647, 123647 },
-};
+};
static const URange32 Warang_Citi_range32[] = {
{ 71840, 71922 },
{ 71935, 71935 },
-};
+};
static const URange32 Yezidi_range32[] = {
{ 69248, 69289 },
{ 69291, 69293 },
{ 69296, 69297 },
-};
+};
static const URange16 Yi_range16[] = {
{ 40960, 42124 },
{ 42128, 42182 },
-};
+};
static const URange32 Zanabazar_Square_range32[] = {
{ 72192, 72263 },
-};
+};
// 4038 16-bit ranges, 1712 32-bit ranges
-const UGroup unicode_groups[] = {
+const UGroup unicode_groups[] = {
{ "Adlam", +1, 0, 0, Adlam_range32, 3 },
- { "Ahom", +1, 0, 0, Ahom_range32, 3 },
- { "Anatolian_Hieroglyphs", +1, 0, 0, Anatolian_Hieroglyphs_range32, 1 },
+ { "Ahom", +1, 0, 0, Ahom_range32, 3 },
+ { "Anatolian_Hieroglyphs", +1, 0, 0, Anatolian_Hieroglyphs_range32, 1 },
{ "Arabic", +1, Arabic_range16, 22, Arabic_range32, 35 },
{ "Armenian", +1, Armenian_range16, 4, 0, 0 },
- { "Avestan", +1, 0, 0, Avestan_range32, 2 },
- { "Balinese", +1, Balinese_range16, 2, 0, 0 },
- { "Bamum", +1, Bamum_range16, 1, Bamum_range32, 1 },
- { "Bassa_Vah", +1, 0, 0, Bassa_Vah_range32, 2 },
- { "Batak", +1, Batak_range16, 2, 0, 0 },
- { "Bengali", +1, Bengali_range16, 14, 0, 0 },
+ { "Avestan", +1, 0, 0, Avestan_range32, 2 },
+ { "Balinese", +1, Balinese_range16, 2, 0, 0 },
+ { "Bamum", +1, Bamum_range16, 1, Bamum_range32, 1 },
+ { "Bassa_Vah", +1, 0, 0, Bassa_Vah_range32, 2 },
+ { "Batak", +1, Batak_range16, 2, 0, 0 },
+ { "Bengali", +1, Bengali_range16, 14, 0, 0 },
{ "Bhaiksuki", +1, 0, 0, Bhaiksuki_range32, 4 },
- { "Bopomofo", +1, Bopomofo_range16, 3, 0, 0 },
- { "Brahmi", +1, 0, 0, Brahmi_range32, 3 },
- { "Braille", +1, Braille_range16, 1, 0, 0 },
- { "Buginese", +1, Buginese_range16, 2, 0, 0 },
- { "Buhid", +1, Buhid_range16, 1, 0, 0 },
+ { "Bopomofo", +1, Bopomofo_range16, 3, 0, 0 },
+ { "Brahmi", +1, 0, 0, Brahmi_range32, 3 },
+ { "Braille", +1, Braille_range16, 1, 0, 0 },
+ { "Buginese", +1, Buginese_range16, 2, 0, 0 },
+ { "Buhid", +1, Buhid_range16, 1, 0, 0 },
{ "C", +1, C_range16, 17, C_range32, 9 },
{ "Canadian_Aboriginal", +1, Canadian_Aboriginal_range16, 2, Canadian_Aboriginal_range32, 1 },
- { "Carian", +1, 0, 0, Carian_range32, 1 },
- { "Caucasian_Albanian", +1, 0, 0, Caucasian_Albanian_range32, 2 },
- { "Cc", +1, Cc_range16, 2, 0, 0 },
+ { "Carian", +1, 0, 0, Carian_range32, 1 },
+ { "Caucasian_Albanian", +1, 0, 0, Caucasian_Albanian_range32, 2 },
+ { "Cc", +1, Cc_range16, 2, 0, 0 },
{ "Cf", +1, Cf_range16, 14, Cf_range32, 7 },
- { "Chakma", +1, 0, 0, Chakma_range32, 2 },
- { "Cham", +1, Cham_range16, 4, 0, 0 },
- { "Cherokee", +1, Cherokee_range16, 3, 0, 0 },
+ { "Chakma", +1, 0, 0, Chakma_range32, 2 },
+ { "Cham", +1, Cham_range16, 4, 0, 0 },
+ { "Cherokee", +1, Cherokee_range16, 3, 0, 0 },
{ "Chorasmian", +1, 0, 0, Chorasmian_range32, 1 },
- { "Co", +1, Co_range16, 1, Co_range32, 2 },
+ { "Co", +1, Co_range16, 1, Co_range32, 2 },
{ "Common", +1, Common_range16, 91, Common_range32, 83 },
- { "Coptic", +1, Coptic_range16, 3, 0, 0 },
- { "Cs", +1, Cs_range16, 1, 0, 0 },
- { "Cuneiform", +1, 0, 0, Cuneiform_range32, 4 },
- { "Cypriot", +1, 0, 0, Cypriot_range32, 6 },
+ { "Coptic", +1, Coptic_range16, 3, 0, 0 },
+ { "Cs", +1, Cs_range16, 1, 0, 0 },
+ { "Cuneiform", +1, 0, 0, Cuneiform_range32, 4 },
+ { "Cypriot", +1, 0, 0, Cypriot_range32, 6 },
{ "Cypro_Minoan", +1, 0, 0, Cypro_Minoan_range32, 1 },
{ "Cyrillic", +1, Cyrillic_range16, 8, 0, 0 },
- { "Deseret", +1, 0, 0, Deseret_range32, 1 },
- { "Devanagari", +1, Devanagari_range16, 4, 0, 0 },
+ { "Deseret", +1, 0, 0, Deseret_range32, 1 },
+ { "Devanagari", +1, Devanagari_range16, 4, 0, 0 },
{ "Dives_Akuru", +1, 0, 0, Dives_Akuru_range32, 8 },
{ "Dogra", +1, 0, 0, Dogra_range32, 1 },
- { "Duployan", +1, 0, 0, Duployan_range32, 5 },
+ { "Duployan", +1, 0, 0, Duployan_range32, 5 },
{ "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 2 },
- { "Elbasan", +1, 0, 0, Elbasan_range32, 1 },
+ { "Elbasan", +1, 0, 0, Elbasan_range32, 1 },
{ "Elymaic", +1, 0, 0, Elymaic_range32, 1 },
{ "Ethiopic", +1, Ethiopic_range16, 32, Ethiopic_range32, 4 },
{ "Georgian", +1, Georgian_range16, 10, 0, 0 },
{ "Glagolitic", +1, Glagolitic_range16, 1, Glagolitic_range32, 5 },
- { "Gothic", +1, 0, 0, Gothic_range32, 1 },
- { "Grantha", +1, 0, 0, Grantha_range32, 15 },
- { "Greek", +1, Greek_range16, 33, Greek_range32, 3 },
- { "Gujarati", +1, Gujarati_range16, 14, 0, 0 },
+ { "Gothic", +1, 0, 0, Gothic_range32, 1 },
+ { "Grantha", +1, 0, 0, Grantha_range32, 15 },
+ { "Greek", +1, Greek_range16, 33, Greek_range32, 3 },
+ { "Gujarati", +1, Gujarati_range16, 14, 0, 0 },
{ "Gunjala_Gondi", +1, 0, 0, Gunjala_Gondi_range32, 6 },
- { "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 },
+ { "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 },
{ "Han", +1, Han_range16, 11, Han_range32, 9 },
- { "Hangul", +1, Hangul_range16, 14, 0, 0 },
+ { "Hangul", +1, Hangul_range16, 14, 0, 0 },
{ "Hanifi_Rohingya", +1, 0, 0, Hanifi_Rohingya_range32, 2 },
- { "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 },
- { "Hatran", +1, 0, 0, Hatran_range32, 3 },
- { "Hebrew", +1, Hebrew_range16, 9, 0, 0 },
+ { "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 },
+ { "Hatran", +1, 0, 0, Hatran_range32, 3 },
+ { "Hebrew", +1, Hebrew_range16, 9, 0, 0 },
{ "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 3 },
- { "Imperial_Aramaic", +1, 0, 0, Imperial_Aramaic_range32, 2 },
+ { "Imperial_Aramaic", +1, 0, 0, Imperial_Aramaic_range32, 2 },
{ "Inherited", +1, Inherited_range16, 19, Inherited_range32, 10 },
- { "Inscriptional_Pahlavi", +1, 0, 0, Inscriptional_Pahlavi_range32, 2 },
- { "Inscriptional_Parthian", +1, 0, 0, Inscriptional_Parthian_range32, 2 },
- { "Javanese", +1, Javanese_range16, 3, 0, 0 },
+ { "Inscriptional_Pahlavi", +1, 0, 0, Inscriptional_Pahlavi_range32, 2 },
+ { "Inscriptional_Parthian", +1, 0, 0, Inscriptional_Parthian_range32, 2 },
+ { "Javanese", +1, Javanese_range16, 3, 0, 0 },
{ "Kaithi", +1, 0, 0, Kaithi_range32, 2 },
{ "Kannada", +1, Kannada_range16, 13, 0, 0 },
{ "Katakana", +1, Katakana_range16, 7, Katakana_range32, 6 },
- { "Kayah_Li", +1, Kayah_Li_range16, 2, 0, 0 },
- { "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 },
+ { "Kayah_Li", +1, Kayah_Li_range16, 2, 0, 0 },
+ { "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 },
{ "Khitan_Small_Script", +1, 0, 0, Khitan_Small_Script_range32, 2 },
- { "Khmer", +1, Khmer_range16, 4, 0, 0 },
- { "Khojki", +1, 0, 0, Khojki_range32, 2 },
- { "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 },
+ { "Khmer", +1, Khmer_range16, 4, 0, 0 },
+ { "Khojki", +1, 0, 0, Khojki_range32, 2 },
+ { "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 },
{ "L", +1, L_range16, 380, L_range32, 268 },
{ "Lao", +1, Lao_range16, 11, 0, 0 },
{ "Latin", +1, Latin_range16, 34, Latin_range32, 4 },
- { "Lepcha", +1, Lepcha_range16, 3, 0, 0 },
- { "Limbu", +1, Limbu_range16, 5, 0, 0 },
- { "Linear_A", +1, 0, 0, Linear_A_range32, 3 },
- { "Linear_B", +1, 0, 0, Linear_B_range32, 7 },
+ { "Lepcha", +1, Lepcha_range16, 3, 0, 0 },
+ { "Limbu", +1, Limbu_range16, 5, 0, 0 },
+ { "Linear_A", +1, 0, 0, Linear_A_range32, 3 },
+ { "Linear_B", +1, 0, 0, Linear_B_range32, 7 },
{ "Lisu", +1, Lisu_range16, 1, Lisu_range32, 1 },
{ "Ll", +1, Ll_range16, 617, Ll_range32, 40 },
{ "Lm", +1, Lm_range16, 57, Lm_range32, 12 },
{ "Lo", +1, Lo_range16, 290, Lo_range32, 211 },
- { "Lt", +1, Lt_range16, 10, 0, 0 },
+ { "Lt", +1, Lt_range16, 10, 0, 0 },
{ "Lu", +1, Lu_range16, 605, Lu_range32, 41 },
- { "Lycian", +1, 0, 0, Lycian_range32, 1 },
- { "Lydian", +1, 0, 0, Lydian_range32, 2 },
+ { "Lycian", +1, 0, 0, Lycian_range32, 1 },
+ { "Lydian", +1, 0, 0, Lydian_range32, 2 },
{ "M", +1, M_range16, 189, M_range32, 110 },
- { "Mahajani", +1, 0, 0, Mahajani_range32, 1 },
+ { "Mahajani", +1, 0, 0, Mahajani_range32, 1 },
{ "Makasar", +1, 0, 0, Makasar_range32, 1 },
{ "Malayalam", +1, Malayalam_range16, 7, 0, 0 },
- { "Mandaic", +1, Mandaic_range16, 2, 0, 0 },
- { "Manichaean", +1, 0, 0, Manichaean_range32, 2 },
+ { "Mandaic", +1, Mandaic_range16, 2, 0, 0 },
+ { "Manichaean", +1, 0, 0, Manichaean_range32, 2 },
{ "Marchen", +1, 0, 0, Marchen_range32, 3 },
{ "Masaram_Gondi", +1, 0, 0, Masaram_Gondi_range32, 7 },
{ "Mc", +1, Mc_range16, 111, Mc_range32, 66 },
- { "Me", +1, Me_range16, 5, 0, 0 },
+ { "Me", +1, Me_range16, 5, 0, 0 },
{ "Medefaidrin", +1, 0, 0, Medefaidrin_range32, 1 },
- { "Meetei_Mayek", +1, Meetei_Mayek_range16, 3, 0, 0 },
- { "Mende_Kikakui", +1, 0, 0, Mende_Kikakui_range32, 2 },
- { "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 3 },
- { "Meroitic_Hieroglyphs", +1, 0, 0, Meroitic_Hieroglyphs_range32, 1 },
- { "Miao", +1, 0, 0, Miao_range32, 3 },
+ { "Meetei_Mayek", +1, Meetei_Mayek_range16, 3, 0, 0 },
+ { "Mende_Kikakui", +1, 0, 0, Mende_Kikakui_range32, 2 },
+ { "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 3 },
+ { "Meroitic_Hieroglyphs", +1, 0, 0, Meroitic_Hieroglyphs_range32, 1 },
+ { "Miao", +1, 0, 0, Miao_range32, 3 },
{ "Mn", +1, Mn_range16, 212, Mn_range32, 124 },
- { "Modi", +1, 0, 0, Modi_range32, 2 },
+ { "Modi", +1, 0, 0, Modi_range32, 2 },
{ "Mongolian", +1, Mongolian_range16, 5, Mongolian_range32, 1 },
- { "Mro", +1, 0, 0, Mro_range32, 3 },
- { "Multani", +1, 0, 0, Multani_range32, 5 },
- { "Myanmar", +1, Myanmar_range16, 3, 0, 0 },
+ { "Mro", +1, 0, 0, Mro_range32, 3 },
+ { "Multani", +1, 0, 0, Multani_range32, 5 },
+ { "Myanmar", +1, Myanmar_range16, 3, 0, 0 },
{ "N", +1, N_range16, 67, N_range32, 67 },
- { "Nabataean", +1, 0, 0, Nabataean_range32, 2 },
+ { "Nabataean", +1, 0, 0, Nabataean_range32, 2 },
{ "Nandinagari", +1, 0, 0, Nandinagari_range32, 3 },
{ "Nd", +1, Nd_range16, 37, Nd_range32, 25 },
- { "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 },
+ { "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 },
{ "Newa", +1, 0, 0, Newa_range32, 2 },
{ "Nko", +1, Nko_range16, 2, 0, 0 },
- { "Nl", +1, Nl_range16, 7, Nl_range32, 5 },
+ { "Nl", +1, Nl_range16, 7, Nl_range32, 5 },
{ "No", +1, No_range16, 29, No_range32, 42 },
{ "Nushu", +1, 0, 0, Nushu_range32, 2 },
{ "Nyiakeng_Puachue_Hmong", +1, 0, 0, Nyiakeng_Puachue_Hmong_range32, 4 },
- { "Ogham", +1, Ogham_range16, 1, 0, 0 },
- { "Ol_Chiki", +1, Ol_Chiki_range16, 1, 0, 0 },
- { "Old_Hungarian", +1, 0, 0, Old_Hungarian_range32, 3 },
+ { "Ogham", +1, Ogham_range16, 1, 0, 0 },
+ { "Ol_Chiki", +1, Ol_Chiki_range16, 1, 0, 0 },
+ { "Old_Hungarian", +1, 0, 0, Old_Hungarian_range32, 3 },
{ "Old_Italic", +1, 0, 0, Old_Italic_range32, 2 },
- { "Old_North_Arabian", +1, 0, 0, Old_North_Arabian_range32, 1 },
- { "Old_Permic", +1, 0, 0, Old_Permic_range32, 1 },
- { "Old_Persian", +1, 0, 0, Old_Persian_range32, 2 },
+ { "Old_North_Arabian", +1, 0, 0, Old_North_Arabian_range32, 1 },
+ { "Old_Permic", +1, 0, 0, Old_Permic_range32, 1 },
+ { "Old_Persian", +1, 0, 0, Old_Persian_range32, 2 },
{ "Old_Sogdian", +1, 0, 0, Old_Sogdian_range32, 1 },
- { "Old_South_Arabian", +1, 0, 0, Old_South_Arabian_range32, 1 },
- { "Old_Turkic", +1, 0, 0, Old_Turkic_range32, 1 },
+ { "Old_South_Arabian", +1, 0, 0, Old_South_Arabian_range32, 1 },
+ { "Old_Turkic", +1, 0, 0, Old_Turkic_range32, 1 },
{ "Old_Uyghur", +1, 0, 0, Old_Uyghur_range32, 1 },
- { "Oriya", +1, Oriya_range16, 14, 0, 0 },
+ { "Oriya", +1, Oriya_range16, 14, 0, 0 },
{ "Osage", +1, 0, 0, Osage_range32, 2 },
- { "Osmanya", +1, 0, 0, Osmanya_range32, 2 },
+ { "Osmanya", +1, 0, 0, Osmanya_range32, 2 },
{ "P", +1, P_range16, 133, P_range32, 56 },
- { "Pahawh_Hmong", +1, 0, 0, Pahawh_Hmong_range32, 5 },
- { "Palmyrene", +1, 0, 0, Palmyrene_range32, 1 },
- { "Pau_Cin_Hau", +1, 0, 0, Pau_Cin_Hau_range32, 1 },
- { "Pc", +1, Pc_range16, 6, 0, 0 },
+ { "Pahawh_Hmong", +1, 0, 0, Pahawh_Hmong_range32, 5 },
+ { "Palmyrene", +1, 0, 0, Palmyrene_range32, 1 },
+ { "Pau_Cin_Hau", +1, 0, 0, Pau_Cin_Hau_range32, 1 },
+ { "Pc", +1, Pc_range16, 6, 0, 0 },
{ "Pd", +1, Pd_range16, 18, Pd_range32, 1 },
{ "Pe", +1, Pe_range16, 76, 0, 0 },
- { "Pf", +1, Pf_range16, 10, 0, 0 },
- { "Phags_Pa", +1, Phags_Pa_range16, 1, 0, 0 },
- { "Phoenician", +1, 0, 0, Phoenician_range32, 2 },
- { "Pi", +1, Pi_range16, 11, 0, 0 },
+ { "Pf", +1, Pf_range16, 10, 0, 0 },
+ { "Phags_Pa", +1, Phags_Pa_range16, 1, 0, 0 },
+ { "Phoenician", +1, 0, 0, Phoenician_range32, 2 },
+ { "Pi", +1, Pi_range16, 11, 0, 0 },
{ "Po", +1, Po_range16, 130, Po_range32, 55 },
{ "Ps", +1, Ps_range16, 79, 0, 0 },
- { "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 },
- { "Rejang", +1, Rejang_range16, 2, 0, 0 },
- { "Runic", +1, Runic_range16, 2, 0, 0 },
+ { "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 },
+ { "Rejang", +1, Rejang_range16, 2, 0, 0 },
+ { "Runic", +1, Runic_range16, 2, 0, 0 },
{ "S", +1, S_range16, 151, S_range32, 83 },
- { "Samaritan", +1, Samaritan_range16, 2, 0, 0 },
- { "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 },
+ { "Samaritan", +1, Samaritan_range16, 2, 0, 0 },
+ { "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 },
{ "Sc", +1, Sc_range16, 18, Sc_range32, 3 },
{ "Sharada", +1, 0, 0, Sharada_range32, 1 },
- { "Shavian", +1, 0, 0, Shavian_range32, 1 },
- { "Siddham", +1, 0, 0, Siddham_range32, 2 },
- { "SignWriting", +1, 0, 0, SignWriting_range32, 3 },
- { "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 },
+ { "Shavian", +1, 0, 0, Shavian_range32, 1 },
+ { "Siddham", +1, 0, 0, Siddham_range32, 2 },
+ { "SignWriting", +1, 0, 0, SignWriting_range32, 3 },
+ { "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 },
{ "Sk", +1, Sk_range16, 30, Sk_range32, 1 },
- { "Sm", +1, Sm_range16, 53, Sm_range32, 11 },
+ { "Sm", +1, Sm_range16, 53, Sm_range32, 11 },
{ "So", +1, So_range16, 114, So_range32, 72 },
{ "Sogdian", +1, 0, 0, Sogdian_range32, 1 },
- { "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 },
+ { "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 },
{ "Soyombo", +1, 0, 0, Soyombo_range32, 1 },
- { "Sundanese", +1, Sundanese_range16, 2, 0, 0 },
- { "Syloti_Nagri", +1, Syloti_Nagri_range16, 1, 0, 0 },
+ { "Sundanese", +1, Sundanese_range16, 2, 0, 0 },
+ { "Syloti_Nagri", +1, Syloti_Nagri_range16, 1, 0, 0 },
{ "Syriac", +1, Syriac_range16, 4, 0, 0 },
- { "Tagalog", +1, Tagalog_range16, 2, 0, 0 },
- { "Tagbanwa", +1, Tagbanwa_range16, 3, 0, 0 },
- { "Tai_Le", +1, Tai_Le_range16, 2, 0, 0 },
- { "Tai_Tham", +1, Tai_Tham_range16, 5, 0, 0 },
- { "Tai_Viet", +1, Tai_Viet_range16, 2, 0, 0 },
- { "Takri", +1, 0, 0, Takri_range32, 2 },
+ { "Tagalog", +1, Tagalog_range16, 2, 0, 0 },
+ { "Tagbanwa", +1, Tagbanwa_range16, 3, 0, 0 },
+ { "Tai_Le", +1, Tai_Le_range16, 2, 0, 0 },
+ { "Tai_Tham", +1, Tai_Tham_range16, 5, 0, 0 },
+ { "Tai_Viet", +1, Tai_Viet_range16, 2, 0, 0 },
+ { "Takri", +1, 0, 0, Takri_range32, 2 },
{ "Tamil", +1, Tamil_range16, 16, Tamil_range32, 2 },
{ "Tangsa", +1, 0, 0, Tangsa_range32, 2 },
{ "Tangut", +1, 0, 0, Tangut_range32, 4 },
{ "Telugu", +1, Telugu_range16, 13, 0, 0 },
- { "Thaana", +1, Thaana_range16, 1, 0, 0 },
- { "Thai", +1, Thai_range16, 2, 0, 0 },
- { "Tibetan", +1, Tibetan_range16, 7, 0, 0 },
- { "Tifinagh", +1, Tifinagh_range16, 3, 0, 0 },
- { "Tirhuta", +1, 0, 0, Tirhuta_range32, 2 },
+ { "Thaana", +1, Thaana_range16, 1, 0, 0 },
+ { "Thai", +1, Thai_range16, 2, 0, 0 },
+ { "Tibetan", +1, Tibetan_range16, 7, 0, 0 },
+ { "Tifinagh", +1, Tifinagh_range16, 3, 0, 0 },
+ { "Tirhuta", +1, 0, 0, Tirhuta_range32, 2 },
{ "Toto", +1, 0, 0, Toto_range32, 1 },
- { "Ugaritic", +1, 0, 0, Ugaritic_range32, 2 },
- { "Vai", +1, Vai_range16, 1, 0, 0 },
+ { "Ugaritic", +1, 0, 0, Ugaritic_range32, 2 },
+ { "Vai", +1, Vai_range16, 1, 0, 0 },
{ "Vithkuqi", +1, 0, 0, Vithkuqi_range32, 8 },
{ "Wancho", +1, 0, 0, Wancho_range32, 2 },
- { "Warang_Citi", +1, 0, 0, Warang_Citi_range32, 2 },
+ { "Warang_Citi", +1, 0, 0, Warang_Citi_range32, 2 },
{ "Yezidi", +1, 0, 0, Yezidi_range32, 3 },
- { "Yi", +1, Yi_range16, 2, 0, 0 },
- { "Z", +1, Z_range16, 8, 0, 0 },
+ { "Yi", +1, Yi_range16, 2, 0, 0 },
+ { "Z", +1, Z_range16, 8, 0, 0 },
{ "Zanabazar_Square", +1, 0, 0, Zanabazar_Square_range32, 1 },
- { "Zl", +1, Zl_range16, 1, 0, 0 },
- { "Zp", +1, Zp_range16, 1, 0, 0 },
- { "Zs", +1, Zs_range16, 7, 0, 0 },
-};
+ { "Zl", +1, Zl_range16, 1, 0, 0 },
+ { "Zp", +1, Zp_range16, 1, 0, 0 },
+ { "Zs", +1, Zs_range16, 7, 0, 0 },
+};
const int num_unicode_groups = 197;
-
-
-} // namespace re2
-
-
+
+
+} // namespace re2
+
+
diff --git a/contrib/libs/re2/re2/unicode_groups.h b/contrib/libs/re2/re2/unicode_groups.h
index 75f55daa61..7e6857b785 100644
--- a/contrib/libs/re2/re2/unicode_groups.h
+++ b/contrib/libs/re2/re2/unicode_groups.h
@@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#ifndef RE2_UNICODE_GROUPS_H_
-#define RE2_UNICODE_GROUPS_H_
-
+#ifndef RE2_UNICODE_GROUPS_H_
+#define RE2_UNICODE_GROUPS_H_
+
// Unicode character groups.
// The codes get split into ranges of 16-bit codes
@@ -18,23 +18,23 @@
// to 16.5 kB of data but make the data harder to use;
// we don't bother.
-#include <stdint.h>
+#include <stdint.h>
-#include "util/util.h"
-#include "util/utf.h"
+#include "util/util.h"
+#include "util/utf.h"
namespace re2 {
struct URange16
{
- uint16_t lo;
- uint16_t hi;
+ uint16_t lo;
+ uint16_t hi;
};
struct URange32
{
- Rune lo;
- Rune hi;
+ Rune lo;
+ Rune hi;
};
struct UGroup
@@ -64,4 +64,4 @@ extern const int num_perl_groups;
} // namespace re2
-#endif // RE2_UNICODE_GROUPS_H_
+#endif // RE2_UNICODE_GROUPS_H_
diff --git a/contrib/libs/re2/re2/walker-inl.h b/contrib/libs/re2/re2/walker-inl.h
index 4d064a0970..0527e530e2 100644
--- a/contrib/libs/re2/re2/walker-inl.h
+++ b/contrib/libs/re2/re2/walker-inl.h
@@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#ifndef RE2_WALKER_INL_H_
-#define RE2_WALKER_INL_H_
-
+#ifndef RE2_WALKER_INL_H_
+#define RE2_WALKER_INL_H_
+
// Helper class for traversing Regexps without recursion.
// Clients should declare their own subclasses that override
// the PreVisit and PostVisit methods, which are called before
@@ -13,9 +13,9 @@
// Not quite the Visitor pattern, because (among other things)
// the Visitor pattern is recursive.
-#include <stack>
+#include <stack>
-#include "util/logging.h"
+#include "util/logging.h"
#include "re2/regexp.h"
namespace re2 {
@@ -95,8 +95,8 @@ template<typename T> class Regexp::Walker {
T WalkInternal(Regexp* re, T top_arg, bool use_copy);
- Walker(const Walker&) = delete;
- Walker& operator=(const Walker&) = delete;
+ Walker(const Walker&) = delete;
+ Walker& operator=(const Walker&) = delete;
};
template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
@@ -190,7 +190,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
s->child_args = &s->child_arg;
else if (re->nsub_ > 1)
s->child_args = new T[re->nsub_];
- FALLTHROUGH_INTENDED;
+ FALLTHROUGH_INTENDED;
}
default: {
if (re->nsub_ > 0) {
@@ -244,4 +244,4 @@ template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
} // namespace re2
-#endif // RE2_WALKER_INL_H_
+#endif // RE2_WALKER_INL_H_
diff --git a/contrib/libs/re2/util/logging.h b/contrib/libs/re2/util/logging.h
index 5b2217f29c..924e2165f6 100644
--- a/contrib/libs/re2/util/logging.h
+++ b/contrib/libs/re2/util/logging.h
@@ -1,109 +1,109 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_LOGGING_H_
-#define UTIL_LOGGING_H_
-
-// Simplified version of Google's logging.
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <ostream>
-#include <sstream>
-
-#include "util/util.h"
-
-// Debug-only checking.
-#define DCHECK(condition) assert(condition)
-#define DCHECK_EQ(val1, val2) assert((val1) == (val2))
-#define DCHECK_NE(val1, val2) assert((val1) != (val2))
-#define DCHECK_LE(val1, val2) assert((val1) <= (val2))
-#define DCHECK_LT(val1, val2) assert((val1) < (val2))
-#define DCHECK_GE(val1, val2) assert((val1) >= (val2))
-#define DCHECK_GT(val1, val2) assert((val1) > (val2))
-
-// Always-on checking
-#define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
-#define CHECK_LT(x, y) CHECK((x) < (y))
-#define CHECK_GT(x, y) CHECK((x) > (y))
-#define CHECK_LE(x, y) CHECK((x) <= (y))
-#define CHECK_GE(x, y) CHECK((x) >= (y))
-#define CHECK_EQ(x, y) CHECK((x) == (y))
-#define CHECK_NE(x, y) CHECK((x) != (y))
-
-#define LOG_INFO LogMessage(__FILE__, __LINE__)
-#define LOG_WARNING LogMessage(__FILE__, __LINE__)
-#define LOG_ERROR LogMessage(__FILE__, __LINE__)
-#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
-#define LOG_QFATAL LOG_FATAL
-
-// It seems that one of the Windows header files defines ERROR as 0.
-#ifdef _WIN32
-#define LOG_0 LOG_INFO
-#endif
-
-#ifdef NDEBUG
-#define LOG_DFATAL LOG_ERROR
-#else
-#define LOG_DFATAL LOG_FATAL
-#endif
-
-#define LOG(severity) LOG_ ## severity.stream()
-
-#define VLOG(x) if((x)>0){}else LOG_INFO.stream()
-
-class LogMessage {
- public:
- LogMessage(const char* file, int line)
- : flushed_(false) {
- stream() << file << ":" << line << ": ";
- }
- void Flush() {
- stream() << "\n";
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_LOGGING_H_
+#define UTIL_LOGGING_H_
+
+// Simplified version of Google's logging.
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ostream>
+#include <sstream>
+
+#include "util/util.h"
+
+// Debug-only checking.
+#define DCHECK(condition) assert(condition)
+#define DCHECK_EQ(val1, val2) assert((val1) == (val2))
+#define DCHECK_NE(val1, val2) assert((val1) != (val2))
+#define DCHECK_LE(val1, val2) assert((val1) <= (val2))
+#define DCHECK_LT(val1, val2) assert((val1) < (val2))
+#define DCHECK_GE(val1, val2) assert((val1) >= (val2))
+#define DCHECK_GT(val1, val2) assert((val1) > (val2))
+
+// Always-on checking
+#define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
+#define CHECK_LT(x, y) CHECK((x) < (y))
+#define CHECK_GT(x, y) CHECK((x) > (y))
+#define CHECK_LE(x, y) CHECK((x) <= (y))
+#define CHECK_GE(x, y) CHECK((x) >= (y))
+#define CHECK_EQ(x, y) CHECK((x) == (y))
+#define CHECK_NE(x, y) CHECK((x) != (y))
+
+#define LOG_INFO LogMessage(__FILE__, __LINE__)
+#define LOG_WARNING LogMessage(__FILE__, __LINE__)
+#define LOG_ERROR LogMessage(__FILE__, __LINE__)
+#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
+#define LOG_QFATAL LOG_FATAL
+
+// It seems that one of the Windows header files defines ERROR as 0.
+#ifdef _WIN32
+#define LOG_0 LOG_INFO
+#endif
+
+#ifdef NDEBUG
+#define LOG_DFATAL LOG_ERROR
+#else
+#define LOG_DFATAL LOG_FATAL
+#endif
+
+#define LOG(severity) LOG_ ## severity.stream()
+
+#define VLOG(x) if((x)>0){}else LOG_INFO.stream()
+
+class LogMessage {
+ public:
+ LogMessage(const char* file, int line)
+ : flushed_(false) {
+ stream() << file << ":" << line << ": ";
+ }
+ void Flush() {
+ stream() << "\n";
std::string s = str_.str();
- size_t n = s.size();
- if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc
- flushed_ = true;
- }
- ~LogMessage() {
- if (!flushed_) {
- Flush();
- }
- }
- std::ostream& stream() { return str_; }
-
- private:
- bool flushed_;
- std::ostringstream str_;
-
- LogMessage(const LogMessage&) = delete;
- LogMessage& operator=(const LogMessage&) = delete;
-};
-
-// Silence "destructor never returns" warning for ~LogMessageFatal().
-// Since this is a header file, push and then pop to limit the scope.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4722)
-#endif
-
-class LogMessageFatal : public LogMessage {
- public:
- LogMessageFatal(const char* file, int line)
- : LogMessage(file, line) {}
+ size_t n = s.size();
+ if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc
+ flushed_ = true;
+ }
+ ~LogMessage() {
+ if (!flushed_) {
+ Flush();
+ }
+ }
+ std::ostream& stream() { return str_; }
+
+ private:
+ bool flushed_;
+ std::ostringstream str_;
+
+ LogMessage(const LogMessage&) = delete;
+ LogMessage& operator=(const LogMessage&) = delete;
+};
+
+// Silence "destructor never returns" warning for ~LogMessageFatal().
+// Since this is a header file, push and then pop to limit the scope.
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4722)
+#endif
+
+class LogMessageFatal : public LogMessage {
+ public:
+ LogMessageFatal(const char* file, int line)
+ : LogMessage(file, line) {}
ATTRIBUTE_NORETURN ~LogMessageFatal() {
- Flush();
- abort();
- }
- private:
- LogMessageFatal(const LogMessageFatal&) = delete;
- LogMessageFatal& operator=(const LogMessageFatal&) = delete;
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-#endif // UTIL_LOGGING_H_
+ Flush();
+ abort();
+ }
+ private:
+ LogMessageFatal(const LogMessageFatal&) = delete;
+ LogMessageFatal& operator=(const LogMessageFatal&) = delete;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#endif // UTIL_LOGGING_H_
diff --git a/contrib/libs/re2/util/mix.h b/contrib/libs/re2/util/mix.h
index d85c172ab0..4404a27761 100644
--- a/contrib/libs/re2/util/mix.h
+++ b/contrib/libs/re2/util/mix.h
@@ -1,41 +1,41 @@
-// Copyright 2016 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_MIX_H_
-#define UTIL_MIX_H_
-
-#include <stddef.h>
-#include <limits>
-
-namespace re2 {
-
-// Silence "truncation of constant value" warning for kMul in 32-bit mode.
-// Since this is a header file, push and then pop to limit the scope.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4309)
-#endif
-
-class HashMix {
- public:
- HashMix() : hash_(1) {}
- explicit HashMix(size_t val) : hash_(val + 83) {}
- void Mix(size_t val) {
- static const size_t kMul = static_cast<size_t>(0xdc3eb94af8ab4c93ULL);
- hash_ *= kMul;
- hash_ = ((hash_ << 19) |
- (hash_ >> (std::numeric_limits<size_t>::digits - 19))) + val;
- }
- size_t get() const { return hash_; }
- private:
- size_t hash_;
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-} // namespace re2
-
-#endif // UTIL_MIX_H_
+// Copyright 2016 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_MIX_H_
+#define UTIL_MIX_H_
+
+#include <stddef.h>
+#include <limits>
+
+namespace re2 {
+
+// Silence "truncation of constant value" warning for kMul in 32-bit mode.
+// Since this is a header file, push and then pop to limit the scope.
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4309)
+#endif
+
+class HashMix {
+ public:
+ HashMix() : hash_(1) {}
+ explicit HashMix(size_t val) : hash_(val + 83) {}
+ void Mix(size_t val) {
+ static const size_t kMul = static_cast<size_t>(0xdc3eb94af8ab4c93ULL);
+ hash_ *= kMul;
+ hash_ = ((hash_ << 19) |
+ (hash_ >> (std::numeric_limits<size_t>::digits - 19))) + val;
+ }
+ size_t get() const { return hash_; }
+ private:
+ size_t hash_;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+} // namespace re2
+
+#endif // UTIL_MIX_H_
diff --git a/contrib/libs/re2/util/mutex.h b/contrib/libs/re2/util/mutex.h
index 158046bb5c..6619daa4c4 100644
--- a/contrib/libs/re2/util/mutex.h
+++ b/contrib/libs/re2/util/mutex.h
@@ -1,15 +1,15 @@
-// Copyright 2007 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_MUTEX_H_
-#define UTIL_MUTEX_H_
-
-/*
- * A simple mutex wrapper, supporting locks and read-write locks.
- * You should assume the locks are *not* re-entrant.
- */
-
+// Copyright 2007 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_MUTEX_H_
+#define UTIL_MUTEX_H_
+
+/*
+ * A simple mutex wrapper, supporting locks and read-write locks.
+ * You should assume the locks are *not* re-entrant.
+ */
+
#ifdef _WIN32
// Requires Windows Vista or Windows Server 2008 at minimum.
#include <windows.h>
@@ -17,55 +17,55 @@
#define MUTEX_IS_WIN32_SRWLOCK
#endif
#else
-#ifndef _POSIX_C_SOURCE
-#define _POSIX_C_SOURCE 200809L
-#endif
-#include <unistd.h>
-#if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0
-#define MUTEX_IS_PTHREAD_RWLOCK
-#endif
-#endif
-
+#ifndef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 200809L
+#endif
+#include <unistd.h>
+#if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0
+#define MUTEX_IS_PTHREAD_RWLOCK
+#endif
+#endif
+
#if defined(MUTEX_IS_WIN32_SRWLOCK)
typedef SRWLOCK MutexType;
#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
-#include <pthread.h>
-#include <stdlib.h>
-typedef pthread_rwlock_t MutexType;
-#else
-#include <mutex>
-typedef std::mutex MutexType;
-#endif
-
-namespace re2 {
-
-class Mutex {
- public:
- inline Mutex();
- inline ~Mutex();
- inline void Lock(); // Block if needed until free then acquire exclusively
- inline void Unlock(); // Release a lock acquired via Lock()
- // Note that on systems that don't support read-write locks, these may
- // be implemented as synonyms to Lock() and Unlock(). So you can use
- // these for efficiency, but don't use them anyplace where being able
- // to do shared reads is necessary to avoid deadlock.
- inline void ReaderLock(); // Block until free or shared then acquire a share
- inline void ReaderUnlock(); // Release a read share of this Mutex
- inline void WriterLock() { Lock(); } // Acquire an exclusive lock
- inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
-
- private:
- MutexType mutex_;
-
- // Catch the error of writing Mutex when intending MutexLock.
- Mutex(Mutex *ignored);
-
- Mutex(const Mutex&) = delete;
- Mutex& operator=(const Mutex&) = delete;
-};
-
+#include <pthread.h>
+#include <stdlib.h>
+typedef pthread_rwlock_t MutexType;
+#else
+#include <mutex>
+typedef std::mutex MutexType;
+#endif
+
+namespace re2 {
+
+class Mutex {
+ public:
+ inline Mutex();
+ inline ~Mutex();
+ inline void Lock(); // Block if needed until free then acquire exclusively
+ inline void Unlock(); // Release a lock acquired via Lock()
+ // Note that on systems that don't support read-write locks, these may
+ // be implemented as synonyms to Lock() and Unlock(). So you can use
+ // these for efficiency, but don't use them anyplace where being able
+ // to do shared reads is necessary to avoid deadlock.
+ inline void ReaderLock(); // Block until free or shared then acquire a share
+ inline void ReaderUnlock(); // Release a read share of this Mutex
+ inline void WriterLock() { Lock(); } // Acquire an exclusive lock
+ inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
+
+ private:
+ MutexType mutex_;
+
+ // Catch the error of writing Mutex when intending MutexLock.
+ Mutex(Mutex *ignored);
+
+ Mutex(const Mutex&) = delete;
+ Mutex& operator=(const Mutex&) = delete;
+};
+
#if defined(MUTEX_IS_WIN32_SRWLOCK)
-
+
Mutex::Mutex() : mutex_(SRWLOCK_INIT) { }
Mutex::~Mutex() { }
void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); }
@@ -75,74 +75,74 @@ void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
-#define SAFE_PTHREAD(fncall) \
- do { \
- if ((fncall) != 0) abort(); \
- } while (0)
-
-Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
-Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); }
-void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
-void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
-void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
-void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
-
-#undef SAFE_PTHREAD
-
-#else
-
-Mutex::Mutex() { }
-Mutex::~Mutex() { }
-void Mutex::Lock() { mutex_.lock(); }
-void Mutex::Unlock() { mutex_.unlock(); }
-void Mutex::ReaderLock() { Lock(); } // C++11 doesn't have std::shared_mutex.
-void Mutex::ReaderUnlock() { Unlock(); }
-
-#endif
-
-// --------------------------------------------------------------------------
-// Some helper classes
-
-// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
-class MutexLock {
- public:
- explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
- ~MutexLock() { mu_->Unlock(); }
- private:
- Mutex * const mu_;
-
- MutexLock(const MutexLock&) = delete;
- MutexLock& operator=(const MutexLock&) = delete;
-};
-
-// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
-class ReaderMutexLock {
- public:
- explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
- ~ReaderMutexLock() { mu_->ReaderUnlock(); }
- private:
- Mutex * const mu_;
-
- ReaderMutexLock(const ReaderMutexLock&) = delete;
- ReaderMutexLock& operator=(const ReaderMutexLock&) = delete;
-};
-
-class WriterMutexLock {
- public:
- explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
- ~WriterMutexLock() { mu_->WriterUnlock(); }
- private:
- Mutex * const mu_;
-
- WriterMutexLock(const WriterMutexLock&) = delete;
- WriterMutexLock& operator=(const WriterMutexLock&) = delete;
-};
-
-// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
-#define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name")
-#define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name")
-#define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name")
-
-} // namespace re2
-
-#endif // UTIL_MUTEX_H_
+#define SAFE_PTHREAD(fncall) \
+ do { \
+ if ((fncall) != 0) abort(); \
+ } while (0)
+
+Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
+Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); }
+void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
+void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
+void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
+void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
+
+#undef SAFE_PTHREAD
+
+#else
+
+Mutex::Mutex() { }
+Mutex::~Mutex() { }
+void Mutex::Lock() { mutex_.lock(); }
+void Mutex::Unlock() { mutex_.unlock(); }
+void Mutex::ReaderLock() { Lock(); } // C++11 doesn't have std::shared_mutex.
+void Mutex::ReaderUnlock() { Unlock(); }
+
+#endif
+
+// --------------------------------------------------------------------------
+// Some helper classes
+
+// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
+class MutexLock {
+ public:
+ explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
+ ~MutexLock() { mu_->Unlock(); }
+ private:
+ Mutex * const mu_;
+
+ MutexLock(const MutexLock&) = delete;
+ MutexLock& operator=(const MutexLock&) = delete;
+};
+
+// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
+class ReaderMutexLock {
+ public:
+ explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
+ ~ReaderMutexLock() { mu_->ReaderUnlock(); }
+ private:
+ Mutex * const mu_;
+
+ ReaderMutexLock(const ReaderMutexLock&) = delete;
+ ReaderMutexLock& operator=(const ReaderMutexLock&) = delete;
+};
+
+class WriterMutexLock {
+ public:
+ explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
+ ~WriterMutexLock() { mu_->WriterUnlock(); }
+ private:
+ Mutex * const mu_;
+
+ WriterMutexLock(const WriterMutexLock&) = delete;
+ WriterMutexLock& operator=(const WriterMutexLock&) = delete;
+};
+
+// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
+#define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name")
+#define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name")
+#define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name")
+
+} // namespace re2
+
+#endif // UTIL_MUTEX_H_
diff --git a/contrib/libs/re2/util/rune.cc b/contrib/libs/re2/util/rune.cc
index 4f625ea380..d3066d2789 100644
--- a/contrib/libs/re2/util/rune.cc
+++ b/contrib/libs/re2/util/rune.cc
@@ -11,10 +11,10 @@
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
-
+
#include <stdarg.h>
#include <string.h>
-
+
#include "util/utf.h"
namespace re2 {
@@ -135,7 +135,7 @@ runetochar(char *str, const Rune *rune)
*/
c = *rune;
if(c <= Rune1) {
- str[0] = static_cast<char>(c);
+ str[0] = static_cast<char>(c);
return 1;
}
@@ -144,7 +144,7 @@ runetochar(char *str, const Rune *rune)
* 0080-07FF => T2 Tx
*/
if(c <= Rune2) {
- str[0] = T2 | static_cast<char>(c >> 1*Bitx);
+ str[0] = T2 | static_cast<char>(c >> 1*Bitx);
str[1] = Tx | (c & Maskx);
return 2;
}
@@ -163,9 +163,9 @@ runetochar(char *str, const Rune *rune)
* 0800-FFFF => T3 Tx Tx
*/
if (c <= Rune3) {
- str[0] = T3 | static_cast<char>(c >> 2*Bitx);
+ str[0] = T3 | static_cast<char>(c >> 2*Bitx);
str[1] = Tx | ((c >> 1*Bitx) & Maskx);
- str[2] = Tx | (c & Maskx);
+ str[2] = Tx | (c & Maskx);
return 3;
}
@@ -173,7 +173,7 @@ runetochar(char *str, const Rune *rune)
* four character sequence (21-bit value)
* 10000-1FFFFF => T4 Tx Tx Tx
*/
- str[0] = T4 | static_cast<char>(c >> 3*Bitx);
+ str[0] = T4 | static_cast<char>(c >> 3*Bitx);
str[1] = Tx | ((c >> 2*Bitx) & Maskx);
str[2] = Tx | ((c >> 1*Bitx) & Maskx);
str[3] = Tx | (c & Maskx);
diff --git a/contrib/libs/re2/util/strutil.cc b/contrib/libs/re2/util/strutil.cc
index fb7e6b1b0c..475216a7e6 100644
--- a/contrib/libs/re2/util/strutil.cc
+++ b/contrib/libs/re2/util/strutil.cc
@@ -2,16 +2,16 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include <stdarg.h>
-#include <stdio.h>
-
-#include "util/strutil.h"
-
-#ifdef _WIN32
-#define snprintf _snprintf
-#define vsnprintf _vsnprintf
-#endif
-
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "util/strutil.h"
+
+#ifdef _WIN32
+#define snprintf _snprintf
+#define vsnprintf _vsnprintf
+#endif
+
namespace re2 {
// ----------------------------------------------------------------------
@@ -19,16 +19,16 @@ namespace re2 {
// Copies 'src' to 'dest', escaping dangerous characters using
// C-style escape sequences. 'src' and 'dest' should not overlap.
// Returns the number of bytes written to 'dest' (not including the \0)
-// or (size_t)-1 if there was insufficient space.
+// or (size_t)-1 if there was insufficient space.
// ----------------------------------------------------------------------
-static size_t CEscapeString(const char* src, size_t src_len,
- char* dest, size_t dest_len) {
+static size_t CEscapeString(const char* src, size_t src_len,
+ char* dest, size_t dest_len) {
const char* src_end = src + src_len;
- size_t used = 0;
+ size_t used = 0;
for (; src < src_end; src++) {
- if (dest_len - used < 2) // space for two-character escape
- return (size_t)-1;
+ if (dest_len - used < 2) // space for two-character escape
+ return (size_t)-1;
unsigned char c = *src;
switch (c) {
@@ -43,9 +43,9 @@ static size_t CEscapeString(const char* src, size_t src_len,
// digit then that digit must be escaped too to prevent it being
// interpreted as part of the character code by C.
if (c < ' ' || c > '~') {
- if (dest_len - used < 5) // space for four-character escape + \0
- return (size_t)-1;
- snprintf(dest + used, 5, "\\%03o", c);
+ if (dest_len - used < 5) // space for four-character escape + \0
+ return (size_t)-1;
+ snprintf(dest + used, 5, "\\%03o", c);
used += 4;
} else {
dest[used++] = c; break;
@@ -54,7 +54,7 @@ static size_t CEscapeString(const char* src, size_t src_len,
}
if (dest_len - used < 1) // make sure that there is room for \0
- return (size_t)-1;
+ return (size_t)-1;
dest[used] = '\0'; // doesn't count towards return value though
return used;
@@ -66,10 +66,10 @@ static size_t CEscapeString(const char* src, size_t src_len,
// C-style escape sequences. 'src' and 'dest' should not overlap.
// ----------------------------------------------------------------------
std::string CEscape(const StringPiece& src) {
- const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion
- char* dest = new char[dest_len];
- const size_t used = CEscapeString(src.data(), src.size(),
- dest, dest_len);
+ const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion
+ char* dest = new char[dest_len];
+ const size_t used = CEscapeString(src.data(), src.size(),
+ dest, dest_len);
std::string s = std::string(dest, used);
delete[] dest;
return s;
@@ -93,57 +93,57 @@ void PrefixSuccessor(std::string* prefix) {
}
static void StringAppendV(std::string* dst, const char* format, va_list ap) {
- // First try with a small fixed size buffer
- char space[1024];
-
- // It's possible for methods that use a va_list to invalidate
- // the data in it upon use. The fix is to make a copy
- // of the structure before using it and use that copy instead.
- va_list backup_ap;
- va_copy(backup_ap, ap);
- int result = vsnprintf(space, sizeof(space), format, backup_ap);
- va_end(backup_ap);
-
- if ((result >= 0) && (static_cast<size_t>(result) < sizeof(space))) {
- // It fit
- dst->append(space, result);
- return;
- }
-
- // Repeatedly increase buffer size until it fits
- int length = sizeof(space);
- while (true) {
- if (result < 0) {
- // Older behavior: just try doubling the buffer size
- length *= 2;
- } else {
- // We need exactly "result+1" characters
- length = result+1;
- }
- char* buf = new char[length];
-
- // Restore the va_list before we use it again
- va_copy(backup_ap, ap);
- result = vsnprintf(buf, length, format, backup_ap);
- va_end(backup_ap);
-
- if ((result >= 0) && (result < length)) {
- // It fit
- dst->append(buf, result);
- delete[] buf;
- return;
- }
- delete[] buf;
- }
-}
-
+ // First try with a small fixed size buffer
+ char space[1024];
+
+ // It's possible for methods that use a va_list to invalidate
+ // the data in it upon use. The fix is to make a copy
+ // of the structure before using it and use that copy instead.
+ va_list backup_ap;
+ va_copy(backup_ap, ap);
+ int result = vsnprintf(space, sizeof(space), format, backup_ap);
+ va_end(backup_ap);
+
+ if ((result >= 0) && (static_cast<size_t>(result) < sizeof(space))) {
+ // It fit
+ dst->append(space, result);
+ return;
+ }
+
+ // Repeatedly increase buffer size until it fits
+ int length = sizeof(space);
+ while (true) {
+ if (result < 0) {
+ // Older behavior: just try doubling the buffer size
+ length *= 2;
+ } else {
+ // We need exactly "result+1" characters
+ length = result+1;
+ }
+ char* buf = new char[length];
+
+ // Restore the va_list before we use it again
+ va_copy(backup_ap, ap);
+ result = vsnprintf(buf, length, format, backup_ap);
+ va_end(backup_ap);
+
+ if ((result >= 0) && (result < length)) {
+ // It fit
+ dst->append(buf, result);
+ delete[] buf;
+ return;
+ }
+ delete[] buf;
+ }
+}
+
std::string StringPrintf(const char* format, ...) {
- va_list ap;
- va_start(ap, format);
+ va_list ap;
+ va_start(ap, format);
std::string result;
- StringAppendV(&result, format, ap);
- va_end(ap);
- return result;
-}
-
+ StringAppendV(&result, format, ap);
+ va_end(ap);
+ return result;
+}
+
} // namespace re2
diff --git a/contrib/libs/re2/util/strutil.h b/contrib/libs/re2/util/strutil.h
index a69908a0dd..a8109c847f 100644
--- a/contrib/libs/re2/util/strutil.h
+++ b/contrib/libs/re2/util/strutil.h
@@ -1,21 +1,21 @@
-// Copyright 2016 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_STRUTIL_H_
-#define UTIL_STRUTIL_H_
-
-#include <string>
-
-#include "re2/stringpiece.h"
-#include "util/util.h"
-
-namespace re2 {
-
+// Copyright 2016 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_STRUTIL_H_
+#define UTIL_STRUTIL_H_
+
+#include <string>
+
+#include "re2/stringpiece.h"
+#include "util/util.h"
+
+namespace re2 {
+
std::string CEscape(const StringPiece& src);
void PrefixSuccessor(std::string* prefix);
std::string StringPrintf(const char* format, ...);
+
+} // namespace re2
-} // namespace re2
-
-#endif // UTIL_STRUTIL_H_
+#endif // UTIL_STRUTIL_H_
diff --git a/contrib/libs/re2/util/utf.h b/contrib/libs/re2/util/utf.h
index 85b4297239..b61561389f 100644
--- a/contrib/libs/re2/util/utf.h
+++ b/contrib/libs/re2/util/utf.h
@@ -15,9 +15,9 @@
* in name space re2.
*/
-#ifndef UTIL_UTF_H_
-#define UTIL_UTF_H_
-
+#ifndef UTIL_UTF_H_
+#define UTIL_UTF_H_
+
#include <stdint.h>
namespace re2 {
@@ -41,4 +41,4 @@ char* utfrune(const char*, Rune);
} // namespace re2
-#endif // UTIL_UTF_H_
+#endif // UTIL_UTF_H_
diff --git a/contrib/libs/re2/util/util.h b/contrib/libs/re2/util/util.h
index 56e46c1a33..cfc30316bc 100644
--- a/contrib/libs/re2/util/util.h
+++ b/contrib/libs/re2/util/util.h
@@ -1,12 +1,12 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_UTIL_H_
-#define UTIL_UTIL_H_
-
+// Copyright 2009 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_UTIL_H_
+#define UTIL_UTIL_H_
+
#define arraysize(array) (sizeof(array)/sizeof((array)[0]))
-
+
#ifndef ATTRIBUTE_NORETURN
#if defined(__GNUC__)
#define ATTRIBUTE_NORETURN __attribute__((noreturn))
@@ -16,7 +16,7 @@
#define ATTRIBUTE_NORETURN
#endif
#endif
-
+
#ifndef ATTRIBUTE_UNUSED
#if defined(__GNUC__)
#define ATTRIBUTE_UNUSED __attribute__((unused))
@@ -25,18 +25,18 @@
#endif
#endif
-#ifndef FALLTHROUGH_INTENDED
+#ifndef FALLTHROUGH_INTENDED
#if defined(__clang__)
#define FALLTHROUGH_INTENDED [[clang::fallthrough]]
#elif defined(__GNUC__) && __GNUC__ >= 7
#define FALLTHROUGH_INTENDED [[gnu::fallthrough]]
#else
#define FALLTHROUGH_INTENDED do {} while (0)
+#endif
#endif
-#endif
-
-#ifndef NO_THREAD_SAFETY_ANALYSIS
-#define NO_THREAD_SAFETY_ANALYSIS
-#endif
-
-#endif // UTIL_UTIL_H_
+
+#ifndef NO_THREAD_SAFETY_ANALYSIS
+#define NO_THREAD_SAFETY_ANALYSIS
+#endif
+
+#endif // UTIL_UTIL_H_
diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make
index 8072de2eb2..47a1c3e62d 100644
--- a/contrib/libs/re2/ya.make
+++ b/contrib/libs/re2/ya.make
@@ -29,24 +29,24 @@ IF (WITH_VALGRIND)
ENDIF()
SRCS(
- re2/bitstate.cc
- re2/compile.cc
- re2/dfa.cc
- re2/filtered_re2.cc
- re2/mimics_pcre.cc
- re2/nfa.cc
- re2/onepass.cc
- re2/parse.cc
- re2/perl_groups.cc
- re2/prefilter.cc
- re2/prefilter_tree.cc
- re2/prog.cc
- re2/re2.cc
- re2/regexp.cc
- re2/set.cc
- re2/simplify.cc
+ re2/bitstate.cc
+ re2/compile.cc
+ re2/dfa.cc
+ re2/filtered_re2.cc
+ re2/mimics_pcre.cc
+ re2/nfa.cc
+ re2/onepass.cc
+ re2/parse.cc
+ re2/perl_groups.cc
+ re2/prefilter.cc
+ re2/prefilter_tree.cc
+ re2/prog.cc
+ re2/re2.cc
+ re2/regexp.cc
+ re2/set.cc
+ re2/simplify.cc
re2/stringpiece.cc
- re2/tostring.cc
+ re2/tostring.cc
re2/unicode_casefold.cc
re2/unicode_groups.cc
util/rune.cc