diff options
author | Andrey Khalyavin <halyavin@gmail.com> | 2022-02-10 16:46:29 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:29 +0300 |
commit | f773626848a7c7456803654292e716b83d69cc12 (patch) | |
tree | db052dfcf9134f492bdbb962cb6c16cea58e1ed3 /contrib/libs/re2 | |
parent | f43ab775d197d300eb67bd4497632b909cd7c2a5 (diff) | |
download | ydb-f773626848a7c7456803654292e716b83d69cc12.tar.gz |
Restoring authorship annotation for Andrey Khalyavin <halyavin@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/re2')
42 files changed, 8386 insertions, 8386 deletions
diff --git a/contrib/libs/re2/re2/bitmap256.h b/contrib/libs/re2/re2/bitmap256.h index 4899379e4d..2a4e47e090 100644 --- a/contrib/libs/re2/re2/bitmap256.h +++ b/contrib/libs/re2/re2/bitmap256.h @@ -1,117 +1,117 @@ -// Copyright 2016 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef RE2_BITMAP256_H_ -#define RE2_BITMAP256_H_ - -#ifdef _MSC_VER -#include <intrin.h> -#endif -#include <stdint.h> -#include <string.h> - -#include "util/util.h" -#include "util/logging.h" - -namespace re2 { - -class Bitmap256 { - public: - Bitmap256() { +// Copyright 2016 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_BITMAP256_H_ +#define RE2_BITMAP256_H_ + +#ifdef _MSC_VER +#include <intrin.h> +#endif +#include <stdint.h> +#include <string.h> + +#include "util/util.h" +#include "util/logging.h" + +namespace re2 { + +class Bitmap256 { + public: + Bitmap256() { Clear(); } // Clears all of the bits. void Clear() { - memset(words_, 0, sizeof words_); - } - - // Tests the bit with index c. - bool Test(int c) const { - DCHECK_GE(c, 0); - DCHECK_LE(c, 255); - + memset(words_, 0, sizeof words_); + } + + // Tests the bit with index c. + bool Test(int c) const { + DCHECK_GE(c, 0); + DCHECK_LE(c, 255); + return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0; - } - - // Sets the bit with index c. - void Set(int c) { - DCHECK_GE(c, 0); - DCHECK_LE(c, 255); - + } + + // Sets the bit with index c. + void Set(int c) { + DCHECK_GE(c, 0); + DCHECK_LE(c, 255); + words_[c / 64] |= (uint64_t{1} << (c % 64)); - } - - // Finds the next non-zero bit with index >= c. - // Returns -1 if no such bit exists. - int FindNextSetBit(int c) const; - - private: - // Finds the least significant non-zero bit in n. - static int FindLSBSet(uint64_t n) { - DCHECK_NE(n, 0); -#if defined(__GNUC__) - return __builtin_ctzll(n); -#elif defined(_MSC_VER) && defined(_M_X64) - unsigned long c; - _BitScanForward64(&c, n); - return static_cast<int>(c); -#elif defined(_MSC_VER) && defined(_M_IX86) - unsigned long c; - if (static_cast<uint32_t>(n) != 0) { - _BitScanForward(&c, static_cast<uint32_t>(n)); - return static_cast<int>(c); - } else { - _BitScanForward(&c, static_cast<uint32_t>(n >> 32)); - return static_cast<int>(c) + 32; - } -#else - int c = 63; - for (int shift = 1 << 5; shift != 0; shift >>= 1) { - uint64_t word = n << shift; - if (word != 0) { - n = word; - c -= shift; - } - } - return c; -#endif - } - - uint64_t words_[4]; -}; - -int Bitmap256::FindNextSetBit(int c) const { - DCHECK_GE(c, 0); - DCHECK_LE(c, 255); - - // Check the word that contains the bit. Mask out any lower bits. - int i = c / 64; + } + + // Finds the next non-zero bit with index >= c. + // Returns -1 if no such bit exists. + int FindNextSetBit(int c) const; + + private: + // Finds the least significant non-zero bit in n. + static int FindLSBSet(uint64_t n) { + DCHECK_NE(n, 0); +#if defined(__GNUC__) + return __builtin_ctzll(n); +#elif defined(_MSC_VER) && defined(_M_X64) + unsigned long c; + _BitScanForward64(&c, n); + return static_cast<int>(c); +#elif defined(_MSC_VER) && defined(_M_IX86) + unsigned long c; + if (static_cast<uint32_t>(n) != 0) { + _BitScanForward(&c, static_cast<uint32_t>(n)); + return static_cast<int>(c); + } else { + _BitScanForward(&c, static_cast<uint32_t>(n >> 32)); + return static_cast<int>(c) + 32; + } +#else + int c = 63; + for (int shift = 1 << 5; shift != 0; shift >>= 1) { + uint64_t word = n << shift; + if (word != 0) { + n = word; + c -= shift; + } + } + return c; +#endif + } + + uint64_t words_[4]; +}; + +int Bitmap256::FindNextSetBit(int c) const { + DCHECK_GE(c, 0); + DCHECK_LE(c, 255); + + // Check the word that contains the bit. Mask out any lower bits. + int i = c / 64; uint64_t word = words_[i] & (~uint64_t{0} << (c % 64)); - if (word != 0) - return (i * 64) + FindLSBSet(word); - - // Check any following words. - i++; - switch (i) { - case 1: - if (words_[1] != 0) - return (1 * 64) + FindLSBSet(words_[1]); - FALLTHROUGH_INTENDED; - case 2: - if (words_[2] != 0) - return (2 * 64) + FindLSBSet(words_[2]); - FALLTHROUGH_INTENDED; - case 3: - if (words_[3] != 0) - return (3 * 64) + FindLSBSet(words_[3]); - FALLTHROUGH_INTENDED; - default: - return -1; - } -} - -} // namespace re2 - -#endif // RE2_BITMAP256_H_ + if (word != 0) + return (i * 64) + FindLSBSet(word); + + // Check any following words. + i++; + switch (i) { + case 1: + if (words_[1] != 0) + return (1 * 64) + FindLSBSet(words_[1]); + FALLTHROUGH_INTENDED; + case 2: + if (words_[2] != 0) + return (2 * 64) + FindLSBSet(words_[2]); + FALLTHROUGH_INTENDED; + case 3: + if (words_[3] != 0) + return (3 * 64) + FindLSBSet(words_[3]); + FALLTHROUGH_INTENDED; + default: + return -1; + } +} + +} // namespace re2 + +#endif // RE2_BITMAP256_H_ diff --git a/contrib/libs/re2/re2/bitstate.cc b/contrib/libs/re2/re2/bitstate.cc index 877e548234..f1020e635d 100644 --- a/contrib/libs/re2/re2/bitstate.cc +++ b/contrib/libs/re2/re2/bitstate.cc @@ -17,13 +17,13 @@ // SearchBitState is a fast replacement for the NFA code on small // regexps and texts when SearchOnePass cannot be used. -#include <stddef.h> -#include <stdint.h> -#include <string.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> #include <limits> #include <utility> - -#include "util/logging.h" + +#include "util/logging.h" #include "re2/pod_array.h" #include "re2/prog.h" #include "re2/regexp.h" @@ -155,7 +155,7 @@ bool BitState::TrySearch(int id0, const char* p0) { cap_[prog_->inst(-id)->cap()] = p; continue; } - + if (rle > 0) { p += rle; // Revivify job on stack. @@ -171,10 +171,10 @@ bool BitState::TrySearch(int id0, const char* p0) { LOG(DFATAL) << "Unexpected opcode: " << ip->opcode(); return false; - case kInstFail: + case kInstFail: break; - - case kInstAltMatch: + + case kInstAltMatch: if (ip->greedy(prog_)) { // out1 is the Match instruction. id = ip->out1(); @@ -194,40 +194,40 @@ bool BitState::TrySearch(int id0, const char* p0) { int c = -1; if (p < end) c = *p & 0xFF; - if (!ip->Matches(c)) - goto Next; - + if (!ip->Matches(c)) + goto Next; + if (ip->hint() != 0) Push(id+ip->hint(), p); // try the next when we're done - id = ip->out(); - p++; - goto CheckAndLoop; + id = ip->out(); + p++; + goto CheckAndLoop; } case kInstCapture: if (!ip->last()) Push(id+1, p); // try the next when we're done - + if (0 <= ip->cap() && ip->cap() < cap_.size()) { // Capture p to register, but save old value first. Push(-id, cap_[ip->cap()]); // undo when we're done cap_[ip->cap()] = p; } - + id = ip->out(); goto CheckAndLoop; - + case kInstEmptyWidth: if (ip->empty() & ~Prog::EmptyFlags(context_, p)) - goto Next; - - if (!ip->last()) + goto Next; + + if (!ip->last()) Push(id+1, p); // try the next when we're done id = ip->out(); goto CheckAndLoop; case kInstNop: - if (!ip->last()) + if (!ip->last()) Push(id+1, p); // try the next when we're done id = ip->out(); @@ -241,7 +241,7 @@ bool BitState::TrySearch(int id0, const char* p0) { case kInstMatch: { if (endmatch_ && p != end) - goto Next; + goto Next; // We found a match. If the caller doesn't care // where the match is, no point going further. @@ -256,9 +256,9 @@ bool BitState::TrySearch(int id0, const char* p0) { if (submatch_[0].data() == NULL || (longest_ && p > submatch_[0].data() + submatch_[0].size())) { for (int i = 0; i < nsubmatch_; i++) - submatch_[i] = - StringPiece(cap_[2 * i], - static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i])); + submatch_[i] = + StringPiece(cap_[2 * i], + static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i])); } // If going for first match, we're done. @@ -303,7 +303,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, submatch_ = submatch; nsubmatch_ = nsubmatch; for (int i = 0; i < nsubmatch_; i++) - submatch_[i] = StringPiece(); + submatch_[i] = StringPiece(); // Allocate scratch space. int nvisited = prog_->list_count() * static_cast<int>(text.size()+1); @@ -337,10 +337,10 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context, // Try to use prefix accel (e.g. memchr) to skip ahead. if (p < etext && prog_->can_prefix_accel()) { p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext - p)); - if (p == NULL) + if (p == NULL) p = etext; - } - + } + cap_[0] = p; if (TrySearch(prog_->start(), p)) // Match must be leftmost; done. return true; diff --git a/contrib/libs/re2/re2/compile.cc b/contrib/libs/re2/re2/compile.cc index 61d801a630..0baa344430 100644 --- a/contrib/libs/re2/re2/compile.cc +++ b/contrib/libs/re2/re2/compile.cc @@ -8,16 +8,16 @@ // This file's external interface is just Regexp::CompileToProg. // The Compiler class defined in this file is private. -#include <stdint.h> -#include <string.h> -#include <unordered_map> -#include <utility> - -#include "util/logging.h" -#include "util/utf.h" +#include <stdint.h> +#include <string.h> +#include <unordered_map> +#include <utility> + +#include "util/logging.h" +#include "util/utf.h" #include "re2/pod_array.h" #include "re2/prog.h" -#include "re2/re2.h" +#include "re2/re2.h" #include "re2/regexp.h" #include "re2/walker-inl.h" @@ -77,7 +77,7 @@ static const PatchList kNullPatchList = {0, 0}; // Compiled program fragment. struct Frag { - uint32_t begin; + uint32_t begin; PatchList end; bool nullable; @@ -89,7 +89,7 @@ struct Frag { // Input encodings. enum Encoding { kEncodingUTF8 = 1, // UTF-8 (0-10FFFF) - kEncodingLatin1, // Latin-1 (0-FF) + kEncodingLatin1, // Latin-1 (0-FF) }; class Compiler : public Regexp::Walker<Frag> { @@ -101,7 +101,7 @@ class Compiler : public Regexp::Walker<Frag> { // Caller is responsible for deleting Prog when finished with it. // If reversed is true, compiles for walking over the input // string backward (reverses all concatenations). - static Prog *Compile(Regexp* re, bool reversed, int64_t max_mem); + static Prog *Compile(Regexp* re, bool reversed, int64_t max_mem); // Compiles alternation of all the re to a new Prog. // Each re has a match with an id equal to its index in the vector. @@ -137,7 +137,7 @@ class Compiler : public Regexp::Walker<Frag> { Frag NoMatch(); // Returns a fragment that matches the empty string. - Frag Match(int32_t id); + Frag Match(int32_t id); // Returns a no-op fragment. Frag Nop(); @@ -165,28 +165,28 @@ class Compiler : public Regexp::Walker<Frag> { void Add_80_10ffff(); // New suffix that matches the byte range lo-hi, then goes to next. - int UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next); - int CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next); - - // Returns true iff the suffix is cached. - bool IsCachedRuneByteSuffix(int id); + int UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next); + int CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next); + // Returns true iff the suffix is cached. + bool IsCachedRuneByteSuffix(int id); + // Adds a suffix to alternation. void AddSuffix(int id); - // Adds a suffix to the trie starting from the given root node. - // Returns zero iff allocating an instruction fails. Otherwise, returns - // the current root node, which might be different from what was given. - int AddSuffixRecursive(int root, int id); - - // Finds the trie node for the given suffix. Returns a Frag in order to + // Adds a suffix to the trie starting from the given root node. + // Returns zero iff allocating an instruction fails. Otherwise, returns + // the current root node, which might be different from what was given. + int AddSuffixRecursive(int root, int id); + + // Finds the trie node for the given suffix. Returns a Frag in order to // distinguish between pointing at the root node directly (end.head == 0) // and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively). - Frag FindByteRange(int root, int id); - - // Compares two ByteRanges and returns true iff they are equal. - bool ByteRangeEqual(int id1, int id2); - + Frag FindByteRange(int root, int id); + + // Compares two ByteRanges and returns true iff they are equal. + bool ByteRangeEqual(int id1, int id2); + // Returns the alternation of all the added suffixes. Frag EndRange(); @@ -209,15 +209,15 @@ class Compiler : public Regexp::Walker<Frag> { int ninst_; // Number of instructions used. int max_ninst_; // Maximum number of instructions. - int64_t max_mem_; // Total memory budget. + int64_t max_mem_; // Total memory budget. - std::unordered_map<uint64_t, int> rune_cache_; + std::unordered_map<uint64_t, int> rune_cache_; Frag rune_range_; RE2::Anchor anchor_; // anchor mode for RE2::Set - Compiler(const Compiler&) = delete; - Compiler& operator=(const Compiler&) = delete; + Compiler(const Compiler&) = delete; + Compiler& operator=(const Compiler&) = delete; }; Compiler::Compiler() { @@ -365,8 +365,8 @@ Frag Compiler::Star(Frag a, bool nongreedy) { // Given a fragment for a, returns a fragment for a? or a?? (if nongreedy) Frag Compiler::Quest(Frag a, bool nongreedy) { - if (IsNoMatch(a)) - return Nop(); + if (IsNoMatch(a)) + return Nop(); int id = AllocInst(1); if (id < 0) return NoMatch(); @@ -400,7 +400,7 @@ Frag Compiler::Nop() { } // Returns a fragment that signals a match. -Frag Compiler::Match(int32_t match_id) { +Frag Compiler::Match(int32_t match_id) { int id = AllocInst(1); if (id < 0) return NoMatch(); @@ -419,8 +419,8 @@ Frag Compiler::EmptyWidth(EmptyOp empty) { // Given a fragment a, returns a fragment with capturing parens around a. Frag Compiler::Capture(Frag a, int n) { - if (IsNoMatch(a)) - return NoMatch(); + if (IsNoMatch(a)) + return NoMatch(); int id = AllocInst(2); if (id < 0) return NoMatch(); @@ -434,7 +434,7 @@ Frag Compiler::Capture(Frag a, int n) { // A Rune is a name for a Unicode code point. // Returns maximum rune encoded by UTF-8 sequence of length len. static int MaxRune(int len) { - int b; // number of Rune bits in len-byte UTF-8 sequence (len < UTFmax) + int b; // number of Rune bits in len-byte UTF-8 sequence (len < UTFmax) if (len == 1) b = 7; else @@ -456,7 +456,7 @@ void Compiler::BeginRange() { rune_range_.end = kNullPatchList; } -int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, +int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next) { Frag f = ByteRange(lo, hi, foldcase); if (next != 0) { @@ -467,18 +467,18 @@ int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, return f.begin; } -static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase, - int next) { - return (uint64_t)next << 17 | - (uint64_t)lo << 9 | - (uint64_t)hi << 1 | - (uint64_t)foldcase; -} - -int Compiler::CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, - int next) { - uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next); - std::unordered_map<uint64_t, int>::const_iterator it = rune_cache_.find(key); +static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase, + int next) { + return (uint64_t)next << 17 | + (uint64_t)lo << 9 | + (uint64_t)hi << 1 | + (uint64_t)foldcase; +} + +int Compiler::CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, + int next) { + uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next); + std::unordered_map<uint64_t, int>::const_iterator it = rune_cache_.find(key); if (it != rune_cache_.end()) return it->second; int id = UncachedRuneByteSuffix(lo, hi, foldcase, next); @@ -486,31 +486,31 @@ int Compiler::CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, return id; } -bool Compiler::IsCachedRuneByteSuffix(int id) { - uint8_t lo = inst_[id].lo_; - uint8_t hi = inst_[id].hi_; - bool foldcase = inst_[id].foldcase() != 0; - int next = inst_[id].out(); - - uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next); - return rune_cache_.find(key) != rune_cache_.end(); -} - +bool Compiler::IsCachedRuneByteSuffix(int id) { + uint8_t lo = inst_[id].lo_; + uint8_t hi = inst_[id].hi_; + bool foldcase = inst_[id].foldcase() != 0; + int next = inst_[id].out(); + + uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next); + return rune_cache_.find(key) != rune_cache_.end(); +} + void Compiler::AddSuffix(int id) { - if (failed_) - return; - + if (failed_) + return; + if (rune_range_.begin == 0) { rune_range_.begin = id; return; } - if (encoding_ == kEncodingUTF8) { - // Build a trie in order to reduce fanout. - rune_range_.begin = AddSuffixRecursive(rune_range_.begin, id); - return; - } - + if (encoding_ == kEncodingUTF8) { + // Build a trie in order to reduce fanout. + rune_range_.begin = AddSuffixRecursive(rune_range_.begin, id); + return; + } + int alt = AllocInst(1); if (alt < 0) { rune_range_.begin = 0; @@ -520,102 +520,102 @@ void Compiler::AddSuffix(int id) { rune_range_.begin = alt; } -int Compiler::AddSuffixRecursive(int root, int id) { - DCHECK(inst_[root].opcode() == kInstAlt || - inst_[root].opcode() == kInstByteRange); - - Frag f = FindByteRange(root, id); - if (IsNoMatch(f)) { - int alt = AllocInst(1); - if (alt < 0) - return 0; - inst_[alt].InitAlt(root, id); - return alt; - } - - int br; +int Compiler::AddSuffixRecursive(int root, int id) { + DCHECK(inst_[root].opcode() == kInstAlt || + inst_[root].opcode() == kInstByteRange); + + Frag f = FindByteRange(root, id); + if (IsNoMatch(f)) { + int alt = AllocInst(1); + if (alt < 0) + return 0; + inst_[alt].InitAlt(root, id); + return alt; + } + + int br; if (f.end.head == 0) - br = root; + br = root; else if (f.end.head&1) - br = inst_[f.begin].out1(); - else - br = inst_[f.begin].out(); - - if (IsCachedRuneByteSuffix(br)) { - // We can't fiddle with cached suffixes, so make a clone of the head. - int byterange = AllocInst(1); - if (byterange < 0) - return 0; - inst_[byterange].InitByteRange(inst_[br].lo(), inst_[br].hi(), - inst_[br].foldcase(), inst_[br].out()); - - // Ensure that the parent points to the clone, not to the original. - // Note that this could leave the head unreachable except via the cache. - br = byterange; + br = inst_[f.begin].out1(); + else + br = inst_[f.begin].out(); + + if (IsCachedRuneByteSuffix(br)) { + // We can't fiddle with cached suffixes, so make a clone of the head. + int byterange = AllocInst(1); + if (byterange < 0) + return 0; + inst_[byterange].InitByteRange(inst_[br].lo(), inst_[br].hi(), + inst_[br].foldcase(), inst_[br].out()); + + // Ensure that the parent points to the clone, not to the original. + // Note that this could leave the head unreachable except via the cache. + br = byterange; if (f.end.head == 0) - root = br; + root = br; else if (f.end.head&1) - inst_[f.begin].out1_ = br; - else - inst_[f.begin].set_out(br); - } - - int out = inst_[id].out(); - if (!IsCachedRuneByteSuffix(id)) { - // The head should be the instruction most recently allocated, so free it - // instead of leaving it unreachable. + inst_[f.begin].out1_ = br; + else + inst_[f.begin].set_out(br); + } + + int out = inst_[id].out(); + if (!IsCachedRuneByteSuffix(id)) { + // The head should be the instruction most recently allocated, so free it + // instead of leaving it unreachable. DCHECK_EQ(id, ninst_-1); - inst_[id].out_opcode_ = 0; - inst_[id].out1_ = 0; + inst_[id].out_opcode_ = 0; + inst_[id].out1_ = 0; ninst_--; - } - - out = AddSuffixRecursive(inst_[br].out(), out); - if (out == 0) - return 0; - - inst_[br].set_out(out); - return root; -} - -bool Compiler::ByteRangeEqual(int id1, int id2) { - return inst_[id1].lo() == inst_[id2].lo() && - inst_[id1].hi() == inst_[id2].hi() && - inst_[id1].foldcase() == inst_[id2].foldcase(); -} - -Frag Compiler::FindByteRange(int root, int id) { - if (inst_[root].opcode() == kInstByteRange) { - if (ByteRangeEqual(root, id)) + } + + out = AddSuffixRecursive(inst_[br].out(), out); + if (out == 0) + return 0; + + inst_[br].set_out(out); + return root; +} + +bool Compiler::ByteRangeEqual(int id1, int id2) { + return inst_[id1].lo() == inst_[id2].lo() && + inst_[id1].hi() == inst_[id2].hi() && + inst_[id1].foldcase() == inst_[id2].foldcase(); +} + +Frag Compiler::FindByteRange(int root, int id) { + if (inst_[root].opcode() == kInstByteRange) { + if (ByteRangeEqual(root, id)) return Frag(root, kNullPatchList, false); - else - return NoMatch(); - } - - while (inst_[root].opcode() == kInstAlt) { - int out1 = inst_[root].out1(); - if (ByteRangeEqual(out1, id)) + else + return NoMatch(); + } + + while (inst_[root].opcode() == kInstAlt) { + int out1 = inst_[root].out1(); + if (ByteRangeEqual(out1, id)) return Frag(root, PatchList::Mk((root << 1) | 1), false); - - // CharClass is a sorted list of ranges, so if out1 of the root Alt wasn't - // what we're looking for, then we can stop immediately. Unfortunately, we - // can't short-circuit the search in reverse mode. - if (!reversed_) - return NoMatch(); - - int out = inst_[root].out(); - if (inst_[out].opcode() == kInstAlt) - root = out; - else if (ByteRangeEqual(out, id)) + + // CharClass is a sorted list of ranges, so if out1 of the root Alt wasn't + // what we're looking for, then we can stop immediately. Unfortunately, we + // can't short-circuit the search in reverse mode. + if (!reversed_) + return NoMatch(); + + int out = inst_[root].out(); + if (inst_[out].opcode() == kInstAlt) + root = out; + else if (ByteRangeEqual(out, id)) return Frag(root, PatchList::Mk(root << 1), false); - else - return NoMatch(); - } - - LOG(DFATAL) << "should never happen"; - return NoMatch(); -} - + else + return NoMatch(); + } + + LOG(DFATAL) << "should never happen"; + return NoMatch(); +} + Frag Compiler::EndRange() { return rune_range_; } @@ -639,13 +639,13 @@ void Compiler::AddRuneRange(Rune lo, Rune hi, bool foldcase) { } void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) { - // Latin-1 is easy: runes *are* bytes. + // Latin-1 is easy: runes *are* bytes. if (lo > hi || lo > 0xFF) return; if (hi > 0xFF) hi = 0xFF; - AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo), - static_cast<uint8_t>(hi), foldcase, 0)); + AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo), + static_cast<uint8_t>(hi), foldcase, 0)); } void Compiler::Add_80_10ffff() { @@ -710,14 +710,14 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) { // ASCII range is always a special case. if (hi < Runeself) { - AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo), - static_cast<uint8_t>(hi), foldcase, 0)); + AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo), + static_cast<uint8_t>(hi), foldcase, 0)); return; } // Split range into sections that agree on leading bytes. for (int i = 1; i < UTFmax; i++) { - uint32_t m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence + uint32_t m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence if ((lo & ~m) != (hi & ~m)) { if ((lo & m) != 0) { AddRuneRangeUTF8(lo, lo|m, foldcase); @@ -733,55 +733,55 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) { } // Finally. Generate byte matching equivalent for lo-hi. - uint8_t ulo[UTFmax], uhi[UTFmax]; + uint8_t ulo[UTFmax], uhi[UTFmax]; int n = runetochar(reinterpret_cast<char*>(ulo), &lo); int m = runetochar(reinterpret_cast<char*>(uhi), &hi); (void)m; // USED(m) DCHECK_EQ(n, m); - // The logic below encodes this thinking: - // - // 1. When we have built the whole suffix, we know that it cannot - // possibly be a suffix of anything longer: in forward mode, nothing - // else can occur before the leading byte; in reverse mode, nothing - // else can occur after the last continuation byte or else the leading - // byte would have to change. Thus, there is no benefit to caching - // the first byte of the suffix whereas there is a cost involved in - // cloning it if it begins a common prefix, which is fairly likely. - // - // 2. Conversely, the last byte of the suffix cannot possibly be a - // prefix of anything because next == 0, so we will never want to - // clone it, but it is fairly likely to be a common suffix. Perhaps - // more so in reverse mode than in forward mode because the former is - // "converging" towards lower entropy, but caching is still worthwhile - // for the latter in cases such as 80-BF. - // - // 3. Handling the bytes between the first and the last is less - // straightforward and, again, the approach depends on whether we are - // "converging" towards lower entropy: in forward mode, a single byte - // is unlikely to be part of a common suffix whereas a byte range - // is more likely so; in reverse mode, a byte range is unlikely to - // be part of a common suffix whereas a single byte is more likely - // so. The same benefit versus cost argument applies here. + // The logic below encodes this thinking: + // + // 1. When we have built the whole suffix, we know that it cannot + // possibly be a suffix of anything longer: in forward mode, nothing + // else can occur before the leading byte; in reverse mode, nothing + // else can occur after the last continuation byte or else the leading + // byte would have to change. Thus, there is no benefit to caching + // the first byte of the suffix whereas there is a cost involved in + // cloning it if it begins a common prefix, which is fairly likely. + // + // 2. Conversely, the last byte of the suffix cannot possibly be a + // prefix of anything because next == 0, so we will never want to + // clone it, but it is fairly likely to be a common suffix. Perhaps + // more so in reverse mode than in forward mode because the former is + // "converging" towards lower entropy, but caching is still worthwhile + // for the latter in cases such as 80-BF. + // + // 3. Handling the bytes between the first and the last is less + // straightforward and, again, the approach depends on whether we are + // "converging" towards lower entropy: in forward mode, a single byte + // is unlikely to be part of a common suffix whereas a byte range + // is more likely so; in reverse mode, a byte range is unlikely to + // be part of a common suffix whereas a single byte is more likely + // so. The same benefit versus cost argument applies here. int id = 0; if (reversed_) { - for (int i = 0; i < n; i++) { - // In reverse UTF-8 mode: cache the leading byte; don't cache the last - // continuation byte; cache anything else iff it's a single byte (XX-XX). - if (i == 0 || (ulo[i] == uhi[i] && i != n-1)) - id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id); - else - id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id); - } + for (int i = 0; i < n; i++) { + // In reverse UTF-8 mode: cache the leading byte; don't cache the last + // continuation byte; cache anything else iff it's a single byte (XX-XX). + if (i == 0 || (ulo[i] == uhi[i] && i != n-1)) + id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id); + else + id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id); + } } else { - for (int i = n-1; i >= 0; i--) { - // In forward UTF-8 mode: don't cache the leading byte; cache the last - // continuation byte; cache anything else iff it's a byte range (XX-YY). - if (i == n-1 || (ulo[i] < uhi[i] && i != 0)) - id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id); - else - id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id); - } + for (int i = n-1; i >= 0; i--) { + // In forward UTF-8 mode: don't cache the leading byte; cache the last + // continuation byte; cache anything else iff it's a byte range (XX-YY). + if (i == n-1 || (ulo[i] < uhi[i] && i != 0)) + id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id); + else + id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id); + } } AddSuffix(id); } @@ -807,13 +807,13 @@ Frag Compiler::PreVisit(Regexp* re, Frag, bool* stop) { if (failed_) *stop = true; - return Frag(); // not used by caller + return Frag(); // not used by caller } Frag Compiler::Literal(Rune r, bool foldcase) { switch (encoding_) { default: - return Frag(); + return Frag(); case kEncodingLatin1: return ByteRange(r, r, foldcase); @@ -821,11 +821,11 @@ Frag Compiler::Literal(Rune r, bool foldcase) { case kEncodingUTF8: { if (r < Runeself) // Make common case fast. return ByteRange(r, r, foldcase); - uint8_t buf[UTFmax]; + uint8_t buf[UTFmax]; int n = runetochar(reinterpret_cast<char*>(buf), &r); - Frag f = ByteRange((uint8_t)buf[0], buf[0], false); + Frag f = ByteRange((uint8_t)buf[0], buf[0], false); for (int i = 1; i < n; i++) - f = Cat(f, ByteRange((uint8_t)buf[i], buf[i], false)); + f = Cat(f, ByteRange((uint8_t)buf[i], buf[i], false)); return f; } } @@ -877,16 +877,16 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags, } case kRegexpStar: - return Star(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0); + return Star(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0); case kRegexpPlus: - return Plus(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0); + return Plus(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0); case kRegexpQuest: - return Quest(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0); + return Quest(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0); case kRegexpLiteral: - return Literal(re->rune(), (re->parse_flags()&Regexp::FoldCase) != 0); + return Literal(re->rune(), (re->parse_flags()&Regexp::FoldCase) != 0); case kRegexpLiteralString: { // Concatenation of literals. @@ -894,8 +894,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags, return Nop(); Frag f; for (int i = 0; i < re->nrunes(); i++) { - Frag f1 = Literal(re->runes()[i], - (re->parse_flags()&Regexp::FoldCase) != 0); + Frag f1 = Literal(re->runes()[i], + (re->parse_flags()&Regexp::FoldCase) != 0); if (i == 0) f = f1; else @@ -940,8 +940,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags, // If this range contains all of A-Za-z or none of it, // the fold flag is unnecessary; don't bother. bool fold = foldascii; - if ((i->lo <= 'A' && 'z' <= i->hi) || i->hi < 'A' || 'z' < i->lo || - ('Z' < i->lo && i->hi < 'a')) + if ((i->lo <= 'A' && 'z' <= i->hi) || i->hi < 'A' || 'z' < i->lo || + ('Z' < i->lo && i->hi < 'a')) fold = false; AddRuneRange(i->lo, i->hi, fold); @@ -982,109 +982,109 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags, // Is this regexp required to start at the beginning of the text? // Only approximate; can return false for complicated regexps like (\Aa|\Ab), // but handles (\A(a|b)). Could use the Walker to write a more exact one. -static bool IsAnchorStart(Regexp** pre, int depth) { - Regexp* re = *pre; - Regexp* sub; - // The depth limit makes sure that we don't overflow - // the stack on a deeply nested regexp. As the comment - // above says, IsAnchorStart is conservative, so returning - // a false negative is okay. The exact limit is somewhat arbitrary. - if (re == NULL || depth >= 4) - return false; - switch (re->op()) { - default: - break; - case kRegexpConcat: - if (re->nsub() > 0) { - sub = re->sub()[0]->Incref(); - if (IsAnchorStart(&sub, depth+1)) { +static bool IsAnchorStart(Regexp** pre, int depth) { + Regexp* re = *pre; + Regexp* sub; + // The depth limit makes sure that we don't overflow + // the stack on a deeply nested regexp. As the comment + // above says, IsAnchorStart is conservative, so returning + // a false negative is okay. The exact limit is somewhat arbitrary. + if (re == NULL || depth >= 4) + return false; + switch (re->op()) { + default: + break; + case kRegexpConcat: + if (re->nsub() > 0) { + sub = re->sub()[0]->Incref(); + if (IsAnchorStart(&sub, depth+1)) { PODArray<Regexp*> subcopy(re->nsub()); - subcopy[0] = sub; // already have reference - for (int i = 1; i < re->nsub(); i++) - subcopy[i] = re->sub()[i]->Incref(); + subcopy[0] = sub; // already have reference + for (int i = 1; i < re->nsub(); i++) + subcopy[i] = re->sub()[i]->Incref(); *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags()); - re->Decref(); - return true; + re->Decref(); + return true; } - sub->Decref(); - } - break; - case kRegexpCapture: - sub = re->sub()[0]->Incref(); - if (IsAnchorStart(&sub, depth+1)) { - *pre = Regexp::Capture(sub, re->parse_flags(), re->cap()); + sub->Decref(); + } + break; + case kRegexpCapture: + sub = re->sub()[0]->Incref(); + if (IsAnchorStart(&sub, depth+1)) { + *pre = Regexp::Capture(sub, re->parse_flags(), re->cap()); re->Decref(); return true; - } - sub->Decref(); - break; - case kRegexpBeginText: - *pre = Regexp::LiteralString(NULL, 0, re->parse_flags()); - re->Decref(); - return true; + } + sub->Decref(); + break; + case kRegexpBeginText: + *pre = Regexp::LiteralString(NULL, 0, re->parse_flags()); + re->Decref(); + return true; } - return false; + return false; } // Is this regexp required to start at the end of the text? // Only approximate; can return false for complicated regexps like (a\z|b\z), // but handles ((a|b)\z). Could use the Walker to write a more exact one. -static bool IsAnchorEnd(Regexp** pre, int depth) { - Regexp* re = *pre; - Regexp* sub; - // The depth limit makes sure that we don't overflow - // the stack on a deeply nested regexp. As the comment - // above says, IsAnchorEnd is conservative, so returning - // a false negative is okay. The exact limit is somewhat arbitrary. - if (re == NULL || depth >= 4) - return false; - switch (re->op()) { - default: - break; - case kRegexpConcat: - if (re->nsub() > 0) { - sub = re->sub()[re->nsub() - 1]->Incref(); - if (IsAnchorEnd(&sub, depth+1)) { +static bool IsAnchorEnd(Regexp** pre, int depth) { + Regexp* re = *pre; + Regexp* sub; + // The depth limit makes sure that we don't overflow + // the stack on a deeply nested regexp. As the comment + // above says, IsAnchorEnd is conservative, so returning + // a false negative is okay. The exact limit is somewhat arbitrary. + if (re == NULL || depth >= 4) + return false; + switch (re->op()) { + default: + break; + case kRegexpConcat: + if (re->nsub() > 0) { + sub = re->sub()[re->nsub() - 1]->Incref(); + if (IsAnchorEnd(&sub, depth+1)) { PODArray<Regexp*> subcopy(re->nsub()); - subcopy[re->nsub() - 1] = sub; // already have reference - for (int i = 0; i < re->nsub() - 1; i++) - subcopy[i] = re->sub()[i]->Incref(); + subcopy[re->nsub() - 1] = sub; // already have reference + for (int i = 0; i < re->nsub() - 1; i++) + subcopy[i] = re->sub()[i]->Incref(); *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags()); - re->Decref(); - return true; + re->Decref(); + return true; } - sub->Decref(); - } - break; - case kRegexpCapture: - sub = re->sub()[0]->Incref(); - if (IsAnchorEnd(&sub, depth+1)) { - *pre = Regexp::Capture(sub, re->parse_flags(), re->cap()); + sub->Decref(); + } + break; + case kRegexpCapture: + sub = re->sub()[0]->Incref(); + if (IsAnchorEnd(&sub, depth+1)) { + *pre = Regexp::Capture(sub, re->parse_flags(), re->cap()); re->Decref(); return true; - } - sub->Decref(); - break; - case kRegexpEndText: - *pre = Regexp::LiteralString(NULL, 0, re->parse_flags()); - re->Decref(); - return true; + } + sub->Decref(); + break; + case kRegexpEndText: + *pre = Regexp::LiteralString(NULL, 0, re->parse_flags()); + re->Decref(); + return true; } - return false; + return false; } -void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem, +void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem, RE2::Anchor anchor) { if (flags & Regexp::Latin1) encoding_ = kEncodingLatin1; max_mem_ = max_mem; if (max_mem <= 0) { max_ninst_ = 100000; // more than enough - } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) { + } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) { // No room for anything. max_ninst_ = 0; } else { - int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst); + int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst); // Limit instruction count so that inst->id() fits nicely in an int. // SparseArray also assumes that the indices (inst->id()) are ints. // The call to WalkExponential uses 2*max_ninst_ below, @@ -1108,7 +1108,7 @@ void Compiler::Setup(Regexp::ParseFlags flags, int64_t max_mem, // If reversed is true, compiles a program that expects // to run over the input string backward (reverses all concatenations). // The reversed flag is also recorded in the returned program. -Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) { +Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) { Compiler c; c.Setup(re->parse_flags(), max_mem, RE2::UNANCHORED /* unused */); c.reversed_ = reversed; @@ -1121,8 +1121,8 @@ Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) { // Record whether prog is anchored, removing the anchors. // (They get in the way of other optimizations.) - bool is_anchor_start = IsAnchorStart(&sre, 0); - bool is_anchor_end = IsAnchorEnd(&sre, 0); + bool is_anchor_start = IsAnchorStart(&sre, 0); + bool is_anchor_end = IsAnchorEnd(&sre, 0); // Generate fragment for entire regexp. Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_); @@ -1165,12 +1165,12 @@ Prog* Compiler::Finish(Regexp* re) { ninst_ = 1; } - // Hand off the array to Prog. + // Hand off the array to Prog. prog_->inst_ = std::move(inst_); prog_->size_ = ninst_; - prog_->Optimize(); - prog_->Flatten(); + prog_->Optimize(); + prog_->Flatten(); prog_->ComputeByteMap(); if (!prog_->reversed()) { @@ -1199,11 +1199,11 @@ Prog* Compiler::Finish(Regexp* re) { } // Converts Regexp to Prog. -Prog* Regexp::CompileToProg(int64_t max_mem) { +Prog* Regexp::CompileToProg(int64_t max_mem) { return Compiler::Compile(this, false, max_mem); } -Prog* Regexp::CompileToReverseProg(int64_t max_mem) { +Prog* Regexp::CompileToReverseProg(int64_t max_mem) { return Compiler::Compile(this, true, max_mem); } @@ -1242,11 +1242,11 @@ Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) { // Make sure DFA has enough memory to operate, // since we're not going to fall back to the NFA. - bool dfa_failed = false; + bool dfa_failed = false; StringPiece sp = "hello, world"; prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch, - NULL, &dfa_failed, NULL); - if (dfa_failed) { + NULL, &dfa_failed, NULL); + if (dfa_failed) { delete prog; return NULL; } diff --git a/contrib/libs/re2/re2/dfa.cc b/contrib/libs/re2/re2/dfa.cc index d47c7d50a7..f36855a273 100644 --- a/contrib/libs/re2/re2/dfa.cc +++ b/contrib/libs/re2/re2/dfa.cc @@ -21,48 +21,48 @@ // // See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent. -#include <stddef.h> -#include <stdint.h> -#include <stdio.h> -#include <string.h> -#include <algorithm> -#include <atomic> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <algorithm> +#include <atomic> #include <deque> -#include <mutex> -#include <new> -#include <string> +#include <mutex> +#include <new> +#include <string> #include <unordered_map> -#include <unordered_set> -#include <utility> -#include <vector> - -#include "util/logging.h" -#include "util/mix.h" -#include "util/mutex.h" -#include "util/strutil.h" +#include <unordered_set> +#include <utility> +#include <vector> + +#include "util/logging.h" +#include "util/mix.h" +#include "util/mutex.h" +#include "util/strutil.h" #include "re2/pod_array.h" #include "re2/prog.h" #include "re2/re2.h" #include "re2/sparse_set.h" -#include "re2/stringpiece.h" +#include "re2/stringpiece.h" -// Silence "zero-sized array in struct/union" warning for DFA::State::next_. -#ifdef _MSC_VER -#pragma warning(disable: 4200) -#endif +// Silence "zero-sized array in struct/union" warning for DFA::State::next_. +#ifdef _MSC_VER +#pragma warning(disable: 4200) +#endif namespace re2 { -// Controls whether the DFA should bail out early if the NFA would be faster. -static bool dfa_should_bail_when_slow = true; - +// Controls whether the DFA should bail out early if the NFA would be faster. +static bool dfa_should_bail_when_slow = true; + void Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(bool b) { dfa_should_bail_when_slow = b; } // Changing this to true compiles in prints that trace execution of the DFA. // Generates a lot of output -- only useful for debugging. -static const bool ExtraDebug = false; +static const bool ExtraDebug = false; // A DFA implementation of a regular expression program. // Since this is entirely a forward declaration mandated by C++, @@ -70,7 +70,7 @@ static const bool ExtraDebug = false; // the comments in the sections that follow the DFA definition. class DFA { public: - DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem); + DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem); ~DFA(); bool ok() const { return !init_failed_; } Prog::MatchKind kind() { return kind_; } @@ -106,28 +106,28 @@ class DFA { // difficult to mark them as such. class RWLocker; class StateSaver; - class Workq; + class Workq; // A single DFA state. The DFA is represented as a graph of these // States, linked by the next_ pointers. If in state s and reading // byte c, the next state should be s->next_[c]. struct State { - inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; } + inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; } int* inst_; // Instruction pointers in the state. int ninst_; // # of inst_ pointers. - uint32_t flag_; // Empty string bitfield flags in effect on the way + uint32_t flag_; // Empty string bitfield flags in effect on the way // into this state, along with kFlagMatch if this // is a matching state. - + // Work around the bug affecting flexible array members in GCC 6.x (for x >= 1). -// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932) -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1 - std::atomic<State*> next_[0]; // Outgoing arrows from State, -#else - std::atomic<State*> next_[]; // Outgoing arrows from State, -#endif - +// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932) +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1 + std::atomic<State*> next_[0]; // Outgoing arrows from State, +#else + std::atomic<State*> next_[]; // Outgoing arrows from State, +#endif + // one per input byte class }; @@ -140,35 +140,35 @@ class DFA { kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left }; - struct StateHash { - size_t operator()(const State* a) const { - DCHECK(a != NULL); - HashMix mix(a->flag_); - for (int i = 0; i < a->ninst_; i++) - mix.Mix(a->inst_[i]); - mix.Mix(0); - return mix.get(); - } - }; - + struct StateHash { + size_t operator()(const State* a) const { + DCHECK(a != NULL); + HashMix mix(a->flag_); + for (int i = 0; i < a->ninst_; i++) + mix.Mix(a->inst_[i]); + mix.Mix(0); + return mix.get(); + } + }; + struct StateEqual { bool operator()(const State* a, const State* b) const { - DCHECK(a != NULL); - DCHECK(b != NULL); + DCHECK(a != NULL); + DCHECK(b != NULL); if (a == b) return true; - if (a->flag_ != b->flag_) + if (a->flag_ != b->flag_) return false; if (a->ninst_ != b->ninst_) return false; for (int i = 0; i < a->ninst_; i++) if (a->inst_[i] != b->inst_[i]) return false; - return true; + return true; } }; - typedef std::unordered_set<State*, StateHash, StateEqual> StateSet; + typedef std::unordered_set<State*, StateHash, StateEqual> StateSet; private: // Make it easier to swap in a scalable reader-writer mutex. @@ -200,7 +200,7 @@ class DFA { // Looks up and returns a State matching the inst, ninst, and flag. // L >= mutex_ - State* CachedState(int* inst, int ninst, uint32_t flag); + State* CachedState(int* inst, int ninst, uint32_t flag); // Clear the cache entirely. // Must hold cache_mutex_.w or be in destructor. @@ -208,7 +208,7 @@ class DFA { // Converts a State into a Workq: the opposite of WorkqToCachedState. // L >= mutex_ - void StateToWorkq(State* s, Workq* q); + void StateToWorkq(State* s, Workq* q); // Runs a State on a given byte, returning the next state. State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_ @@ -223,12 +223,12 @@ class DFA { // Runs a Workq on a set of empty-string flags, producing a new Workq in nq. // L >= mutex_ - void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint32_t flag); + void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint32_t flag); // Adds the instruction id to the Workq, following empty arrows // according to flag. // L >= mutex_ - void AddToQueue(Workq* q, int id, uint32_t flag); + void AddToQueue(Workq* q, int id, uint32_t flag); // For debugging, returns a text representation of State. static std::string DumpState(State* state); @@ -265,8 +265,8 @@ class DFA { SparseSet* matches; private: - SearchParams(const SearchParams&) = delete; - SearchParams& operator=(const SearchParams&) = delete; + SearchParams(const SearchParams&) = delete; + SearchParams& operator=(const SearchParams&) = delete; }; // Before each search, the parameters to Search are analyzed by @@ -281,8 +281,8 @@ class DFA { // false on failure. // cache_mutex_.r <= L < mutex_ bool AnalyzeSearch(SearchParams* params); - bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info, - uint32_t flags); + bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info, + uint32_t flags); // The generic search loop, inlined to create specialized versions. // cache_mutex_.r <= L < mutex_ @@ -339,8 +339,8 @@ class DFA { // readers. Any State* pointers are only valid while cache_mutex_ // is held. CacheMutex cache_mutex_; - int64_t mem_budget_; // Total memory budget for all States. - int64_t state_budget_; // Amount of memory remaining for new States. + int64_t mem_budget_; // Total memory budget for all States. + int64_t state_budget_; // Amount of memory remaining for new States. StateSet state_cache_; // All States computed so far. StartInfo start_[kMaxStart]; @@ -348,9 +348,9 @@ class DFA { DFA& operator=(const DFA&) = delete; }; -// Shorthand for casting to uint8_t*. -static inline const uint8_t* BytePtr(const void* v) { - return reinterpret_cast<const uint8_t*>(v); +// Shorthand for casting to uint8_t*. +static inline const uint8_t* BytePtr(const void* v) { + return reinterpret_cast<const uint8_t*>(v); } // Work queues @@ -415,24 +415,24 @@ class DFA::Workq : public SparseSet { int maxmark_; // maximum number of marks int nextmark_; // id of next mark bool last_was_mark_; // last inserted was mark - - Workq(const Workq&) = delete; - Workq& operator=(const Workq&) = delete; + + Workq(const Workq&) = delete; + Workq& operator=(const Workq&) = delete; }; -DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem) +DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem) : prog_(prog), kind_(kind), init_failed_(false), q0_(NULL), q1_(NULL), - mem_budget_(max_mem) { - if (ExtraDebug) + mem_budget_(max_mem) { + if (ExtraDebug) fprintf(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored().c_str()); int nmark = 0; - if (kind_ == Prog::kLongestMatch) - nmark = prog_->size(); - // See DFA::AddToQueue() for why this is so. + if (kind_ == Prog::kLongestMatch) + nmark = prog_->size(); + // See DFA::AddToQueue() for why this is so. int nstack = prog_->inst_count(kInstCapture) + prog_->inst_count(kInstEmptyWidth) + prog_->inst_count(kInstNop) + @@ -454,18 +454,18 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem) // At minimum, the search requires room for two states in order // to limp along, restarting frequently. We'll get better performance // if there is room for a larger number of states, say 20. - // Note that a state stores list heads only, so we use the program - // list count for the upper bound, not the program size. - int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot - int64_t one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) + - (prog_->list_count()+nmark)*sizeof(int); + // Note that a state stores list heads only, so we use the program + // list count for the upper bound, not the program size. + int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot + int64_t one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) + + (prog_->list_count()+nmark)*sizeof(int); if (state_budget_ < 20*one_state) { init_failed_ = true; return; } - q0_ = new Workq(prog_->size(), nmark); - q1_ = new Workq(prog_->size(), nmark); + q0_ = new Workq(prog_->size(), nmark); + q1_ = new Workq(prog_->size(), nmark); stack_ = PODArray<int>(nstack); } @@ -593,7 +593,7 @@ std::string DFA::DumpState(State* state) { // If mq is not null, MatchSep and the match IDs in mq will be appended // to the State. DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) { - //mutex_.AssertHeld(); + //mutex_.AssertHeld(); // Construct array of instruction ids for the new state. // Only ByteRange, EmptyWidth, and Match instructions are useful to keep: @@ -601,10 +601,10 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) { // RunWorkqOnEmptyString or RunWorkqOnByte. PODArray<int> inst(q->size()); int n = 0; - uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions - bool sawmatch = false; // whether queue contains guaranteed kInstMatch - bool sawmark = false; // whether queue contains a Mark - if (ExtraDebug) + uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions + bool sawmatch = false; // whether queue contains guaranteed kInstMatch + bool sawmark = false; // whether queue contains a Mark + if (ExtraDebug) fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag); for (Workq::iterator it = q->begin(); it != q->end(); ++it) { int id = *it; @@ -627,18 +627,18 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) { if (kind_ != Prog::kManyMatch && (kind_ != Prog::kFirstMatch || (it == q->begin() && ip->greedy(prog_))) && - (kind_ != Prog::kLongestMatch || !sawmark) && - (flag & kFlagMatch)) { - if (ExtraDebug) + (kind_ != Prog::kLongestMatch || !sawmark) && + (flag & kFlagMatch)) { + if (ExtraDebug) fprintf(stderr, " -> FullMatchState\n"); return FullMatchState; } - FALLTHROUGH_INTENDED; - default: - // Record iff id is the head of its list, which must - // be the case if id-1 is the last of *its* list. :) - if (prog_->inst(id-1)->last()) - inst[n++] = *it; + FALLTHROUGH_INTENDED; + default: + // Record iff id is the head of its list, which must + // be the case if id-1 is the last of *its* list. :) + if (prog_->inst(id-1)->last()) + inst[n++] = *it; if (ip->opcode() == kInstEmptyWidth) needflags |= ip->empty(); if (ip->opcode() == kInstMatch && !prog_->anchor_end()) @@ -675,7 +675,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) { // the execution loop can stop early. This is only okay // if the state is *not* a matching state. if (n == 0 && flag == 0) { - if (ExtraDebug) + if (ExtraDebug) fprintf(stderr, " -> DeadState\n"); return DeadState; } @@ -690,7 +690,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) { int* markp = ip; while (markp < ep && *markp != Mark) markp++; - std::sort(ip, markp); + std::sort(ip, markp); if (markp < ep) markp++; ip = markp; @@ -727,50 +727,50 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) { // Looks in the State cache for a State matching inst, ninst, flag. // If one is found, returns it. If one is not found, allocates one, // inserts it in the cache, and returns it. -DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) { - //mutex_.AssertHeld(); +DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) { + //mutex_.AssertHeld(); // Look in the cache for a pre-existing state. - // We have to initialise the struct like this because otherwise - // MSVC will complain about the flexible array member. :( - State state; - state.inst_ = inst; - state.ninst_ = ninst; - state.flag_ = flag; + // We have to initialise the struct like this because otherwise + // MSVC will complain about the flexible array member. :( + State state; + state.inst_ = inst; + state.ninst_ = ninst; + state.flag_ = flag; StateSet::iterator it = state_cache_.find(&state); if (it != state_cache_.end()) { - if (ExtraDebug) + if (ExtraDebug) fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str()); return *it; } // Must have enough memory for new state. // In addition to what we're going to allocate, - // the state cache hash table seems to incur about 40 bytes per + // the state cache hash table seems to incur about 40 bytes per // State*, empirically. - const int kStateCacheOverhead = 40; + const int kStateCacheOverhead = 40; int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot - int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) + - ninst*sizeof(int); + int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) + + ninst*sizeof(int); if (mem_budget_ < mem + kStateCacheOverhead) { mem_budget_ = -1; return NULL; } mem_budget_ -= mem + kStateCacheOverhead; - // Allocate new state along with room for next_ and inst_. + // Allocate new state along with room for next_ and inst_. char* space = std::allocator<char>().allocate(mem); - State* s = new (space) State; - (void) new (s->next_) std::atomic<State*>[nnext]; - // Work around a unfortunate bug in older versions of libstdc++. - // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658) - for (int i = 0; i < nnext; i++) - (void) new (s->next_ + i) std::atomic<State*>(NULL); - s->inst_ = new (s->next_ + nnext) int[ninst]; + State* s = new (space) State; + (void) new (s->next_) std::atomic<State*>[nnext]; + // Work around a unfortunate bug in older versions of libstdc++. + // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658) + for (int i = 0; i < nnext; i++) + (void) new (s->next_ + i) std::atomic<State*>(NULL); + s->inst_ = new (s->next_ + nnext) int[ninst]; memmove(s->inst_, inst, ninst*sizeof s->inst_[0]); s->ninst_ = ninst; s->flag_ = flag; - if (ExtraDebug) + if (ExtraDebug) fprintf(stderr, " -> %s\n", DumpState(s).c_str()); // Put state in cache and return it. @@ -780,19 +780,19 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) { // Clear the cache. Must hold cache_mutex_.w or be in destructor. void DFA::ClearCache() { - StateSet::iterator begin = state_cache_.begin(); - StateSet::iterator end = state_cache_.end(); - while (begin != end) { - StateSet::iterator tmp = begin; - ++begin; - // Deallocate the blob of memory that we allocated in DFA::CachedState(). + StateSet::iterator begin = state_cache_.begin(); + StateSet::iterator end = state_cache_.end(); + while (begin != end) { + StateSet::iterator tmp = begin; + ++begin; + // Deallocate the blob of memory that we allocated in DFA::CachedState(). // We recompute mem in order to benefit from sized delete where possible. int ninst = (*tmp)->ninst_; int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) + ninst*sizeof(int); std::allocator<char>().deallocate(reinterpret_cast<char*>(*tmp), mem); - } + } state_cache_.clear(); } @@ -806,24 +806,24 @@ void DFA::StateToWorkq(State* s, Workq* q) { // Nothing after this is an instruction! break; } else { - // Explore from the head of the list. - AddToQueue(q, s->inst_[i], s->flag_ & kFlagEmptyMask); + // Explore from the head of the list. + AddToQueue(q, s->inst_[i], s->flag_ & kFlagEmptyMask); } } } -// Adds ip to the work queue, following empty arrows according to flag. -void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { +// Adds ip to the work queue, following empty arrows according to flag. +void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { // Use stack_ to hold our stack of instructions yet to process. - // It was preallocated as follows: - // one entry per Capture; - // one entry per EmptyWidth; and - // one entry per Nop. - // This reflects the maximum number of stack pushes that each can - // perform. (Each instruction can be processed at most once.) - // When using marks, we also added nmark == prog_->size(). - // (Otherwise, nmark == 0.) + // It was preallocated as follows: + // one entry per Capture; + // one entry per EmptyWidth; and + // one entry per Nop. + // This reflects the maximum number of stack pushes that each can + // perform. (Each instruction can be processed at most once.) + // When using marks, we also added nmark == prog_->size(). + // (Otherwise, nmark == 0.) int* stk = stack_.data(); int nstk = 0; @@ -832,7 +832,7 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { DCHECK_LE(nstk, stack_.size()); id = stk[--nstk]; - Loop: + Loop: if (id == Mark) { q->mark(); continue; @@ -842,8 +842,8 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { continue; // If ip is already on the queue, nothing to do. - // Otherwise add it. We don't actually keep all the - // ones that get added, but adding all of them here + // Otherwise add it. We don't actually keep all the + // ones that get added, but adding all of them here // increases the likelihood of q->contains(id), // reducing the amount of duplicated work. if (q->contains(id)) @@ -853,46 +853,46 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { // Process instruction. Prog::Inst* ip = prog_->inst(id); switch (ip->opcode()) { - default: - LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); + default: + LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); break; case kInstByteRange: // just save these on the queue case kInstMatch: - if (ip->last()) - break; - id = id+1; - goto Loop; + if (ip->last()) + break; + id = id+1; + goto Loop; case kInstCapture: // DFA treats captures as no-ops. case kInstNop: - if (!ip->last()) - stk[nstk++] = id+1; - - // If this instruction is the [00-FF]* loop at the beginning of - // a leftmost-longest unanchored search, separate with a Mark so - // that future threads (which will start farther to the right in - // the input string) are lower priority than current threads. - if (ip->opcode() == kInstNop && q->maxmark() > 0 && + if (!ip->last()) + stk[nstk++] = id+1; + + // If this instruction is the [00-FF]* loop at the beginning of + // a leftmost-longest unanchored search, separate with a Mark so + // that future threads (which will start farther to the right in + // the input string) are lower priority than current threads. + if (ip->opcode() == kInstNop && q->maxmark() > 0 && id == prog_->start_unanchored() && id != prog_->start()) stk[nstk++] = Mark; - id = ip->out(); - goto Loop; - - case kInstAltMatch: - DCHECK(!ip->last()); - id = id+1; - goto Loop; - + id = ip->out(); + goto Loop; + + case kInstAltMatch: + DCHECK(!ip->last()); + id = id+1; + goto Loop; + case kInstEmptyWidth: - if (!ip->last()) - stk[nstk++] = id+1; - - // Continue on if we have all the right flag bits. - if (ip->empty() & ~flag) - break; - id = ip->out(); - goto Loop; + if (!ip->last()) + stk[nstk++] = id+1; + + // Continue on if we have all the right flag bits. + if (ip->empty() & ~flag) + break; + id = ip->out(); + goto Loop; } } } @@ -913,7 +913,7 @@ void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { // and then processing only $. Doing the two-step sequence won't match // ^$^$^$ but processing ^ and $ simultaneously will (and is the behavior // exhibited by existing implementations). -void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint32_t flag) { +void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint32_t flag) { newq->clear(); for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { if (oldq->is_mark(*i)) @@ -929,7 +929,7 @@ void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint32_t flag) { // regular expression program has been reached (the regexp has matched). void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, int c, uint32_t flag, bool* ismatch) { - //mutex_.AssertHeld(); + //mutex_.AssertHeld(); newq->clear(); for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { @@ -942,10 +942,10 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, int id = *i; Prog::Inst* ip = prog_->inst(id); switch (ip->opcode()) { - default: - LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); - break; - + default: + LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); + break; + case kInstFail: // never succeeds case kInstCapture: // already followed case kInstNop: // already followed @@ -984,7 +984,7 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, } } - if (ExtraDebug) + if (ExtraDebug) fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n", DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch); } @@ -1000,8 +1000,8 @@ DFA::State* DFA::RunStateOnByteUnlocked(State* state, int c) { // Processes input byte c in state, returning new state. DFA::State* DFA::RunStateOnByte(State* state, int c) { - //mutex_.AssertHeld(); - + //mutex_.AssertHeld(); + if (state <= SpecialStateMax) { if (state == FullMatchState) { // It is convenient for routines like PossibleMatchRange @@ -1023,9 +1023,9 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { } // If someone else already computed this, return it. - State* ns = state->next_[ByteMap(c)].load(std::memory_order_relaxed); - if (ns != NULL) - return ns; + State* ns = state->next_[ByteMap(c)].load(std::memory_order_relaxed); + if (ns != NULL) + return ns; // Convert state into Workq. StateToWorkq(state, q0_); @@ -1034,10 +1034,10 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { // around this byte. Before the byte we have the flags recorded // in the State structure itself. After the byte we have // nothing yet (but that will change: read on). - uint32_t needflag = state->flag_ >> kFlagNeedShift; - uint32_t beforeflag = state->flag_ & kFlagEmptyMask; - uint32_t oldbeforeflag = beforeflag; - uint32_t afterflag = 0; + uint32_t needflag = state->flag_ >> kFlagNeedShift; + uint32_t beforeflag = state->flag_ & kFlagEmptyMask; + uint32_t oldbeforeflag = beforeflag; + uint32_t afterflag = 0; if (c == '\n') { // Insert implicit $ and ^ around \n @@ -1053,8 +1053,8 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { // The state flag kFlagLastWord says whether the last // byte processed was a word character. Use that info to // insert empty-width (non-)word boundaries. - bool islastword = (state->flag_ & kFlagLastWord) != 0; - bool isword = c != kByteEndText && Prog::IsWordChar(static_cast<uint8_t>(c)); + bool islastword = (state->flag_ & kFlagLastWord) != 0; + bool isword = c != kByteEndText && Prog::IsWordChar(static_cast<uint8_t>(c)); if (isword == islastword) beforeflag |= kEmptyNonWordBoundary; else @@ -1064,7 +1064,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { // Only useful to rerun on empty string if there are new, useful flags. if (beforeflag & ~oldbeforeflag & needflag) { RunWorkqOnEmptyString(q0_, q1_, beforeflag); - using std::swap; + using std::swap; swap(q0_, q1_); } bool ismatch = false; @@ -1073,7 +1073,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { swap(q0_, q1_); // Save afterflag along with ismatch and isword in new state. - uint32_t flag = afterflag; + uint32_t flag = afterflag; if (ismatch) flag |= kFlagMatch; if (isword) @@ -1084,11 +1084,11 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { else ns = WorkqToCachedState(q0_, NULL, flag); - // Flush ns before linking to it. + // Flush ns before linking to it. // Write barrier before updating state->next_ so that the // main search loop can proceed without any locking, for speed. // (Otherwise it would need one mutex operation per input byte.) - state->next_[ByteMap(c)].store(ns, std::memory_order_release); + state->next_[ByteMap(c)].store(ns, std::memory_order_release); return ns; } @@ -1126,8 +1126,8 @@ class DFA::RWLocker { CacheMutex* mu_; bool writing_; - RWLocker(const RWLocker&) = delete; - RWLocker& operator=(const RWLocker&) = delete; + RWLocker(const RWLocker&) = delete; + RWLocker& operator=(const RWLocker&) = delete; }; DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) { @@ -1139,15 +1139,15 @@ DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) { void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS { if (!writing_) { mu_->ReaderUnlock(); - mu_->WriterLock(); + mu_->WriterLock(); writing_ = true; } } DFA::RWLocker::~RWLocker() { - if (!writing_) - mu_->ReaderUnlock(); - else + if (!writing_) + mu_->ReaderUnlock(); + else mu_->WriterUnlock(); } @@ -1212,12 +1212,12 @@ class DFA::StateSaver { DFA* dfa_; // the DFA to use int* inst_; // saved info from State int ninst_; - uint32_t flag_; + uint32_t flag_; bool is_special_; // whether original state was special State* special_; // if is_special_, the original state - StateSaver(const StateSaver&) = delete; - StateSaver& operator=(const StateSaver&) = delete; + StateSaver(const StateSaver&) = delete; + StateSaver& operator=(const StateSaver&) = delete; }; DFA::StateSaver::StateSaver(DFA* dfa, State* state) { @@ -1331,13 +1331,13 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { const uint8_t* ep = BytePtr(params->text.data() + params->text.size()); // end of text const uint8_t* resetp = NULL; // p at last cache reset - if (!run_forward) { - using std::swap; + if (!run_forward) { + using std::swap; swap(p, ep); - } + } - const uint8_t* bytemap = prog_->bytemap(); - const uint8_t* lastmatch = NULL; // most recent matching position in text + const uint8_t* bytemap = prog_->bytemap(); + const uint8_t* lastmatch = NULL; // most recent matching position in text bool matched = false; State* s = start; @@ -1364,7 +1364,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { } while (p != ep) { - if (ExtraDebug) + if (ExtraDebug) fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str()); if (can_prefix_accel && s == start) { @@ -1402,7 +1402,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { // Okay to use bytemap[] not ByteMap() here, because // c is known to be an actual byte and not kByteEndText. - State* ns = s->next_[bytemap[c]].load(std::memory_order_acquire); + State* ns = s->next_[bytemap[c]].load(std::memory_order_acquire); if (ns == NULL) { ns = RunStateOnByteUnlocked(s, c); if (ns == NULL) { @@ -1415,7 +1415,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { // of 10 bytes per state computation, fail so that RE2 can // fall back to the NFA. However, RE2::Set cannot fall back, // so we just have to keep on keeping on in that case. - if (dfa_should_bail_when_slow && resetp != NULL && + if (dfa_should_bail_when_slow && resetp != NULL && static_cast<size_t>(p - resetp) < 10*state_cache_.size() && kind_ != Prog::kManyMatch) { params->failed = true; @@ -1464,7 +1464,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { lastmatch = p - 1; else lastmatch = p + 1; - if (ExtraDebug) + if (ExtraDebug) fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str()); if (params->matches != NULL && kind_ == Prog::kManyMatch) { for (int i = s->ninst_ - 1; i >= 0; i--) { @@ -1499,7 +1499,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { lastbyte = BeginPtr(params->text)[-1] & 0xFF; } - State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire); + State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire); if (ns == NULL) { ns = RunStateOnByteUnlocked(s, lastbyte); if (ns == NULL) { @@ -1539,8 +1539,8 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) { if (id == MatchSep) break; params->matches->insert(id); - } - } + } + } } params->ep = reinterpret_cast<const char*>(lastmatch); @@ -1628,14 +1628,14 @@ bool DFA::AnalyzeSearch(SearchParams* params) { // Sanity check: make sure that text lies within context. if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) { - LOG(DFATAL) << "context does not contain text"; + LOG(DFATAL) << "context does not contain text"; params->start = DeadState; return true; } // Determine correct search type. int start; - uint32_t flags; + uint32_t flags; if (params->run_forward) { if (BeginPtr(text) == BeginPtr(context)) { start = kStartBeginText; @@ -1693,7 +1693,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) { params->start->flag_ >> kFlagNeedShift == 0) params->can_prefix_accel = true; - if (ExtraDebug) + if (ExtraDebug) fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n", params->anchored, params->run_forward, flags, DumpState(params->start).c_str(), params->can_prefix_accel); @@ -1703,8 +1703,8 @@ bool DFA::AnalyzeSearch(SearchParams* params) { // Fills in info if needed. Returns true on success, false on failure. bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, - uint32_t flags) { - // Quick check. + uint32_t flags) { + // Quick check. State* start = info->start.load(std::memory_order_acquire); if (start != NULL) return true; @@ -1722,7 +1722,7 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, if (start == NULL) return false; - // Synchronize with "quick check" above. + // Synchronize with "quick check" above. info->start.store(start, std::memory_order_release); return true; } @@ -1743,7 +1743,7 @@ bool DFA::Search(const StringPiece& text, } *failed = false; - if (ExtraDebug) { + if (ExtraDebug) { fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str()); fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n", std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_); @@ -1761,7 +1761,7 @@ bool DFA::Search(const StringPiece& text, return false; } if (params.start == DeadState) - return false; + return false; if (params.start == FullMatchState) { if (run_forward == want_earliest_match) *epp = text.data(); @@ -1769,7 +1769,7 @@ bool DFA::Search(const StringPiece& text, *epp = text.data() + text.size(); return true; } - if (ExtraDebug) + if (ExtraDebug) fprintf(stderr, "start %s\n", DumpState(params.start).c_str()); bool ret = FastSearchLoop(¶ms); if (params.failed) { @@ -1782,35 +1782,35 @@ bool DFA::Search(const StringPiece& text, DFA* Prog::GetDFA(MatchKind kind) { // For a forward DFA, half the memory goes to each DFA. - // However, if it is a "many match" DFA, then there is - // no counterpart with which the memory must be shared. - // + // However, if it is a "many match" DFA, then there is + // no counterpart with which the memory must be shared. + // // For a reverse DFA, all the memory goes to the // "longest match" DFA, because RE2 never does reverse // "first match" searches. - if (kind == kFirstMatch) { - std::call_once(dfa_first_once_, [](Prog* prog) { - prog->dfa_first_ = new DFA(prog, kFirstMatch, prog->dfa_mem_ / 2); - }, this); - return dfa_first_; - } else if (kind == kManyMatch) { - std::call_once(dfa_first_once_, [](Prog* prog) { - prog->dfa_first_ = new DFA(prog, kManyMatch, prog->dfa_mem_); - }, this); - return dfa_first_; - } else { - std::call_once(dfa_longest_once_, [](Prog* prog) { - if (!prog->reversed_) - prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_ / 2); - else - prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_); - }, this); - return dfa_longest_; + if (kind == kFirstMatch) { + std::call_once(dfa_first_once_, [](Prog* prog) { + prog->dfa_first_ = new DFA(prog, kFirstMatch, prog->dfa_mem_ / 2); + }, this); + return dfa_first_; + } else if (kind == kManyMatch) { + std::call_once(dfa_first_once_, [](Prog* prog) { + prog->dfa_first_ = new DFA(prog, kManyMatch, prog->dfa_mem_); + }, this); + return dfa_first_; + } else { + std::call_once(dfa_longest_once_, [](Prog* prog) { + if (!prog->reversed_) + prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_ / 2); + else + prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_); + }, this); + return dfa_longest_; } -} +} -void Prog::DeleteDFA(DFA* dfa) { - delete dfa; +void Prog::DeleteDFA(DFA* dfa) { + delete dfa; } // Executes the regexp program to search in text, @@ -1824,7 +1824,7 @@ void Prog::DeleteDFA(DFA* dfa) { // This is the only external interface (class DFA only exists in this file). // bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, - Anchor anchor, MatchKind kind, StringPiece* match0, + Anchor anchor, MatchKind kind, StringPiece* match0, bool* failed, SparseSet* matches) { *failed = false; @@ -1891,7 +1891,7 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, *match0 = StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep)); else - *match0 = + *match0 = StringPiece(text.data(), static_cast<size_t>(ep - text.data())); } return true; @@ -1905,7 +1905,7 @@ int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) { // Pick out start state for unanchored search // at beginning of text. RWLocker l(&cache_mutex_); - SearchParams params(StringPiece(), StringPiece(), &l); + SearchParams params(StringPiece(), StringPiece(), &l); params.anchored = false; if (!AnalyzeSearch(¶ms) || params.start == NULL || @@ -1993,7 +1993,7 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { // Pick out start state for anchored search at beginning of text. RWLocker l(&cache_mutex_); - SearchParams params(StringPiece(), StringPiece(), &l); + SearchParams params(StringPiece(), StringPiece(), &l); params.anchored = true; if (!AnalyzeSearch(¶ms)) return false; @@ -2033,14 +2033,14 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { // Build minimum prefix. State* s = params.start; min->clear(); - MutexLock lock(&mutex_); + MutexLock lock(&mutex_); for (int i = 0; i < maxlen; i++) { - if (previously_visited_states[s] > kMaxEltRepetitions) + if (previously_visited_states[s] > kMaxEltRepetitions) break; previously_visited_states[s]++; // Stop if min is a match. - State* ns = RunStateOnByte(s, kByteEndText); + State* ns = RunStateOnByte(s, kByteEndText); if (ns == NULL) // DFA out of memory return false; if (ns != DeadState && (ns == FullMatchState || ns->IsMatch())) @@ -2049,13 +2049,13 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { // Try to extend the string with low bytes. bool extended = false; for (int j = 0; j < 256; j++) { - ns = RunStateOnByte(s, j); + ns = RunStateOnByte(s, j); if (ns == NULL) // DFA out of memory return false; if (ns == FullMatchState || (ns > SpecialStateMax && ns->ninst_ > 0)) { extended = true; - min->append(1, static_cast<char>(j)); + min->append(1, static_cast<char>(j)); s = ns; break; } @@ -2069,20 +2069,20 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { s = params.start; max->clear(); for (int i = 0; i < maxlen; i++) { - if (previously_visited_states[s] > kMaxEltRepetitions) + if (previously_visited_states[s] > kMaxEltRepetitions) break; previously_visited_states[s] += 1; // Try to extend the string with high bytes. bool extended = false; for (int j = 255; j >= 0; j--) { - State* ns = RunStateOnByte(s, j); + State* ns = RunStateOnByte(s, j); if (ns == NULL) return false; if (ns == FullMatchState || (ns > SpecialStateMax && ns->ninst_ > 0)) { extended = true; - max->append(1, static_cast<char>(j)); + max->append(1, static_cast<char>(j)); s = ns; break; } @@ -2110,9 +2110,9 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { // PossibleMatchRange for a Prog. bool Prog::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { - // Have to use dfa_longest_ to get all strings for full matches. - // For example, (a|aa) never matches aa in first-match mode. - return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen); + // Have to use dfa_longest_ to get all strings for full matches. + // For example, (a|aa) never matches aa in first-match mode. + return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen); } } // namespace re2 diff --git a/contrib/libs/re2/re2/filtered_re2.cc b/contrib/libs/re2/re2/filtered_re2.cc index 5df97456e2..4a4a190889 100644 --- a/contrib/libs/re2/re2/filtered_re2.cc +++ b/contrib/libs/re2/re2/filtered_re2.cc @@ -3,13 +3,13 @@ // license that can be found in the LICENSE file. #include "re2/filtered_re2.h" - -#include <stddef.h> -#include <string> + +#include <stddef.h> +#include <string> #include <utility> - -#include "util/util.h" -#include "util/logging.h" + +#include "util/util.h" +#include "util/logging.h" #include "re2/prefilter.h" #include "re2/prefilter_tree.h" @@ -20,13 +20,13 @@ FilteredRE2::FilteredRE2() prefilter_tree_(new PrefilterTree()) { } -FilteredRE2::FilteredRE2(int min_atom_len) - : compiled_(false), - prefilter_tree_(new PrefilterTree(min_atom_len)) { -} - +FilteredRE2::FilteredRE2(int min_atom_len) + : compiled_(false), + prefilter_tree_(new PrefilterTree(min_atom_len)) { +} + FilteredRE2::~FilteredRE2() { - for (size_t i = 0; i < re2_vec_.size(); i++) + for (size_t i = 0; i < re2_vec_.size(); i++) delete re2_vec_[i]; } @@ -52,13 +52,13 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, RE2::ErrorCode code = re->error_code(); if (!re->ok()) { - if (options.log_errors()) { - LOG(ERROR) << "Couldn't compile regular expression, skipping: " + if (options.log_errors()) { + LOG(ERROR) << "Couldn't compile regular expression, skipping: " << pattern << " due to error " << re->error(); - } + } delete re; } else { - *id = static_cast<int>(re2_vec_.size()); + *id = static_cast<int>(re2_vec_.size()); re2_vec_.push_back(re); } @@ -66,17 +66,17 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, } void FilteredRE2::Compile(std::vector<std::string>* atoms) { - if (compiled_) { - LOG(ERROR) << "Compile called already."; + if (compiled_) { + LOG(ERROR) << "Compile called already."; return; } - if (re2_vec_.empty()) { - LOG(ERROR) << "Compile called before Add."; - return; - } - - for (size_t i = 0; i < re2_vec_.size(); i++) { + if (re2_vec_.empty()) { + LOG(ERROR) << "Compile called before Add."; + return; + } + + for (size_t i = 0; i < re2_vec_.size(); i++) { Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]); prefilter_tree_->Add(prefilter); } @@ -86,21 +86,21 @@ void FilteredRE2::Compile(std::vector<std::string>* atoms) { } int FilteredRE2::SlowFirstMatch(const StringPiece& text) const { - for (size_t i = 0; i < re2_vec_.size(); i++) + for (size_t i = 0; i < re2_vec_.size(); i++) if (RE2::PartialMatch(text, *re2_vec_[i])) - return static_cast<int>(i); + return static_cast<int>(i); return -1; } int FilteredRE2::FirstMatch(const StringPiece& text, - const std::vector<int>& atoms) const { + const std::vector<int>& atoms) const { if (!compiled_) { - LOG(DFATAL) << "FirstMatch called before Compile."; + LOG(DFATAL) << "FirstMatch called before Compile."; return -1; } - std::vector<int> regexps; + std::vector<int> regexps; prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); - for (size_t i = 0; i < regexps.size(); i++) + for (size_t i = 0; i < regexps.size(); i++) if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) return regexps[i]; return -1; @@ -108,25 +108,25 @@ int FilteredRE2::FirstMatch(const StringPiece& text, bool FilteredRE2::AllMatches( const StringPiece& text, - const std::vector<int>& atoms, - std::vector<int>* matching_regexps) const { + const std::vector<int>& atoms, + std::vector<int>* matching_regexps) const { matching_regexps->clear(); - std::vector<int> regexps; + std::vector<int> regexps; prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); - for (size_t i = 0; i < regexps.size(); i++) + for (size_t i = 0; i < regexps.size(); i++) if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) matching_regexps->push_back(regexps[i]); return !matching_regexps->empty(); } -void FilteredRE2::AllPotentials( - const std::vector<int>& atoms, - std::vector<int>* potential_regexps) const { - prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps); -} - -void FilteredRE2::RegexpsGivenStrings(const std::vector<int>& matched_atoms, - std::vector<int>* passed_regexps) { +void FilteredRE2::AllPotentials( + const std::vector<int>& atoms, + std::vector<int>* potential_regexps) const { + prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps); +} + +void FilteredRE2::RegexpsGivenStrings(const std::vector<int>& matched_atoms, + std::vector<int>* passed_regexps) { prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps); } diff --git a/contrib/libs/re2/re2/filtered_re2.h b/contrib/libs/re2/re2/filtered_re2.h index dd618c70e8..d07822dad3 100644 --- a/contrib/libs/re2/re2/filtered_re2.h +++ b/contrib/libs/re2/re2/filtered_re2.h @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#ifndef RE2_FILTERED_RE2_H_ -#define RE2_FILTERED_RE2_H_ - +#ifndef RE2_FILTERED_RE2_H_ +#define RE2_FILTERED_RE2_H_ + // The class FilteredRE2 is used as a wrapper to multiple RE2 regexps. // It provides a prefilter mechanism that helps in cutting down the // number of regexps that need to be actually searched. @@ -22,10 +22,10 @@ // in the text to get the actual regexp matches. #include <memory> -#include <string> -#include <vector> +#include <string> +#include <vector> -#include "re2/re2.h" +#include "re2/re2.h" namespace re2 { @@ -34,7 +34,7 @@ class PrefilterTree; class FilteredRE2 { public: FilteredRE2(); - explicit FilteredRE2(int min_atom_len); + explicit FilteredRE2(int min_atom_len); ~FilteredRE2(); // Not copyable. @@ -69,24 +69,24 @@ class FilteredRE2 { // Returns -1 on no match. Compile has to be called before // calling this. int FirstMatch(const StringPiece& text, - const std::vector<int>& atoms) const; + const std::vector<int>& atoms) const; // Returns the indices of all matching regexps, after first clearing // matched_regexps. bool AllMatches(const StringPiece& text, - const std::vector<int>& atoms, - std::vector<int>* matching_regexps) const; - - // Returns the indices of all potentially matching regexps after first - // clearing potential_regexps. - // A regexp is potentially matching if it passes the filter. - // If a regexp passes the filter it may still not match. - // A regexp that does not pass the filter is guaranteed to not match. - void AllPotentials(const std::vector<int>& atoms, - std::vector<int>* potential_regexps) const; - + const std::vector<int>& atoms, + std::vector<int>* matching_regexps) const; + + // Returns the indices of all potentially matching regexps after first + // clearing potential_regexps. + // A regexp is potentially matching if it passes the filter. + // If a regexp passes the filter it may still not match. + // A regexp that does not pass the filter is guaranteed to not match. + void AllPotentials(const std::vector<int>& atoms, + std::vector<int>* potential_regexps) const; + // The number of regexps added. - int NumRegexps() const { return static_cast<int>(re2_vec_.size()); } + int NumRegexps() const { return static_cast<int>(re2_vec_.size()); } // Get the individual RE2 objects. const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; } @@ -96,11 +96,11 @@ class FilteredRE2 { void PrintPrefilter(int regexpid); // Useful for testing and debugging. - void RegexpsGivenStrings(const std::vector<int>& matched_atoms, - std::vector<int>* passed_regexps); + void RegexpsGivenStrings(const std::vector<int>& matched_atoms, + std::vector<int>* passed_regexps); // All the regexps in the FilteredRE2. - std::vector<RE2*> re2_vec_; + std::vector<RE2*> re2_vec_; // Has the FilteredRE2 been compiled using Compile() bool compiled_; diff --git a/contrib/libs/re2/re2/mimics_pcre.cc b/contrib/libs/re2/re2/mimics_pcre.cc index b1d6a51228..23e6b43f37 100644 --- a/contrib/libs/re2/re2/mimics_pcre.cc +++ b/contrib/libs/re2/re2/mimics_pcre.cc @@ -22,8 +22,8 @@ // // Regexp::MimicsPCRE checks for any of these conditions. -#include "util/util.h" -#include "util/logging.h" +#include "util/util.h" +#include "util/logging.h" #include "re2/regexp.h" #include "re2/walker-inl.h" @@ -135,8 +135,8 @@ class EmptyStringWalker : public Regexp::Walker<bool> { } private: - EmptyStringWalker(const EmptyStringWalker&) = delete; - EmptyStringWalker& operator=(const EmptyStringWalker&) = delete; + EmptyStringWalker(const EmptyStringWalker&) = delete; + EmptyStringWalker& operator=(const EmptyStringWalker&) = delete; }; // Called after visiting re's children. child_args contains the return diff --git a/contrib/libs/re2/re2/nfa.cc b/contrib/libs/re2/re2/nfa.cc index c7339f8ffd..9767d45d65 100644 --- a/contrib/libs/re2/re2/nfa.cc +++ b/contrib/libs/re2/re2/nfa.cc @@ -24,14 +24,14 @@ // Like Thompson's original machine and like the DFA implementation, this // implementation notices a match only once it is one byte past it. -#include <stdio.h> -#include <string.h> -#include <algorithm> +#include <stdio.h> +#include <string.h> +#include <algorithm> #include <deque> -#include <string> -#include <utility> -#include <vector> - +#include <string> +#include <utility> +#include <vector> + #include "util/logging.h" #include "util/strutil.h" #include "re2/pod_array.h" @@ -42,8 +42,8 @@ namespace re2 { -static const bool ExtraDebug = false; - +static const bool ExtraDebug = false; + class NFA { public: NFA(Prog* prog); @@ -67,7 +67,7 @@ class NFA { private: struct Thread { union { - int ref; + int ref; Thread* next; // when on free list }; const char** capture; @@ -75,8 +75,8 @@ class NFA { // State for explicit stack in AddToThreadq. struct AddState { - int id; // Inst to process - Thread* t; // if not null, set t0 = t before processing id + int id; // Inst to process + Thread* t; // if not null, set t0 = t before processing id }; // Threadq is a list of threads. The list is sorted by the order @@ -85,23 +85,23 @@ class NFA { typedef SparseArray<Thread*> Threadq; inline Thread* AllocThread(); - inline Thread* Incref(Thread* t); - inline void Decref(Thread* t); + inline Thread* Incref(Thread* t); + inline void Decref(Thread* t); - // Follows all empty arrows from id0 and enqueues all the states reached. - // Enqueues only the ByteRange instructions that match byte c. + // Follows all empty arrows from id0 and enqueues all the states reached. + // Enqueues only the ByteRange instructions that match byte c. // context is used (with p) for evaluating empty-width specials. - // p is the current input position, and t0 is the current thread. + // p is the current input position, and t0 is the current thread. void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, - const char* p, Thread* t0); + const char* p, Thread* t0); // Run runq on byte c, appending new states to nextq. // Updates matched_ and match_ as new, better matches are found. // context is used (with p) for evaluating empty-width specials. // p is the position of byte c in the input string for AddToThreadq; // p-1 will be used when processing Match instructions. - // Frees all the threads on runq. - // If there is a shortcut to the end, returns that shortcut. + // Frees all the threads on runq. + // If there is a shortcut to the end, returns that shortcut. int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, const char* p); @@ -126,13 +126,13 @@ class NFA { const char** match_; // best match so far bool matched_; // any match so far? - NFA(const NFA&) = delete; - NFA& operator=(const NFA&) = delete; + NFA(const NFA&) = delete; + NFA& operator=(const NFA&) = delete; }; NFA::NFA(Prog* prog) { prog_ = prog; - start_ = prog_->start(); + start_ = prog_->start(); ncapture_ = 0; longest_ = false; endmatch_ = false; @@ -140,7 +140,7 @@ NFA::NFA(Prog* prog) { etext_ = NULL; q0_.resize(prog_->size()); q1_.resize(prog_->size()); - // See NFA::AddToThreadq() for why this is so. + // See NFA::AddToThreadq() for why this is so. int nstack = 2*prog_->inst_count(kInstCapture) + prog_->inst_count(kInstEmptyWidth) + prog_->inst_count(kInstNop) + 1; // + 1 for start inst @@ -160,78 +160,78 @@ NFA::Thread* NFA::AllocThread() { Thread* t = freelist_; if (t != NULL) { freelist_ = t->next; - t->ref = 1; + t->ref = 1; // We don't need to touch t->capture because // the caller will immediately overwrite it. return t; } arena_.emplace_back(); t = &arena_.back(); - t->ref = 1; + t->ref = 1; t->capture = new const char*[ncapture_]; return t; } -NFA::Thread* NFA::Incref(Thread* t) { +NFA::Thread* NFA::Incref(Thread* t) { + DCHECK(t != NULL); + t->ref++; + return t; +} + +void NFA::Decref(Thread* t) { DCHECK(t != NULL); - t->ref++; - return t; -} - -void NFA::Decref(Thread* t) { - DCHECK(t != NULL); - t->ref--; - if (t->ref > 0) - return; - DCHECK_EQ(t->ref, 0); + t->ref--; + if (t->ref > 0) + return; + DCHECK_EQ(t->ref, 0); t->next = freelist_; freelist_ = t; -} - -// Follows all empty arrows from id0 and enqueues all the states reached. -// Enqueues only the ByteRange instructions that match byte c. +} + +// Follows all empty arrows from id0 and enqueues all the states reached. +// Enqueues only the ByteRange instructions that match byte c. // context is used (with p) for evaluating empty-width specials. -// p is the current input position, and t0 is the current thread. +// p is the current input position, and t0 is the current thread. void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, - const char* p, Thread* t0) { + const char* p, Thread* t0) { if (id0 == 0) return; // Use stack_ to hold our stack of instructions yet to process. - // It was preallocated as follows: - // two entries per Capture; - // one entry per EmptyWidth; and - // one entry per Nop. - // This reflects the maximum number of stack pushes that each can - // perform. (Each instruction can be processed at most once.) + // It was preallocated as follows: + // two entries per Capture; + // one entry per EmptyWidth; and + // one entry per Nop. + // This reflects the maximum number of stack pushes that each can + // perform. (Each instruction can be processed at most once.) AddState* stk = stack_.data(); - int nstk = 0; + int nstk = 0; stk[nstk++] = {id0, NULL}; while (nstk > 0) { DCHECK_LE(nstk, stack_.size()); - AddState a = stk[--nstk]; - - Loop: - if (a.t != NULL) { - // t0 was a thread that we allocated and copied in order to - // record the capture, so we must now decref it. - Decref(t0); - t0 = a.t; - } - + AddState a = stk[--nstk]; + + Loop: + if (a.t != NULL) { + // t0 was a thread that we allocated and copied in order to + // record the capture, so we must now decref it. + Decref(t0); + t0 = a.t; + } + int id = a.id; if (id == 0) continue; if (q->has_index(id)) { - if (ExtraDebug) - fprintf(stderr, " [%d%s]\n", id, FormatCapture(t0->capture).c_str()); + if (ExtraDebug) + fprintf(stderr, " [%d%s]\n", id, FormatCapture(t0->capture).c_str()); continue; } // Create entry in q no matter what. We might fill it in below, // or we might not. Even if not, it is necessary to have it, - // so that we don't revisit id0 during the recursion. + // so that we don't revisit id0 during the recursion. q->set_new(id, NULL); Thread** tp = &q->get_existing(id); int j; @@ -247,48 +247,48 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, case kInstAltMatch: // Save state; will pick up at next byte. - t = Incref(t0); + t = Incref(t0); *tp = t; - DCHECK(!ip->last()); + DCHECK(!ip->last()); a = {id+1, NULL}; - goto Loop; + goto Loop; case kInstNop: - if (!ip->last()) + if (!ip->last()) stk[nstk++] = {id+1, NULL}; - + // Continue on. a = {ip->out(), NULL}; - goto Loop; + goto Loop; case kInstCapture: - if (!ip->last()) + if (!ip->last()) stk[nstk++] = {id+1, NULL}; - + if ((j=ip->cap()) < ncapture_) { - // Push a dummy whose only job is to restore t0 + // Push a dummy whose only job is to restore t0 // once we finish exploring this possibility. stk[nstk++] = {0, t0}; // Record capture. - t = AllocThread(); - CopyCapture(t->capture, t0->capture); - t->capture[j] = p; - t0 = t; + t = AllocThread(); + CopyCapture(t->capture, t0->capture); + t->capture[j] = p; + t0 = t; } a = {ip->out(), NULL}; - goto Loop; - - case kInstByteRange: - if (!ip->Matches(c)) - goto Next; + goto Loop; + case kInstByteRange: + if (!ip->Matches(c)) + goto Next; + // Save state; will pick up at next byte. - t = Incref(t0); + t = Incref(t0); *tp = t; - if (ExtraDebug) - fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str()); + if (ExtraDebug) + fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str()); if (ip->hint() == 0) break; @@ -302,27 +302,27 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, if (ExtraDebug) fprintf(stderr, " ! %d%s\n", id, FormatCapture(t0->capture).c_str()); - Next: - if (ip->last()) - break; + Next: + if (ip->last()) + break; a = {id+1, NULL}; - goto Loop; - + goto Loop; + case kInstEmptyWidth: - if (!ip->last()) + if (!ip->last()) stk[nstk++] = {id+1, NULL}; - + // Continue on if we have all the right flag bits. if (ip->empty() & ~Prog::EmptyFlags(context, p)) break; a = {ip->out(), NULL}; - goto Loop; + goto Loop; } } } // Run runq on byte c, appending new states to nextq. -// Updates matched_ and match_ as new, better matches are found. +// Updates matched_ and match_ as new, better matches are found. // context is used (with p) for evaluating empty-width specials. // p is the position of byte c in the input string for AddToThreadq; // p-1 will be used when processing Match instructions. @@ -340,12 +340,12 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, if (longest_) { // Can skip any threads started after our current best match. if (matched_ && match_[0] < t->capture[0]) { - Decref(t); + Decref(t); continue; } } - int id = i->index(); + int id = i->index(); Prog::Inst* ip = prog_->inst(id); switch (ip->opcode()) { @@ -363,10 +363,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, break; // The match is ours if we want it. if (ip->greedy(prog_) || longest_) { - CopyCapture(match_, t->capture); - matched_ = true; - - Decref(t); + CopyCapture(match_, t->capture); + matched_ = true; + + Decref(t); for (++i; i != runq->end(); ++i) { if (i->value() != NULL) Decref(i->value()); @@ -398,21 +398,21 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, // point but longer than an existing match. if (!matched_ || t->capture[0] < match_[0] || (t->capture[0] == match_[0] && p-1 > match_[1])) { - CopyCapture(match_, t->capture); + CopyCapture(match_, t->capture); match_[1] = p-1; - matched_ = true; - } + matched_ = true; + } } else { // Leftmost-biased mode: this match is by definition // better than what we've already found (see next line). - CopyCapture(match_, t->capture); + CopyCapture(match_, t->capture); match_[1] = p-1; - matched_ = true; + matched_ = true; // Cut off the threads that can only find matches // worse than the one we just found: don't run the // rest of the current Threadq. - Decref(t); + Decref(t); for (++i; i != runq->end(); ++i) { if (i->value() != NULL) Decref(i->value()); @@ -423,7 +423,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, break; } } - Decref(t); + Decref(t); } runq->clear(); return 0; @@ -455,9 +455,9 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, if (context.data() == NULL) context = text; - // Sanity check: make sure that text lies within context. + // Sanity check: make sure that text lies within context. if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) { - LOG(DFATAL) << "context does not contain text"; + LOG(DFATAL) << "context does not contain text"; return false; } @@ -496,7 +496,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, // For convenience. etext_ = text.data() + text.size(); - if (ExtraDebug) + if (ExtraDebug) fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n", std::string(text).c_str(), std::string(context).c_str(), anchored, longest); @@ -508,29 +508,29 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, // Loop over the text, stepping the machine. for (const char* p = text.data();; p++) { - if (ExtraDebug) { - int c = 0; + if (ExtraDebug) { + int c = 0; if (p == btext_) - c = '^'; + c = '^'; else if (p > etext_) - c = '$'; + c = '$'; else if (p < etext_) - c = p[0] & 0xFF; - + c = p[0] & 0xFF; + fprintf(stderr, "%c:", c); for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { Thread* t = i->value(); if (t == NULL) continue; - fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str()); + fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str()); } fprintf(stderr, "\n"); } - // This is a no-op the first time around the loop because runq is empty. + // This is a no-op the first time around the loop because runq is empty. int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p); DCHECK_EQ(runq->size(), 0); - using std::swap; + using std::swap; swap(nextq, runq); nextq->clear(); if (id != 0) { @@ -544,8 +544,8 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, break; case kInstCapture: - if (ip->cap() < ncapture_) - match_[ip->cap()] = p; + if (ip->cap() < ncapture_) + match_[ip->cap()] = p; id = ip->out(); continue; @@ -574,24 +574,24 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, // Try to use prefix accel (e.g. memchr) to skip ahead. // The search must be unanchored and there must be zero // possible matches already. - if (!anchored && runq->size() == 0 && + if (!anchored && runq->size() == 0 && p < etext_ && prog_->can_prefix_accel()) { p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p)); if (p == NULL) p = etext_; } - Thread* t = AllocThread(); - CopyCapture(t->capture, match_); - t->capture[0] = p; + Thread* t = AllocThread(); + CopyCapture(t->capture, match_); + t->capture[0] = p; AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p, t); - Decref(t); + Decref(t); } // If all the threads have died, stop early. if (runq->size() == 0) { - if (ExtraDebug) + if (ExtraDebug) fprintf(stderr, "dead\n"); break; } @@ -616,11 +616,11 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, if (matched_) { for (int i = 0; i < nsubmatch; i++) - submatch[i] = - StringPiece(match_[2 * i], - static_cast<size_t>(match_[2 * i + 1] - match_[2 * i])); - if (ExtraDebug) - fprintf(stderr, "match (%td,%td)\n", + submatch[i] = + StringPiece(match_[2 * i], + static_cast<size_t>(match_[2 * i + 1] - match_[2 * i])); + if (ExtraDebug) + fprintf(stderr, "match (%td,%td)\n", match_[0] - btext_, match_[1] - btext_); return true; @@ -632,7 +632,7 @@ bool Prog::SearchNFA(const StringPiece& text, const StringPiece& context, Anchor anchor, MatchKind kind, StringPiece* match, int nmatch) { - if (ExtraDebug) + if (ExtraDebug) Dump(); NFA nfa(this); @@ -651,63 +651,63 @@ Prog::SearchNFA(const StringPiece& text, const StringPiece& context, return true; } -// For each instruction i in the program reachable from the start, compute the -// number of instructions reachable from i by following only empty transitions -// and record that count as fanout[i]. -// -// fanout holds the results and is also the work queue for the outer iteration. -// reachable holds the reached nodes for the inner iteration. -void Prog::Fanout(SparseArray<int>* fanout) { - DCHECK_EQ(fanout->max_size(), size()); - SparseSet reachable(size()); - fanout->clear(); - fanout->set_new(start(), 0); - for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) { +// For each instruction i in the program reachable from the start, compute the +// number of instructions reachable from i by following only empty transitions +// and record that count as fanout[i]. +// +// fanout holds the results and is also the work queue for the outer iteration. +// reachable holds the reached nodes for the inner iteration. +void Prog::Fanout(SparseArray<int>* fanout) { + DCHECK_EQ(fanout->max_size(), size()); + SparseSet reachable(size()); + fanout->clear(); + fanout->set_new(start(), 0); + for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) { int* count = &i->value(); - reachable.clear(); - reachable.insert(i->index()); - for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) { - int id = *j; - Prog::Inst* ip = inst(id); - switch (ip->opcode()) { - default: - LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()"; - break; - - case kInstByteRange: - if (!ip->last()) - reachable.insert(id+1); - - (*count)++; - if (!fanout->has_index(ip->out())) { - fanout->set_new(ip->out(), 0); - } - break; - - case kInstAltMatch: - DCHECK(!ip->last()); - reachable.insert(id+1); - break; - - case kInstCapture: - case kInstEmptyWidth: - case kInstNop: - if (!ip->last()) - reachable.insert(id+1); - - reachable.insert(ip->out()); - break; - - case kInstMatch: - if (!ip->last()) - reachable.insert(id+1); - break; - - case kInstFail: - break; - } - } - } -} - + reachable.clear(); + reachable.insert(i->index()); + for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) { + int id = *j; + Prog::Inst* ip = inst(id); + switch (ip->opcode()) { + default: + LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()"; + break; + + case kInstByteRange: + if (!ip->last()) + reachable.insert(id+1); + + (*count)++; + if (!fanout->has_index(ip->out())) { + fanout->set_new(ip->out(), 0); + } + break; + + case kInstAltMatch: + DCHECK(!ip->last()); + reachable.insert(id+1); + break; + + case kInstCapture: + case kInstEmptyWidth: + case kInstNop: + if (!ip->last()) + reachable.insert(id+1); + + reachable.insert(ip->out()); + break; + + case kInstMatch: + if (!ip->last()) + reachable.insert(id+1); + break; + + case kInstFail: + break; + } + } + } +} + } // namespace re2 diff --git a/contrib/libs/re2/re2/onepass.cc b/contrib/libs/re2/re2/onepass.cc index 263974654d..2789dbb206 100644 --- a/contrib/libs/re2/re2/onepass.cc +++ b/contrib/libs/re2/re2/onepass.cc @@ -50,30 +50,30 @@ // See also Anne Brüggemann-Klein and Derick Wood, // "One-unambiguous regular languages", Information and Computation 142(2). -#include <stdint.h> +#include <stdint.h> #include <string.h> -#include <algorithm> +#include <algorithm> #include <map> -#include <string> -#include <vector> - -#include "util/util.h" -#include "util/logging.h" -#include "util/strutil.h" -#include "util/utf.h" +#include <string> +#include <vector> + +#include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/pod_array.h" #include "re2/prog.h" #include "re2/sparse_set.h" -#include "re2/stringpiece.h" - -// Silence "zero-sized array in struct/union" warning for OneState::action. -#ifdef _MSC_VER -#pragma warning(disable: 4200) -#endif +#include "re2/stringpiece.h" +// Silence "zero-sized array in struct/union" warning for OneState::action. +#ifdef _MSC_VER +#pragma warning(disable: 4200) +#endif + namespace re2 { -static const bool ExtraDebug = false; +static const bool ExtraDebug = false; // The key insight behind this implementation is that the // non-determinism in an NFA for a one-pass regular expression @@ -144,11 +144,11 @@ static const bool ExtraDebug = false; // maps next input bytes into equivalence classes, to reduce // the memory footprint.) struct OneState { - uint32_t matchcond; // conditions to match right now. - uint32_t action[]; + uint32_t matchcond; // conditions to match right now. + uint32_t action[]; }; -// The uint32_t conditions in the action are a combination of +// The uint32_t conditions in the action are a combination of // condition and capture bits and the next state. The bottom 16 bits // are the condition and capture bits, and the top 16 are the index of // the next state. @@ -165,8 +165,8 @@ struct OneState { // and kEmptyNonWordBoundary, so we can use that as a sentinel // instead of needing an extra bit. -static const int kIndexShift = 16; // number of bits below index -static const int kEmptyShift = 6; // number of empty flags in prog.h +static const int kIndexShift = 16; // number of bits below index +static const int kEmptyShift = 6; // number of empty flags in prog.h static const int kRealCapShift = kEmptyShift + 1; static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2; @@ -174,23 +174,23 @@ static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2; static const int kCapShift = kRealCapShift - 2; static const int kMaxCap = kRealMaxCap + 2; -static const uint32_t kMatchWins = 1 << kEmptyShift; -static const uint32_t kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift; +static const uint32_t kMatchWins = 1 << kEmptyShift; +static const uint32_t kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift; -static const uint32_t kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary; +static const uint32_t kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary; // Check, at compile time, that prog.h agrees with math above. // This function is never called. void OnePass_Checks() { - static_assert((1<<kEmptyShift)-1 == kEmptyAllFlags, - "kEmptyShift disagrees with kEmptyAllFlags"); + static_assert((1<<kEmptyShift)-1 == kEmptyAllFlags, + "kEmptyShift disagrees with kEmptyAllFlags"); // kMaxCap counts pointers, kMaxOnePassCapture counts pairs. - static_assert(kMaxCap == Prog::kMaxOnePassCapture*2, - "kMaxCap disagrees with kMaxOnePassCapture"); + static_assert(kMaxCap == Prog::kMaxOnePassCapture*2, + "kMaxCap disagrees with kMaxOnePassCapture"); } -static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) { - uint32_t satisfied = Prog::EmptyFlags(context, p); +static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) { + uint32_t satisfied = Prog::EmptyFlags(context, p); if (cond & kEmptyAllFlags & ~satisfied) return false; return true; @@ -198,17 +198,17 @@ static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) { // Apply the capture bits in cond, saving p to the appropriate // locations in cap[]. -static void ApplyCaptures(uint32_t cond, const char* p, +static void ApplyCaptures(uint32_t cond, const char* p, const char** cap, int ncap) { for (int i = 2; i < ncap; i++) if (cond & (1 << kCapShift << i)) cap[i] = p; } -// Computes the OneState* for the given nodeindex. -static inline OneState* IndexToNode(uint8_t* nodes, int statesize, +// Computes the OneState* for the given nodeindex. +static inline OneState* IndexToNode(uint8_t* nodes, int statesize, int nodeindex) { - return reinterpret_cast<OneState*>(nodes + statesize*nodeindex); + return reinterpret_cast<OneState*>(nodes + statesize*nodeindex); } bool Prog::SearchOnePass(const StringPiece& text, @@ -245,26 +245,26 @@ bool Prog::SearchOnePass(const StringPiece& text, kind = kFullMatch; uint8_t* nodes = onepass_nodes_.data(); - int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t); - // start() is always mapped to the zeroth OneState. - OneState* state = IndexToNode(nodes, statesize, 0); - uint8_t* bytemap = bytemap_; + int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t); + // start() is always mapped to the zeroth OneState. + OneState* state = IndexToNode(nodes, statesize, 0); + uint8_t* bytemap = bytemap_; const char* bp = text.data(); const char* ep = text.data() + text.size(); const char* p; bool matched = false; matchcap[0] = bp; cap[0] = bp; - uint32_t nextmatchcond = state->matchcond; + uint32_t nextmatchcond = state->matchcond; for (p = bp; p < ep; p++) { int c = bytemap[*p & 0xFF]; - uint32_t matchcond = nextmatchcond; - uint32_t cond = state->action[c]; + uint32_t matchcond = nextmatchcond; + uint32_t cond = state->action[c]; // Determine whether we can reach act->next. // If so, advance state and nextmatchcond. if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) { - uint32_t nextindex = cond >> kIndexShift; + uint32_t nextindex = cond >> kIndexShift; state = IndexToNode(nodes, statesize, nextindex); nextmatchcond = state->matchcond; } else { @@ -323,7 +323,7 @@ bool Prog::SearchOnePass(const StringPiece& text, // Look for match at end of input. { - uint32_t matchcond = state->matchcond; + uint32_t matchcond = state->matchcond; if (matchcond != kImpossible && ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) { if (nmatch > 1 && (matchcond & kCapMask)) @@ -339,9 +339,9 @@ done: if (!matched) return false; for (int i = 0; i < nmatch; i++) - match[i] = - StringPiece(matchcap[2 * i], - static_cast<size_t>(matchcap[2 * i + 1] - matchcap[2 * i])); + match[i] = + StringPiece(matchcap[2 * i], + static_cast<size_t>(matchcap[2 * i + 1] - matchcap[2 * i])); return true; } @@ -363,7 +363,7 @@ static bool AddQ(Instq *q, int id) { struct InstCond { int id; - uint32_t cond; + uint32_t cond; }; // Returns whether this is a one-pass program; that is, @@ -393,37 +393,37 @@ bool Prog::IsOnePass() { // Willing to use at most 1/4 of the DFA budget (heuristic). // Limit max node count to 65000 as a conservative estimate to // avoid overflowing 16-bit node index in encoding. - int maxnodes = 2 + inst_count(kInstByteRange); - int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t); + int maxnodes = 2 + inst_count(kInstByteRange); + int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32_t); if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes) return false; // Flood the graph starting at the start state, and check // that in each reachable state, each possible byte leads // to a unique next state. - int stacksize = inst_count(kInstCapture) + - inst_count(kInstEmptyWidth) + - inst_count(kInstNop) + 1; // + 1 for start inst + int stacksize = inst_count(kInstCapture) + + inst_count(kInstEmptyWidth) + + inst_count(kInstNop) + 1; // + 1 for start inst PODArray<InstCond> stack(stacksize); - + int size = this->size(); PODArray<int> nodebyid(size); // indexed by ip memset(nodebyid.data(), 0xFF, size*sizeof nodebyid[0]); - // Originally, nodes was a uint8_t[maxnodes*statesize], but that was - // unnecessarily optimistic: why allocate a large amount of memory - // upfront for a large program when it is unlikely to be one-pass? - std::vector<uint8_t> nodes; + // Originally, nodes was a uint8_t[maxnodes*statesize], but that was + // unnecessarily optimistic: why allocate a large amount of memory + // upfront for a large program when it is unlikely to be one-pass? + std::vector<uint8_t> nodes; Instq tovisit(size), workq(size); AddQ(&tovisit, start()); nodebyid[start()] = 0; int nalloc = 1; - nodes.insert(nodes.end(), statesize, 0); + nodes.insert(nodes.end(), statesize, 0); for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { int id = *it; int nodeindex = nodebyid[id]; - OneState* node = IndexToNode(nodes.data(), statesize, nodeindex); + OneState* node = IndexToNode(nodes.data(), statesize, nodeindex); // Flood graph using manual stack, filling in actions as found. // Default is none. @@ -438,107 +438,107 @@ bool Prog::IsOnePass() { stack[nstack++].cond = 0; while (nstack > 0) { int id = stack[--nstack].id; - uint32_t cond = stack[nstack].cond; - - Loop: + uint32_t cond = stack[nstack].cond; + + Loop: Prog::Inst* ip = inst(id); switch (ip->opcode()) { - default: - LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); - break; - + default: + LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); + break; + case kInstAltMatch: // TODO(rsc): Ignoring kInstAltMatch optimization. // Should implement it in this engine, but it's subtle. - DCHECK(!ip->last()); + DCHECK(!ip->last()); // If already on work queue, (1) is violated: bail out. - if (!AddQ(&workq, id+1)) + if (!AddQ(&workq, id+1)) goto fail; - id = id+1; - goto Loop; + id = id+1; + goto Loop; case kInstByteRange: { int nextindex = nodebyid[ip->out()]; if (nextindex == -1) { if (nalloc >= maxnodes) { - if (ExtraDebug) - LOG(ERROR) << StringPrintf( - "Not OnePass: hit node limit %d >= %d", nalloc, maxnodes); + if (ExtraDebug) + LOG(ERROR) << StringPrintf( + "Not OnePass: hit node limit %d >= %d", nalloc, maxnodes); goto fail; } nextindex = nalloc; - AddQ(&tovisit, ip->out()); - nodebyid[ip->out()] = nalloc; + AddQ(&tovisit, ip->out()); + nodebyid[ip->out()] = nalloc; nalloc++; - nodes.insert(nodes.end(), statesize, 0); - // Update node because it might have been invalidated. - node = IndexToNode(nodes.data(), statesize, nodeindex); + nodes.insert(nodes.end(), statesize, 0); + // Update node because it might have been invalidated. + node = IndexToNode(nodes.data(), statesize, nodeindex); } for (int c = ip->lo(); c <= ip->hi(); c++) { int b = bytemap_[c]; - // Skip any bytes immediately after c that are also in b. - while (c < 256-1 && bytemap_[c+1] == b) - c++; - uint32_t act = node->action[b]; - uint32_t newact = (nextindex << kIndexShift) | cond; - if (matched) - newact |= kMatchWins; + // Skip any bytes immediately after c that are also in b. + while (c < 256-1 && bytemap_[c+1] == b) + c++; + uint32_t act = node->action[b]; + uint32_t newact = (nextindex << kIndexShift) | cond; + if (matched) + newact |= kMatchWins; if ((act & kImpossible) == kImpossible) { node->action[b] = newact; } else if (act != newact) { - if (ExtraDebug) - LOG(ERROR) << StringPrintf( - "Not OnePass: conflict on byte %#x at state %d", c, *it); + if (ExtraDebug) + LOG(ERROR) << StringPrintf( + "Not OnePass: conflict on byte %#x at state %d", c, *it); goto fail; } } if (ip->foldcase()) { - Rune lo = std::max<Rune>(ip->lo(), 'a') + 'A' - 'a'; - Rune hi = std::min<Rune>(ip->hi(), 'z') + 'A' - 'a'; + Rune lo = std::max<Rune>(ip->lo(), 'a') + 'A' - 'a'; + Rune hi = std::min<Rune>(ip->hi(), 'z') + 'A' - 'a'; for (int c = lo; c <= hi; c++) { int b = bytemap_[c]; - // Skip any bytes immediately after c that are also in b. - while (c < 256-1 && bytemap_[c+1] == b) - c++; - uint32_t act = node->action[b]; - uint32_t newact = (nextindex << kIndexShift) | cond; - if (matched) - newact |= kMatchWins; + // Skip any bytes immediately after c that are also in b. + while (c < 256-1 && bytemap_[c+1] == b) + c++; + uint32_t act = node->action[b]; + uint32_t newact = (nextindex << kIndexShift) | cond; + if (matched) + newact |= kMatchWins; if ((act & kImpossible) == kImpossible) { node->action[b] = newact; } else if (act != newact) { - if (ExtraDebug) - LOG(ERROR) << StringPrintf( - "Not OnePass: conflict on byte %#x at state %d", c, *it); + if (ExtraDebug) + LOG(ERROR) << StringPrintf( + "Not OnePass: conflict on byte %#x at state %d", c, *it); goto fail; } } } - - if (ip->last()) - break; - // If already on work queue, (1) is violated: bail out. - if (!AddQ(&workq, id+1)) - goto fail; - id = id+1; - goto Loop; + + if (ip->last()) + break; + // If already on work queue, (1) is violated: bail out. + if (!AddQ(&workq, id+1)) + goto fail; + id = id+1; + goto Loop; } case kInstCapture: - case kInstEmptyWidth: - case kInstNop: - if (!ip->last()) { - // If already on work queue, (1) is violated: bail out. - if (!AddQ(&workq, id+1)) - goto fail; - stack[nstack].id = id+1; - stack[nstack++].cond = cond; - } - - if (ip->opcode() == kInstCapture && ip->cap() < kMaxCap) + case kInstEmptyWidth: + case kInstNop: + if (!ip->last()) { + // If already on work queue, (1) is violated: bail out. + if (!AddQ(&workq, id+1)) + goto fail; + stack[nstack].id = id+1; + stack[nstack++].cond = cond; + } + + if (ip->opcode() == kInstCapture && ip->cap() < kMaxCap) cond |= (1 << kCapShift) << ip->cap(); - if (ip->opcode() == kInstEmptyWidth) - cond |= ip->empty(); + if (ip->opcode() == kInstEmptyWidth) + cond |= ip->empty(); // kInstCapture and kInstNop always proceed to ip->out(). // kInstEmptyWidth only sometimes proceeds to ip->out(), @@ -548,44 +548,44 @@ bool Prog::IsOnePass() { // If already on work queue, (1) is violated: bail out. if (!AddQ(&workq, ip->out())) { - if (ExtraDebug) - LOG(ERROR) << StringPrintf( + if (ExtraDebug) + LOG(ERROR) << StringPrintf( "Not OnePass: multiple paths %d -> %d", *it, ip->out()); goto fail; } - id = ip->out(); - goto Loop; + id = ip->out(); + goto Loop; case kInstMatch: if (matched) { // (3) is violated - if (ExtraDebug) - LOG(ERROR) << StringPrintf( + if (ExtraDebug) + LOG(ERROR) << StringPrintf( "Not OnePass: multiple matches from %d", *it); goto fail; } matched = true; node->matchcond = cond; - if (ip->last()) - break; - // If already on work queue, (1) is violated: bail out. - if (!AddQ(&workq, id+1)) - goto fail; - id = id+1; - goto Loop; - + if (ip->last()) + break; + // If already on work queue, (1) is violated: bail out. + if (!AddQ(&workq, id+1)) + goto fail; + id = id+1; + goto Loop; + case kInstFail: break; } } } - if (ExtraDebug) { // For debugging, dump one-pass NFA to LOG(ERROR). - LOG(ERROR) << "bytemap:\n" << DumpByteMap(); - LOG(ERROR) << "prog:\n" << Dump(); - - std::map<int, int> idmap; + if (ExtraDebug) { // For debugging, dump one-pass NFA to LOG(ERROR). + LOG(ERROR) << "bytemap:\n" << DumpByteMap(); + LOG(ERROR) << "prog:\n" << Dump(); + + std::map<int, int> idmap; for (int i = 0; i < size; i++) if (nodebyid[i] != -1) idmap[nodebyid[i]] = i; @@ -595,8 +595,8 @@ bool Prog::IsOnePass() { int id = *it; int nodeindex = nodebyid[id]; if (nodeindex == -1) - continue; - OneState* node = IndexToNode(nodes.data(), statesize, nodeindex); + continue; + OneState* node = IndexToNode(nodes.data(), statesize, nodeindex); dump += StringPrintf("node %d id=%d: matchcond=%#x\n", nodeindex, id, node->matchcond); for (int i = 0; i < bytemap_range_; i++) { @@ -608,7 +608,7 @@ bool Prog::IsOnePass() { idmap[node->action[i] >> kIndexShift]); } } - LOG(ERROR) << "nodes:\n" << dump; + LOG(ERROR) << "nodes:\n" << dump; } dfa_mem_ -= nalloc*statesize; diff --git a/contrib/libs/re2/re2/parse.cc b/contrib/libs/re2/re2/parse.cc index 85f16f060b..632d69ae27 100644 --- a/contrib/libs/re2/re2/parse.cc +++ b/contrib/libs/re2/re2/parse.cc @@ -16,37 +16,37 @@ // and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W. // See regexp.h for rationale. -#include <ctype.h> -#include <stddef.h> -#include <stdint.h> -#include <string.h> -#include <algorithm> -#include <map> -#include <string> +#include <ctype.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <algorithm> +#include <map> +#include <string> #include <vector> - -#include "util/util.h" -#include "util/logging.h" -#include "util/strutil.h" -#include "util/utf.h" + +#include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/pod_array.h" #include "re2/regexp.h" -#include "re2/stringpiece.h" +#include "re2/stringpiece.h" #include "re2/unicode_casefold.h" #include "re2/unicode_groups.h" -#include "re2/walker-inl.h" - -#if defined(RE2_USE_ICU) -#include "unicode/uniset.h" -#include "unicode/unistr.h" -#include "unicode/utypes.h" -#endif - +#include "re2/walker-inl.h" + +#if defined(RE2_USE_ICU) +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "unicode/utypes.h" +#endif + namespace re2 { // Controls the maximum repeat count permitted by the parser. static int maximum_repeat_count = 1000; - + void Regexp::FUZZING_ONLY_set_maximum_repeat_count(int i) { maximum_repeat_count = i; } @@ -183,8 +183,8 @@ private: int ncap_; // number of capturing parens seen int rune_max_; // maximum char value for this encoding - ParseState(const ParseState&) = delete; - ParseState& operator=(const ParseState&) = delete; + ParseState(const ParseState&) = delete; + ParseState& operator=(const ParseState&) = delete; }; // Pseudo-operators - only on parse stack. @@ -242,8 +242,8 @@ bool Regexp::ParseState::PushRegexp(Regexp* re) { // single characters (e.g., [.] instead of \.), and some // analysis does better with fewer character classes. // Similarly, [Aa] can be rewritten as a literal A with ASCII case folding. - if (re->op_ == kRegexpCharClass && re->ccb_ != NULL) { - re->ccb_->RemoveAbove(rune_max_); + if (re->op_ == kRegexpCharClass && re->ccb_ != NULL) { + re->ccb_->RemoveAbove(rune_max_); if (re->ccb_->size() == 1) { Rune r = re->ccb_->begin()->lo; re->Decref(); @@ -269,12 +269,12 @@ bool Regexp::ParseState::PushRegexp(Regexp* re) { // Searches the case folding tables and returns the CaseFold* that contains r. // If there isn't one, returns the CaseFold* with smallest f->lo bigger than r. // If there isn't one, returns NULL. -const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) { - const CaseFold* ef = f + n; +const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) { + const CaseFold* ef = f + n; // Binary search for entry containing r. while (n > 0) { - int m = n/2; + int m = n/2; if (f[m].lo <= r && r <= f[m].hi) return &f[m]; if (r < f[m].lo) { @@ -286,10 +286,10 @@ const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) { } // There is no entry that contains r, but f points - // where it would have been. Unless f points at + // where it would have been. Unless f points at // the end of the array, it points at the next entry // after r. - if (f < ef) + if (f < ef) return f; // No entry contains r; no entry contains runes > r. @@ -297,24 +297,24 @@ const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) { } // Returns the result of applying the fold f to the rune r. -Rune ApplyFold(const CaseFold *f, Rune r) { +Rune ApplyFold(const CaseFold *f, Rune r) { switch (f->delta) { default: return r + f->delta; - case EvenOddSkip: // even <-> odd but only applies to every other - if ((r - f->lo) % 2) - return r; - FALLTHROUGH_INTENDED; + case EvenOddSkip: // even <-> odd but only applies to every other + if ((r - f->lo) % 2) + return r; + FALLTHROUGH_INTENDED; case EvenOdd: // even <-> odd if (r%2 == 0) return r + 1; return r - 1; - case OddEvenSkip: // odd <-> even but only applies to every other - if ((r - f->lo) % 2) - return r; - FALLTHROUGH_INTENDED; + case OddEvenSkip: // odd <-> even but only applies to every other + if ((r - f->lo) % 2) + return r; + FALLTHROUGH_INTENDED; case OddEven: // odd <-> even if (r%2 == 1) return r + 1; @@ -333,7 +333,7 @@ Rune ApplyFold(const CaseFold *f, Rune r) { // // CycleFoldRune('?') = '?' Rune CycleFoldRune(Rune r) { - const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, r); + const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, r); if (f == NULL || r < f->lo) return r; return ApplyFold(f, r); @@ -356,7 +356,7 @@ static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) { return; while (lo <= hi) { - const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, lo); + const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, lo); if (f == NULL) // lo has no fold, nor does anything above lo break; if (lo < f->lo) { // lo has no fold; next rune with a fold is f->lo @@ -367,7 +367,7 @@ static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) { // Add in the result of folding the range lo - f->hi // and that range's fold, recursively. Rune lo1 = lo; - Rune hi1 = std::min<Rune>(hi, f->hi); + Rune hi1 = std::min<Rune>(hi, f->hi); switch (f->delta) { default: lo1 += f->delta; @@ -482,23 +482,23 @@ bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s, Regexp::ParseFlags fl = flags_; if (nongreedy) fl = fl ^ NonGreedy; - - // Squash **, ++ and ??. Regexp::Star() et al. handle this too, but - // they're mostly for use during simplification, not during parsing. - if (op == stacktop_->op() && fl == stacktop_->parse_flags()) - return true; - - // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because - // op is a repeat, we just have to check that stacktop_->op() is too, - // then adjust stacktop_. - if ((stacktop_->op() == kRegexpStar || - stacktop_->op() == kRegexpPlus || - stacktop_->op() == kRegexpQuest) && - fl == stacktop_->parse_flags()) { - stacktop_->op_ = kRegexpStar; - return true; - } - + + // Squash **, ++ and ??. Regexp::Star() et al. handle this too, but + // they're mostly for use during simplification, not during parsing. + if (op == stacktop_->op() && fl == stacktop_->parse_flags()) + return true; + + // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because + // op is a repeat, we just have to check that stacktop_->op() is too, + // then adjust stacktop_. + if ((stacktop_->op() == kRegexpStar || + stacktop_->op() == kRegexpPlus || + stacktop_->op() == kRegexpQuest) && + fl == stacktop_->parse_flags()) { + stacktop_->op_ = kRegexpStar; + return true; + } + Regexp* re = new Regexp(op, fl); re->AllocSub(1); re->down_ = stacktop_->down_; @@ -508,61 +508,61 @@ bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s, return true; } -// RepetitionWalker reports whether the repetition regexp is valid. -// Valid means that the combination of the top-level repetition -// and any inner repetitions does not exceed n copies of the -// innermost thing. -// This rewalks the regexp tree and is called for every repetition, -// so we have to worry about inducing quadratic behavior in the parser. -// We avoid this by only using RepetitionWalker when min or max >= 2. -// In that case the depth of any >= 2 nesting can only get to 9 without -// triggering a parse error, so each subtree can only be rewalked 9 times. -class RepetitionWalker : public Regexp::Walker<int> { - public: - RepetitionWalker() {} - virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); - virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, - int* child_args, int nchild_args); - virtual int ShortVisit(Regexp* re, int parent_arg); - - private: - RepetitionWalker(const RepetitionWalker&) = delete; - RepetitionWalker& operator=(const RepetitionWalker&) = delete; -}; - -int RepetitionWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { - int arg = parent_arg; - if (re->op() == kRegexpRepeat) { - int m = re->max(); - if (m < 0) { - m = re->min(); - } - if (m > 0) { - arg /= m; - } - } - return arg; -} - -int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, - int* child_args, int nchild_args) { - int arg = pre_arg; - for (int i = 0; i < nchild_args; i++) { - if (child_args[i] < arg) { - arg = child_args[i]; - } - } - return arg; -} - -int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) { +// RepetitionWalker reports whether the repetition regexp is valid. +// Valid means that the combination of the top-level repetition +// and any inner repetitions does not exceed n copies of the +// innermost thing. +// This rewalks the regexp tree and is called for every repetition, +// so we have to worry about inducing quadratic behavior in the parser. +// We avoid this by only using RepetitionWalker when min or max >= 2. +// In that case the depth of any >= 2 nesting can only get to 9 without +// triggering a parse error, so each subtree can only be rewalked 9 times. +class RepetitionWalker : public Regexp::Walker<int> { + public: + RepetitionWalker() {} + virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); + virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, + int* child_args, int nchild_args); + virtual int ShortVisit(Regexp* re, int parent_arg); + + private: + RepetitionWalker(const RepetitionWalker&) = delete; + RepetitionWalker& operator=(const RepetitionWalker&) = delete; +}; + +int RepetitionWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { + int arg = parent_arg; + if (re->op() == kRegexpRepeat) { + int m = re->max(); + if (m < 0) { + m = re->min(); + } + if (m > 0) { + arg /= m; + } + } + return arg; +} + +int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, + int* child_args, int nchild_args) { + int arg = pre_arg; + for (int i = 0; i < nchild_args; i++) { + if (child_args[i] < arg) { + arg = child_args[i]; + } + } + return arg; +} + +int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) { // Should never be called: we use Walk(), not WalkExponential(). #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - LOG(DFATAL) << "RepetitionWalker::ShortVisit called"; + LOG(DFATAL) << "RepetitionWalker::ShortVisit called"; #endif - return 0; -} - + return 0; +} + // Pushes a repetition regexp onto the stack. // A valid argument for the operator must already be on the stack. bool Regexp::ParseState::PushRepetition(int min, int max, @@ -591,14 +591,14 @@ bool Regexp::ParseState::PushRepetition(int min, int max, re->sub()[0] = FinishRegexp(stacktop_); re->simple_ = re->ComputeSimple(); stacktop_ = re; - if (min >= 2 || max >= 2) { - RepetitionWalker w; + if (min >= 2 || max >= 2) { + RepetitionWalker w; if (w.Walk(stacktop_, maximum_repeat_count) == 0) { - status_->set_code(kRegexpRepeatSize); - status_->set_error_arg(s); - return false; - } - } + status_->set_code(kRegexpRepeatSize); + status_->set_error_arg(s); + return false; + } + } return true; } @@ -637,33 +637,33 @@ bool Regexp::ParseState::DoVerticalBar() { Regexp* r1; Regexp* r2; if ((r1 = stacktop_) != NULL && - (r2 = r1->down_) != NULL && + (r2 = r1->down_) != NULL && r2->op() == kVerticalBar) { Regexp* r3; - if ((r3 = r2->down_) != NULL && - (r1->op() == kRegexpAnyChar || r3->op() == kRegexpAnyChar)) { - // AnyChar is above or below the vertical bar. Let it subsume - // the other when the other is Literal, CharClass or AnyChar. - if (r3->op() == kRegexpAnyChar && - (r1->op() == kRegexpLiteral || - r1->op() == kRegexpCharClass || - r1->op() == kRegexpAnyChar)) { - // Discard r1. - stacktop_ = r2; - r1->Decref(); - return true; - } - if (r1->op() == kRegexpAnyChar && - (r3->op() == kRegexpLiteral || - r3->op() == kRegexpCharClass || - r3->op() == kRegexpAnyChar)) { - // Rearrange the stack and discard r3. - r1->down_ = r3->down_; - r2->down_ = r1; - stacktop_ = r2; - r3->Decref(); - return true; + if ((r3 = r2->down_) != NULL && + (r1->op() == kRegexpAnyChar || r3->op() == kRegexpAnyChar)) { + // AnyChar is above or below the vertical bar. Let it subsume + // the other when the other is Literal, CharClass or AnyChar. + if (r3->op() == kRegexpAnyChar && + (r1->op() == kRegexpLiteral || + r1->op() == kRegexpCharClass || + r1->op() == kRegexpAnyChar)) { + // Discard r1. + stacktop_ = r2; + r1->Decref(); + return true; } + if (r1->op() == kRegexpAnyChar && + (r3->op() == kRegexpLiteral || + r3->op() == kRegexpCharClass || + r3->op() == kRegexpAnyChar)) { + // Rearrange the stack and discard r3. + r1->down_ = r3->down_; + r2->down_ = r1; + stacktop_ = r2; + r3->Decref(); + return true; + } } // Swap r1 below vertical bar (r2). r1->down_ = r2->down_; @@ -1083,14 +1083,14 @@ void FactorAlternationImpl::Round1(Regexp** sub, int nsub, void FactorAlternationImpl::Round2(Regexp** sub, int nsub, Regexp::ParseFlags flags, std::vector<Splice>* splices) { - // Round 2: Factor out common simple prefixes, - // just the first piece of each concatenation. - // This will be good enough a lot of the time. - // - // Complex subexpressions (e.g. involving quantifiers) - // are not safe to factor because that collapses their - // distinct paths through the automaton, which affects - // correctness in some cases. + // Round 2: Factor out common simple prefixes, + // just the first piece of each concatenation. + // This will be good enough a lot of the time. + // + // Complex subexpressions (e.g. involving quantifiers) + // are not safe to factor because that collapses their + // distinct paths through the automaton, which affects + // correctness in some cases. int start = 0; Regexp* first = NULL; for (int i = 0; i <= nsub; i++) { @@ -1100,23 +1100,23 @@ void FactorAlternationImpl::Round2(Regexp** sub, int nsub, if (i < nsub) { first_i = Regexp::LeadingRegexp(sub[i]); if (first != NULL && - // first must be an empty-width op - // OR a char class, any char or any byte - // OR a fixed repeat of a literal, char class, any char or any byte. - (first->op() == kRegexpBeginLine || - first->op() == kRegexpEndLine || - first->op() == kRegexpWordBoundary || - first->op() == kRegexpNoWordBoundary || - first->op() == kRegexpBeginText || - first->op() == kRegexpEndText || - first->op() == kRegexpCharClass || - first->op() == kRegexpAnyChar || - first->op() == kRegexpAnyByte || - (first->op() == kRegexpRepeat && - first->min() == first->max() && - (first->sub()[0]->op() == kRegexpLiteral || - first->sub()[0]->op() == kRegexpCharClass || - first->sub()[0]->op() == kRegexpAnyChar || + // first must be an empty-width op + // OR a char class, any char or any byte + // OR a fixed repeat of a literal, char class, any char or any byte. + (first->op() == kRegexpBeginLine || + first->op() == kRegexpEndLine || + first->op() == kRegexpWordBoundary || + first->op() == kRegexpNoWordBoundary || + first->op() == kRegexpBeginText || + first->op() == kRegexpEndText || + first->op() == kRegexpCharClass || + first->op() == kRegexpAnyChar || + first->op() == kRegexpAnyByte || + (first->op() == kRegexpRepeat && + first->min() == first->max() && + (first->sub()[0]->op() == kRegexpLiteral || + first->sub()[0]->op() == kRegexpCharClass || + first->sub()[0]->op() == kRegexpAnyChar || first->sub()[0]->op() == kRegexpAnyByte))) && Regexp::Equal(first, first_i)) continue; @@ -1312,7 +1312,7 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) { if (r >= 0) { re1->op_ = kRegexpLiteral; re1->rune_ = r; - re1->parse_flags_ = static_cast<uint16_t>(flags); + re1->parse_flags_ = static_cast<uint16_t>(flags); return true; } @@ -1391,18 +1391,18 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) { // Argument order is backwards from usual Google style // but consistent with chartorune. static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) { - // fullrune() takes int, not size_t. However, it just looks - // at the leading byte and treats any length >= 4 the same. + // fullrune() takes int, not size_t. However, it just looks + // at the leading byte and treats any length >= 4 the same. if (fullrune(sp->data(), static_cast<int>(std::min(size_t{4}, sp->size())))) { - int n = chartorune(r, sp->data()); - // Some copies of chartorune have a bug that accepts - // encodings of values in (10FFFF, 1FFFFF] as valid. - // Those values break the character class algorithm, - // which assumes Runemax is the largest rune. - if (*r > Runemax) { - n = 1; - *r = Runeerror; - } + int n = chartorune(r, sp->data()); + // Some copies of chartorune have a bug that accepts + // encodings of values in (10FFFF, 1FFFFF] as valid. + // Those values break the character class algorithm, + // which assumes Runemax is the largest rune. + if (*r > Runemax) { + n = 1; + *r = Runeerror; + } if (!(n == 1 && *r == Runeerror)) { // no decoding error sp->remove_prefix(n); return n; @@ -1456,12 +1456,12 @@ static bool ParseEscape(StringPiece* s, Rune* rp, if (s->empty() || (*s)[0] != '\\') { // Should not happen - caller always checks. status->set_code(kRegexpInternalError); - status->set_error_arg(StringPiece()); + status->set_error_arg(StringPiece()); return false; } if (s->size() == 1) { status->set_code(kRegexpTrailingBackslash); - status->set_error_arg(StringPiece()); + status->set_error_arg(StringPiece()); return false; } Rune c, c1; @@ -1492,7 +1492,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp, // Single non-zero octal digit is a backreference; not supported. if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7') goto BadEscape; - FALLTHROUGH_INTENDED; + FALLTHROUGH_INTENDED; case '0': // consume up to three octal digits; already have one. code = c - '0'; @@ -1507,8 +1507,8 @@ static bool ParseEscape(StringPiece* s, Rune* rp, } } } - if (code > rune_max) - goto BadEscape; + if (code > rune_max) + goto BadEscape; *rp = code; return true; @@ -1582,7 +1582,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp, // in Perl, \b means word-boundary but [\b] // means backspace. We don't support that: // if you want a backspace embed a literal - // backspace character or use \x08. + // backspace character or use \x08. // // case 'b': // *rp = '\b'; @@ -1594,7 +1594,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp, BadEscape: // Unrecognized escape sequence. status->set_code(kRegexpBadEscape); - status->set_error_arg( + status->set_error_arg( StringPiece(begin, static_cast<size_t>(s->data() - begin))); return false; } @@ -1623,8 +1623,8 @@ void CharClassBuilder::AddRangeFlags( } // Look for a group with the given name. -static const UGroup* LookupGroup(const StringPiece& name, - const UGroup *groups, int ngroups) { +static const UGroup* LookupGroup(const StringPiece& name, + const UGroup *groups, int ngroups) { // Simple name lookup. for (int i = 0; i < ngroups; i++) if (StringPiece(groups[i].name) == name) @@ -1641,12 +1641,12 @@ static const UGroup* LookupPerlGroup(const StringPiece& name) { return LookupGroup(name, perl_groups, num_perl_groups); } -#if !defined(RE2_USE_ICU) -// Fake UGroup containing all Runes -static URange16 any16[] = { { 0, 65535 } }; -static URange32 any32[] = { { 65536, Runemax } }; -static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 }; - +#if !defined(RE2_USE_ICU) +// Fake UGroup containing all Runes +static URange16 any16[] = { { 0, 65535 } }; +static URange32 any32[] = { { 65536, Runemax } }; +static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 }; + // Look for a Unicode group with the given name (e.g., "Han") static const UGroup* LookupUnicodeGroup(const StringPiece& name) { // Special case: "Any" means any. @@ -1654,11 +1654,11 @@ static const UGroup* LookupUnicodeGroup(const StringPiece& name) { return &anygroup; return LookupGroup(name, unicode_groups, num_unicode_groups); } -#endif +#endif // Add a UGroup or its negation to the character class. -static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign, - Regexp::ParseFlags parse_flags) { +static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign, + Regexp::ParseFlags parse_flags) { if (sign == +1) { for (int i = 0; i < g->nr16; i++) { cc->AddRangeFlags(g->r16[i].lo, g->r16[i].hi, parse_flags); @@ -1675,13 +1675,13 @@ static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign, // to what's already missing. Too hard, so do in two steps. CharClassBuilder ccb1; AddUGroup(&ccb1, g, +1, parse_flags); - // If the flags say to take out \n, put it in, so that negating will take it out. - // Normally AddRangeFlags does this, but we're bypassing AddRangeFlags. - bool cutnl = !(parse_flags & Regexp::ClassNL) || - (parse_flags & Regexp::NeverNL); - if (cutnl) { - ccb1.AddRange('\n', '\n'); - } + // If the flags say to take out \n, put it in, so that negating will take it out. + // Normally AddRangeFlags does this, but we're bypassing AddRangeFlags. + bool cutnl = !(parse_flags & Regexp::ClassNL) || + (parse_flags & Regexp::NeverNL); + if (cutnl) { + ccb1.AddRange('\n', '\n'); + } ccb1.Negate(); cc->AddCharClass(&ccb1); return; @@ -1746,7 +1746,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, // Committed to parse. Results: int sign = +1; // -1 = negated char class if (c == 'P') - sign = -sign; + sign = -sign; StringPiece seq = *s; // \p{Han} or \pL StringPiece name; // Han or L s->remove_prefix(2); // '\\', 'p' @@ -1759,8 +1759,8 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, name = StringPiece(p, static_cast<size_t>(s->data() - p)); } else { // Name is in braces. Look for closing } - size_t end = s->find('}', 0); - if (end == StringPiece::npos) { + size_t end = s->find('}', 0); + if (end == StringPiece::npos) { if (!IsValidUTF8(seq, status)) return kParseError; status->set_code(kRegexpBadCharRange); @@ -1780,9 +1780,9 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, sign = -sign; name.remove_prefix(1); // '^' } - -#if !defined(RE2_USE_ICU) - // Look up the group in the RE2 Unicode data. + +#if !defined(RE2_USE_ICU) + // Look up the group in the RE2 Unicode data. const UGroup *g = LookupUnicodeGroup(name); if (g == NULL) { status->set_code(kRegexpBadCharRange); @@ -1791,30 +1791,30 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, } AddUGroup(cc, g, sign, parse_flags); -#else - // Look up the group in the ICU Unicode data. Because ICU provides full - // Unicode properties support, this could be more than a lookup by name. - ::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8( +#else + // Look up the group in the ICU Unicode data. Because ICU provides full + // Unicode properties support, this could be more than a lookup by name. + ::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8( std::string("\\p{") + std::string(name) + std::string("}")); - UErrorCode uerr = U_ZERO_ERROR; - ::icu::UnicodeSet uset(ustr, uerr); - if (U_FAILURE(uerr)) { - status->set_code(kRegexpBadCharRange); - status->set_error_arg(seq); - return kParseError; - } - - // Convert the UnicodeSet to a URange32 and UGroup that we can add. - int nr = uset.getRangeCount(); + UErrorCode uerr = U_ZERO_ERROR; + ::icu::UnicodeSet uset(ustr, uerr); + if (U_FAILURE(uerr)) { + status->set_code(kRegexpBadCharRange); + status->set_error_arg(seq); + return kParseError; + } + + // Convert the UnicodeSet to a URange32 and UGroup that we can add. + int nr = uset.getRangeCount(); PODArray<URange32> r(nr); - for (int i = 0; i < nr; i++) { - r[i].lo = uset.getRangeStart(i); - r[i].hi = uset.getRangeEnd(i); - } + for (int i = 0; i < nr; i++) { + r[i].lo = uset.getRangeStart(i); + r[i].hi = uset.getRangeEnd(i); + } UGroup g = {"", +1, 0, 0, r.data(), nr}; - AddUGroup(cc, &g, sign, parse_flags); -#endif - + AddUGroup(cc, &g, sign, parse_flags); +#endif + return kParseOk; } @@ -1841,7 +1841,7 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags, // Got it. Check that it's valid. q += 2; - StringPiece name(p, static_cast<size_t>(q - p)); + StringPiece name(p, static_cast<size_t>(q - p)); const UGroup *g = LookupPosixGroup(name); if (g == NULL) { @@ -1895,8 +1895,8 @@ bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr, return false; if (rr->hi < rr->lo) { status->set_code(kRegexpBadCharRange); - status->set_error_arg( - StringPiece(os.data(), static_cast<size_t>(s->data() - os.data()))); + status->set_error_arg( + StringPiece(os.data(), static_cast<size_t>(s->data() - os.data()))); return false; } } else { @@ -1915,7 +1915,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s, if (s->empty() || (*s)[0] != '[') { // Caller checked this. status->set_code(kRegexpInternalError); - status->set_error_arg(StringPiece()); + status->set_error_arg(StringPiece()); return false; } bool negated = false; @@ -2083,8 +2083,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { // so that's the one we implement. One is enough. if (t.size() > 2 && t[0] == 'P' && t[1] == '<') { // Pull out name. - size_t end = t.find('>', 2); - if (end == StringPiece::npos) { + size_t end = t.find('>', 2); + if (end == StringPiece::npos) { if (!IsValidUTF8(*s, status_)) return false; status_->set_code(kRegexpBadNamedCapture); @@ -2192,7 +2192,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { BadPerlOp: status_->set_code(kRegexpBadPerlOp); - status_->set_error_arg( + status_->set_error_arg( StringPiece(s->data(), static_cast<size_t>(t.data() - s->data()))); return false; } @@ -2205,7 +2205,7 @@ void ConvertLatin1ToUTF8(const StringPiece& latin1, std::string* utf) { char buf[UTFmax]; utf->clear(); - for (size_t i = 0; i < latin1.size(); i++) { + for (size_t i = 0; i < latin1.size(); i++) { Rune r = latin1[i] & 0xFF; int n = runetochar(buf, &r); utf->append(buf, n); @@ -2246,9 +2246,9 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, return ps.DoFinish(); } - StringPiece lastunary = StringPiece(); + StringPiece lastunary = StringPiece(); while (!t.empty()) { - StringPiece isunary = StringPiece(); + StringPiece isunary = StringPiece(); switch (t[0]) { default: { Rune r; @@ -2267,13 +2267,13 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, return NULL; break; } - if (ps.flags() & NeverCapture) { - if (!ps.DoLeftParenNoCapture()) - return NULL; - } else { - if (!ps.DoLeftParen(StringPiece())) - return NULL; - } + if (ps.flags() & NeverCapture) { + if (!ps.DoLeftParenNoCapture()) + return NULL; + } else { + if (!ps.DoLeftParen(StringPiece())) + return NULL; + } t.remove_prefix(1); // '(' break; @@ -2340,14 +2340,14 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, // a** is a syntax error, not a double-star. // (and a++ means something else entirely, which we don't support!) status->set_code(kRegexpRepeatOp); - status->set_error_arg(StringPiece( + status->set_error_arg(StringPiece( lastunary.data(), static_cast<size_t>(t.data() - lastunary.data()))); return NULL; } } - opstr = StringPiece(opstr.data(), - static_cast<size_t>(t.data() - opstr.data())); + opstr = StringPiece(opstr.data(), + static_cast<size_t>(t.data() - opstr.data())); if (!ps.PushRepeatOp(op, opstr, nongreedy)) return NULL; isunary = opstr; @@ -2373,14 +2373,14 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, if (!lastunary.empty()) { // Not allowed to stack repetition operators. status->set_code(kRegexpRepeatOp); - status->set_error_arg(StringPiece( + status->set_error_arg(StringPiece( lastunary.data(), static_cast<size_t>(t.data() - lastunary.data()))); return NULL; } } - opstr = StringPiece(opstr.data(), - static_cast<size_t>(t.data() - opstr.data())); + opstr = StringPiece(opstr.data(), + static_cast<size_t>(t.data() - opstr.data())); if (!ps.PushRepetition(lo, hi, opstr, nongreedy)) return NULL; isunary = opstr; diff --git a/contrib/libs/re2/re2/prefilter.cc b/contrib/libs/re2/re2/prefilter.cc index a47b3120fb..0a13823200 100644 --- a/contrib/libs/re2/re2/prefilter.cc +++ b/contrib/libs/re2/re2/prefilter.cc @@ -3,23 +3,23 @@ // license that can be found in the LICENSE file. #include "re2/prefilter.h" - -#include <stddef.h> -#include <stdint.h> -#include <string> -#include <vector> - -#include "util/util.h" -#include "util/logging.h" -#include "util/strutil.h" -#include "util/utf.h" -#include "re2/re2.h" -#include "re2/unicode_casefold.h" + +#include <stddef.h> +#include <stdint.h> +#include <string> +#include <vector> + +#include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" +#include "re2/re2.h" +#include "re2/unicode_casefold.h" #include "re2/walker-inl.h" namespace re2 { -static const bool ExtraDebug = false; +static const bool ExtraDebug = false; typedef std::set<std::string>::iterator SSIter; typedef std::set<std::string>::const_iterator ConstSSIter; @@ -29,13 +29,13 @@ Prefilter::Prefilter(Op op) { op_ = op; subs_ = NULL; if (op_ == AND || op_ == OR) - subs_ = new std::vector<Prefilter*>; + subs_ = new std::vector<Prefilter*>; } // Destroys a Prefilter. Prefilter::~Prefilter() { if (subs_) { - for (size_t i = 0; i < subs_->size(); i++) + for (size_t i = 0; i < subs_->size(); i++) delete (*subs_)[i]; delete subs_; subs_ = NULL; @@ -49,7 +49,7 @@ Prefilter* Prefilter::Simplify() { } // Nothing left in the AND/OR. - if (subs_->empty()) { + if (subs_->empty()) { if (op_ == AND) op_ = ALL; // AND of nothing is true else @@ -104,7 +104,7 @@ Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) { // If a and b match op, merge their contents. if (a->op() == op && b->op() == op) { - for (size_t i = 0; i < b->subs()->size(); i++) { + for (size_t i = 0; i < b->subs()->size(); i++) { Prefilter* bb = (*b->subs())[i]; a->subs()->push_back(bb); } @@ -172,28 +172,28 @@ Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) { return or_prefilter; } -static Rune ToLowerRune(Rune r) { - if (r < Runeself) { - if ('A' <= r && r <= 'Z') - r += 'a' - 'A'; - return r; - } - - const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r); - if (f == NULL || r < f->lo) - return r; - return ApplyFold(f, r); -} - -static Rune ToLowerRuneLatin1(Rune r) { - if ('A' <= r && r <= 'Z') - r += 'a' - 'A'; - return r; -} - +static Rune ToLowerRune(Rune r) { + if (r < Runeself) { + if ('A' <= r && r <= 'Z') + r += 'a' - 'A'; + return r; + } + + const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r); + if (f == NULL || r < f->lo) + return r; + return ApplyFold(f, r); +} + +static Rune ToLowerRuneLatin1(Rune r) { + if ('A' <= r && r <= 'Z') + r += 'a' - 'A'; + return r; +} + Prefilter* Prefilter::FromString(const std::string& str) { Prefilter* m = new Prefilter(Prefilter::ATOM); - m->atom_ = str; + m->atom_ = str; return m; } @@ -215,9 +215,9 @@ class Prefilter::Info { static Info* EmptyString(); static Info* NoMatch(); static Info* AnyCharOrAnyByte(); - static Info* CClass(CharClass* cc, bool latin1); + static Info* CClass(CharClass* cc, bool latin1); static Info* Literal(Rune r); - static Info* LiteralLatin1(Rune r); + static Info* LiteralLatin1(Rune r); static Info* AnyMatch(); // Format Info as a string. @@ -279,7 +279,7 @@ std::string Prefilter::Info::ToString() { } return s; } - + if (match_) return match_->DebugString(); @@ -395,26 +395,26 @@ static std::string RuneToString(Rune r) { } static std::string RuneToStringLatin1(Rune r) { - char c = r & 0xff; + char c = r & 0xff; return std::string(&c, 1); -} - +} + // Constructs Info for literal rune. Prefilter::Info* Prefilter::Info::Literal(Rune r) { Info* info = new Info(); - info->exact_.insert(RuneToString(ToLowerRune(r))); - info->is_exact_ = true; - return info; -} - -// Constructs Info for literal rune for Latin1 encoded string. -Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) { - Info* info = new Info(); - info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); + info->exact_.insert(RuneToString(ToLowerRune(r))); info->is_exact_ = true; return info; } +// Constructs Info for literal rune for Latin1 encoded string. +Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) { + Info* info = new Info(); + info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); + info->is_exact_ = true; + return info; +} + // Constructs Info for dot (any character) or \C (any byte). Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() { Prefilter::Info* info = new Prefilter::Info(); @@ -449,12 +449,12 @@ Prefilter::Info* Prefilter::Info::EmptyString() { // Constructs Prefilter::Info for a character class. typedef CharClass::iterator CCIter; -Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, - bool latin1) { - if (ExtraDebug) { - LOG(ERROR) << "CharClassInfo:"; +Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, + bool latin1) { + if (ExtraDebug) { + LOG(ERROR) << "CharClassInfo:"; for (CCIter i = cc->begin(); i != cc->end(); ++i) - LOG(ERROR) << " " << i->lo << "-" << i->hi; + LOG(ERROR) << " " << i->lo << "-" << i->hi; } // If the class is too large, it's okay to overestimate. @@ -463,26 +463,26 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, Prefilter::Info *a = new Prefilter::Info(); for (CCIter i = cc->begin(); i != cc->end(); ++i) - for (Rune r = i->lo; r <= i->hi; r++) { - if (latin1) { - a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); - } else { - a->exact_.insert(RuneToString(ToLowerRune(r))); - } - } - - + for (Rune r = i->lo; r <= i->hi; r++) { + if (latin1) { + a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); + } else { + a->exact_.insert(RuneToString(ToLowerRune(r))); + } + } + + a->is_exact_ = true; - if (ExtraDebug) - LOG(ERROR) << " = " << a->ToString(); + if (ExtraDebug) + LOG(ERROR) << " = " << a->ToString(); return a; } class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> { public: - Walker(bool latin1) : latin1_(latin1) {} + Walker(bool latin1) : latin1_(latin1) {} virtual Info* PostVisit( Regexp* re, Info* parent_arg, @@ -493,20 +493,20 @@ class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> { Regexp* re, Info* parent_arg); - bool latin1() { return latin1_; } + bool latin1() { return latin1_; } private: - bool latin1_; - - Walker(const Walker&) = delete; - Walker& operator=(const Walker&) = delete; + bool latin1_; + + Walker(const Walker&) = delete; + Walker& operator=(const Walker&) = delete; }; Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { - if (ExtraDebug) - LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString(); - - bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0; - Prefilter::Info::Walker w(latin1); + if (ExtraDebug) + LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString(); + + bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0; + Prefilter::Info::Walker w(latin1); Prefilter::Info* info = w.WalkExponential(re, NULL, 100000); if (w.stopped_early()) { @@ -552,12 +552,12 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( break; case kRegexpLiteral: - if (latin1()) { - info = LiteralLatin1(re->rune()); - } - else { - info = Literal(re->rune()); - } + if (latin1()) { + info = LiteralLatin1(re->rune()); + } + else { + info = Literal(re->rune()); + } break; case kRegexpLiteralString: @@ -565,17 +565,17 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( info = NoMatch(); break; } - if (latin1()) { - info = LiteralLatin1(re->runes()[0]); - for (int i = 1; i < re->nrunes(); i++) { - info = Concat(info, LiteralLatin1(re->runes()[i])); - } - } else { - info = Literal(re->runes()[0]); - for (int i = 1; i < re->nrunes(); i++) { - info = Concat(info, Literal(re->runes()[i])); - } - } + if (latin1()) { + info = LiteralLatin1(re->runes()[0]); + for (int i = 1; i < re->nrunes(); i++) { + info = Concat(info, LiteralLatin1(re->runes()[i])); + } + } else { + info = Literal(re->runes()[0]); + for (int i = 1; i < re->nrunes(); i++) { + info = Concat(info, Literal(re->runes()[i])); + } + } break; case kRegexpConcat: { @@ -626,7 +626,7 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( break; case kRegexpCharClass: - info = CClass(re->cc(), latin1()); + info = CClass(re->cc(), latin1()); break; case kRegexpCapture: @@ -635,9 +635,9 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( break; } - if (ExtraDebug) - LOG(ERROR) << "BuildInfo " << re->ToString() - << ": " << (info ? info->ToString() : ""); + if (ExtraDebug) + LOG(ERROR) << "BuildInfo " << re->ToString() + << ": " << (info ? info->ToString() : ""); return info; } @@ -674,21 +674,21 @@ std::string Prefilter::DebugString() const { return ""; case AND: { std::string s = ""; - for (size_t i = 0; i < subs_->size(); i++) { + for (size_t i = 0; i < subs_->size(); i++) { if (i > 0) s += " "; - Prefilter* sub = (*subs_)[i]; - s += sub ? sub->DebugString() : "<nil>"; + Prefilter* sub = (*subs_)[i]; + s += sub ? sub->DebugString() : "<nil>"; } return s; } case OR: { std::string s = "("; - for (size_t i = 0; i < subs_->size(); i++) { + for (size_t i = 0; i < subs_->size(); i++) { if (i > 0) s += "|"; - Prefilter* sub = (*subs_)[i]; - s += sub ? sub->DebugString() : "<nil>"; + Prefilter* sub = (*subs_)[i]; + s += sub ? sub->DebugString() : "<nil>"; } s += ")"; return s; diff --git a/contrib/libs/re2/re2/prefilter.h b/contrib/libs/re2/re2/prefilter.h index 4fedeb4a7c..8390aa8892 100644 --- a/contrib/libs/re2/re2/prefilter.h +++ b/contrib/libs/re2/re2/prefilter.h @@ -2,19 +2,19 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#ifndef RE2_PREFILTER_H_ -#define RE2_PREFILTER_H_ - +#ifndef RE2_PREFILTER_H_ +#define RE2_PREFILTER_H_ + // Prefilter is the class used to extract string guards from regexps. // Rather than using Prefilter class directly, use FilteredRE2. // See filtered_re2.h -#include <set> -#include <string> -#include <vector> +#include <set> +#include <string> +#include <vector> -#include "util/util.h" -#include "util/logging.h" +#include "util/util.h" +#include "util/logging.h" namespace re2 { @@ -42,14 +42,14 @@ class Prefilter { int unique_id() const { return unique_id_; } // The children of the Prefilter node. - std::vector<Prefilter*>* subs() { - DCHECK(op_ == AND || op_ == OR); + std::vector<Prefilter*>* subs() { + DCHECK(op_ == AND || op_ == OR); return subs_; } // Set the children vector. Prefilter takes ownership of subs and // subs_ will be deleted when Prefilter is deleted. - void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; } + void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; } // Given a RE2, return a Prefilter. The caller takes ownership of // the Prefilter and should deallocate it. Returns NULL if Prefilter @@ -87,7 +87,7 @@ class Prefilter { Op op_; // Sub-matches for AND or OR Prefilter. - std::vector<Prefilter*>* subs_; + std::vector<Prefilter*>* subs_; // Actual string to match in leaf node. std::string atom_; @@ -99,8 +99,8 @@ class Prefilter { // and -1 for duplicate nodes. int unique_id_; - Prefilter(const Prefilter&) = delete; - Prefilter& operator=(const Prefilter&) = delete; + Prefilter(const Prefilter&) = delete; + Prefilter& operator=(const Prefilter&) = delete; }; } // namespace re2 diff --git a/contrib/libs/re2/re2/prefilter_tree.cc b/contrib/libs/re2/re2/prefilter_tree.cc index fdf4e083c9..688b2751aa 100644 --- a/contrib/libs/re2/re2/prefilter_tree.cc +++ b/contrib/libs/re2/re2/prefilter_tree.cc @@ -2,61 +2,61 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "re2/prefilter_tree.h" - -#include <stddef.h> -#include <algorithm> -#include <map> -#include <memory> -#include <set> -#include <string> -#include <utility> -#include <vector> - -#include "util/util.h" -#include "util/logging.h" +#include "re2/prefilter_tree.h" + +#include <stddef.h> +#include <algorithm> +#include <map> +#include <memory> +#include <set> +#include <string> +#include <utility> +#include <vector> + +#include "util/util.h" +#include "util/logging.h" #include "util/strutil.h" #include "re2/prefilter.h" -#include "re2/re2.h" +#include "re2/re2.h" namespace re2 { -static const bool ExtraDebug = false; - +static const bool ExtraDebug = false; + PrefilterTree::PrefilterTree() - : compiled_(false), - min_atom_len_(3) { -} - -PrefilterTree::PrefilterTree(int min_atom_len) - : compiled_(false), - min_atom_len_(min_atom_len) { + : compiled_(false), + min_atom_len_(3) { } +PrefilterTree::PrefilterTree(int min_atom_len) + : compiled_(false), + min_atom_len_(min_atom_len) { +} + PrefilterTree::~PrefilterTree() { - for (size_t i = 0; i < prefilter_vec_.size(); i++) + for (size_t i = 0; i < prefilter_vec_.size(); i++) delete prefilter_vec_[i]; - for (size_t i = 0; i < entries_.size(); i++) + for (size_t i = 0; i < entries_.size(); i++) delete entries_[i].parents; } -void PrefilterTree::Add(Prefilter* prefilter) { +void PrefilterTree::Add(Prefilter* prefilter) { if (compiled_) { - LOG(DFATAL) << "Add called after Compile."; + LOG(DFATAL) << "Add called after Compile."; return; } - if (prefilter != NULL && !KeepNode(prefilter)) { - delete prefilter; - prefilter = NULL; + if (prefilter != NULL && !KeepNode(prefilter)) { + delete prefilter; + prefilter = NULL; } - prefilter_vec_.push_back(prefilter); + prefilter_vec_.push_back(prefilter); } void PrefilterTree::Compile(std::vector<std::string>* atom_vec) { if (compiled_) { - LOG(DFATAL) << "Compile called already."; + LOG(DFATAL) << "Compile called already."; return; } @@ -77,31 +77,31 @@ void PrefilterTree::Compile(std::vector<std::string>* atom_vec) { // no longer necessary for their parent to trigger; that is, we do // not miss out on any regexps triggering by getting rid of a // prefilter node. - for (size_t i = 0; i < entries_.size(); i++) { - StdIntMap* parents = entries_[i].parents; + for (size_t i = 0; i < entries_.size(); i++) { + StdIntMap* parents = entries_[i].parents; if (parents->size() > 8) { // This one triggers too many things. If all the parents are AND // nodes and have other things guarding them, then get rid of // this trigger. TODO(vsri): Adjust the threshold appropriately, // make it a function of total number of nodes? bool have_other_guard = true; - for (StdIntMap::iterator it = parents->begin(); - it != parents->end(); ++it) { + for (StdIntMap::iterator it = parents->begin(); + it != parents->end(); ++it) { have_other_guard = have_other_guard && - (entries_[it->first].propagate_up_at_count > 1); - } + (entries_[it->first].propagate_up_at_count > 1); + } if (have_other_guard) { - for (StdIntMap::iterator it = parents->begin(); + for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) - entries_[it->first].propagate_up_at_count -= 1; + entries_[it->first].propagate_up_at_count -= 1; parents->clear(); // Forget the parents } } } - if (ExtraDebug) + if (ExtraDebug) PrintDebugInfo(&nodes); } @@ -119,7 +119,7 @@ std::string PrefilterTree::NodeString(Prefilter* node) const { if (node->op() == Prefilter::ATOM) { s += node->atom(); } else { - for (size_t i = 0; i < node->subs()->size(); i++) { + for (size_t i = 0; i < node->subs()->size(); i++) { if (i > 0) s += ','; s += StringPrintf("%d", (*node->subs())[i]->unique_id()); @@ -128,56 +128,56 @@ std::string PrefilterTree::NodeString(Prefilter* node) const { return s; } -bool PrefilterTree::KeepNode(Prefilter* node) const { - if (node == NULL) - return false; - - switch (node->op()) { - default: - LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op(); - return false; - - case Prefilter::ALL: +bool PrefilterTree::KeepNode(Prefilter* node) const { + if (node == NULL) + return false; + + switch (node->op()) { + default: + LOG(DFATAL) << "Unexpected op in KeepNode: " << node->op(); + return false; + + case Prefilter::ALL: case Prefilter::NONE: - return false; - - case Prefilter::ATOM: - return node->atom().size() >= static_cast<size_t>(min_atom_len_); - - case Prefilter::AND: { - int j = 0; - std::vector<Prefilter*>* subs = node->subs(); - for (size_t i = 0; i < subs->size(); i++) - if (KeepNode((*subs)[i])) - (*subs)[j++] = (*subs)[i]; - else - delete (*subs)[i]; - - subs->resize(j); - return j > 0; - } - - case Prefilter::OR: - for (size_t i = 0; i < node->subs()->size(); i++) - if (!KeepNode((*node->subs())[i])) - return false; - return true; - } -} - + return false; + + case Prefilter::ATOM: + return node->atom().size() >= static_cast<size_t>(min_atom_len_); + + case Prefilter::AND: { + int j = 0; + std::vector<Prefilter*>* subs = node->subs(); + for (size_t i = 0; i < subs->size(); i++) + if (KeepNode((*subs)[i])) + (*subs)[j++] = (*subs)[i]; + else + delete (*subs)[i]; + + subs->resize(j); + return j > 0; + } + + case Prefilter::OR: + for (size_t i = 0; i < node->subs()->size(); i++) + if (!KeepNode((*node->subs())[i])) + return false; + return true; + } +} + void PrefilterTree::AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec) { atom_vec->clear(); // Build vector of all filter nodes, sorted topologically // from top to bottom in v. - std::vector<Prefilter*> v; + std::vector<Prefilter*> v; // Add the top level nodes of each regexp prefilter. - for (size_t i = 0; i < prefilter_vec_.size(); i++) { + for (size_t i = 0; i < prefilter_vec_.size(); i++) { Prefilter* f = prefilter_vec_[i]; if (f == NULL) - unfiltered_.push_back(static_cast<int>(i)); + unfiltered_.push_back(static_cast<int>(i)); // We push NULL also on to v, so that we maintain the // mapping of index==regexpid for level=0 prefilter nodes. @@ -185,20 +185,20 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes, } // Now add all the descendant nodes. - for (size_t i = 0; i < v.size(); i++) { + for (size_t i = 0; i < v.size(); i++) { Prefilter* f = v[i]; if (f == NULL) continue; if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) { - const std::vector<Prefilter*>& subs = *f->subs(); - for (size_t j = 0; j < subs.size(); j++) + const std::vector<Prefilter*>& subs = *f->subs(); + for (size_t j = 0; j < subs.size(); j++) v.push_back(subs[j]); } } // Identify unique nodes. int unique_id = 0; - for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) { + for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) { Prefilter *node = v[i]; if (node == NULL) continue; @@ -219,8 +219,8 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes, } entries_.resize(nodes->size()); - // Create parent StdIntMap for the entries. - for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) { + // Create parent StdIntMap for the entries. + for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) { Prefilter* prefilter = v[i]; if (prefilter == NULL) continue; @@ -229,11 +229,11 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes, continue; Entry* entry = &entries_[prefilter->unique_id()]; - entry->parents = new StdIntMap(); + entry->parents = new StdIntMap(); } // Fill the entries. - for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) { + for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) { Prefilter* prefilter = v[i]; if (prefilter == NULL) continue; @@ -255,8 +255,8 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes, case Prefilter::OR: case Prefilter::AND: { - std::set<int> uniq_child; - for (size_t j = 0; j < prefilter->subs()->size(); j++) { + std::set<int> uniq_child; + for (size_t j = 0; j < prefilter->subs()->size(); j++) { Prefilter* child = (*prefilter->subs())[j]; Prefilter* canonical = CanonicalNode(nodes, child); if (canonical == NULL) { @@ -264,17 +264,17 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes, return; } int child_id = canonical->unique_id(); - uniq_child.insert(child_id); + uniq_child.insert(child_id); // To the child, we want to add to parent indices. Entry* child_entry = &entries_[child_id]; - if (child_entry->parents->find(prefilter->unique_id()) == - child_entry->parents->end()) { - (*child_entry->parents)[prefilter->unique_id()] = 1; - } + if (child_entry->parents->find(prefilter->unique_id()) == + child_entry->parents->end()) { + (*child_entry->parents)[prefilter->unique_id()] = 1; + } } - entry->propagate_up_at_count = prefilter->op() == Prefilter::AND - ? static_cast<int>(uniq_child.size()) - : 1; + entry->propagate_up_at_count = prefilter->op() == Prefilter::AND + ? static_cast<int>(uniq_child.size()) + : 1; break; } @@ -282,20 +282,20 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes, } // For top level nodes, populate regexp id. - for (size_t i = 0; i < prefilter_vec_.size(); i++) { + for (size_t i = 0; i < prefilter_vec_.size(); i++) { if (prefilter_vec_[i] == NULL) continue; int id = CanonicalNode(nodes, prefilter_vec_[i])->unique_id(); DCHECK_LE(0, id); Entry* entry = &entries_[id]; - entry->regexps.push_back(static_cast<int>(i)); + entry->regexps.push_back(static_cast<int>(i)); } } // Functions for triggering during search. void PrefilterTree::RegexpsGivenStrings( - const std::vector<int>& matched_atoms, - std::vector<int>* regexps) const { + const std::vector<int>& matched_atoms, + std::vector<int>* regexps) const { regexps->clear(); if (!compiled_) { // Some legacy users of PrefilterTree call Compile() before @@ -304,9 +304,9 @@ void PrefilterTree::RegexpsGivenStrings( if (prefilter_vec_.empty()) return; - LOG(ERROR) << "RegexpsGivenStrings called before Compile."; + LOG(ERROR) << "RegexpsGivenStrings called before Compile."; for (size_t i = 0; i < prefilter_vec_.size(); i++) - regexps->push_back(static_cast<int>(i)); + regexps->push_back(static_cast<int>(i)); } else { IntMap regexps_map(static_cast<int>(prefilter_vec_.size())); std::vector<int> matched_atom_ids; @@ -320,26 +320,26 @@ void PrefilterTree::RegexpsGivenStrings( regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end()); } - std::sort(regexps->begin(), regexps->end()); + std::sort(regexps->begin(), regexps->end()); } -void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids, +void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids, IntMap* regexps) const { - IntMap count(static_cast<int>(entries_.size())); - IntMap work(static_cast<int>(entries_.size())); - for (size_t i = 0; i < atom_ids.size(); i++) + IntMap count(static_cast<int>(entries_.size())); + IntMap work(static_cast<int>(entries_.size())); + for (size_t i = 0; i < atom_ids.size(); i++) work.set(atom_ids[i], 1); for (IntMap::iterator it = work.begin(); it != work.end(); ++it) { const Entry& entry = entries_[it->index()]; // Record regexps triggered. - for (size_t i = 0; i < entry.regexps.size(); i++) + for (size_t i = 0; i < entry.regexps.size(); i++) regexps->set(entry.regexps[i], 1); int c; // Pass trigger up to parents. - for (StdIntMap::iterator it = entry.parents->begin(); + for (StdIntMap::iterator it = entry.parents->begin(); it != entry.parents->end(); ++it) { - int j = it->first; + int j = it->first; const Entry& parent = entries_[j]; // Delay until all the children have succeeded. if (parent.propagate_up_at_count > 1) { @@ -361,26 +361,26 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids, // Debugging help. void PrefilterTree::PrintPrefilter(int regexpid) { - LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]); + LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]); } void PrefilterTree::PrintDebugInfo(NodeMap* nodes) { - LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size(); - LOG(ERROR) << "#Unique Nodes: " << entries_.size(); + LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size(); + LOG(ERROR) << "#Unique Nodes: " << entries_.size(); for (size_t i = 0; i < entries_.size(); i++) { - StdIntMap* parents = entries_[i].parents; - const std::vector<int>& regexps = entries_[i].regexps; - LOG(ERROR) << "EntryId: " << i - << " N: " << parents->size() << " R: " << regexps.size(); - for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) - LOG(ERROR) << it->first; + StdIntMap* parents = entries_[i].parents; + const std::vector<int>& regexps = entries_[i].regexps; + LOG(ERROR) << "EntryId: " << i + << " N: " << parents->size() << " R: " << regexps.size(); + for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) + LOG(ERROR) << it->first; } - LOG(ERROR) << "Map:"; + LOG(ERROR) << "Map:"; for (NodeMap::const_iterator iter = nodes->begin(); iter != nodes->end(); ++iter) - LOG(ERROR) << "NodeId: " << (*iter).second->unique_id() - << " Str: " << (*iter).first; + LOG(ERROR) << "NodeId: " << (*iter).second->unique_id() + << " Str: " << (*iter).first; } std::string PrefilterTree::DebugNodeString(Prefilter* node) const { @@ -392,7 +392,7 @@ std::string PrefilterTree::DebugNodeString(Prefilter* node) const { // Adding the operation disambiguates AND and OR nodes. node_string += node->op() == Prefilter::AND ? "AND" : "OR"; node_string += "("; - for (size_t i = 0; i < node->subs()->size(); i++) { + for (size_t i = 0; i < node->subs()->size(); i++) { if (i > 0) node_string += ','; node_string += StringPrintf("%d", (*node->subs())[i]->unique_id()); diff --git a/contrib/libs/re2/re2/prefilter_tree.h b/contrib/libs/re2/re2/prefilter_tree.h index 5d73074d97..780f34a540 100644 --- a/contrib/libs/re2/re2/prefilter_tree.h +++ b/contrib/libs/re2/re2/prefilter_tree.h @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#ifndef RE2_PREFILTER_TREE_H_ -#define RE2_PREFILTER_TREE_H_ - +#ifndef RE2_PREFILTER_TREE_H_ +#define RE2_PREFILTER_TREE_H_ + // The PrefilterTree class is used to form an AND-OR tree of strings // that would trigger each regexp. The 'prefilter' of each regexp is // added to PrefilterTree, and then PrefilterTree is used to find all @@ -16,11 +16,11 @@ // atoms) that the user of this class should use to do the string // matching. -#include <map> -#include <string> -#include <vector> - -#include "util/util.h" +#include <map> +#include <string> +#include <vector> + +#include "util/util.h" #include "re2/prefilter.h" #include "re2/sparse_array.h" @@ -29,7 +29,7 @@ namespace re2 { class PrefilterTree { public: PrefilterTree(); - explicit PrefilterTree(int min_atom_len); + explicit PrefilterTree(int min_atom_len); ~PrefilterTree(); // Adds the prefilter for the next regexp. Note that we assume that @@ -50,8 +50,8 @@ class PrefilterTree { // contain all the ids of string atoms that were found to match the // content. The caller can use any string match engine to perform // this function. This function is thread safe. - void RegexpsGivenStrings(const std::vector<int>& matched_atoms, - std::vector<int>* regexps) const; + void RegexpsGivenStrings(const std::vector<int>& matched_atoms, + std::vector<int>* regexps) const; // Print debug prefilter. Also prints unique ids associated with // nodes of the prefilter of the regexp. @@ -77,23 +77,23 @@ class PrefilterTree { // are two different nodes, but they share the atom 'def'. So when // 'def' matches, it triggers two parents, corresponding to the two // different OR nodes. - StdIntMap* parents; + StdIntMap* parents; // When this node is ready to trigger the parent, what are the // regexps that are triggered. - std::vector<int> regexps; + std::vector<int> regexps; }; - // Returns true if the prefilter node should be kept. - bool KeepNode(Prefilter* node) const; - + // Returns true if the prefilter node should be kept. + bool KeepNode(Prefilter* node) const; + // This function assigns unique ids to various parts of the // prefilter, by looking at if these nodes are already in the // PrefilterTree. void AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec); // Given the matching atoms, find the regexps to be triggered. - void PropagateMatch(const std::vector<int>& atom_ids, + void PropagateMatch(const std::vector<int>& atom_ids, IntMap* regexps) const; // Returns the prefilter node that has the same NodeString as this @@ -112,28 +112,28 @@ class PrefilterTree { // These are all the nodes formed by Compile. Essentially, there is // one node for each unique atom and each unique AND/OR node. - std::vector<Entry> entries_; + std::vector<Entry> entries_; // indices of regexps that always pass through the filter (since we // found no required literals in these regexps). - std::vector<int> unfiltered_; + std::vector<int> unfiltered_; // vector of Prefilter for all regexps. - std::vector<Prefilter*> prefilter_vec_; + std::vector<Prefilter*> prefilter_vec_; // Atom index in returned strings to entry id mapping. - std::vector<int> atom_index_to_id_; + std::vector<int> atom_index_to_id_; // Has the prefilter tree been compiled. bool compiled_; - // Strings less than this length are not stored as atoms. - const int min_atom_len_; - - PrefilterTree(const PrefilterTree&) = delete; - PrefilterTree& operator=(const PrefilterTree&) = delete; + // Strings less than this length are not stored as atoms. + const int min_atom_len_; + + PrefilterTree(const PrefilterTree&) = delete; + PrefilterTree& operator=(const PrefilterTree&) = delete; }; -} // namespace +} // namespace #endif // RE2_PREFILTER_TREE_H_ diff --git a/contrib/libs/re2/re2/prog.cc b/contrib/libs/re2/re2/prog.cc index a700d35de3..0092562c26 100644 --- a/contrib/libs/re2/re2/prog.cc +++ b/contrib/libs/re2/re2/prog.cc @@ -13,29 +13,29 @@ #include <intrin.h> #endif #endif -#include <stdint.h> -#include <string.h> -#include <algorithm> -#include <memory> -#include <utility> - -#include "util/util.h" -#include "util/logging.h" -#include "util/strutil.h" -#include "re2/bitmap256.h" -#include "re2/stringpiece.h" - +#include <stdint.h> +#include <string.h> +#include <algorithm> +#include <memory> +#include <utility> + +#include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "re2/bitmap256.h" +#include "re2/stringpiece.h" + namespace re2 { // Constructors per Inst opcode -void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) { +void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) { DCHECK_EQ(out_opcode_, 0); set_out_opcode(out, kInstAlt); out1_ = out1; } -void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) { +void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) { DCHECK_EQ(out_opcode_, 0); set_out_opcode(out, kInstByteRange); lo_ = lo & 0xFF; @@ -43,25 +43,25 @@ void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) { hint_foldcase_ = foldcase&1; } -void Prog::Inst::InitCapture(int cap, uint32_t out) { +void Prog::Inst::InitCapture(int cap, uint32_t out) { DCHECK_EQ(out_opcode_, 0); set_out_opcode(out, kInstCapture); cap_ = cap; } -void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) { +void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) { DCHECK_EQ(out_opcode_, 0); set_out_opcode(out, kInstEmptyWidth); empty_ = empty; } -void Prog::Inst::InitMatch(int32_t id) { +void Prog::Inst::InitMatch(int32_t id) { DCHECK_EQ(out_opcode_, 0); set_opcode(kInstMatch); match_id_ = id; } -void Prog::Inst::InitNop(uint32_t out) { +void Prog::Inst::InitNop(uint32_t out) { DCHECK_EQ(out_opcode_, 0); set_opcode(kInstNop); } @@ -109,7 +109,7 @@ Prog::Prog() : anchor_start_(false), anchor_end_(false), reversed_(false), - did_flatten_(false), + did_flatten_(false), did_onepass_(false), start_(0), start_unanchored_(0), @@ -117,16 +117,16 @@ Prog::Prog() bytemap_range_(0), prefix_foldcase_(false), prefix_size_(0), - list_count_(0), + list_count_(0), bit_state_text_max_size_(0), - dfa_mem_(0), + dfa_mem_(0), dfa_first_(NULL), - dfa_longest_(NULL) { + dfa_longest_(NULL) { } Prog::~Prog() { - DeleteDFA(dfa_longest_); - DeleteDFA(dfa_first_); + DeleteDFA(dfa_longest_); + DeleteDFA(dfa_first_); if (prefix_foldcase_) delete[] prefix_dfa_; } @@ -153,29 +153,29 @@ static std::string ProgToString(Prog* prog, Workq* q) { static std::string FlattenedProgToString(Prog* prog, int start) { std::string s; - for (int id = start; id < prog->size(); id++) { - Prog::Inst* ip = prog->inst(id); - if (ip->last()) + for (int id = start; id < prog->size(); id++) { + Prog::Inst* ip = prog->inst(id); + if (ip->last()) s += StringPrintf("%d. %s\n", id, ip->Dump().c_str()); - else + else s += StringPrintf("%d+ %s\n", id, ip->Dump().c_str()); } - return s; -} + return s; +} std::string Prog::Dump() { - if (did_flatten_) - return FlattenedProgToString(this, start_); - + if (did_flatten_) + return FlattenedProgToString(this, start_); + Workq q(size_); AddToQueue(&q, start_); - return ProgToString(this, &q); + return ProgToString(this, &q); } std::string Prog::DumpUnanchored() { - if (did_flatten_) - return FlattenedProgToString(this, start_unanchored_); - + if (did_flatten_) + return FlattenedProgToString(this, start_unanchored_); + Workq q(size_); AddToQueue(&q, start_unanchored_); return ProgToString(this, &q); @@ -183,17 +183,17 @@ std::string Prog::DumpUnanchored() { std::string Prog::DumpByteMap() { std::string map; - for (int c = 0; c < 256; c++) { - int b = bytemap_[c]; - int lo = c; - while (c < 256-1 && bytemap_[c+1] == b) - c++; - int hi = c; + for (int c = 0; c < 256; c++) { + int b = bytemap_[c]; + int lo = c; + while (c < 256-1 && bytemap_[c+1] == b) + c++; + int hi = c; map += StringPrintf("[%02x-%02x] -> %d\n", lo, hi, b); - } - return map; -} - + } + return map; +} + // Is ip a guaranteed match at end of text, perhaps after some capturing? static bool IsMatch(Prog* prog, Prog::Inst* ip) { for (;;) { @@ -218,8 +218,8 @@ static bool IsMatch(Prog* prog, Prog::Inst* ip) { return true; } } -} - +} + // Peep-hole optimizer. void Prog::Optimize() { Workq q(size_); @@ -284,7 +284,7 @@ void Prog::Optimize() { } } -uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) { +uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) { int flags = 0; // ^ and \A @@ -318,320 +318,320 @@ uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) { return flags; } -// ByteMapBuilder implements a coloring algorithm. -// -// The first phase is a series of "mark and merge" batches: we mark one or more -// [lo-hi] ranges, then merge them into our internal state. Batching is not for -// performance; rather, it means that the ranges are treated indistinguishably. -// -// Internally, the ranges are represented using a bitmap that stores the splits -// and a vector that stores the colors; both of them are indexed by the ranges' -// last bytes. Thus, in order to merge a [lo-hi] range, we split at lo-1 and at -// hi (if not already split), then recolor each range in between. The color map -// (i.e. from the old color to the new color) is maintained for the lifetime of -// the batch and so underpins this somewhat obscure approach to set operations. -// -// The second phase builds the bytemap from our internal state: we recolor each -// range, then store the new color (which is now the byte class) in each of the -// corresponding array elements. Finally, we output the number of byte classes. -class ByteMapBuilder { - public: - ByteMapBuilder() { - // Initial state: the [0-255] range has color 256. - // This will avoid problems during the second phase, - // in which we assign byte classes numbered from 0. - splits_.Set(255); - colors_[255] = 256; - nextcolor_ = 257; - } - - void Mark(int lo, int hi); - void Merge(); - void Build(uint8_t* bytemap, int* bytemap_range); - - private: - int Recolor(int oldcolor); - - Bitmap256 splits_; +// ByteMapBuilder implements a coloring algorithm. +// +// The first phase is a series of "mark and merge" batches: we mark one or more +// [lo-hi] ranges, then merge them into our internal state. Batching is not for +// performance; rather, it means that the ranges are treated indistinguishably. +// +// Internally, the ranges are represented using a bitmap that stores the splits +// and a vector that stores the colors; both of them are indexed by the ranges' +// last bytes. Thus, in order to merge a [lo-hi] range, we split at lo-1 and at +// hi (if not already split), then recolor each range in between. The color map +// (i.e. from the old color to the new color) is maintained for the lifetime of +// the batch and so underpins this somewhat obscure approach to set operations. +// +// The second phase builds the bytemap from our internal state: we recolor each +// range, then store the new color (which is now the byte class) in each of the +// corresponding array elements. Finally, we output the number of byte classes. +class ByteMapBuilder { + public: + ByteMapBuilder() { + // Initial state: the [0-255] range has color 256. + // This will avoid problems during the second phase, + // in which we assign byte classes numbered from 0. + splits_.Set(255); + colors_[255] = 256; + nextcolor_ = 257; + } + + void Mark(int lo, int hi); + void Merge(); + void Build(uint8_t* bytemap, int* bytemap_range); + + private: + int Recolor(int oldcolor); + + Bitmap256 splits_; int colors_[256]; - int nextcolor_; - std::vector<std::pair<int, int>> colormap_; - std::vector<std::pair<int, int>> ranges_; - - ByteMapBuilder(const ByteMapBuilder&) = delete; - ByteMapBuilder& operator=(const ByteMapBuilder&) = delete; -}; - -void ByteMapBuilder::Mark(int lo, int hi) { - DCHECK_GE(lo, 0); - DCHECK_GE(hi, 0); - DCHECK_LE(lo, 255); - DCHECK_LE(hi, 255); - DCHECK_LE(lo, hi); - - // Ignore any [0-255] ranges. They cause us to recolor every range, which - // has no effect on the eventual result and is therefore a waste of time. - if (lo == 0 && hi == 255) - return; - - ranges_.emplace_back(lo, hi); -} - -void ByteMapBuilder::Merge() { - for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin(); - it != ranges_.end(); - ++it) { - int lo = it->first-1; - int hi = it->second; - - if (0 <= lo && !splits_.Test(lo)) { - splits_.Set(lo); - int next = splits_.FindNextSetBit(lo+1); - colors_[lo] = colors_[next]; - } - if (!splits_.Test(hi)) { - splits_.Set(hi); - int next = splits_.FindNextSetBit(hi+1); - colors_[hi] = colors_[next]; - } - - int c = lo+1; - while (c < 256) { - int next = splits_.FindNextSetBit(c); - colors_[next] = Recolor(colors_[next]); - if (next == hi) - break; - c = next+1; - } - } - colormap_.clear(); - ranges_.clear(); -} - -void ByteMapBuilder::Build(uint8_t* bytemap, int* bytemap_range) { - // Assign byte classes numbered from 0. - nextcolor_ = 0; - - int c = 0; - while (c < 256) { - int next = splits_.FindNextSetBit(c); - uint8_t b = static_cast<uint8_t>(Recolor(colors_[next])); - while (c <= next) { - bytemap[c] = b; - c++; - } - } - - *bytemap_range = nextcolor_; -} - -int ByteMapBuilder::Recolor(int oldcolor) { - // Yes, this is a linear search. There can be at most 256 - // colors and there will typically be far fewer than that. - // Also, we need to consider keys *and* values in order to - // avoid recoloring a given range more than once per batch. - std::vector<std::pair<int, int>>::const_iterator it = - std::find_if(colormap_.begin(), colormap_.end(), - [=](const std::pair<int, int>& kv) -> bool { - return kv.first == oldcolor || kv.second == oldcolor; - }); - if (it != colormap_.end()) - return it->second; - int newcolor = nextcolor_; - nextcolor_++; - colormap_.emplace_back(oldcolor, newcolor); - return newcolor; + int nextcolor_; + std::vector<std::pair<int, int>> colormap_; + std::vector<std::pair<int, int>> ranges_; + + ByteMapBuilder(const ByteMapBuilder&) = delete; + ByteMapBuilder& operator=(const ByteMapBuilder&) = delete; +}; + +void ByteMapBuilder::Mark(int lo, int hi) { + DCHECK_GE(lo, 0); + DCHECK_GE(hi, 0); + DCHECK_LE(lo, 255); + DCHECK_LE(hi, 255); + DCHECK_LE(lo, hi); + + // Ignore any [0-255] ranges. They cause us to recolor every range, which + // has no effect on the eventual result and is therefore a waste of time. + if (lo == 0 && hi == 255) + return; + + ranges_.emplace_back(lo, hi); } +void ByteMapBuilder::Merge() { + for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin(); + it != ranges_.end(); + ++it) { + int lo = it->first-1; + int hi = it->second; + + if (0 <= lo && !splits_.Test(lo)) { + splits_.Set(lo); + int next = splits_.FindNextSetBit(lo+1); + colors_[lo] = colors_[next]; + } + if (!splits_.Test(hi)) { + splits_.Set(hi); + int next = splits_.FindNextSetBit(hi+1); + colors_[hi] = colors_[next]; + } + + int c = lo+1; + while (c < 256) { + int next = splits_.FindNextSetBit(c); + colors_[next] = Recolor(colors_[next]); + if (next == hi) + break; + c = next+1; + } + } + colormap_.clear(); + ranges_.clear(); +} + +void ByteMapBuilder::Build(uint8_t* bytemap, int* bytemap_range) { + // Assign byte classes numbered from 0. + nextcolor_ = 0; + + int c = 0; + while (c < 256) { + int next = splits_.FindNextSetBit(c); + uint8_t b = static_cast<uint8_t>(Recolor(colors_[next])); + while (c <= next) { + bytemap[c] = b; + c++; + } + } + + *bytemap_range = nextcolor_; +} + +int ByteMapBuilder::Recolor(int oldcolor) { + // Yes, this is a linear search. There can be at most 256 + // colors and there will typically be far fewer than that. + // Also, we need to consider keys *and* values in order to + // avoid recoloring a given range more than once per batch. + std::vector<std::pair<int, int>>::const_iterator it = + std::find_if(colormap_.begin(), colormap_.end(), + [=](const std::pair<int, int>& kv) -> bool { + return kv.first == oldcolor || kv.second == oldcolor; + }); + if (it != colormap_.end()) + return it->second; + int newcolor = nextcolor_; + nextcolor_++; + colormap_.emplace_back(oldcolor, newcolor); + return newcolor; +} + void Prog::ComputeByteMap() { - // Fill in bytemap with byte classes for the program. - // Ranges of bytes that are treated indistinguishably - // will be mapped to a single byte class. - ByteMapBuilder builder; - - // Don't repeat the work for ^ and $. - bool marked_line_boundaries = false; - // Don't repeat the work for \b and \B. - bool marked_word_boundaries = false; - - for (int id = 0; id < size(); id++) { - Inst* ip = inst(id); - if (ip->opcode() == kInstByteRange) { - int lo = ip->lo(); - int hi = ip->hi(); - builder.Mark(lo, hi); - if (ip->foldcase() && lo <= 'z' && hi >= 'a') { - int foldlo = lo; - int foldhi = hi; - if (foldlo < 'a') - foldlo = 'a'; - if (foldhi > 'z') - foldhi = 'z'; + // Fill in bytemap with byte classes for the program. + // Ranges of bytes that are treated indistinguishably + // will be mapped to a single byte class. + ByteMapBuilder builder; + + // Don't repeat the work for ^ and $. + bool marked_line_boundaries = false; + // Don't repeat the work for \b and \B. + bool marked_word_boundaries = false; + + for (int id = 0; id < size(); id++) { + Inst* ip = inst(id); + if (ip->opcode() == kInstByteRange) { + int lo = ip->lo(); + int hi = ip->hi(); + builder.Mark(lo, hi); + if (ip->foldcase() && lo <= 'z' && hi >= 'a') { + int foldlo = lo; + int foldhi = hi; + if (foldlo < 'a') + foldlo = 'a'; + if (foldhi > 'z') + foldhi = 'z'; if (foldlo <= foldhi) { foldlo += 'A' - 'a'; foldhi += 'A' - 'a'; builder.Mark(foldlo, foldhi); } - } - // If this Inst is not the last Inst in its list AND the next Inst is - // also a ByteRange AND the Insts have the same out, defer the merge. - if (!ip->last() && - inst(id+1)->opcode() == kInstByteRange && - ip->out() == inst(id+1)->out()) - continue; - builder.Merge(); - } else if (ip->opcode() == kInstEmptyWidth) { - if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) && - !marked_line_boundaries) { - builder.Mark('\n', '\n'); - builder.Merge(); - marked_line_boundaries = true; - } - if (ip->empty() & (kEmptyWordBoundary|kEmptyNonWordBoundary) && - !marked_word_boundaries) { - // We require two batches here: the first for ranges that are word - // characters, the second for ranges that are not word characters. - for (bool isword : {true, false}) { - int j; - for (int i = 0; i < 256; i = j) { - for (j = i + 1; j < 256 && - Prog::IsWordChar(static_cast<uint8_t>(i)) == - Prog::IsWordChar(static_cast<uint8_t>(j)); - j++) - ; - if (Prog::IsWordChar(static_cast<uint8_t>(i)) == isword) - builder.Mark(i, j - 1); - } - builder.Merge(); - } - marked_word_boundaries = true; - } - } + } + // If this Inst is not the last Inst in its list AND the next Inst is + // also a ByteRange AND the Insts have the same out, defer the merge. + if (!ip->last() && + inst(id+1)->opcode() == kInstByteRange && + ip->out() == inst(id+1)->out()) + continue; + builder.Merge(); + } else if (ip->opcode() == kInstEmptyWidth) { + if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) && + !marked_line_boundaries) { + builder.Mark('\n', '\n'); + builder.Merge(); + marked_line_boundaries = true; + } + if (ip->empty() & (kEmptyWordBoundary|kEmptyNonWordBoundary) && + !marked_word_boundaries) { + // We require two batches here: the first for ranges that are word + // characters, the second for ranges that are not word characters. + for (bool isword : {true, false}) { + int j; + for (int i = 0; i < 256; i = j) { + for (j = i + 1; j < 256 && + Prog::IsWordChar(static_cast<uint8_t>(i)) == + Prog::IsWordChar(static_cast<uint8_t>(j)); + j++) + ; + if (Prog::IsWordChar(static_cast<uint8_t>(i)) == isword) + builder.Mark(i, j - 1); + } + builder.Merge(); + } + marked_word_boundaries = true; + } + } } - builder.Build(bytemap_, &bytemap_range_); - - if (0) { // For debugging, use trivial bytemap. - LOG(ERROR) << "Using trivial bytemap."; - for (int i = 0; i < 256; i++) - bytemap_[i] = static_cast<uint8_t>(i); + builder.Build(bytemap_, &bytemap_range_); + + if (0) { // For debugging, use trivial bytemap. + LOG(ERROR) << "Using trivial bytemap."; + for (int i = 0; i < 256; i++) + bytemap_[i] = static_cast<uint8_t>(i); bytemap_range_ = 256; } } -// Prog::Flatten() implements a graph rewriting algorithm. -// -// The overall process is similar to epsilon removal, but retains some epsilon -// transitions: those from Capture and EmptyWidth instructions; and those from -// nullable subexpressions. (The latter avoids quadratic blowup in transitions -// in the worst case.) It might be best thought of as Alt instruction elision. -// -// In conceptual terms, it divides the Prog into "trees" of instructions, then -// traverses the "trees" in order to produce "lists" of instructions. A "tree" -// is one or more instructions that grow from one "root" instruction to one or -// more "leaf" instructions; if a "tree" has exactly one instruction, then the -// "root" is also the "leaf". In most cases, a "root" is the successor of some -// "leaf" (i.e. the "leaf" instruction's out() returns the "root" instruction) -// and is considered a "successor root". A "leaf" can be a ByteRange, Capture, -// EmptyWidth or Match instruction. However, this is insufficient for handling -// nested nullable subexpressions correctly, so in some cases, a "root" is the -// dominator of the instructions reachable from some "successor root" (i.e. it -// has an unreachable predecessor) and is considered a "dominator root". Since -// only Alt instructions can be "dominator roots" (other instructions would be +// Prog::Flatten() implements a graph rewriting algorithm. +// +// The overall process is similar to epsilon removal, but retains some epsilon +// transitions: those from Capture and EmptyWidth instructions; and those from +// nullable subexpressions. (The latter avoids quadratic blowup in transitions +// in the worst case.) It might be best thought of as Alt instruction elision. +// +// In conceptual terms, it divides the Prog into "trees" of instructions, then +// traverses the "trees" in order to produce "lists" of instructions. A "tree" +// is one or more instructions that grow from one "root" instruction to one or +// more "leaf" instructions; if a "tree" has exactly one instruction, then the +// "root" is also the "leaf". In most cases, a "root" is the successor of some +// "leaf" (i.e. the "leaf" instruction's out() returns the "root" instruction) +// and is considered a "successor root". A "leaf" can be a ByteRange, Capture, +// EmptyWidth or Match instruction. However, this is insufficient for handling +// nested nullable subexpressions correctly, so in some cases, a "root" is the +// dominator of the instructions reachable from some "successor root" (i.e. it +// has an unreachable predecessor) and is considered a "dominator root". Since +// only Alt instructions can be "dominator roots" (other instructions would be // "leaves"), only Alt instructions are required to be marked as predecessors. -// -// Dividing the Prog into "trees" comprises two passes: marking the "successor -// roots" and the predecessors; and marking the "dominator roots". Sorting the -// "successor roots" by their bytecode offsets enables iteration in order from -// greatest to least during the second pass; by working backwards in this case -// and flooding the graph no further than "leaves" and already marked "roots", -// it becomes possible to mark "dominator roots" without doing excessive work. -// -// Traversing the "trees" is just iterating over the "roots" in order of their -// marking and flooding the graph no further than "leaves" and "roots". When a -// "leaf" is reached, the instruction is copied with its successor remapped to -// its "root" number. When a "root" is reached, a Nop instruction is generated -// with its successor remapped similarly. As each "list" is produced, its last -// instruction is marked as such. After all of the "lists" have been produced, -// a pass over their instructions remaps their successors to bytecode offsets. -void Prog::Flatten() { - if (did_flatten_) - return; - did_flatten_ = true; - - // Scratch structures. It's important that these are reused by functions - // that we call in loops because they would thrash the heap otherwise. - SparseSet reachable(size()); - std::vector<int> stk; - stk.reserve(size()); - - // First pass: Marks "successor roots" and predecessors. - // Builds the mapping from inst-ids to root-ids. - SparseArray<int> rootmap(size()); - SparseArray<int> predmap(size()); - std::vector<std::vector<int>> predvec; - MarkSuccessors(&rootmap, &predmap, &predvec, &reachable, &stk); - - // Second pass: Marks "dominator roots". - SparseArray<int> sorted(rootmap); - std::sort(sorted.begin(), sorted.end(), sorted.less); - for (SparseArray<int>::const_iterator i = sorted.end() - 1; - i != sorted.begin(); - --i) { - if (i->index() != start_unanchored() && i->index() != start()) - MarkDominator(i->index(), &rootmap, &predmap, &predvec, &reachable, &stk); - } - - // Third pass: Emits "lists". Remaps outs to root-ids. - // Builds the mapping from root-ids to flat-ids. - std::vector<int> flatmap(rootmap.size()); - std::vector<Inst> flat; - flat.reserve(size()); - for (SparseArray<int>::const_iterator i = rootmap.begin(); - i != rootmap.end(); - ++i) { - flatmap[i->value()] = static_cast<int>(flat.size()); - EmitList(i->index(), &rootmap, &flat, &reachable, &stk); - flat.back().set_last(); +// +// Dividing the Prog into "trees" comprises two passes: marking the "successor +// roots" and the predecessors; and marking the "dominator roots". Sorting the +// "successor roots" by their bytecode offsets enables iteration in order from +// greatest to least during the second pass; by working backwards in this case +// and flooding the graph no further than "leaves" and already marked "roots", +// it becomes possible to mark "dominator roots" without doing excessive work. +// +// Traversing the "trees" is just iterating over the "roots" in order of their +// marking and flooding the graph no further than "leaves" and "roots". When a +// "leaf" is reached, the instruction is copied with its successor remapped to +// its "root" number. When a "root" is reached, a Nop instruction is generated +// with its successor remapped similarly. As each "list" is produced, its last +// instruction is marked as such. After all of the "lists" have been produced, +// a pass over their instructions remaps their successors to bytecode offsets. +void Prog::Flatten() { + if (did_flatten_) + return; + did_flatten_ = true; + + // Scratch structures. It's important that these are reused by functions + // that we call in loops because they would thrash the heap otherwise. + SparseSet reachable(size()); + std::vector<int> stk; + stk.reserve(size()); + + // First pass: Marks "successor roots" and predecessors. + // Builds the mapping from inst-ids to root-ids. + SparseArray<int> rootmap(size()); + SparseArray<int> predmap(size()); + std::vector<std::vector<int>> predvec; + MarkSuccessors(&rootmap, &predmap, &predvec, &reachable, &stk); + + // Second pass: Marks "dominator roots". + SparseArray<int> sorted(rootmap); + std::sort(sorted.begin(), sorted.end(), sorted.less); + for (SparseArray<int>::const_iterator i = sorted.end() - 1; + i != sorted.begin(); + --i) { + if (i->index() != start_unanchored() && i->index() != start()) + MarkDominator(i->index(), &rootmap, &predmap, &predvec, &reachable, &stk); + } + + // Third pass: Emits "lists". Remaps outs to root-ids. + // Builds the mapping from root-ids to flat-ids. + std::vector<int> flatmap(rootmap.size()); + std::vector<Inst> flat; + flat.reserve(size()); + for (SparseArray<int>::const_iterator i = rootmap.begin(); + i != rootmap.end(); + ++i) { + flatmap[i->value()] = static_cast<int>(flat.size()); + EmitList(i->index(), &rootmap, &flat, &reachable, &stk); + flat.back().set_last(); // We have the bounds of the "list", so this is the // most convenient point at which to compute hints. ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size())); - } - - list_count_ = static_cast<int>(flatmap.size()); - for (int i = 0; i < kNumInst; i++) - inst_count_[i] = 0; - - // Fourth pass: Remaps outs to flat-ids. - // Counts instructions by opcode. - for (int id = 0; id < static_cast<int>(flat.size()); id++) { - Inst* ip = &flat[id]; - if (ip->opcode() != kInstAltMatch) // handled in EmitList() - ip->set_out(flatmap[ip->out()]); - inst_count_[ip->opcode()]++; - } - + } + + list_count_ = static_cast<int>(flatmap.size()); + for (int i = 0; i < kNumInst; i++) + inst_count_[i] = 0; + + // Fourth pass: Remaps outs to flat-ids. + // Counts instructions by opcode. + for (int id = 0; id < static_cast<int>(flat.size()); id++) { + Inst* ip = &flat[id]; + if (ip->opcode() != kInstAltMatch) // handled in EmitList() + ip->set_out(flatmap[ip->out()]); + inst_count_[ip->opcode()]++; + } + #if !defined(NDEBUG) // Address a `-Wunused-but-set-variable' warning from Clang 13.x. size_t total = 0; - for (int i = 0; i < kNumInst; i++) - total += inst_count_[i]; + for (int i = 0; i < kNumInst; i++) + total += inst_count_[i]; CHECK_EQ(total, flat.size()); #endif - - // Remap start_unanchored and start. - if (start_unanchored() == 0) { - DCHECK_EQ(start(), 0); - } else if (start_unanchored() == start()) { - set_start_unanchored(flatmap[1]); - set_start(flatmap[1]); - } else { - set_start_unanchored(flatmap[1]); - set_start(flatmap[2]); - } - - // Finally, replace the old instructions with the new instructions. - size_ = static_cast<int>(flat.size()); + + // Remap start_unanchored and start. + if (start_unanchored() == 0) { + DCHECK_EQ(start(), 0); + } else if (start_unanchored() == start()) { + set_start_unanchored(flatmap[1]); + set_start(flatmap[1]); + } else { + set_start_unanchored(flatmap[1]); + set_start(flatmap[2]); + } + + // Finally, replace the old instructions with the new instructions. + size_ = static_cast<int>(flat.size()); inst_ = PODArray<Inst>(size_); memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]); @@ -649,198 +649,198 @@ void Prog::Flatten() { // for tracking pairs of possibilities that it has already explored. const size_t kBitStateBitmapMaxSize = 256*1024; // max size in bits bit_state_text_max_size_ = kBitStateBitmapMaxSize / list_count_ - 1; -} - -void Prog::MarkSuccessors(SparseArray<int>* rootmap, - SparseArray<int>* predmap, - std::vector<std::vector<int>>* predvec, - SparseSet* reachable, std::vector<int>* stk) { - // Mark the kInstFail instruction. - rootmap->set_new(0, rootmap->size()); - - // Mark the start_unanchored and start instructions. - if (!rootmap->has_index(start_unanchored())) - rootmap->set_new(start_unanchored(), rootmap->size()); - if (!rootmap->has_index(start())) - rootmap->set_new(start(), rootmap->size()); - - reachable->clear(); - stk->clear(); - stk->push_back(start_unanchored()); - while (!stk->empty()) { - int id = stk->back(); - stk->pop_back(); - Loop: - if (reachable->contains(id)) - continue; - reachable->insert_new(id); - - Inst* ip = inst(id); - switch (ip->opcode()) { - default: - LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); - break; - - case kInstAltMatch: - case kInstAlt: - // Mark this instruction as a predecessor of each out. - for (int out : {ip->out(), ip->out1()}) { - if (!predmap->has_index(out)) { - predmap->set_new(out, static_cast<int>(predvec->size())); - predvec->emplace_back(); - } - (*predvec)[predmap->get_existing(out)].emplace_back(id); - } - stk->push_back(ip->out1()); - id = ip->out(); - goto Loop; - - case kInstByteRange: - case kInstCapture: - case kInstEmptyWidth: - // Mark the out of this instruction as a "root". - if (!rootmap->has_index(ip->out())) - rootmap->set_new(ip->out(), rootmap->size()); - id = ip->out(); - goto Loop; - - case kInstNop: - id = ip->out(); - goto Loop; - - case kInstMatch: - case kInstFail: - break; - } - } -} - -void Prog::MarkDominator(int root, SparseArray<int>* rootmap, - SparseArray<int>* predmap, - std::vector<std::vector<int>>* predvec, - SparseSet* reachable, std::vector<int>* stk) { - reachable->clear(); - stk->clear(); - stk->push_back(root); - while (!stk->empty()) { - int id = stk->back(); - stk->pop_back(); - Loop: - if (reachable->contains(id)) - continue; - reachable->insert_new(id); - - if (id != root && rootmap->has_index(id)) { - // We reached another "tree" via epsilon transition. - continue; - } - - Inst* ip = inst(id); - switch (ip->opcode()) { - default: - LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); - break; - - case kInstAltMatch: - case kInstAlt: - stk->push_back(ip->out1()); - id = ip->out(); - goto Loop; - - case kInstByteRange: - case kInstCapture: - case kInstEmptyWidth: - break; - - case kInstNop: - id = ip->out(); - goto Loop; - - case kInstMatch: - case kInstFail: - break; - } - } - - for (SparseSet::const_iterator i = reachable->begin(); - i != reachable->end(); - ++i) { - int id = *i; - if (predmap->has_index(id)) { - for (int pred : (*predvec)[predmap->get_existing(id)]) { - if (!reachable->contains(pred)) { - // id has a predecessor that cannot be reached from root! - // Therefore, id must be a "root" too - mark it as such. - if (!rootmap->has_index(id)) - rootmap->set_new(id, rootmap->size()); - } - } - } - } -} - -void Prog::EmitList(int root, SparseArray<int>* rootmap, - std::vector<Inst>* flat, - SparseSet* reachable, std::vector<int>* stk) { - reachable->clear(); - stk->clear(); - stk->push_back(root); - while (!stk->empty()) { - int id = stk->back(); - stk->pop_back(); - Loop: - if (reachable->contains(id)) - continue; - reachable->insert_new(id); - - if (id != root && rootmap->has_index(id)) { - // We reached another "tree" via epsilon transition. Emit a kInstNop - // instruction so that the Prog does not become quadratically larger. - flat->emplace_back(); - flat->back().set_opcode(kInstNop); - flat->back().set_out(rootmap->get_existing(id)); - continue; - } - - Inst* ip = inst(id); - switch (ip->opcode()) { - default: - LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); - break; - - case kInstAltMatch: - flat->emplace_back(); - flat->back().set_opcode(kInstAltMatch); - flat->back().set_out(static_cast<int>(flat->size())); - flat->back().out1_ = static_cast<uint32_t>(flat->size())+1; - FALLTHROUGH_INTENDED; - - case kInstAlt: - stk->push_back(ip->out1()); - id = ip->out(); - goto Loop; - - case kInstByteRange: - case kInstCapture: - case kInstEmptyWidth: - flat->emplace_back(); - memmove(&flat->back(), ip, sizeof *ip); - flat->back().set_out(rootmap->get_existing(ip->out())); - break; - - case kInstNop: - id = ip->out(); - goto Loop; - - case kInstMatch: - case kInstFail: - flat->emplace_back(); - memmove(&flat->back(), ip, sizeof *ip); - break; - } - } -} - +} + +void Prog::MarkSuccessors(SparseArray<int>* rootmap, + SparseArray<int>* predmap, + std::vector<std::vector<int>>* predvec, + SparseSet* reachable, std::vector<int>* stk) { + // Mark the kInstFail instruction. + rootmap->set_new(0, rootmap->size()); + + // Mark the start_unanchored and start instructions. + if (!rootmap->has_index(start_unanchored())) + rootmap->set_new(start_unanchored(), rootmap->size()); + if (!rootmap->has_index(start())) + rootmap->set_new(start(), rootmap->size()); + + reachable->clear(); + stk->clear(); + stk->push_back(start_unanchored()); + while (!stk->empty()) { + int id = stk->back(); + stk->pop_back(); + Loop: + if (reachable->contains(id)) + continue; + reachable->insert_new(id); + + Inst* ip = inst(id); + switch (ip->opcode()) { + default: + LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); + break; + + case kInstAltMatch: + case kInstAlt: + // Mark this instruction as a predecessor of each out. + for (int out : {ip->out(), ip->out1()}) { + if (!predmap->has_index(out)) { + predmap->set_new(out, static_cast<int>(predvec->size())); + predvec->emplace_back(); + } + (*predvec)[predmap->get_existing(out)].emplace_back(id); + } + stk->push_back(ip->out1()); + id = ip->out(); + goto Loop; + + case kInstByteRange: + case kInstCapture: + case kInstEmptyWidth: + // Mark the out of this instruction as a "root". + if (!rootmap->has_index(ip->out())) + rootmap->set_new(ip->out(), rootmap->size()); + id = ip->out(); + goto Loop; + + case kInstNop: + id = ip->out(); + goto Loop; + + case kInstMatch: + case kInstFail: + break; + } + } +} + +void Prog::MarkDominator(int root, SparseArray<int>* rootmap, + SparseArray<int>* predmap, + std::vector<std::vector<int>>* predvec, + SparseSet* reachable, std::vector<int>* stk) { + reachable->clear(); + stk->clear(); + stk->push_back(root); + while (!stk->empty()) { + int id = stk->back(); + stk->pop_back(); + Loop: + if (reachable->contains(id)) + continue; + reachable->insert_new(id); + + if (id != root && rootmap->has_index(id)) { + // We reached another "tree" via epsilon transition. + continue; + } + + Inst* ip = inst(id); + switch (ip->opcode()) { + default: + LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); + break; + + case kInstAltMatch: + case kInstAlt: + stk->push_back(ip->out1()); + id = ip->out(); + goto Loop; + + case kInstByteRange: + case kInstCapture: + case kInstEmptyWidth: + break; + + case kInstNop: + id = ip->out(); + goto Loop; + + case kInstMatch: + case kInstFail: + break; + } + } + + for (SparseSet::const_iterator i = reachable->begin(); + i != reachable->end(); + ++i) { + int id = *i; + if (predmap->has_index(id)) { + for (int pred : (*predvec)[predmap->get_existing(id)]) { + if (!reachable->contains(pred)) { + // id has a predecessor that cannot be reached from root! + // Therefore, id must be a "root" too - mark it as such. + if (!rootmap->has_index(id)) + rootmap->set_new(id, rootmap->size()); + } + } + } + } +} + +void Prog::EmitList(int root, SparseArray<int>* rootmap, + std::vector<Inst>* flat, + SparseSet* reachable, std::vector<int>* stk) { + reachable->clear(); + stk->clear(); + stk->push_back(root); + while (!stk->empty()) { + int id = stk->back(); + stk->pop_back(); + Loop: + if (reachable->contains(id)) + continue; + reachable->insert_new(id); + + if (id != root && rootmap->has_index(id)) { + // We reached another "tree" via epsilon transition. Emit a kInstNop + // instruction so that the Prog does not become quadratically larger. + flat->emplace_back(); + flat->back().set_opcode(kInstNop); + flat->back().set_out(rootmap->get_existing(id)); + continue; + } + + Inst* ip = inst(id); + switch (ip->opcode()) { + default: + LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); + break; + + case kInstAltMatch: + flat->emplace_back(); + flat->back().set_opcode(kInstAltMatch); + flat->back().set_out(static_cast<int>(flat->size())); + flat->back().out1_ = static_cast<uint32_t>(flat->size())+1; + FALLTHROUGH_INTENDED; + + case kInstAlt: + stk->push_back(ip->out1()); + id = ip->out(); + goto Loop; + + case kInstByteRange: + case kInstCapture: + case kInstEmptyWidth: + flat->emplace_back(); + memmove(&flat->back(), ip, sizeof *ip); + flat->back().set_out(rootmap->get_existing(ip->out())); + break; + + case kInstNop: + id = ip->out(); + goto Loop; + + case kInstMatch: + case kInstFail: + flat->emplace_back(); + memmove(&flat->back(), ip, sizeof *ip); + break; + } + } +} + // For each ByteRange instruction in [begin, end), computes a hint to execution // engines: the delta to the next instruction (in flat) worth exploring iff the // current instruction matched. diff --git a/contrib/libs/re2/re2/prog.h b/contrib/libs/re2/re2/prog.h index 4af012ab6f..5ac0e67c29 100644 --- a/contrib/libs/re2/re2/prog.h +++ b/contrib/libs/re2/re2/prog.h @@ -2,24 +2,24 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#ifndef RE2_PROG_H_ -#define RE2_PROG_H_ - +#ifndef RE2_PROG_H_ +#define RE2_PROG_H_ + // Compiled representation of regular expressions. // See regexp.h for the Regexp class, which represents a regular // expression symbolically. -#include <stdint.h> +#include <stdint.h> #include <functional> -#include <mutex> -#include <string> -#include <vector> +#include <mutex> +#include <string> +#include <vector> #include <type_traits> -#include "util/util.h" -#include "util/logging.h" +#include "util/util.h" +#include "util/logging.h" #include "re2/pod_array.h" -#include "re2/re2.h" +#include "re2/re2.h" #include "re2/sparse_array.h" #include "re2/sparse_set.h" @@ -35,7 +35,7 @@ enum InstOp { kInstMatch, // found a match! kInstNop, // no-op; occasionally unavoidable kInstFail, // never match; occasionally unavoidable - kNumInst, + kNumInst, }; // Bit flags for empty-width specials @@ -49,7 +49,7 @@ enum EmptyOp { kEmptyAllFlags = (1<<6)-1, }; -class DFA; +class DFA; class Regexp; // Compiled form of regexp program. @@ -64,25 +64,25 @@ class Prog { // See the assertion below for why this is so. Inst() = default; - // Copyable. - Inst(const Inst&) = default; - Inst& operator=(const Inst&) = default; - + // Copyable. + Inst(const Inst&) = default; + Inst& operator=(const Inst&) = default; + // Constructors per opcode - void InitAlt(uint32_t out, uint32_t out1); - void InitByteRange(int lo, int hi, int foldcase, uint32_t out); - void InitCapture(int cap, uint32_t out); - void InitEmptyWidth(EmptyOp empty, uint32_t out); + void InitAlt(uint32_t out, uint32_t out1); + void InitByteRange(int lo, int hi, int foldcase, uint32_t out); + void InitCapture(int cap, uint32_t out); + void InitEmptyWidth(EmptyOp empty, uint32_t out); void InitMatch(int id); - void InitNop(uint32_t out); + void InitNop(uint32_t out); void InitFail(); // Getters int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); } InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); } - int last() { return (out_opcode_>>3)&1; } - int out() { return out_opcode_>>4; } - int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; } + int last() { return (out_opcode_>>3)&1; } + int out() { return out_opcode_>>4; } + int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; } int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; } int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; } int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; } @@ -90,12 +90,12 @@ class Prog { int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; } int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; } EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; } - - bool greedy(Prog* p) { + + bool greedy(Prog* p) { DCHECK_EQ(opcode(), kInstAltMatch); - return p->inst(out())->opcode() == kInstByteRange || - (p->inst(out())->opcode() == kInstNop && - p->inst(p->inst(out())->out())->opcode() == kInstByteRange); + return p->inst(out())->opcode() == kInstByteRange || + (p->inst(out())->opcode() == kInstNop && + p->inst(p->inst(out())->out())->opcode() == kInstByteRange); } // Does this inst (an kInstByteRange) match c? @@ -110,24 +110,24 @@ class Prog { std::string Dump(); // Maximum instruction id. - // (Must fit in out_opcode_. PatchList/last steal another bit.) + // (Must fit in out_opcode_. PatchList/last steal another bit.) static const int kMaxInst = (1<<28) - 1; private: void set_opcode(InstOp opcode) { - out_opcode_ = (out()<<4) | (last()<<3) | opcode; - } - - void set_last() { - out_opcode_ = (out()<<4) | (1<<3) | opcode(); + out_opcode_ = (out()<<4) | (last()<<3) | opcode; } + void set_last() { + out_opcode_ = (out()<<4) | (1<<3) | opcode(); + } + void set_out(int out) { - out_opcode_ = (out<<4) | (last()<<3) | opcode(); + out_opcode_ = (out<<4) | (last()<<3) | opcode(); } void set_out_opcode(int out, InstOp opcode) { - out_opcode_ = (out<<4) | (last()<<3) | opcode; + out_opcode_ = (out<<4) | (last()<<3) | opcode; } uint32_t out_opcode_; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode @@ -157,8 +157,8 @@ class Prog { // foldcase: A-Z -> a-z before checking range. }; - EmptyOp empty_; // opcode == kInstEmptyWidth - // empty_ is bitwise OR of kEmpty* flags above. + EmptyOp empty_; // opcode == kInstEmptyWidth + // empty_ is bitwise OR of kEmpty* flags above. }; friend class Compiler; @@ -201,21 +201,21 @@ class Prog { void set_start(int start) { start_ = start; } int start_unanchored() { return start_unanchored_; } void set_start_unanchored(int start) { start_unanchored_ = start; } - int size() { return size_; } + int size() { return size_; } bool reversed() { return reversed_; } void set_reversed(bool reversed) { reversed_ = reversed; } - int list_count() { return list_count_; } - int inst_count(InstOp op) { return inst_count_[op]; } + int list_count() { return list_count_; } + int inst_count(InstOp op) { return inst_count_[op]; } uint16_t* list_heads() { return list_heads_.data(); } size_t bit_state_text_max_size() { return bit_state_text_max_size_; } int64_t dfa_mem() { return dfa_mem_; } - void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; } + void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; } bool anchor_start() { return anchor_start_; } void set_anchor_start(bool b) { anchor_start_ = b; } bool anchor_end() { return anchor_end_; } void set_anchor_end(bool b) { anchor_end_ = b; } int bytemap_range() { return bytemap_range_; } - const uint8_t* bytemap() { return bytemap_; } + const uint8_t* bytemap() { return bytemap_; } bool can_prefix_accel() { return prefix_size_ != 0; } // Accelerates to the first likely occurrence of the prefix. @@ -230,7 +230,7 @@ class Prog { return memchr(data, prefix_front_, size); } } - + // Configures prefix accel using the analysis performed during compilation. void ConfigurePrefixAccel(const std::string& prefix, bool prefix_foldcase); @@ -249,7 +249,7 @@ class Prog { // Returns the set of kEmpty flags that are in effect at // position p within context. - static uint32_t EmptyFlags(const StringPiece& context, const char* p); + static uint32_t EmptyFlags(const StringPiece& context, const char* p); // Returns whether byte c is a word character: ASCII only. // Used by the implementation of \b and \B. @@ -258,7 +258,7 @@ class Prog { // (the DFA has only one-byte lookahead). // - even if the lookahead were possible, the Progs would be huge. // This crude approximation is the same one PCRE uses. - static bool IsWordChar(uint8_t c) { + static bool IsWordChar(uint8_t c) { return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || @@ -291,7 +291,7 @@ class Prog { // If matches != NULL and kind == kManyMatch and there is a match, // SearchDFA fills matches with the match IDs of the final matching state. bool SearchDFA(const StringPiece& text, const StringPiece& context, - Anchor anchor, MatchKind kind, StringPiece* match0, + Anchor anchor, MatchKind kind, StringPiece* match0, bool* failed, SparseSet* matches); // The callback issued after building each DFA state with BuildEntireDFA(). @@ -311,7 +311,7 @@ class Prog { // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY. int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb); - // Compute bytemap. + // Compute bytemap. void ComputeByteMap(); // Run peep-hole optimizer on program. @@ -361,41 +361,41 @@ class Prog { // Returns true on success, false on error. bool PossibleMatchRange(std::string* min, std::string* max, int maxlen); - // EXPERIMENTAL! SUBJECT TO CHANGE! - // Outputs the program fanout into the given sparse array. - void Fanout(SparseArray<int>* fanout); - + // EXPERIMENTAL! SUBJECT TO CHANGE! + // Outputs the program fanout into the given sparse array. + void Fanout(SparseArray<int>* fanout); + // Compiles a collection of regexps to Prog. Each regexp will have // its own Match instruction recording the index in the output vector. static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem); - // Flattens the Prog from "tree" form to "list" form. This is an in-place - // operation in the sense that the old instructions are lost. - void Flatten(); - - // Walks the Prog; the "successor roots" or predecessors of the reachable - // instructions are marked in rootmap or predmap/predvec, respectively. - // reachable and stk are preallocated scratch structures. - void MarkSuccessors(SparseArray<int>* rootmap, - SparseArray<int>* predmap, - std::vector<std::vector<int>>* predvec, - SparseSet* reachable, std::vector<int>* stk); - - // Walks the Prog from the given "root" instruction; the "dominator root" - // of the reachable instructions (if such exists) is marked in rootmap. - // reachable and stk are preallocated scratch structures. - void MarkDominator(int root, SparseArray<int>* rootmap, - SparseArray<int>* predmap, - std::vector<std::vector<int>>* predvec, - SparseSet* reachable, std::vector<int>* stk); - - // Walks the Prog from the given "root" instruction; the reachable - // instructions are emitted in "list" form and appended to flat. - // reachable and stk are preallocated scratch structures. - void EmitList(int root, SparseArray<int>* rootmap, - std::vector<Inst>* flat, - SparseSet* reachable, std::vector<int>* stk); - + // Flattens the Prog from "tree" form to "list" form. This is an in-place + // operation in the sense that the old instructions are lost. + void Flatten(); + + // Walks the Prog; the "successor roots" or predecessors of the reachable + // instructions are marked in rootmap or predmap/predvec, respectively. + // reachable and stk are preallocated scratch structures. + void MarkSuccessors(SparseArray<int>* rootmap, + SparseArray<int>* predmap, + std::vector<std::vector<int>>* predvec, + SparseSet* reachable, std::vector<int>* stk); + + // Walks the Prog from the given "root" instruction; the "dominator root" + // of the reachable instructions (if such exists) is marked in rootmap. + // reachable and stk are preallocated scratch structures. + void MarkDominator(int root, SparseArray<int>* rootmap, + SparseArray<int>* predmap, + std::vector<std::vector<int>>* predvec, + SparseSet* reachable, std::vector<int>* stk); + + // Walks the Prog from the given "root" instruction; the reachable + // instructions are emitted in "list" form and appended to flat. + // reachable and stk are preallocated scratch structures. + void EmitList(int root, SparseArray<int>* rootmap, + std::vector<Inst>* flat, + SparseSet* reachable, std::vector<int>* stk); + // Computes hints for ByteRange instructions in [begin, end). void ComputeHints(std::vector<Inst>* flat, int begin, int end); @@ -407,12 +407,12 @@ class Prog { friend class Compiler; DFA* GetDFA(MatchKind kind); - void DeleteDFA(DFA* dfa); + void DeleteDFA(DFA* dfa); bool anchor_start_; // regexp has explicit start anchor bool anchor_end_; // regexp has explicit end anchor bool reversed_; // whether program runs backward over input - bool did_flatten_; // has Flatten been called? + bool did_flatten_; // has Flatten been called? bool did_onepass_; // has IsOnePass been called? int start_; // entry point for program @@ -435,21 +435,21 @@ class Prog { PODArray<uint16_t> list_heads_; // sparse array enumerating list heads // not populated if size_ is overly large size_t bit_state_text_max_size_; // upper bound (inclusive) on text.size() - + PODArray<Inst> inst_; // pointer to instruction array PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes - int64_t dfa_mem_; // Maximum memory for DFAs. - DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch - DFA* dfa_longest_; // DFA cached for kLongestMatch/kFullMatch + int64_t dfa_mem_; // Maximum memory for DFAs. + DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch + DFA* dfa_longest_; // DFA cached for kLongestMatch/kFullMatch - uint8_t bytemap_[256]; // map from input bytes to byte classes + uint8_t bytemap_[256]; // map from input bytes to byte classes - std::once_flag dfa_first_once_; - std::once_flag dfa_longest_once_; + std::once_flag dfa_first_once_; + std::once_flag dfa_longest_once_; - Prog(const Prog&) = delete; - Prog& operator=(const Prog&) = delete; + Prog(const Prog&) = delete; + Prog& operator=(const Prog&) = delete; }; // std::string_view in MSVC has iterators that aren't just pointers and @@ -465,4 +465,4 @@ static inline const char* EndPtr(const StringPiece& s) { } // namespace re2 -#endif // RE2_PROG_H_ +#endif // RE2_PROG_H_ diff --git a/contrib/libs/re2/re2/re2.cc b/contrib/libs/re2/re2/re2.cc index 47fb385e4e..a9679c634e 100644 --- a/contrib/libs/re2/re2/re2.cc +++ b/contrib/libs/re2/re2/re2.cc @@ -7,29 +7,29 @@ // Originally the PCRE C++ wrapper, but adapted to use // the new automata-based regular expression engines. -#include "re2/re2.h" +#include "re2/re2.h" -#include <assert.h> -#include <ctype.h> -#include <errno.h> +#include <assert.h> +#include <ctype.h> +#include <errno.h> #ifdef _MSC_VER #include <intrin.h> #endif -#include <stdint.h> -#include <stdlib.h> -#include <string.h> -#include <algorithm> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <algorithm> #include <atomic> -#include <iterator> -#include <mutex> +#include <iterator> +#include <mutex> #include <string> -#include <utility> -#include <vector> - -#include "util/util.h" -#include "util/logging.h" -#include "util/strutil.h" -#include "util/utf.h" +#include <utility> +#include <vector> + +#include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/prog.h" #include "re2/regexp.h" #include "re2/sparse_array.h" @@ -40,26 +40,26 @@ namespace re2 { static const int kMaxArgs = 16; static const int kVecSize = 1+kMaxArgs; -const int RE2::Options::kDefaultMaxMem; // initialized in re2.h - -RE2::Options::Options(RE2::CannedOptions opt) - : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), - posix_syntax_(opt == RE2::POSIX), - longest_match_(opt == RE2::POSIX), - log_errors_(opt != RE2::Quiet), - max_mem_(kDefaultMaxMem), - literal_(false), - never_nl_(false), - dot_nl_(false), - never_capture_(false), - case_sensitive_(true), - perl_classes_(false), - word_boundary_(false), - one_line_(false) { -} - -// static empty objects for use as const references. -// To avoid global constructors, allocated in RE2::Init(). +const int RE2::Options::kDefaultMaxMem; // initialized in re2.h + +RE2::Options::Options(RE2::CannedOptions opt) + : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), + posix_syntax_(opt == RE2::POSIX), + longest_match_(opt == RE2::POSIX), + log_errors_(opt != RE2::Quiet), + max_mem_(kDefaultMaxMem), + literal_(false), + never_nl_(false), + dot_nl_(false), + never_capture_(false), + case_sensitive_(true), + perl_classes_(false), + word_boundary_(false), + one_line_(false) { +} + +// static empty objects for use as const references. +// To avoid global constructors, allocated in RE2::Init(). static const std::string* empty_string; static const std::map<std::string, int>* empty_named_groups; static const std::map<int, std::string>* empty_group_names; @@ -67,37 +67,37 @@ static const std::map<int, std::string>* empty_group_names; // Converts from Regexp error code to RE2 error code. // Maybe some day they will diverge. In any event, this // hides the existence of Regexp from RE2 users. -static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) { +static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) { switch (code) { - case re2::kRegexpSuccess: + case re2::kRegexpSuccess: return RE2::NoError; - case re2::kRegexpInternalError: + case re2::kRegexpInternalError: return RE2::ErrorInternal; - case re2::kRegexpBadEscape: + case re2::kRegexpBadEscape: return RE2::ErrorBadEscape; - case re2::kRegexpBadCharClass: + case re2::kRegexpBadCharClass: return RE2::ErrorBadCharClass; - case re2::kRegexpBadCharRange: + case re2::kRegexpBadCharRange: return RE2::ErrorBadCharRange; - case re2::kRegexpMissingBracket: + case re2::kRegexpMissingBracket: return RE2::ErrorMissingBracket; - case re2::kRegexpMissingParen: + case re2::kRegexpMissingParen: return RE2::ErrorMissingParen; case re2::kRegexpUnexpectedParen: return RE2::ErrorUnexpectedParen; - case re2::kRegexpTrailingBackslash: + case re2::kRegexpTrailingBackslash: return RE2::ErrorTrailingBackslash; - case re2::kRegexpRepeatArgument: + case re2::kRegexpRepeatArgument: return RE2::ErrorRepeatArgument; - case re2::kRegexpRepeatSize: + case re2::kRegexpRepeatSize: return RE2::ErrorRepeatSize; - case re2::kRegexpRepeatOp: + case re2::kRegexpRepeatOp: return RE2::ErrorRepeatOp; - case re2::kRegexpBadPerlOp: + case re2::kRegexpBadPerlOp: return RE2::ErrorBadPerlOp; - case re2::kRegexpBadUTF8: + case re2::kRegexpBadUTF8: return RE2::ErrorBadUTF8; - case re2::kRegexpBadNamedCapture: + case re2::kRegexpBadNamedCapture: return RE2::ErrorBadNamedCapture; } return RE2::ErrorInternal; @@ -130,8 +130,8 @@ int RE2::Options::ParseFlags() const { int flags = Regexp::ClassNL; switch (encoding()) { default: - if (log_errors()) - LOG(ERROR) << "Unknown encoding " << encoding(); + if (log_errors()) + LOG(ERROR) << "Unknown encoding " << encoding(); break; case RE2::Options::EncodingUTF8: break; @@ -149,12 +149,12 @@ int RE2::Options::ParseFlags() const { if (never_nl()) flags |= Regexp::NeverNL; - if (dot_nl()) - flags |= Regexp::DotNL; - - if (never_capture()) - flags |= Regexp::NeverCapture; - + if (dot_nl()) + flags |= Regexp::DotNL; + + if (never_capture()) + flags |= Regexp::NeverCapture; + if (!case_sensitive()) flags |= Regexp::FoldCase; @@ -171,16 +171,16 @@ int RE2::Options::ParseFlags() const { } void RE2::Init(const StringPiece& pattern, const Options& options) { - static std::once_flag empty_once; - std::call_once(empty_once, []() { + static std::once_flag empty_once; + std::call_once(empty_once, []() { empty_string = new std::string; empty_named_groups = new std::map<std::string, int>; empty_group_names = new std::map<int, std::string>; - }); - + }); + pattern_.assign(pattern.data(), pattern.size()); options_.Copy(options); - entire_regexp_ = NULL; + entire_regexp_ = NULL; error_ = empty_string; error_code_ = NoError; error_arg_.clear(); @@ -211,7 +211,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { return; } - re2::Regexp* suffix; + re2::Regexp* suffix; if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix)) suffix_regexp_ = suffix; else @@ -243,20 +243,20 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { } // Returns rprog_, computing it if needed. -re2::Prog* RE2::ReverseProg() const { - std::call_once(rprog_once_, [](const RE2* re) { - re->rprog_ = - re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3); - if (re->rprog_ == NULL) { - if (re->options_.log_errors()) - LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'"; +re2::Prog* RE2::ReverseProg() const { + std::call_once(rprog_once_, [](const RE2* re) { + re->rprog_ = + re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3); + if (re->rprog_ == NULL) { + if (re->options_.log_errors()) + LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'"; // We no longer touch error_ and error_code_ because failing to compile // the reverse Prog is not a showstopper: falling back to NFA execution // is fine. More importantly, an RE2 object is supposed to be logically // immutable: whatever ok() would have returned after Init() completed, // it should continue to return that no matter what ReverseProg() does. } - }, this); + }, this); return rprog_; } @@ -267,11 +267,11 @@ RE2::~RE2() { entire_regexp_->Decref(); delete prog_; delete rprog_; - if (error_ != empty_string) + if (error_ != empty_string) delete error_; - if (named_groups_ != NULL && named_groups_ != empty_named_groups) + if (named_groups_ != NULL && named_groups_ != empty_named_groups) delete named_groups_; - if (group_names_ != NULL && group_names_ != empty_group_names) + if (group_names_ != NULL && group_names_ != empty_group_names) delete group_names_; } @@ -282,8 +282,8 @@ int RE2::ProgramSize() const { } int RE2::ReverseProgramSize() const { - if (prog_ == NULL) - return -1; + if (prog_ == NULL) + return -1; Prog* prog = ReverseProg(); if (prog == NULL) return -1; @@ -306,12 +306,12 @@ static int FindMSBSet(uint32_t n) { if (word != 0) { n = word; c += shift; - } - } + } + } return c; #endif -} - +} + static int Fanout(Prog* prog, std::vector<int>* histogram) { SparseArray<int> fanout(prog->size()); prog->Fanout(&fanout); @@ -329,8 +329,8 @@ static int Fanout(Prog* prog, std::vector<int>* histogram) { if (histogram != NULL) histogram->assign(data, data+size); return size-1; -} - +} + int RE2::ProgramFanout(std::vector<int>* histogram) const { if (prog_ == NULL) return -1; @@ -348,23 +348,23 @@ int RE2::ReverseProgramFanout(std::vector<int>* histogram) const { // Returns named_groups_, computing it if needed. const std::map<std::string, int>& RE2::NamedCapturingGroups() const { - std::call_once(named_groups_once_, [](const RE2* re) { - if (re->suffix_regexp_ != NULL) - re->named_groups_ = re->suffix_regexp_->NamedCaptures(); - if (re->named_groups_ == NULL) - re->named_groups_ = empty_named_groups; - }, this); + std::call_once(named_groups_once_, [](const RE2* re) { + if (re->suffix_regexp_ != NULL) + re->named_groups_ = re->suffix_regexp_->NamedCaptures(); + if (re->named_groups_ == NULL) + re->named_groups_ = empty_named_groups; + }, this); return *named_groups_; } // Returns group_names_, computing it if needed. const std::map<int, std::string>& RE2::CapturingGroupNames() const { - std::call_once(group_names_once_, [](const RE2* re) { - if (re->suffix_regexp_ != NULL) - re->group_names_ = re->suffix_regexp_->CaptureNames(); - if (re->group_names_ == NULL) - re->group_names_ = empty_group_names; - }, this); + std::call_once(group_names_once_, [](const RE2* re) { + if (re->suffix_regexp_ != NULL) + re->group_names_ = re->suffix_regexp_->CaptureNames(); + if (re->group_names_ == NULL) + re->group_names_ = empty_group_names; + }, this); return *group_names_; } @@ -382,7 +382,7 @@ bool RE2::PartialMatchN(const StringPiece& text, const RE2& re, bool RE2::ConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int n) { - size_t consumed; + size_t consumed; if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) { input->remove_prefix(consumed); return true; @@ -393,7 +393,7 @@ bool RE2::ConsumeN(StringPiece* input, const RE2& re, bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int n) { - size_t consumed; + size_t consumed; if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) { input->remove_prefix(consumed); return true; @@ -411,7 +411,7 @@ bool RE2::Replace(std::string* str, return false; if (nvec > static_cast<int>(arraysize(vec))) return false; - if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec)) + if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec)) return false; std::string s; @@ -439,43 +439,43 @@ int RE2::GlobalReplace(std::string* str, const char* lastend = NULL; std::string out; int count = 0; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - // Iterate just once when fuzzing. Otherwise, we easily get bogged down - // and coverage is unlikely to improve despite significant expense. - while (p == str->data()) { -#else +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // Iterate just once when fuzzing. Otherwise, we easily get bogged down + // and coverage is unlikely to improve despite significant expense. + while (p == str->data()) { +#else while (p <= ep) { -#endif - if (!re.Match(*str, static_cast<size_t>(p - str->data()), - str->size(), UNANCHORED, vec, nvec)) +#endif + if (!re.Match(*str, static_cast<size_t>(p - str->data()), + str->size(), UNANCHORED, vec, nvec)) break; if (p < vec[0].data()) out.append(p, vec[0].data() - p); if (vec[0].data() == lastend && vec[0].empty()) { // Disallow empty match at end of last match: skip ahead. - // + // // fullrune() takes int, not ptrdiff_t. However, it just looks - // at the leading byte and treats any length >= 4 the same. - if (re.options().encoding() == RE2::Options::EncodingUTF8 && + // at the leading byte and treats any length >= 4 the same. + if (re.options().encoding() == RE2::Options::EncodingUTF8 && fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) { - // re is in UTF-8 mode and there is enough left of str - // to allow us to advance by up to UTFmax bytes. - Rune r; - int n = chartorune(&r, p); - // Some copies of chartorune have a bug that accepts - // encodings of values in (10FFFF, 1FFFFF] as valid. - if (r > Runemax) { - n = 1; - r = Runeerror; - } - if (!(n == 1 && r == Runeerror)) { // no decoding error - out.append(p, n); - p += n; - continue; - } - } - // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode, - // we fell through from above and the GIGO principle applies. + // re is in UTF-8 mode and there is enough left of str + // to allow us to advance by up to UTFmax bytes. + Rune r; + int n = chartorune(&r, p); + // Some copies of chartorune have a bug that accepts + // encodings of values in (10FFFF, 1FFFFF] as valid. + if (r > Runemax) { + n = 1; + r = Runeerror; + } + if (!(n == 1 && r == Runeerror)) { // no decoding error + out.append(p, n); + p += n; + continue; + } + } + // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode, + // we fell through from above and the GIGO principle applies. if (p < ep) out.append(p, 1); p++; @@ -492,7 +492,7 @@ int RE2::GlobalReplace(std::string* str, if (p < ep) out.append(p, ep - p); - using std::swap; + using std::swap; swap(out, *str); return count; } @@ -507,7 +507,7 @@ bool RE2::Extract(const StringPiece& text, return false; if (nvec > static_cast<int>(arraysize(vec))) return false; - if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec)) + if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec)) return false; out->clear(); @@ -525,7 +525,7 @@ std::string RE2::QuoteMeta(const StringPiece& unquoted) { // that. (This also makes it identical to the perl function of the // same name except for the null-character special case; // see `perldoc -f quotemeta`.) - for (size_t ii = 0; ii < unquoted.size(); ++ii) { + for (size_t ii = 0; ii < unquoted.size(); ++ii) { // Note that using 'isalnum' here raises the benchmark time from // 32ns to 58ns: if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && @@ -557,7 +557,7 @@ bool RE2::PossibleMatchRange(std::string* min, std::string* max, if (prog_ == NULL) return false; - int n = static_cast<int>(prefix_.size()); + int n = static_cast<int>(prefix_.size()); if (n > maxlen) n = maxlen; @@ -596,12 +596,12 @@ bool RE2::PossibleMatchRange(std::string* min, std::string* max, // Avoid possible locale nonsense in standard strcasecmp. // The string a is known to be all lowercase. -static int ascii_strcasecmp(const char* a, const char* b, size_t len) { +static int ascii_strcasecmp(const char* a, const char* b, size_t len) { const char* ae = a + len; for (; a < ae; a++, b++) { - uint8_t x = *a; - uint8_t y = *b; + uint8_t x = *a; + uint8_t y = *b; if ('A' <= y && y <= 'Z') y += 'a' - 'A'; if (x != y) @@ -614,8 +614,8 @@ static int ascii_strcasecmp(const char* a, const char* b, size_t len) { /***** Actual matching and rewriting code *****/ bool RE2::Match(const StringPiece& text, - size_t startpos, - size_t endpos, + size_t startpos, + size_t endpos, Anchor re_anchor, StringPiece* submatch, int nsubmatch) const { @@ -625,18 +625,18 @@ bool RE2::Match(const StringPiece& text, return false; } - if (startpos > endpos || endpos > text.size()) { - if (options_.log_errors()) - LOG(ERROR) << "RE2: invalid startpos, endpos pair. [" - << "startpos: " << startpos << ", " - << "endpos: " << endpos << ", " - << "text size: " << text.size() << "]"; - return false; - } - + if (startpos > endpos || endpos > text.size()) { + if (options_.log_errors()) + LOG(ERROR) << "RE2: invalid startpos, endpos pair. [" + << "startpos: " << startpos << ", " + << "endpos: " << endpos << ", " + << "text size: " << text.size() << "]"; + return false; + } + StringPiece subtext = text; subtext.remove_prefix(startpos); - subtext.remove_suffix(text.size() - endpos); + subtext.remove_suffix(text.size() - endpos); // Use DFAs to find exact location of match, filter out non-matches. @@ -651,13 +651,13 @@ bool RE2::Match(const StringPiece& text, if (ncap > nsubmatch) ncap = nsubmatch; - // If the regexp is anchored explicitly, must not be in middle of text. - if (prog_->anchor_start() && startpos != 0) - return false; + // If the regexp is anchored explicitly, must not be in middle of text. + if (prog_->anchor_start() && startpos != 0) + return false; if (prog_->anchor_end() && endpos != text.size()) return false; - - // If the regexp is anchored explicitly, update re_anchor + + // If the regexp is anchored explicitly, update re_anchor // so that we can potentially fall into a faster case below. if (prog_->anchor_start() && prog_->anchor_end()) re_anchor = ANCHOR_BOTH; @@ -665,10 +665,10 @@ bool RE2::Match(const StringPiece& text, re_anchor = ANCHOR_START; // Check for the required prefix, if any. - size_t prefixlen = 0; + size_t prefixlen = 0; if (!prefix_.empty()) { - if (startpos != 0) - return false; + if (startpos != 0) + return false; prefixlen = prefix_.size(); if (prefixlen > subtext.size()) return false; @@ -738,7 +738,7 @@ bool RE2::Match(const StringPiece& text, if (!prog_->SearchDFA(subtext, text, anchor, kind, matchp, &dfa_failed, NULL)) { if (dfa_failed) { - if (options_.log_errors()) + if (options_.log_errors()) LOG(ERROR) << "DFA out of memory: " << "pattern length " << pattern_.size() << ", " << "program size " << prog_->size() << ", " @@ -764,7 +764,7 @@ bool RE2::Match(const StringPiece& text, if (!prog->SearchDFA(match, text, Prog::kAnchored, Prog::kLongestMatch, &match, &dfa_failed, NULL)) { if (dfa_failed) { - if (options_.log_errors()) + if (options_.log_errors()) LOG(ERROR) << "DFA out of memory: " << "pattern length " << pattern_.size() << ", " << "program size " << prog->size() << ", " @@ -774,8 +774,8 @@ bool RE2::Match(const StringPiece& text, skipped_test = true; break; } - if (options_.log_errors()) - LOG(ERROR) << "SearchDFA inconsistency"; + if (options_.log_errors()) + LOG(ERROR) << "SearchDFA inconsistency"; return false; } break; @@ -807,13 +807,13 @@ bool RE2::Match(const StringPiece& text, if (!prog_->SearchDFA(subtext, text, anchor, kind, &match, &dfa_failed, NULL)) { if (dfa_failed) { - if (options_.log_errors()) + if (options_.log_errors()) LOG(ERROR) << "DFA out of memory: " << "pattern length " << pattern_.size() << ", " << "program size " << prog_->size() << ", " << "list count " << prog_->list_count() << ", " << "bytemap range " << prog_->bytemap_range(); - // Fall back to NFA below. + // Fall back to NFA below. skipped_test = true; break; } @@ -843,20 +843,20 @@ bool RE2::Match(const StringPiece& text, if (can_one_pass && anchor != Prog::kUnanchored) { if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) { - if (!skipped_test && options_.log_errors()) + if (!skipped_test && options_.log_errors()) LOG(ERROR) << "SearchOnePass inconsistency"; return false; } } else if (can_bit_state && subtext1.size() <= bit_state_text_max_size) { if (!prog_->SearchBitState(subtext1, text, anchor, kind, submatch, ncap)) { - if (!skipped_test && options_.log_errors()) + if (!skipped_test && options_.log_errors()) LOG(ERROR) << "SearchBitState inconsistency"; return false; } } else { if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) { - if (!skipped_test && options_.log_errors()) + if (!skipped_test && options_.log_errors()) LOG(ERROR) << "SearchNFA inconsistency"; return false; } @@ -865,19 +865,19 @@ bool RE2::Match(const StringPiece& text, // Adjust overall match for required prefix that we stripped off. if (prefixlen > 0 && nsubmatch > 0) - submatch[0] = StringPiece(submatch[0].data() - prefixlen, + submatch[0] = StringPiece(submatch[0].data() - prefixlen, submatch[0].size() + prefixlen); // Zero submatches that don't exist in the regexp. for (int i = ncap; i < nsubmatch; i++) - submatch[i] = StringPiece(); + submatch[i] = StringPiece(); return true; } // Internal matcher - like Match() but takes Args not StringPieces. bool RE2::DoMatch(const StringPiece& text, Anchor re_anchor, - size_t* consumed, + size_t* consumed, const Arg* const* args, int n) const { if (!ok()) { @@ -914,7 +914,7 @@ bool RE2::DoMatch(const StringPiece& text, return false; } - if (consumed != NULL) + if (consumed != NULL) *consumed = static_cast<size_t>(EndPtr(vec[0]) - BeginPtr(text)); if (n == 0 || args == NULL) { @@ -1061,11 +1061,11 @@ bool Parse(const char* str, size_t n, TString* dest) { template <> bool Parse(const char* str, size_t n, StringPiece* dest) { - if (dest == NULL) return true; + if (dest == NULL) return true; *dest = StringPiece(str, n); - return true; -} - + return true; +} + template <> bool Parse(const char* str, size_t n, char* dest) { if (n != 1) return false; @@ -1079,13 +1079,13 @@ bool Parse(const char* str, size_t n, signed char* dest) { if (n != 1) return false; if (dest == NULL) return true; *dest = str[0]; - return true; -} - + return true; +} + template <> bool Parse(const char* str, size_t n, unsigned char* dest) { - if (n != 1) return false; - if (dest == NULL) return true; + if (n != 1) return false; + if (dest == NULL) return true; *dest = str[0]; return true; } @@ -1093,61 +1093,61 @@ bool Parse(const char* str, size_t n, unsigned char* dest) { // Largest number spec that we are willing to parse static const int kMaxNumberLength = 32; -// REQUIRES "buf" must have length at least nbuf. -// Copies "str" into "buf" and null-terminates. -// Overwrites *np with the new length. -static const char* TerminateNumber(char* buf, size_t nbuf, const char* str, - size_t* np, bool accept_spaces) { - size_t n = *np; - if (n == 0) return ""; - if (n > 0 && isspace(*str)) { +// REQUIRES "buf" must have length at least nbuf. +// Copies "str" into "buf" and null-terminates. +// Overwrites *np with the new length. +static const char* TerminateNumber(char* buf, size_t nbuf, const char* str, + size_t* np, bool accept_spaces) { + size_t n = *np; + if (n == 0) return ""; + if (n > 0 && isspace(*str)) { // We are less forgiving than the strtoxxx() routines and do not - // allow leading spaces. We do allow leading spaces for floats. - if (!accept_spaces) { - return ""; - } - while (n > 0 && isspace(*str)) { - n--; - str++; - } - } - - // Although buf has a fixed maximum size, we can still handle - // arbitrarily large integers correctly by omitting leading zeros. - // (Numbers that are still too long will be out of range.) - // Before deciding whether str is too long, - // remove leading zeros with s/000+/00/. - // Leaving the leading two zeros in place means that - // we don't change 0000x123 (invalid) into 0x123 (valid). - // Skip over leading - before replacing. - bool neg = false; - if (n >= 1 && str[0] == '-') { - neg = true; - n--; - str++; - } - - if (n >= 3 && str[0] == '0' && str[1] == '0') { - while (n >= 3 && str[2] == '0') { - n--; - str++; - } - } - - if (neg) { // make room in buf for - - n++; - str--; + // allow leading spaces. We do allow leading spaces for floats. + if (!accept_spaces) { + return ""; + } + while (n > 0 && isspace(*str)) { + n--; + str++; + } } - if (n > nbuf-1) return ""; - - memmove(buf, str, n); - if (neg) { - buf[0] = '-'; + // Although buf has a fixed maximum size, we can still handle + // arbitrarily large integers correctly by omitting leading zeros. + // (Numbers that are still too long will be out of range.) + // Before deciding whether str is too long, + // remove leading zeros with s/000+/00/. + // Leaving the leading two zeros in place means that + // we don't change 0000x123 (invalid) into 0x123 (valid). + // Skip over leading - before replacing. + bool neg = false; + if (n >= 1 && str[0] == '-') { + neg = true; + n--; + str++; } - buf[n] = '\0'; - *np = n; - return buf; + + if (n >= 3 && str[0] == '0' && str[1] == '0') { + while (n >= 3 && str[2] == '0') { + n--; + str++; + } + } + + if (neg) { // make room in buf for - + n++; + str--; + } + + if (n > nbuf-1) return ""; + + memmove(buf, str, n); + if (neg) { + buf[0] = '-'; + } + buf[n] = '\0'; + *np = n; + return buf; } template <> @@ -1186,7 +1186,7 @@ template <> bool Parse(const char* str, size_t n, long* dest, int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, sizeof buf, str, &n, false); + str = TerminateNumber(buf, sizeof buf, str, &n, false); char* end; errno = 0; long r = strtol(str, &end, radix); @@ -1201,11 +1201,11 @@ template <> bool Parse(const char* str, size_t n, unsigned long* dest, int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, sizeof buf, str, &n, false); + str = TerminateNumber(buf, sizeof buf, str, &n, false); if (str[0] == '-') { - // strtoul() will silently accept negative numbers and parse - // them. This module is more strict and treats them as errors. - return false; + // strtoul() will silently accept negative numbers and parse + // them. This module is more strict and treats them as errors. + return false; } char* end; @@ -1262,10 +1262,10 @@ template <> bool Parse(const char* str, size_t n, long long* dest, int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, sizeof buf, str, &n, false); + str = TerminateNumber(buf, sizeof buf, str, &n, false); char* end; errno = 0; - long long r = strtoll(str, &end, radix); + long long r = strtoll(str, &end, radix); if (end != str + n) return false; // Leftover junk if (errno) return false; if (dest == NULL) return true; @@ -1277,7 +1277,7 @@ template <> bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) { if (n == 0) return false; char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, sizeof buf, str, &n, false); + str = TerminateNumber(buf, sizeof buf, str, &n, false); if (str[0] == '-') { // strtoull() will silently accept negative numbers and parse // them. This module is more strict and treats them as errors. @@ -1285,7 +1285,7 @@ bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) { } char* end; errno = 0; - unsigned long long r = strtoull(str, &end, radix); + unsigned long long r = strtoull(str, &end, radix); if (end != str + n) return false; // Leftover junk if (errno) return false; if (dest == NULL) return true; diff --git a/contrib/libs/re2/re2/re2.h b/contrib/libs/re2/re2/re2.h index f8f8043daf..90cdf87880 100644 --- a/contrib/libs/re2/re2/re2.h +++ b/contrib/libs/re2/re2/re2.h @@ -1,35 +1,35 @@ -// Copyright 2003-2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef RE2_RE2_H_ -#define RE2_RE2_H_ - -// C++ interface to the re2 regular-expression library. -// RE2 supports Perl-style regular expressions (with extensions like -// \d, \w, \s, ...). -// -// ----------------------------------------------------------------------- -// REGEXP SYNTAX: -// -// This module uses the re2 library and hence supports -// its syntax for regular expressions, which is similar to Perl's with -// some of the more complicated things thrown away. In particular, -// backreferences and generalized assertions are not available, nor is \Z. -// -// See https://github.com/google/re2/wiki/Syntax for the syntax -// supported by RE2, and a comparison with PCRE and PERL regexps. -// -// For those not familiar with Perl's regular expressions, -// here are some examples of the most commonly used extensions: -// -// "hello (\\w+) world" -- \w matches a "word" character -// "version (\\d+)" -- \d matches a digit -// "hello\\s+world" -- \s matches any whitespace character -// "\\b(\\w+)\\b" -- \b matches non-empty string at word boundary -// "(?i)hello" -- (?i) turns on case-insensitive matching -// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible -// +// Copyright 2003-2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_RE2_H_ +#define RE2_RE2_H_ + +// C++ interface to the re2 regular-expression library. +// RE2 supports Perl-style regular expressions (with extensions like +// \d, \w, \s, ...). +// +// ----------------------------------------------------------------------- +// REGEXP SYNTAX: +// +// This module uses the re2 library and hence supports +// its syntax for regular expressions, which is similar to Perl's with +// some of the more complicated things thrown away. In particular, +// backreferences and generalized assertions are not available, nor is \Z. +// +// See https://github.com/google/re2/wiki/Syntax for the syntax +// supported by RE2, and a comparison with PCRE and PERL regexps. +// +// For those not familiar with Perl's regular expressions, +// here are some examples of the most commonly used extensions: +// +// "hello (\\w+) world" -- \w matches a "word" character +// "version (\\d+)" -- \d matches a digit +// "hello\\s+world" -- \s matches any whitespace character +// "\\b(\\w+)\\b" -- \b matches non-empty string at word boundary +// "(?i)hello" -- (?i) turns on case-insensitive matching +// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible +// // The double backslashes are needed when writing C++ string literals. // However, they should NOT be used when writing C++11 raw string literals: // @@ -43,31 +43,31 @@ // When using UTF-8 encoding, case-insensitive matching will perform // simple case folding, not full case folding. // -// ----------------------------------------------------------------------- -// MATCHING INTERFACE: -// -// The "FullMatch" operation checks that supplied text matches a -// supplied pattern exactly. -// -// Example: successful match -// CHECK(RE2::FullMatch("hello", "h.*o")); -// -// Example: unsuccessful match (requires full match): -// CHECK(!RE2::FullMatch("hello", "e")); -// -// ----------------------------------------------------------------------- -// UTF-8 AND THE MATCHING INTERFACE: -// -// By default, the pattern and input text are interpreted as UTF-8. -// The RE2::Latin1 option causes them to be interpreted as Latin-1. -// -// Example: -// CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern))); -// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1))); -// -// ----------------------------------------------------------------------- +// ----------------------------------------------------------------------- +// MATCHING INTERFACE: +// +// The "FullMatch" operation checks that supplied text matches a +// supplied pattern exactly. +// +// Example: successful match +// CHECK(RE2::FullMatch("hello", "h.*o")); +// +// Example: unsuccessful match (requires full match): +// CHECK(!RE2::FullMatch("hello", "e")); +// +// ----------------------------------------------------------------------- +// UTF-8 AND THE MATCHING INTERFACE: +// +// By default, the pattern and input text are interpreted as UTF-8. +// The RE2::Latin1 option causes them to be interpreted as Latin-1. +// +// Example: +// CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern))); +// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1))); +// +// ----------------------------------------------------------------------- // MATCHING WITH SUBSTRING EXTRACTION: -// +// // You can supply extra pointer arguments to extract matched substrings. // On match failure, none of the pointees will have been modified. // On match success, the substrings will be converted (as necessary) and @@ -79,252 +79,252 @@ // that do not inspect the substring contents. Hence, in the common case // where all of the pointees are of such types, failure is always due to // match failure and thus none of the pointees will have been modified. -// -// Example: extracts "ruby" into "s" and 1234 into "i" -// int i; +// +// Example: extracts "ruby" into "s" and 1234 into "i" +// int i; // std::string s; -// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); -// -// Example: fails because string cannot be stored in integer -// CHECK(!RE2::FullMatch("ruby", "(.*)", &i)); -// +// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); +// +// Example: fails because string cannot be stored in integer +// CHECK(!RE2::FullMatch("ruby", "(.*)", &i)); +// // Example: fails because there aren't enough sub-patterns -// CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s)); -// -// Example: does not try to extract any extra sub-patterns -// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s)); -// -// Example: does not try to extract into NULL -// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i)); -// -// Example: integer overflow causes failure -// CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i)); -// -// NOTE(rsc): Asking for substrings slows successful matches quite a bit. -// This may get a little faster in the future, but right now is slower -// than PCRE. On the other hand, failed matches run *very* fast (faster -// than PCRE), as do matches without substring extraction. -// -// ----------------------------------------------------------------------- -// PARTIAL MATCHES -// -// You can use the "PartialMatch" operation when you want the pattern -// to match any substring of the text. -// -// Example: simple search for a string: -// CHECK(RE2::PartialMatch("hello", "ell")); -// -// Example: find first number in a string -// int number; -// CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number)); -// CHECK_EQ(number, 100); -// -// ----------------------------------------------------------------------- -// PRE-COMPILED REGULAR EXPRESSIONS -// -// RE2 makes it easy to use any string as a regular expression, without -// requiring a separate compilation step. -// -// If speed is of the essence, you can create a pre-compiled "RE2" -// object from the pattern and use it multiple times. If you do so, -// you can typically parse text faster than with sscanf. -// -// Example: precompile pattern for faster matching: -// RE2 pattern("h.*o"); -// while (ReadLine(&str)) { -// if (RE2::FullMatch(str, pattern)) ...; -// } -// -// ----------------------------------------------------------------------- -// SCANNING TEXT INCREMENTALLY -// -// The "Consume" operation may be useful if you want to repeatedly -// match regular expressions at the front of a string and skip over -// them as they match. This requires use of the "StringPiece" type, -// which represents a sub-range of a real string. -// -// Example: read lines of the form "var = value" from a string. +// CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s)); +// +// Example: does not try to extract any extra sub-patterns +// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s)); +// +// Example: does not try to extract into NULL +// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i)); +// +// Example: integer overflow causes failure +// CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i)); +// +// NOTE(rsc): Asking for substrings slows successful matches quite a bit. +// This may get a little faster in the future, but right now is slower +// than PCRE. On the other hand, failed matches run *very* fast (faster +// than PCRE), as do matches without substring extraction. +// +// ----------------------------------------------------------------------- +// PARTIAL MATCHES +// +// You can use the "PartialMatch" operation when you want the pattern +// to match any substring of the text. +// +// Example: simple search for a string: +// CHECK(RE2::PartialMatch("hello", "ell")); +// +// Example: find first number in a string +// int number; +// CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number)); +// CHECK_EQ(number, 100); +// +// ----------------------------------------------------------------------- +// PRE-COMPILED REGULAR EXPRESSIONS +// +// RE2 makes it easy to use any string as a regular expression, without +// requiring a separate compilation step. +// +// If speed is of the essence, you can create a pre-compiled "RE2" +// object from the pattern and use it multiple times. If you do so, +// you can typically parse text faster than with sscanf. +// +// Example: precompile pattern for faster matching: +// RE2 pattern("h.*o"); +// while (ReadLine(&str)) { +// if (RE2::FullMatch(str, pattern)) ...; +// } +// +// ----------------------------------------------------------------------- +// SCANNING TEXT INCREMENTALLY +// +// The "Consume" operation may be useful if you want to repeatedly +// match regular expressions at the front of a string and skip over +// them as they match. This requires use of the "StringPiece" type, +// which represents a sub-range of a real string. +// +// Example: read lines of the form "var = value" from a string. // std::string contents = ...; // Fill string somehow -// StringPiece input(contents); // Wrap a StringPiece around it -// +// StringPiece input(contents); // Wrap a StringPiece around it +// // std::string var; -// int value; -// while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { -// ...; -// } -// -// Each successful call to "Consume" will set "var/value", and also -// advance "input" so it points past the matched text. Note that if the -// regular expression matches an empty string, input will advance -// by 0 bytes. If the regular expression being used might match -// an empty string, the loop body must check for this case and either -// advance the string or break out of the loop. -// -// The "FindAndConsume" operation is similar to "Consume" but does not -// anchor your match at the beginning of the string. For example, you -// could extract all words from a string by repeatedly calling -// RE2::FindAndConsume(&input, "(\\w+)", &word) -// -// ----------------------------------------------------------------------- -// USING VARIABLE NUMBER OF ARGUMENTS -// -// The above operations require you to know the number of arguments -// when you write the code. This is not always possible or easy (for -// example, the regular expression may be calculated at run time). -// You can use the "N" version of the operations when the number of -// match arguments are determined at run time. -// -// Example: -// const RE2::Arg* args[10]; -// int n; -// // ... populate args with pointers to RE2::Arg values ... -// // ... set n to the number of RE2::Arg objects ... -// bool match = RE2::FullMatchN(input, pattern, args, n); -// -// The last statement is equivalent to -// -// bool match = RE2::FullMatch(input, pattern, -// *args[0], *args[1], ..., *args[n - 1]); -// -// ----------------------------------------------------------------------- -// PARSING HEX/OCTAL/C-RADIX NUMBERS -// -// By default, if you pass a pointer to a numeric value, the -// corresponding text is interpreted as a base-10 number. You can -// instead wrap the pointer with a call to one of the operators Hex(), -// Octal(), or CRadix() to interpret the text in another base. The -// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) -// prefixes, but defaults to base-10. -// -// Example: -// int a, b, c, d; -// CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)", -// RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d)); -// will leave 64 in a, b, c, and d. - -#include <stddef.h> -#include <stdint.h> -#include <algorithm> -#include <map> -#include <mutex> -#include <string> +// int value; +// while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { +// ...; +// } +// +// Each successful call to "Consume" will set "var/value", and also +// advance "input" so it points past the matched text. Note that if the +// regular expression matches an empty string, input will advance +// by 0 bytes. If the regular expression being used might match +// an empty string, the loop body must check for this case and either +// advance the string or break out of the loop. +// +// The "FindAndConsume" operation is similar to "Consume" but does not +// anchor your match at the beginning of the string. For example, you +// could extract all words from a string by repeatedly calling +// RE2::FindAndConsume(&input, "(\\w+)", &word) +// +// ----------------------------------------------------------------------- +// USING VARIABLE NUMBER OF ARGUMENTS +// +// The above operations require you to know the number of arguments +// when you write the code. This is not always possible or easy (for +// example, the regular expression may be calculated at run time). +// You can use the "N" version of the operations when the number of +// match arguments are determined at run time. +// +// Example: +// const RE2::Arg* args[10]; +// int n; +// // ... populate args with pointers to RE2::Arg values ... +// // ... set n to the number of RE2::Arg objects ... +// bool match = RE2::FullMatchN(input, pattern, args, n); +// +// The last statement is equivalent to +// +// bool match = RE2::FullMatch(input, pattern, +// *args[0], *args[1], ..., *args[n - 1]); +// +// ----------------------------------------------------------------------- +// PARSING HEX/OCTAL/C-RADIX NUMBERS +// +// By default, if you pass a pointer to a numeric value, the +// corresponding text is interpreted as a base-10 number. You can +// instead wrap the pointer with a call to one of the operators Hex(), +// Octal(), or CRadix() to interpret the text in another base. The +// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) +// prefixes, but defaults to base-10. +// +// Example: +// int a, b, c, d; +// CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)", +// RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d)); +// will leave 64 in a, b, c, and d. + +#include <stddef.h> +#include <stdint.h> +#include <algorithm> +#include <map> +#include <mutex> +#include <string> #include <type_traits> #include <vector> #if defined(ARCADIA_ROOT) -#include <util/generic/string.h> +#include <util/generic/string.h> #endif - + #if defined(__APPLE__) #include <TargetConditionals.h> #endif -#include "re2/stringpiece.h" - -namespace re2 { -class Prog; -class Regexp; -} // namespace re2 - -namespace re2 { - -// Interface for regular expression matching. Also corresponds to a -// pre-compiled regular expression. An "RE2" object is safe for -// concurrent use by multiple threads. -class RE2 { - public: - // We convert user-passed pointers into special Arg objects - class Arg; - class Options; - - // Defined in set.h. - class Set; - - enum ErrorCode { - NoError = 0, - - // Unexpected error - ErrorInternal, - - // Parse errors - ErrorBadEscape, // bad escape sequence - ErrorBadCharClass, // bad character class - ErrorBadCharRange, // bad character class range - ErrorMissingBracket, // missing closing ] - ErrorMissingParen, // missing closing ) +#include "re2/stringpiece.h" + +namespace re2 { +class Prog; +class Regexp; +} // namespace re2 + +namespace re2 { + +// Interface for regular expression matching. Also corresponds to a +// pre-compiled regular expression. An "RE2" object is safe for +// concurrent use by multiple threads. +class RE2 { + public: + // We convert user-passed pointers into special Arg objects + class Arg; + class Options; + + // Defined in set.h. + class Set; + + enum ErrorCode { + NoError = 0, + + // Unexpected error + ErrorInternal, + + // Parse errors + ErrorBadEscape, // bad escape sequence + ErrorBadCharClass, // bad character class + ErrorBadCharRange, // bad character class range + ErrorMissingBracket, // missing closing ] + ErrorMissingParen, // missing closing ) ErrorUnexpectedParen, // unexpected closing ) - ErrorTrailingBackslash, // trailing \ at end of regexp - ErrorRepeatArgument, // repeat argument missing, e.g. "*" - ErrorRepeatSize, // bad repetition argument - ErrorRepeatOp, // bad repetition operator - ErrorBadPerlOp, // bad perl operator - ErrorBadUTF8, // invalid UTF-8 in regexp - ErrorBadNamedCapture, // bad named capture group - ErrorPatternTooLarge // pattern too large (compile failed) - }; - - // Predefined common options. - // If you need more complicated things, instantiate - // an Option class, possibly passing one of these to - // the Option constructor, change the settings, and pass that - // Option class to the RE2 constructor. - enum CannedOptions { - DefaultOptions = 0, - Latin1, // treat input as Latin-1 (default UTF-8) - POSIX, // POSIX syntax, leftmost-longest match - Quiet // do not log about regexp parse errors - }; - + ErrorTrailingBackslash, // trailing \ at end of regexp + ErrorRepeatArgument, // repeat argument missing, e.g. "*" + ErrorRepeatSize, // bad repetition argument + ErrorRepeatOp, // bad repetition operator + ErrorBadPerlOp, // bad perl operator + ErrorBadUTF8, // invalid UTF-8 in regexp + ErrorBadNamedCapture, // bad named capture group + ErrorPatternTooLarge // pattern too large (compile failed) + }; + + // Predefined common options. + // If you need more complicated things, instantiate + // an Option class, possibly passing one of these to + // the Option constructor, change the settings, and pass that + // Option class to the RE2 constructor. + enum CannedOptions { + DefaultOptions = 0, + Latin1, // treat input as Latin-1 (default UTF-8) + POSIX, // POSIX syntax, leftmost-longest match + Quiet // do not log about regexp parse errors + }; + // Need to have the const char* and const std::string& forms for implicit - // conversions when passing string literals to FullMatch and PartialMatch. - // Otherwise the StringPiece form would be sufficient. -#ifndef SWIG - RE2(const char* pattern); + // conversions when passing string literals to FullMatch and PartialMatch. + // Otherwise the StringPiece form would be sufficient. +#ifndef SWIG + RE2(const char* pattern); RE2(const std::string& pattern); -#endif - RE2(const StringPiece& pattern); - RE2(const StringPiece& pattern, const Options& options); +#endif + RE2(const StringPiece& pattern); + RE2(const StringPiece& pattern, const Options& options); #if defined(ARCADIA_ROOT) - // ambiguity resolution. - RE2(const TString& pattern) : RE2(StringPiece(pattern)) {} + // ambiguity resolution. + RE2(const TString& pattern) : RE2(StringPiece(pattern)) {} #endif - ~RE2(); - - // Returns whether RE2 was created properly. - bool ok() const { return error_code() == NoError; } - - // The string specification for this RE2. E.g. - // RE2 re("ab*c?d+"); - // re.pattern(); // "ab*c?d+" + ~RE2(); + + // Returns whether RE2 was created properly. + bool ok() const { return error_code() == NoError; } + + // The string specification for this RE2. E.g. + // RE2 re("ab*c?d+"); + // re.pattern(); // "ab*c?d+" const std::string& pattern() const { return pattern_; } - - // If RE2 could not be created properly, returns an error string. - // Else returns the empty string. + + // If RE2 could not be created properly, returns an error string. + // Else returns the empty string. const std::string& error() const { return *error_; } - - // If RE2 could not be created properly, returns an error code. - // Else returns RE2::NoError (== 0). - ErrorCode error_code() const { return error_code_; } - - // If RE2 could not be created properly, returns the offending - // portion of the regexp. + + // If RE2 could not be created properly, returns an error code. + // Else returns RE2::NoError (== 0). + ErrorCode error_code() const { return error_code_; } + + // If RE2 could not be created properly, returns the offending + // portion of the regexp. const std::string& error_arg() const { return error_arg_; } - - // Returns the program size, a very approximate measure of a regexp's "cost". - // Larger numbers are more expensive than smaller numbers. - int ProgramSize() const; + + // Returns the program size, a very approximate measure of a regexp's "cost". + // Larger numbers are more expensive than smaller numbers. + int ProgramSize() const; int ReverseProgramSize() const; - + // If histogram is not null, outputs the program fanout // as a histogram bucketed by powers of 2. - // Returns the number of the largest non-empty bucket. + // Returns the number of the largest non-empty bucket. int ProgramFanout(std::vector<int>* histogram) const; int ReverseProgramFanout(std::vector<int>* histogram) const; - - // Returns the underlying Regexp; not for general use. - // Returns entire_regexp_ so that callers don't need - // to know about prefix_ and prefix_foldcase_. - re2::Regexp* Regexp() const { return entire_regexp_; } - + + // Returns the underlying Regexp; not for general use. + // Returns entire_regexp_ so that callers don't need + // to know about prefix_ and prefix_foldcase_. + re2::Regexp* Regexp() const { return entire_regexp_; } + /***** The array-based matching interface ******/ // The functions here have names ending in 'N' and are used to implement @@ -360,41 +360,41 @@ class RE2 { // The first layer constructs the temporary Arg objects. The second layer // (above) constructs the array of pointers to the temporary Arg objects. - /***** The useful part: the matching interface *****/ - - // Matches "text" against "re". If pointer arguments are - // supplied, copies matched sub-patterns into them. - // + /***** The useful part: the matching interface *****/ + + // Matches "text" against "re". If pointer arguments are + // supplied, copies matched sub-patterns into them. + // // You can pass in a "const char*" or a "std::string" for "text". // You can pass in a "const char*" or a "std::string" or a "RE2" for "re". - // - // The provided pointer arguments can be pointers to any scalar numeric - // type, or one of: + // + // The provided pointer arguments can be pointers to any scalar numeric + // type, or one of: // std::string (matched piece is copied to string) - // StringPiece (StringPiece is mutated to point to matched piece) - // T (where "bool T::ParseFrom(const char*, size_t)" exists) - // (void*)NULL (the corresponding matched sub-pattern is not copied) - // - // Returns true iff all of the following conditions are satisfied: + // StringPiece (StringPiece is mutated to point to matched piece) + // T (where "bool T::ParseFrom(const char*, size_t)" exists) + // (void*)NULL (the corresponding matched sub-pattern is not copied) + // + // Returns true iff all of the following conditions are satisfied: // a. "text" matches "re" fully - from the beginning to the end of "text". // b. The number of matched sub-patterns is >= number of supplied pointers. - // c. The "i"th argument has a suitable type for holding the - // string captured as the "i"th sub-pattern. If you pass in - // NULL for the "i"th argument, or pass fewer arguments than + // c. The "i"th argument has a suitable type for holding the + // string captured as the "i"th sub-pattern. If you pass in + // NULL for the "i"th argument, or pass fewer arguments than // number of sub-patterns, the "i"th captured sub-pattern is - // ignored. - // - // CAVEAT: An optional sub-pattern that does not exist in the - // matched string is assigned the empty string. Therefore, the - // following will return false (because the empty string is not a - // valid number): - // int number; - // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number); - template <typename... A> - static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) { - return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...); - } - + // ignored. + // + // CAVEAT: An optional sub-pattern that does not exist in the + // matched string is assigned the empty string. Therefore, the + // following will return false (because the empty string is not a + // valid number): + // int number; + // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number); + template <typename... A> + static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) { + return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...); + } + // Like FullMatch(), except that "re" is allowed to match a substring // of "text". // @@ -406,11 +406,11 @@ class RE2 { // NULL for the "i"th argument, or pass fewer arguments than // number of sub-patterns, the "i"th captured sub-pattern is // ignored. - template <typename... A> - static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) { - return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...); - } - + template <typename... A> + static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) { + return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...); + } + // Like FullMatch() and PartialMatch(), except that "re" has to match // a prefix of the text, and "input" is advanced past the matched // text. Note: "input" is modified iff this routine returns true @@ -424,11 +424,11 @@ class RE2 { // NULL for the "i"th argument, or pass fewer arguments than // number of sub-patterns, the "i"th captured sub-pattern is // ignored. - template <typename... A> - static bool Consume(StringPiece* input, const RE2& re, A&&... a) { - return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...); - } - + template <typename... A> + static bool Consume(StringPiece* input, const RE2& re, A&&... a) { + return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...); + } + // Like Consume(), but does not anchor the match at the beginning of // the text. That is, "re" need not start its match at the beginning // of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds @@ -442,374 +442,374 @@ class RE2 { // NULL for the "i"th argument, or pass fewer arguments than // number of sub-patterns, the "i"th captured sub-pattern is // ignored. - template <typename... A> - static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) { - return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...); - } -#endif - + template <typename... A> + static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) { + return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...); + } +#endif + // Replace the first match of "re" in "str" with "rewrite". - // Within "rewrite", backslash-escaped digits (\1 to \9) can be - // used to insert text matching corresponding parenthesized group - // from the pattern. \0 in "rewrite" refers to the entire matching - // text. E.g., - // + // Within "rewrite", backslash-escaped digits (\1 to \9) can be + // used to insert text matching corresponding parenthesized group + // from the pattern. \0 in "rewrite" refers to the entire matching + // text. E.g., + // // std::string s = "yabba dabba doo"; - // CHECK(RE2::Replace(&s, "b+", "d")); - // - // will leave "s" containing "yada dabba doo" - // - // Returns true if the pattern matches and a replacement occurs, - // false otherwise. + // CHECK(RE2::Replace(&s, "b+", "d")); + // + // will leave "s" containing "yada dabba doo" + // + // Returns true if the pattern matches and a replacement occurs, + // false otherwise. static bool Replace(std::string* str, const RE2& re, - const StringPiece& rewrite); + const StringPiece& rewrite); #if defined(ARCADIA_ROOT) - static bool Replace(TString *str, - const RE2& pattern, - const StringPiece& rewrite) { + static bool Replace(TString *str, + const RE2& pattern, + const StringPiece& rewrite) { std::string tmp(*str); - bool res = Replace(&tmp, pattern, rewrite); - *str = tmp; - return res; - } + bool res = Replace(&tmp, pattern, rewrite); + *str = tmp; + return res; + } #endif - - // Like Replace(), except replaces successive non-overlapping occurrences - // of the pattern in the string with the rewrite. E.g. - // + + // Like Replace(), except replaces successive non-overlapping occurrences + // of the pattern in the string with the rewrite. E.g. + // // std::string s = "yabba dabba doo"; - // CHECK(RE2::GlobalReplace(&s, "b+", "d")); - // - // will leave "s" containing "yada dada doo" - // Replacements are not subject to re-matching. - // - // Because GlobalReplace only replaces non-overlapping matches, - // replacing "ana" within "banana" makes only one replacement, not two. - // - // Returns the number of replacements made. + // CHECK(RE2::GlobalReplace(&s, "b+", "d")); + // + // will leave "s" containing "yada dada doo" + // Replacements are not subject to re-matching. + // + // Because GlobalReplace only replaces non-overlapping matches, + // replacing "ana" within "banana" makes only one replacement, not two. + // + // Returns the number of replacements made. static int GlobalReplace(std::string* str, const RE2& re, - const StringPiece& rewrite); - + const StringPiece& rewrite); + #if defined(ARCADIA_ROOT) static int GlobalReplace(TString* str, const RE2& pattern, const StringPiece& rewrite) { std::string tmp(*str); - int res = GlobalReplace(&tmp, pattern, rewrite); - *str = tmp; - return res; - } + int res = GlobalReplace(&tmp, pattern, rewrite); + *str = tmp; + return res; + } #endif - - // Like Replace, except that if the pattern matches, "rewrite" - // is copied into "out" with substitutions. The non-matching - // portions of "text" are ignored. - // - // Returns true iff a match occurred and the extraction happened - // successfully; if no match occurs, the string is left unaffected. - // - // REQUIRES: "text" must not alias any part of "*out". + + // Like Replace, except that if the pattern matches, "rewrite" + // is copied into "out" with substitutions. The non-matching + // portions of "text" are ignored. + // + // Returns true iff a match occurred and the extraction happened + // successfully; if no match occurs, the string is left unaffected. + // + // REQUIRES: "text" must not alias any part of "*out". static bool Extract(const StringPiece& text, const RE2& re, const StringPiece& rewrite, std::string* out); - + #if defined(ARCADIA_ROOT) static bool Extract(const StringPiece& text, - const RE2& pattern, + const RE2& pattern, const StringPiece& rewrite, - TString *out) { - std::string tmp; - bool res = Extract(text, pattern, rewrite, &tmp); - *out = tmp; - return res; - } + TString *out) { + std::string tmp; + bool res = Extract(text, pattern, rewrite, &tmp); + *out = tmp; + return res; + } #endif - - // Escapes all potentially meaningful regexp characters in - // 'unquoted'. The returned string, used as a regular expression, + + // Escapes all potentially meaningful regexp characters in + // 'unquoted'. The returned string, used as a regular expression, // will match exactly the original string. For example, - // 1.5-2.0? - // may become: - // 1\.5\-2\.0\? + // 1.5-2.0? + // may become: + // 1\.5\-2\.0\? static std::string QuoteMeta(const StringPiece& unquoted); - - // Computes range for any strings matching regexp. The min and max can in - // some cases be arbitrarily precise, so the caller gets to specify the - // maximum desired length of string returned. - // - // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any - // string s that is an anchored match for this regexp satisfies - // min <= s && s <= max. - // - // Note that PossibleMatchRange() will only consider the first copy of an - // infinitely repeated element (i.e., any regexp element followed by a '*' or - // '+' operator). Regexps with "{N}" constructions are not affected, as those - // do not compile down to infinite repetitions. - // - // Returns true on success, false on error. + + // Computes range for any strings matching regexp. The min and max can in + // some cases be arbitrarily precise, so the caller gets to specify the + // maximum desired length of string returned. + // + // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any + // string s that is an anchored match for this regexp satisfies + // min <= s && s <= max. + // + // Note that PossibleMatchRange() will only consider the first copy of an + // infinitely repeated element (i.e., any regexp element followed by a '*' or + // '+' operator). Regexps with "{N}" constructions are not affected, as those + // do not compile down to infinite repetitions. + // + // Returns true on success, false on error. bool PossibleMatchRange(std::string* min, std::string* max, int maxlen) const; - - // Generic matching interface - - // Type of match. - enum Anchor { - UNANCHORED, // No anchoring - ANCHOR_START, // Anchor at start only - ANCHOR_BOTH // Anchor at start and end - }; - - // Return the number of capturing subpatterns, or -1 if the - // regexp wasn't valid on construction. The overall match ($0) - // does not count: if the regexp is "(a)(b)", returns 2. + + // Generic matching interface + + // Type of match. + enum Anchor { + UNANCHORED, // No anchoring + ANCHOR_START, // Anchor at start only + ANCHOR_BOTH // Anchor at start and end + }; + + // Return the number of capturing subpatterns, or -1 if the + // regexp wasn't valid on construction. The overall match ($0) + // does not count: if the regexp is "(a)(b)", returns 2. int NumberOfCapturingGroups() const { return num_captures_; } - - // Return a map from names to capturing indices. - // The map records the index of the leftmost group - // with the given name. - // Only valid until the re is deleted. + + // Return a map from names to capturing indices. + // The map records the index of the leftmost group + // with the given name. + // Only valid until the re is deleted. const std::map<std::string, int>& NamedCapturingGroups() const; - - // Return a map from capturing indices to names. - // The map has no entries for unnamed groups. - // Only valid until the re is deleted. + + // Return a map from capturing indices to names. + // The map has no entries for unnamed groups. + // Only valid until the re is deleted. const std::map<int, std::string>& CapturingGroupNames() const; - - // General matching routine. - // Match against text starting at offset startpos - // and stopping the search at offset endpos. - // Returns true if match found, false if not. + + // General matching routine. + // Match against text starting at offset startpos + // and stopping the search at offset endpos. + // Returns true if match found, false if not. // On a successful match, fills in submatch[] (up to nsubmatch entries) - // with information about submatches. + // with information about submatches. // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, with // submatch[0] = "barbaz", submatch[1].data() = NULL, submatch[2] = "bar", // submatch[3].data() = NULL, ..., up to submatch[nsubmatch-1].data() = NULL. // Caveat: submatch[] may be clobbered even on match failure. - // - // Don't ask for more match information than you will use: + // + // Don't ask for more match information than you will use: // runs much faster with nsubmatch == 1 than nsubmatch > 1, and // runs even faster if nsubmatch == 0. // Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(), - // but will be handled correctly. - // - // Passing text == StringPiece(NULL, 0) will be handled like any other - // empty string, but note that on return, it will not be possible to tell - // whether submatch i matched the empty string or did not match: + // but will be handled correctly. + // + // Passing text == StringPiece(NULL, 0) will be handled like any other + // empty string, but note that on return, it will not be possible to tell + // whether submatch i matched the empty string or did not match: // either way, submatch[i].data() == NULL. - bool Match(const StringPiece& text, - size_t startpos, - size_t endpos, + bool Match(const StringPiece& text, + size_t startpos, + size_t endpos, Anchor re_anchor, StringPiece* submatch, int nsubmatch) const; - - // Check that the given rewrite string is suitable for use with this - // regular expression. It checks that: - // * The regular expression has enough parenthesized subexpressions - // to satisfy all of the \N tokens in rewrite - // * The rewrite string doesn't have any syntax errors. E.g., - // '\' followed by anything other than a digit or '\'. - // A true return value guarantees that Replace() and Extract() won't - // fail because of a bad rewrite string. + + // Check that the given rewrite string is suitable for use with this + // regular expression. It checks that: + // * The regular expression has enough parenthesized subexpressions + // to satisfy all of the \N tokens in rewrite + // * The rewrite string doesn't have any syntax errors. E.g., + // '\' followed by anything other than a digit or '\'. + // A true return value guarantees that Replace() and Extract() won't + // fail because of a bad rewrite string. bool CheckRewriteString(const StringPiece& rewrite, std::string* error) const; - + bool CheckRewriteString(const StringPiece& rewrite, std::nullptr_t error) const { return CheckRewriteString(rewrite, static_cast<std::string*>(error)); - } - + } + #if defined(ARCADIA_ROOT) - bool CheckRewriteString(const StringPiece& rewrite, TString* error) const { - if (error) { - std::string tmp; - bool res = CheckRewriteString(rewrite, &tmp); - error->assign(tmp.data(), tmp.size()); - return res; - } else { - return CheckRewriteString(rewrite, nullptr); - } - } + bool CheckRewriteString(const StringPiece& rewrite, TString* error) const { + if (error) { + std::string tmp; + bool res = CheckRewriteString(rewrite, &tmp); + error->assign(tmp.data(), tmp.size()); + return res; + } else { + return CheckRewriteString(rewrite, nullptr); + } + } #endif - - // Returns the maximum submatch needed for the rewrite to be done by - // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2. - static int MaxSubmatch(const StringPiece& rewrite); - - // Append the "rewrite" string, with backslash subsitutions from "vec", - // to string "out". - // Returns true on success. This method can fail because of a malformed - // rewrite string. CheckRewriteString guarantees that the rewrite will - // be sucessful. + + // Returns the maximum submatch needed for the rewrite to be done by + // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2. + static int MaxSubmatch(const StringPiece& rewrite); + + // Append the "rewrite" string, with backslash subsitutions from "vec", + // to string "out". + // Returns true on success. This method can fail because of a malformed + // rewrite string. CheckRewriteString guarantees that the rewrite will + // be sucessful. bool Rewrite(std::string* out, const StringPiece& rewrite, - const StringPiece* vec, - int veclen) const; - - // Constructor options - class Options { - public: - // The options are (defaults in parentheses): - // - // utf8 (true) text and pattern are UTF-8; otherwise Latin-1 - // posix_syntax (false) restrict regexps to POSIX egrep syntax - // longest_match (false) search for longest match, not first match - // log_errors (true) log syntax and execution errors to ERROR - // max_mem (see below) approx. max memory footprint of RE2 - // literal (false) interpret string as literal, not regexp - // never_nl (false) never match \n, even if it is in regexp - // dot_nl (false) dot matches everything including new line - // never_capture (false) parse all parens as non-capturing - // case_sensitive (true) match is case-sensitive (regexp can override - // with (?i) unless in posix_syntax mode) - // - // The following options are only consulted when posix_syntax == true. + const StringPiece* vec, + int veclen) const; + + // Constructor options + class Options { + public: + // The options are (defaults in parentheses): + // + // utf8 (true) text and pattern are UTF-8; otherwise Latin-1 + // posix_syntax (false) restrict regexps to POSIX egrep syntax + // longest_match (false) search for longest match, not first match + // log_errors (true) log syntax and execution errors to ERROR + // max_mem (see below) approx. max memory footprint of RE2 + // literal (false) interpret string as literal, not regexp + // never_nl (false) never match \n, even if it is in regexp + // dot_nl (false) dot matches everything including new line + // never_capture (false) parse all parens as non-capturing + // case_sensitive (true) match is case-sensitive (regexp can override + // with (?i) unless in posix_syntax mode) + // + // The following options are only consulted when posix_syntax == true. // When posix_syntax == false, these features are always enabled and // cannot be turned off; to perform multi-line matching in that case, // begin the regexp with (?m). - // perl_classes (false) allow Perl's \d \s \w \D \S \W - // word_boundary (false) allow Perl's \b \B (word boundary and not) - // one_line (false) ^ and $ only match beginning and end of text - // - // The max_mem option controls how much memory can be used - // to hold the compiled form of the regexp (the Prog) and - // its cached DFA graphs. Code Search placed limits on the number - // of Prog instructions and DFA states: 10,000 for both. - // In RE2, those limits would translate to about 240 KB per Prog - // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a - // better job of keeping them small than Code Search did). - // Each RE2 has two Progs (one forward, one reverse), and each Prog - // can have two DFAs (one first match, one longest match). - // That makes 4 DFAs: - // + // perl_classes (false) allow Perl's \d \s \w \D \S \W + // word_boundary (false) allow Perl's \b \B (word boundary and not) + // one_line (false) ^ and $ only match beginning and end of text + // + // The max_mem option controls how much memory can be used + // to hold the compiled form of the regexp (the Prog) and + // its cached DFA graphs. Code Search placed limits on the number + // of Prog instructions and DFA states: 10,000 for both. + // In RE2, those limits would translate to about 240 KB per Prog + // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a + // better job of keeping them small than Code Search did). + // Each RE2 has two Progs (one forward, one reverse), and each Prog + // can have two DFAs (one first match, one longest match). + // That makes 4 DFAs: + // // forward, first-match - used for UNANCHORED or ANCHOR_START searches - // if opt.longest_match() == false - // forward, longest-match - used for all ANCHOR_BOTH searches, - // and the other two kinds if - // opt.longest_match() == true - // reverse, first-match - never used - // reverse, longest-match - used as second phase for unanchored searches - // - // The RE2 memory budget is statically divided between the two - // Progs and then the DFAs: two thirds to the forward Prog - // and one third to the reverse Prog. The forward Prog gives half - // of what it has left over to each of its DFAs. The reverse Prog - // gives it all to its longest-match DFA. - // - // Once a DFA fills its budget, it flushes its cache and starts over. - // If this happens too often, RE2 falls back on the NFA implementation. - - // For now, make the default budget something close to Code Search. - static const int kDefaultMaxMem = 8<<20; - - enum Encoding { - EncodingUTF8 = 1, - EncodingLatin1 - }; - - Options() : - encoding_(EncodingUTF8), - posix_syntax_(false), - longest_match_(false), - log_errors_(true), - max_mem_(kDefaultMaxMem), - literal_(false), - never_nl_(false), - dot_nl_(false), - never_capture_(false), - case_sensitive_(true), - perl_classes_(false), - word_boundary_(false), - one_line_(false) { - } - - /*implicit*/ Options(CannedOptions); - - Encoding encoding() const { return encoding_; } - void set_encoding(Encoding encoding) { encoding_ = encoding; } - - bool posix_syntax() const { return posix_syntax_; } - void set_posix_syntax(bool b) { posix_syntax_ = b; } - - bool longest_match() const { return longest_match_; } - void set_longest_match(bool b) { longest_match_ = b; } - - bool log_errors() const { return log_errors_; } - void set_log_errors(bool b) { log_errors_ = b; } - - int64_t max_mem() const { return max_mem_; } - void set_max_mem(int64_t m) { max_mem_ = m; } - - bool literal() const { return literal_; } - void set_literal(bool b) { literal_ = b; } - - bool never_nl() const { return never_nl_; } - void set_never_nl(bool b) { never_nl_ = b; } - - bool dot_nl() const { return dot_nl_; } - void set_dot_nl(bool b) { dot_nl_ = b; } - - bool never_capture() const { return never_capture_; } - void set_never_capture(bool b) { never_capture_ = b; } - - bool case_sensitive() const { return case_sensitive_; } - void set_case_sensitive(bool b) { case_sensitive_ = b; } - - bool perl_classes() const { return perl_classes_; } - void set_perl_classes(bool b) { perl_classes_ = b; } - - bool word_boundary() const { return word_boundary_; } - void set_word_boundary(bool b) { word_boundary_ = b; } - - bool one_line() const { return one_line_; } - void set_one_line(bool b) { one_line_ = b; } - - void Copy(const Options& src) { - *this = src; - } - - int ParseFlags() const; - - private: - Encoding encoding_; - bool posix_syntax_; - bool longest_match_; - bool log_errors_; - int64_t max_mem_; - bool literal_; - bool never_nl_; - bool dot_nl_; - bool never_capture_; - bool case_sensitive_; - bool perl_classes_; - bool word_boundary_; - bool one_line_; - }; - - // Returns the options set in the constructor. + // if opt.longest_match() == false + // forward, longest-match - used for all ANCHOR_BOTH searches, + // and the other two kinds if + // opt.longest_match() == true + // reverse, first-match - never used + // reverse, longest-match - used as second phase for unanchored searches + // + // The RE2 memory budget is statically divided between the two + // Progs and then the DFAs: two thirds to the forward Prog + // and one third to the reverse Prog. The forward Prog gives half + // of what it has left over to each of its DFAs. The reverse Prog + // gives it all to its longest-match DFA. + // + // Once a DFA fills its budget, it flushes its cache and starts over. + // If this happens too often, RE2 falls back on the NFA implementation. + + // For now, make the default budget something close to Code Search. + static const int kDefaultMaxMem = 8<<20; + + enum Encoding { + EncodingUTF8 = 1, + EncodingLatin1 + }; + + Options() : + encoding_(EncodingUTF8), + posix_syntax_(false), + longest_match_(false), + log_errors_(true), + max_mem_(kDefaultMaxMem), + literal_(false), + never_nl_(false), + dot_nl_(false), + never_capture_(false), + case_sensitive_(true), + perl_classes_(false), + word_boundary_(false), + one_line_(false) { + } + + /*implicit*/ Options(CannedOptions); + + Encoding encoding() const { return encoding_; } + void set_encoding(Encoding encoding) { encoding_ = encoding; } + + bool posix_syntax() const { return posix_syntax_; } + void set_posix_syntax(bool b) { posix_syntax_ = b; } + + bool longest_match() const { return longest_match_; } + void set_longest_match(bool b) { longest_match_ = b; } + + bool log_errors() const { return log_errors_; } + void set_log_errors(bool b) { log_errors_ = b; } + + int64_t max_mem() const { return max_mem_; } + void set_max_mem(int64_t m) { max_mem_ = m; } + + bool literal() const { return literal_; } + void set_literal(bool b) { literal_ = b; } + + bool never_nl() const { return never_nl_; } + void set_never_nl(bool b) { never_nl_ = b; } + + bool dot_nl() const { return dot_nl_; } + void set_dot_nl(bool b) { dot_nl_ = b; } + + bool never_capture() const { return never_capture_; } + void set_never_capture(bool b) { never_capture_ = b; } + + bool case_sensitive() const { return case_sensitive_; } + void set_case_sensitive(bool b) { case_sensitive_ = b; } + + bool perl_classes() const { return perl_classes_; } + void set_perl_classes(bool b) { perl_classes_ = b; } + + bool word_boundary() const { return word_boundary_; } + void set_word_boundary(bool b) { word_boundary_ = b; } + + bool one_line() const { return one_line_; } + void set_one_line(bool b) { one_line_ = b; } + + void Copy(const Options& src) { + *this = src; + } + + int ParseFlags() const; + + private: + Encoding encoding_; + bool posix_syntax_; + bool longest_match_; + bool log_errors_; + int64_t max_mem_; + bool literal_; + bool never_nl_; + bool dot_nl_; + bool never_capture_; + bool case_sensitive_; + bool perl_classes_; + bool word_boundary_; + bool one_line_; + }; + + // Returns the options set in the constructor. const Options& options() const { return options_; } - - // Argument converters; see below. + + // Argument converters; see below. template <typename T> static Arg CRadix(T* ptr); template <typename T> static Arg Hex(T* ptr); template <typename T> static Arg Octal(T* ptr); - - private: - void Init(const StringPiece& pattern, const Options& options); - - bool DoMatch(const StringPiece& text, + + private: + void Init(const StringPiece& pattern, const Options& options); + + bool DoMatch(const StringPiece& text, Anchor re_anchor, - size_t* consumed, - const Arg* const args[], - int n) const; - - re2::Prog* ReverseProg() const; - + size_t* consumed, + const Arg* const args[], + int n) const; + + re2::Prog* ReverseProg() const; + std::string pattern_; // string regular expression Options options_; // option flags re2::Regexp* entire_regexp_; // parsed regular expression @@ -822,26 +822,26 @@ class RE2 { re2::Prog* prog_; // compiled program for regexp int num_captures_; // number of capturing groups bool is_one_pass_; // can use prog_->SearchOnePass? - + // Reverse Prog for DFA execution only mutable re2::Prog* rprog_; - // Map from capture names to indices + // Map from capture names to indices mutable const std::map<std::string, int>* named_groups_; - // Map from capture indices to names + // Map from capture indices to names mutable const std::map<int, std::string>* group_names_; - - mutable std::once_flag rprog_once_; - mutable std::once_flag named_groups_once_; - mutable std::once_flag group_names_once_; - - RE2(const RE2&) = delete; - RE2& operator=(const RE2&) = delete; -}; - -/***** Implementation details *****/ - + + mutable std::once_flag rprog_once_; + mutable std::once_flag named_groups_once_; + mutable std::once_flag group_names_once_; + + RE2(const RE2&) = delete; + RE2& operator=(const RE2&) = delete; +}; + +/***** Implementation details *****/ + namespace re2_internal { - + // Types for which the 3-ary Parse() function template has specializations. template <typename T> struct Parse3ary : public std::false_type {}; template <> struct Parse3ary<void> : public std::true_type {}; @@ -855,7 +855,7 @@ template <> struct Parse3ary<signed char> : public std::true_type {}; template <> struct Parse3ary<unsigned char> : public std::true_type {}; template <> struct Parse3ary<float> : public std::true_type {}; template <> struct Parse3ary<double> : public std::true_type {}; - + template <typename T> bool Parse(const char* str, size_t n, T* dest); @@ -875,18 +875,18 @@ bool Parse(const char* str, size_t n, T* dest, int radix); } // namespace re2_internal -class RE2::Arg { +class RE2::Arg { private: template <typename T> using CanParse3ary = typename std::enable_if< re2_internal::Parse3ary<T>::value, int>::type; - + template <typename T> using CanParse4ary = typename std::enable_if< re2_internal::Parse4ary<T>::value, int>::type; - + #if !defined(_MSC_VER) template <typename T> using CanParseFrom = typename std::enable_if< @@ -895,46 +895,46 @@ class RE2::Arg { &T::ParseFrom))>::value, int>::type; #endif - + public: Arg() : Arg(nullptr) {} Arg(std::nullptr_t ptr) : arg_(ptr), parser_(DoNothing) {} - + template <typename T, CanParse3ary<T> = 0> Arg(T* ptr) : arg_(ptr), parser_(DoParse3ary<T>) {} - + template <typename T, CanParse4ary<T> = 0> Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {} - + #if !defined(_MSC_VER) template <typename T, CanParseFrom<T> = 0> Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {} #endif - + typedef bool (*Parser)(const char* str, size_t n, void* dest); - + template <typename T> Arg(T* ptr, Parser parser) : arg_(ptr), parser_(parser) {} - + bool Parse(const char* str, size_t n) const { return (*parser_)(str, n, arg_); } - private: + private: static bool DoNothing(const char* /*str*/, size_t /*n*/, void* /*dest*/) { return true; } - + template <typename T> static bool DoParse3ary(const char* str, size_t n, void* dest) { return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest)); } - + template <typename T> static bool DoParse4ary(const char* str, size_t n, void* dest) { return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10); } - + #if !defined(_MSC_VER) template <typename T> static bool DoParseFrom(const char* str, size_t n, void* dest) { @@ -942,85 +942,85 @@ class RE2::Arg { return reinterpret_cast<T*>(dest)->ParseFrom(str, n); } #endif - + void* arg_; Parser parser_; -}; - +}; + template <typename T> inline RE2::Arg RE2::CRadix(T* ptr) { return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool { return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 0); }); } - + template <typename T> inline RE2::Arg RE2::Hex(T* ptr) { return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool { return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 16); }); -} - +} + template <typename T> inline RE2::Arg RE2::Octal(T* ptr) { return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool { return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 8); }); } - -#ifndef SWIG -// Silence warnings about missing initializers for members of LazyRE2. + +#ifndef SWIG +// Silence warnings about missing initializers for members of LazyRE2. #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6 -#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +// Helper for writing global or static RE2s safely. +// Write +// static LazyRE2 re = {".*"}; +// and then use *re instead of writing +// static RE2 re(".*"); +// The former is more careful about multithreaded +// situations than the latter. +// +// N.B. This class never deletes the RE2 object that +// it constructs: that's a feature, so that it can be used +// for global and function static variables. +class LazyRE2 { + private: + struct NoArg {}; + + public: + typedef RE2 element_type; // support std::pointer_traits + + // Constructor omitted to preserve braced initialization in C++98. + + // Pretend to be a pointer to Type (never NULL due to on-demand creation): + RE2& operator*() const { return *get(); } + RE2* operator->() const { return get(); } + + // Named accessor/initializer: + RE2* get() const { + std::call_once(once_, &LazyRE2::Init, this); + return ptr_; + } + + // All data fields must be public to support {"foo"} initialization. + const char* pattern_; + RE2::CannedOptions options_; + NoArg barrier_against_excess_initializers_; + + mutable RE2* ptr_; + mutable std::once_flag once_; + + private: + static void Init(const LazyRE2* lazy_re2) { + lazy_re2->ptr_ = new RE2(lazy_re2->pattern_, lazy_re2->options_); + } + + void operator=(const LazyRE2&); // disallowed +}; #endif - -// Helper for writing global or static RE2s safely. -// Write -// static LazyRE2 re = {".*"}; -// and then use *re instead of writing -// static RE2 re(".*"); -// The former is more careful about multithreaded -// situations than the latter. -// -// N.B. This class never deletes the RE2 object that -// it constructs: that's a feature, so that it can be used -// for global and function static variables. -class LazyRE2 { - private: - struct NoArg {}; - - public: - typedef RE2 element_type; // support std::pointer_traits - - // Constructor omitted to preserve braced initialization in C++98. - - // Pretend to be a pointer to Type (never NULL due to on-demand creation): - RE2& operator*() const { return *get(); } - RE2* operator->() const { return get(); } - - // Named accessor/initializer: - RE2* get() const { - std::call_once(once_, &LazyRE2::Init, this); - return ptr_; - } - - // All data fields must be public to support {"foo"} initialization. - const char* pattern_; - RE2::CannedOptions options_; - NoArg barrier_against_excess_initializers_; - - mutable RE2* ptr_; - mutable std::once_flag once_; - - private: - static void Init(const LazyRE2* lazy_re2) { - lazy_re2->ptr_ = new RE2(lazy_re2->pattern_, lazy_re2->options_); - } - - void operator=(const LazyRE2&); // disallowed -}; -#endif - + namespace hooks { // Most platforms support thread_local. Older versions of iOS don't support @@ -1069,9 +1069,9 @@ DECLARE_HOOK(DFASearchFailure) } // namespace hooks -} // namespace re2 - -using re2::RE2; -using re2::LazyRE2; - -#endif // RE2_RE2_H_ +} // namespace re2 + +using re2::RE2; +using re2::LazyRE2; + +#endif // RE2_RE2_H_ diff --git a/contrib/libs/re2/re2/regexp.cc b/contrib/libs/re2/re2/regexp.cc index ca1318b43d..c583f3e593 100644 --- a/contrib/libs/re2/re2/regexp.cc +++ b/contrib/libs/re2/re2/regexp.cc @@ -6,31 +6,31 @@ // Tested by parse_test.cc #include "re2/regexp.h" - -#include <stddef.h> -#include <stdint.h> -#include <string.h> -#include <algorithm> -#include <map> -#include <mutex> -#include <string> -#include <vector> - -#include "util/util.h" -#include "util/logging.h" -#include "util/mutex.h" -#include "util/utf.h" + +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <algorithm> +#include <map> +#include <mutex> +#include <string> +#include <vector> + +#include "util/util.h" +#include "util/logging.h" +#include "util/mutex.h" +#include "util/utf.h" #include "re2/pod_array.h" -#include "re2/stringpiece.h" +#include "re2/stringpiece.h" #include "re2/walker-inl.h" namespace re2 { // Constructor. Allocates vectors as appropriate for operator. Regexp::Regexp(RegexpOp op, ParseFlags parse_flags) - : op_(static_cast<uint8_t>(op)), + : op_(static_cast<uint8_t>(op)), simple_(false), - parse_flags_(static_cast<uint16_t>(parse_flags)), + parse_flags_(static_cast<uint16_t>(parse_flags)), ref_(1), nsub_(0), down_(NULL) { @@ -57,8 +57,8 @@ Regexp::~Regexp() { delete[] runes_; break; case kRegexpCharClass: - if (cc_) - cc_->Delete(); + if (cc_) + cc_->Delete(); delete ccb_; break; } @@ -74,36 +74,36 @@ bool Regexp::QuickDestroy() { return false; } -// Lazily allocated. -static Mutex* ref_mutex; -static std::map<Regexp*, int>* ref_map; +// Lazily allocated. +static Mutex* ref_mutex; +static std::map<Regexp*, int>* ref_map; int Regexp::Ref() { if (ref_ < kMaxRef) return ref_; - MutexLock l(ref_mutex); - return (*ref_map)[this]; + MutexLock l(ref_mutex); + return (*ref_map)[this]; } // Increments reference count, returns object as convenience. Regexp* Regexp::Incref() { if (ref_ >= kMaxRef-1) { - static std::once_flag ref_once; - std::call_once(ref_once, []() { - ref_mutex = new Mutex; - ref_map = new std::map<Regexp*, int>; - }); - + static std::once_flag ref_once; + std::call_once(ref_once, []() { + ref_mutex = new Mutex; + ref_map = new std::map<Regexp*, int>; + }); + // Store ref count in overflow map. - MutexLock l(ref_mutex); - if (ref_ == kMaxRef) { - // already overflowed - (*ref_map)[this]++; - } else { - // overflowing now - (*ref_map)[this] = kMaxRef; - ref_ = kMaxRef; + MutexLock l(ref_mutex); + if (ref_ == kMaxRef) { + // already overflowed + (*ref_map)[this]++; + } else { + // overflowing now + (*ref_map)[this] = kMaxRef; + ref_ = kMaxRef; } return this; } @@ -116,13 +116,13 @@ Regexp* Regexp::Incref() { void Regexp::Decref() { if (ref_ == kMaxRef) { // Ref count is stored in overflow map. - MutexLock l(ref_mutex); - int r = (*ref_map)[this] - 1; + MutexLock l(ref_mutex); + int r = (*ref_map)[this] - 1; if (r < kMaxRef) { - ref_ = static_cast<uint16_t>(r); - ref_map->erase(this); + ref_ = static_cast<uint16_t>(r); + ref_map->erase(this); } else { - (*ref_map)[this] = r; + (*ref_map)[this] = r; } return; } @@ -191,45 +191,45 @@ Regexp* Regexp::HaveMatch(int match_id, ParseFlags flags) { return re; } -Regexp* Regexp::StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags) { - // Squash **, ++ and ??. - if (op == sub->op() && flags == sub->parse_flags()) +Regexp* Regexp::StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags) { + // Squash **, ++ and ??. + if (op == sub->op() && flags == sub->parse_flags()) return sub; - - // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because - // op is Star/Plus/Quest, we just have to check that sub->op() is too. - if ((sub->op() == kRegexpStar || - sub->op() == kRegexpPlus || - sub->op() == kRegexpQuest) && - flags == sub->parse_flags()) { - // If sub is Star, no need to rewrite it. - if (sub->op() == kRegexpStar) - return sub; - - // Rewrite sub to Star. - Regexp* re = new Regexp(kRegexpStar, flags); - re->AllocSub(1); - re->sub()[0] = sub->sub()[0]->Incref(); - sub->Decref(); // We didn't consume the reference after all. - return re; - } - - Regexp* re = new Regexp(op, flags); + + // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because + // op is Star/Plus/Quest, we just have to check that sub->op() is too. + if ((sub->op() == kRegexpStar || + sub->op() == kRegexpPlus || + sub->op() == kRegexpQuest) && + flags == sub->parse_flags()) { + // If sub is Star, no need to rewrite it. + if (sub->op() == kRegexpStar) + return sub; + + // Rewrite sub to Star. + Regexp* re = new Regexp(kRegexpStar, flags); + re->AllocSub(1); + re->sub()[0] = sub->sub()[0]->Incref(); + sub->Decref(); // We didn't consume the reference after all. + return re; + } + + Regexp* re = new Regexp(op, flags); re->AllocSub(1); re->sub()[0] = sub; return re; } -Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) { - return StarPlusOrQuest(kRegexpPlus, sub, flags); -} - +Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) { + return StarPlusOrQuest(kRegexpPlus, sub, flags); +} + Regexp* Regexp::Star(Regexp* sub, ParseFlags flags) { - return StarPlusOrQuest(kRegexpStar, sub, flags); + return StarPlusOrQuest(kRegexpStar, sub, flags); } Regexp* Regexp::Quest(Regexp* sub, ParseFlags flags) { - return StarPlusOrQuest(kRegexpQuest, sub, flags); + return StarPlusOrQuest(kRegexpQuest, sub, flags); } Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub, @@ -237,13 +237,13 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub, if (nsub == 1) return sub[0]; - if (nsub == 0) { - if (op == kRegexpAlternate) - return new Regexp(kRegexpNoMatch, flags); - else - return new Regexp(kRegexpEmptyMatch, flags); - } - + if (nsub == 0) { + if (op == kRegexpAlternate) + return new Regexp(kRegexpNoMatch, flags); + else + return new Regexp(kRegexpEmptyMatch, flags); + } + PODArray<Regexp*> subcopy; if (op == kRegexpAlternate && can_factor) { // Going to edit sub; make a copy so we don't step on caller. @@ -436,7 +436,7 @@ bool Regexp::Equal(Regexp* a, Regexp* b) { // The stack (vector) has pairs of regexps waiting to // be compared. The regexps are only equal if // all the pairs end up being equal. - std::vector<Regexp*> stk; + std::vector<Regexp*> stk; for (;;) { // Invariant: TopEqual(a, b) == true. @@ -476,11 +476,11 @@ bool Regexp::Equal(Regexp* a, Regexp* b) { continue; } - size_t n = stk.size(); + size_t n = stk.size(); if (n == 0) break; - DCHECK_GE(n, 2); + DCHECK_GE(n, 2); a = stk[n-2]; b = stk[n-1]; stk.resize(n-2); @@ -490,7 +490,7 @@ bool Regexp::Equal(Regexp* a, Regexp* b) { } // Keep in sync with enum RegexpStatusCode in regexp.h -static const char *kErrorStrings[] = { +static const char *kErrorStrings[] = { "no error", "unexpected error", "invalid escape sequence", @@ -553,9 +553,9 @@ class NumCapturesWalker : public Regexp::Walker<Ignored> { private: int ncapture_; - - NumCapturesWalker(const NumCapturesWalker&) = delete; - NumCapturesWalker& operator=(const NumCapturesWalker&) = delete; + + NumCapturesWalker(const NumCapturesWalker&) = delete; + NumCapturesWalker& operator=(const NumCapturesWalker&) = delete; }; int Regexp::NumCaptures() { @@ -600,9 +600,9 @@ class NamedCapturesWalker : public Regexp::Walker<Ignored> { private: std::map<std::string, int>* map_; - - NamedCapturesWalker(const NamedCapturesWalker&) = delete; - NamedCapturesWalker& operator=(const NamedCapturesWalker&) = delete; + + NamedCapturesWalker(const NamedCapturesWalker&) = delete; + NamedCapturesWalker& operator=(const NamedCapturesWalker&) = delete; }; std::map<std::string, int>* Regexp::NamedCaptures() { @@ -644,9 +644,9 @@ class CaptureNamesWalker : public Regexp::Walker<Ignored> { private: std::map<int, std::string>* map_; - - CaptureNamesWalker(const CaptureNamesWalker&) = delete; - CaptureNamesWalker& operator=(const CaptureNamesWalker&) = delete; + + CaptureNamesWalker(const CaptureNamesWalker&) = delete; + CaptureNamesWalker& operator=(const CaptureNamesWalker&) = delete; }; std::map<int, std::string>* Regexp::CaptureNames() { @@ -690,13 +690,13 @@ bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase, int i = 0; while (i < nsub_ && sub()[i]->op_ == kRegexpBeginText) i++; - if (i == 0 || i >= nsub_) + if (i == 0 || i >= nsub_) return false; Regexp* re = sub()[i]; if (re->op_ != kRegexpLiteral && re->op_ != kRegexpLiteralString) return false; - i++; + i++; if (i < nsub_) { for (int j = i; j < nsub_; j++) sub()[j]->Incref(); @@ -761,13 +761,13 @@ bool CharClassBuilder::AddRange(Rune lo, Rune hi) { if (lo <= 'z' && hi >= 'A') { // Overlaps some alpha, maybe not all. // Update bitmaps telling which ASCII letters are in the set. - Rune lo1 = std::max<Rune>(lo, 'A'); - Rune hi1 = std::min<Rune>(hi, 'Z'); + Rune lo1 = std::max<Rune>(lo, 'A'); + Rune hi1 = std::min<Rune>(hi, 'Z'); if (lo1 <= hi1) upper_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'A'); - lo1 = std::max<Rune>(lo, 'a'); - hi1 = std::min<Rune>(hi, 'z'); + lo1 = std::max<Rune>(lo, 'a'); + hi1 = std::min<Rune>(hi, 'z'); if (lo1 <= hi1) lower_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'a'); } @@ -883,7 +883,7 @@ void CharClassBuilder::RemoveAbove(Rune r) { void CharClassBuilder::Negate() { // Build up negation and then copy in. // Could edit ranges in place, but C++ won't let me. - std::vector<RuneRange> v; + std::vector<RuneRange> v; v.reserve(ranges_.size() + 1); // In negation, first range begins at 0, unless @@ -906,7 +906,7 @@ void CharClassBuilder::Negate() { } ranges_.clear(); - for (size_t i = 0; i < v.size(); i++) + for (size_t i = 0; i < v.size(); i++) ranges_.insert(v[i]); upper_ = AlphaMask & ~upper_; @@ -920,7 +920,7 @@ void CharClassBuilder::Negate() { CharClass* CharClass::New(size_t maxranges) { CharClass* cc; - uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]]; + uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]]; cc = reinterpret_cast<CharClass*>(data); cc->ranges_ = reinterpret_cast<RuneRange*>(data + sizeof *cc); cc->nranges_ = 0; @@ -930,7 +930,7 @@ CharClass* CharClass::New(size_t maxranges) { } void CharClass::Delete() { - uint8_t* data = reinterpret_cast<uint8_t*>(this); + uint8_t* data = reinterpret_cast<uint8_t*>(this); delete[] data; } @@ -977,7 +977,7 @@ CharClass* CharClassBuilder::GetCharClass() { for (iterator it = begin(); it != end(); ++it) cc->ranges_[n++] = *it; cc->nranges_ = n; - DCHECK_LE(n, static_cast<int>(ranges_.size())); + DCHECK_LE(n, static_cast<int>(ranges_.size())); cc->nrunes_ = nrunes_; cc->folds_ascii_ = FoldsASCII(); return cc; diff --git a/contrib/libs/re2/re2/regexp.h b/contrib/libs/re2/re2/regexp.h index b6446f9fe5..164e93392a 100644 --- a/contrib/libs/re2/re2/regexp.h +++ b/contrib/libs/re2/re2/regexp.h @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#ifndef RE2_REGEXP_H_ -#define RE2_REGEXP_H_ - +#ifndef RE2_REGEXP_H_ +#define RE2_REGEXP_H_ + // --- SPONSORED LINK -------------------------------------------------- // If you want to use this library for regular expression matching, // you should use re2/re2.h, which provides a class RE2 that @@ -87,15 +87,15 @@ // parsed regular expressions. #include <stddef.h> -#include <stdint.h> -#include <map> -#include <set> -#include <string> +#include <stdint.h> +#include <map> +#include <set> +#include <string> #include "util/util.h" #include "util/logging.h" #include "util/utf.h" -#include "re2/stringpiece.h" +#include "re2/stringpiece.h" namespace re2 { @@ -194,10 +194,10 @@ class RegexpStatus { RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {} ~RegexpStatus() { delete tmp_; } - void set_code(RegexpStatusCode code) { code_ = code; } + void set_code(RegexpStatusCode code) { code_ = code; } void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; } void set_tmp(std::string* tmp) { delete tmp_; tmp_ = tmp; } - RegexpStatusCode code() const { return code_; } + RegexpStatusCode code() const { return code_; } const StringPiece& error_arg() const { return error_arg_; } bool ok() const { return code() == kRegexpSuccess; } @@ -213,12 +213,12 @@ class RegexpStatus { std::string Text() const; private: - RegexpStatusCode code_; // Kind of error + RegexpStatusCode code_; // Kind of error StringPiece error_arg_; // Piece of regexp containing syntax error. std::string* tmp_; // Temporary storage, possibly where error_arg_ is. - RegexpStatus(const RegexpStatus&) = delete; - RegexpStatus& operator=(const RegexpStatus&) = delete; + RegexpStatus(const RegexpStatus&) = delete; + RegexpStatus& operator=(const RegexpStatus&) = delete; }; // Compiled form; see prog.h @@ -268,9 +268,9 @@ class CharClass { int nrunes_; RuneRange *ranges_; int nranges_; - - CharClass(const CharClass&) = delete; - CharClass& operator=(const CharClass&) = delete; + + CharClass(const CharClass&) = delete; + CharClass& operator=(const CharClass&) = delete; }; class Regexp { @@ -278,52 +278,52 @@ class Regexp { // Flags for parsing. Can be ORed together. enum ParseFlags { - NoParseFlags = 0, - FoldCase = 1<<0, // Fold case during matching (case-insensitive). - Literal = 1<<1, // Treat s as literal string instead of a regexp. - ClassNL = 1<<2, // Allow char classes like [^a-z] and \D and \s - // and [[:space:]] to match newline. - DotNL = 1<<3, // Allow . to match newline. - MatchNL = ClassNL | DotNL, - OneLine = 1<<4, // Treat ^ and $ as only matching at beginning and - // end of text, not around embedded newlines. - // (Perl's default) - Latin1 = 1<<5, // Regexp and text are in Latin1, not UTF-8. - NonGreedy = 1<<6, // Repetition operators are non-greedy by default. - PerlClasses = 1<<7, // Allow Perl character classes like \d. - PerlB = 1<<8, // Allow Perl's \b and \B. - PerlX = 1<<9, // Perl extensions: - // non-capturing parens - (?: ) - // non-greedy operators - *? +? ?? {}? - // flag edits - (?i) (?-i) (?i: ) - // i - FoldCase - // m - !OneLine - // s - DotNL - // U - NonGreedy - // line ends: \A \z - // \Q and \E to disable/enable metacharacters - // (?P<name>expr) for named captures - // \C to match any single byte - UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group - // and \P{Han} for its negation. - NeverNL = 1<<11, // Never match NL, even if the regexp mentions - // it explicitly. - NeverCapture = 1<<12, // Parse all parens as non-capturing. + NoParseFlags = 0, + FoldCase = 1<<0, // Fold case during matching (case-insensitive). + Literal = 1<<1, // Treat s as literal string instead of a regexp. + ClassNL = 1<<2, // Allow char classes like [^a-z] and \D and \s + // and [[:space:]] to match newline. + DotNL = 1<<3, // Allow . to match newline. + MatchNL = ClassNL | DotNL, + OneLine = 1<<4, // Treat ^ and $ as only matching at beginning and + // end of text, not around embedded newlines. + // (Perl's default) + Latin1 = 1<<5, // Regexp and text are in Latin1, not UTF-8. + NonGreedy = 1<<6, // Repetition operators are non-greedy by default. + PerlClasses = 1<<7, // Allow Perl character classes like \d. + PerlB = 1<<8, // Allow Perl's \b and \B. + PerlX = 1<<9, // Perl extensions: + // non-capturing parens - (?: ) + // non-greedy operators - *? +? ?? {}? + // flag edits - (?i) (?-i) (?i: ) + // i - FoldCase + // m - !OneLine + // s - DotNL + // U - NonGreedy + // line ends: \A \z + // \Q and \E to disable/enable metacharacters + // (?P<name>expr) for named captures + // \C to match any single byte + UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group + // and \P{Han} for its negation. + NeverNL = 1<<11, // Never match NL, even if the regexp mentions + // it explicitly. + NeverCapture = 1<<12, // Parse all parens as non-capturing. // As close to Perl as we can get. - LikePerl = ClassNL | OneLine | PerlClasses | PerlB | PerlX | - UnicodeGroups, + LikePerl = ClassNL | OneLine | PerlClasses | PerlB | PerlX | + UnicodeGroups, // Internal use only. - WasDollar = 1<<13, // on kRegexpEndText: was $ in regexp text - AllParseFlags = (1<<14)-1, + WasDollar = 1<<13, // on kRegexpEndText: was $ in regexp text + AllParseFlags = (1<<14)-1, }; // Get. No set, Regexps are logically immutable once created. RegexpOp op() { return static_cast<RegexpOp>(op_); } int nsub() { return nsub_; } - bool simple() { return simple_ != 0; } - ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); } + bool simple() { return simple_ != 0; } + ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); } int Ref(); // For testing. Regexp** sub() { @@ -363,7 +363,7 @@ class Regexp { // removed. The result will capture exactly the same // subexpressions the original did, unless formatted with ToString. Regexp* Simplify(); - friend class CoalesceWalker; + friend class CoalesceWalker; friend class SimplifyWalker; // Parses the regexp src and then simplifies it and sets *dst to the @@ -420,8 +420,8 @@ class Regexp { // Construction and execution of prog will // stay within approximately max_mem bytes of memory. // If max_mem <= 0, a reasonable default is used. - Prog* CompileToProg(int64_t max_mem); - Prog* CompileToReverseProg(int64_t max_mem); + Prog* CompileToProg(int64_t max_mem); + Prog* CompileToReverseProg(int64_t max_mem); // Whether to expect this library to find exactly the same answer as PCRE // when running this regexp. Most regexps do mimic PCRE exactly, but a few @@ -465,7 +465,7 @@ class Regexp { // Helpers for Parse. Listed here so they can edit Regexps. class ParseState; - + friend class ParseState; friend bool ParseCharClass(StringPiece* s, Regexp** out_re, RegexpStatus* status); @@ -476,10 +476,10 @@ class Regexp { // Computes whether Regexp is already simple. bool ComputeSimple(); - // Constructor that generates a Star, Plus or Quest, - // squashing the pair if sub is also a Star, Plus or Quest. - static Regexp* StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags); - + // Constructor that generates a Star, Plus or Quest, + // squashing the pair if sub is also a Star, Plus or Quest. + static Regexp* StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags); + // Constructor that generates a concatenation or alternation, // enforcing the limit on the number of subexpressions for // a particular Regexp. @@ -516,7 +516,7 @@ class Regexp { // Allocate space for n sub-regexps. void AllocSub(int n) { - DCHECK(n >= 0 && static_cast<uint16_t>(n) == n); + DCHECK(n >= 0 && static_cast<uint16_t>(n) == n); if (n > 1) submany_ = new Regexp*[n]; nsub_ = static_cast<uint16_t>(n); @@ -529,38 +529,38 @@ class Regexp { void Swap(Regexp *that); // Operator. See description of operators above. - // uint8_t instead of RegexpOp to control space usage. - uint8_t op_; + // uint8_t instead of RegexpOp to control space usage. + uint8_t op_; // Is this regexp structure already simple // (has it been returned by Simplify)? - // uint8_t instead of bool to control space usage. - uint8_t simple_; + // uint8_t instead of bool to control space usage. + uint8_t simple_; // Flags saved from parsing and used during execution. // (Only FoldCase is used.) - // uint16_t instead of ParseFlags to control space usage. - uint16_t parse_flags_; + // uint16_t instead of ParseFlags to control space usage. + uint16_t parse_flags_; // Reference count. Exists so that SimplifyRegexp can build // regexp structures that are dags rather than trees to avoid // exponential blowup in space requirements. - // uint16_t to control space usage. + // uint16_t to control space usage. // The standard regexp routines will never generate a - // ref greater than the maximum repeat count (kMaxRepeat), + // ref greater than the maximum repeat count (kMaxRepeat), // but even so, Incref and Decref consult an overflow map // when ref_ reaches kMaxRef. - uint16_t ref_; - static const uint16_t kMaxRef = 0xffff; + uint16_t ref_; + static const uint16_t kMaxRef = 0xffff; // Subexpressions. - // uint16_t to control space usage. + // uint16_t to control space usage. // Concat and Alternate handle larger numbers of subexpressions // by building concatenation or alternation trees. // Other routines should call Concat or Alternate instead of // filling in sub() by hand. - uint16_t nsub_; - static const uint16_t kMaxNsub = 0xffff; + uint16_t nsub_; + static const uint16_t kMaxNsub = 0xffff; union { Regexp** submany_; // if nsub_ > 1 Regexp* subone_; // if nsub_ == 1 @@ -595,12 +595,12 @@ class Regexp { void *the_union_[2]; // as big as any other element, for memset }; - Regexp(const Regexp&) = delete; - Regexp& operator=(const Regexp&) = delete; + Regexp(const Regexp&) = delete; + Regexp& operator=(const Regexp&) = delete; }; // Character class set: contains non-overlapping, non-abutting RuneRanges. -typedef std::set<RuneRange, RuneRangeLess> RuneRangeSet; +typedef std::set<RuneRange, RuneRangeLess> RuneRangeSet; class CharClassBuilder { public: @@ -625,41 +625,41 @@ class CharClassBuilder { void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags); private: - static const uint32_t AlphaMask = (1<<26) - 1; - uint32_t upper_; // bitmap of A-Z - uint32_t lower_; // bitmap of a-z + static const uint32_t AlphaMask = (1<<26) - 1; + uint32_t upper_; // bitmap of A-Z + uint32_t lower_; // bitmap of a-z int nrunes_; RuneRangeSet ranges_; - - CharClassBuilder(const CharClassBuilder&) = delete; - CharClassBuilder& operator=(const CharClassBuilder&) = delete; + + CharClassBuilder(const CharClassBuilder&) = delete; + CharClassBuilder& operator=(const CharClassBuilder&) = delete; }; -// Bitwise ops on ParseFlags produce ParseFlags. -inline Regexp::ParseFlags operator|(Regexp::ParseFlags a, - Regexp::ParseFlags b) { - return static_cast<Regexp::ParseFlags>( - static_cast<int>(a) | static_cast<int>(b)); +// Bitwise ops on ParseFlags produce ParseFlags. +inline Regexp::ParseFlags operator|(Regexp::ParseFlags a, + Regexp::ParseFlags b) { + return static_cast<Regexp::ParseFlags>( + static_cast<int>(a) | static_cast<int>(b)); } -inline Regexp::ParseFlags operator^(Regexp::ParseFlags a, - Regexp::ParseFlags b) { - return static_cast<Regexp::ParseFlags>( - static_cast<int>(a) ^ static_cast<int>(b)); +inline Regexp::ParseFlags operator^(Regexp::ParseFlags a, + Regexp::ParseFlags b) { + return static_cast<Regexp::ParseFlags>( + static_cast<int>(a) ^ static_cast<int>(b)); } -inline Regexp::ParseFlags operator&(Regexp::ParseFlags a, - Regexp::ParseFlags b) { - return static_cast<Regexp::ParseFlags>( - static_cast<int>(a) & static_cast<int>(b)); +inline Regexp::ParseFlags operator&(Regexp::ParseFlags a, + Regexp::ParseFlags b) { + return static_cast<Regexp::ParseFlags>( + static_cast<int>(a) & static_cast<int>(b)); } -inline Regexp::ParseFlags operator~(Regexp::ParseFlags a) { - // Attempting to produce a value out of enum's range has undefined behaviour. - return static_cast<Regexp::ParseFlags>( - ~static_cast<int>(a) & static_cast<int>(Regexp::AllParseFlags)); +inline Regexp::ParseFlags operator~(Regexp::ParseFlags a) { + // Attempting to produce a value out of enum's range has undefined behaviour. + return static_cast<Regexp::ParseFlags>( + ~static_cast<int>(a) & static_cast<int>(Regexp::AllParseFlags)); } } // namespace re2 - -#endif // RE2_REGEXP_H_ + +#endif // RE2_REGEXP_H_ diff --git a/contrib/libs/re2/re2/set.cc b/contrib/libs/re2/re2/set.cc index 18705663a5..81b100c0d4 100644 --- a/contrib/libs/re2/re2/set.cc +++ b/contrib/libs/re2/re2/set.cc @@ -2,22 +2,22 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include "re2/set.h" +#include "re2/set.h" -#include <stddef.h> +#include <stddef.h> #include <algorithm> #include <memory> #include <utility> - -#include "util/util.h" -#include "util/logging.h" + +#include "util/util.h" +#include "util/logging.h" #include "re2/pod_array.h" #include "re2/prog.h" -#include "re2/re2.h" +#include "re2/re2.h" #include "re2/regexp.h" #include "re2/stringpiece.h" -namespace re2 { +namespace re2 { RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) : options_(options), @@ -61,7 +61,7 @@ int RE2::Set::Add(const StringPiece& pattern, std::string* error) { Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( options_.ParseFlags()); RegexpStatus status; - re2::Regexp* re = Regexp::Parse(pattern, pf, &status); + re2::Regexp* re = Regexp::Parse(pattern, pf, &status); if (re == NULL) { if (error != NULL) *error = status.Text(); @@ -72,7 +72,7 @@ int RE2::Set::Add(const StringPiece& pattern, std::string* error) { // Concatenate with match index and push on vector. int n = static_cast<int>(elem_.size()); - re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); + re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); if (re->op() == kRegexpConcat) { int nsub = re->nsub(); PODArray<re2::Regexp*> sub(nsub + 1); @@ -82,10 +82,10 @@ int RE2::Set::Add(const StringPiece& pattern, std::string* error) { re->Decref(); re = re2::Regexp::Concat(sub.data(), nsub + 1, pf); } else { - re2::Regexp* sub[2]; + re2::Regexp* sub[2]; sub[0] = re; sub[1] = m; - re = re2::Regexp::Concat(sub, 2, pf); + re = re2::Regexp::Concat(sub, 2, pf); } elem_.emplace_back(std::string(pattern), re); return n; @@ -140,20 +140,20 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v, std::unique_ptr<SparseSet> matches; if (v != NULL) { matches.reset(new SparseSet(size_)); - v->clear(); + v->clear(); } bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch, NULL, &dfa_failed, matches.get()); - if (dfa_failed) { - if (options_.log_errors()) + if (dfa_failed) { + if (options_.log_errors()) LOG(ERROR) << "DFA out of memory: " << "program size " << prog_->size() << ", " << "list count " << prog_->list_count() << ", " << "bytemap range " << prog_->bytemap_range(); if (error_info != NULL) error_info->kind = kOutOfMemory; - return false; - } + return false; + } if (ret == false) { if (error_info != NULL) error_info->kind = kNoError; @@ -172,5 +172,5 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v, error_info->kind = kNoError; return true; } - -} // namespace re2 + +} // namespace re2 diff --git a/contrib/libs/re2/re2/set.h b/contrib/libs/re2/re2/set.h index 8d64f30ccd..a23cc6cc21 100644 --- a/contrib/libs/re2/re2/set.h +++ b/contrib/libs/re2/re2/set.h @@ -1,28 +1,28 @@ -// Copyright 2010 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef RE2_SET_H_ -#define RE2_SET_H_ - +// Copyright 2010 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_SET_H_ +#define RE2_SET_H_ + #include <memory> -#include <string> +#include <string> #include <utility> -#include <vector> - -#include "re2/re2.h" - -namespace re2 { -class Prog; -class Regexp; -} // namespace re2 - -namespace re2 { - -// An RE2::Set represents a collection of regexps that can -// be searched for simultaneously. -class RE2::Set { - public: +#include <vector> + +#include "re2/re2.h" + +namespace re2 { +class Prog; +class Regexp; +} // namespace re2 + +namespace re2 { + +// An RE2::Set represents a collection of regexps that can +// be searched for simultaneously. +class RE2::Set { + public: enum ErrorKind { kNoError = 0, kNotCompiled, // The set is not compiled. @@ -34,9 +34,9 @@ class RE2::Set { ErrorKind kind; }; - Set(const RE2::Options& options, RE2::Anchor anchor); - ~Set(); - + Set(const RE2::Options& options, RE2::Anchor anchor); + ~Set(); + // Not copyable. Set(const Set&) = delete; Set& operator=(const Set&) = delete; @@ -47,39 +47,39 @@ class RE2::Set { // Adds pattern to the set using the options passed to the constructor. // Returns the index that will identify the regexp in the output of Match(), // or -1 if the regexp cannot be parsed. - // Indices are assigned in sequential order starting from 0. + // Indices are assigned in sequential order starting from 0. // Errors do not increment the index; if error is not NULL, *error will hold // the error message from the parser. int Add(const StringPiece& pattern, std::string* error); - + // Compiles the set in preparation for matching. // Returns false if the compiler runs out of memory. // Add() must not be called again after Compile(). // Compile() must be called before Match(). - bool Compile(); - + bool Compile(); + // Returns true if text matches at least one of the regexps in the set. // Fills v (if not NULL) with the indices of the matching regexps. - // Callers must not expect v to be sorted. - bool Match(const StringPiece& text, std::vector<int>* v) const; - + // Callers must not expect v to be sorted. + bool Match(const StringPiece& text, std::vector<int>* v) const; + // As above, but populates error_info (if not NULL) when none of the regexps // in the set matched. This can inform callers when DFA execution fails, for // example, because they might wish to handle that case differently. bool Match(const StringPiece& text, std::vector<int>* v, ErrorInfo* error_info) const; - private: + private: typedef std::pair<std::string, re2::Regexp*> Elem; - RE2::Options options_; - RE2::Anchor anchor_; + RE2::Options options_; + RE2::Anchor anchor_; std::vector<Elem> elem_; - bool compiled_; + bool compiled_; int size_; std::unique_ptr<re2::Prog> prog_; -}; - -} // namespace re2 - -#endif // RE2_SET_H_ +}; + +} // namespace re2 + +#endif // RE2_SET_H_ diff --git a/contrib/libs/re2/re2/simplify.cc b/contrib/libs/re2/re2/simplify.cc index 663d5fcd45..8a53ee2eac 100644 --- a/contrib/libs/re2/re2/simplify.cc +++ b/contrib/libs/re2/re2/simplify.cc @@ -6,11 +6,11 @@ // to use simple extended regular expression features. // Also sort and simplify character classes. -#include <string> - -#include "util/util.h" -#include "util/logging.h" -#include "util/utf.h" +#include <string> + +#include "util/util.h" +#include "util/logging.h" +#include "util/utf.h" #include "re2/pod_array.h" #include "re2/regexp.h" #include "re2/walker-inl.h" @@ -63,7 +63,7 @@ bool Regexp::ComputeSimple() { // These are simple as long as the subpieces are simple. subs = sub(); for (int i = 0; i < nsub_; i++) - if (!subs[i]->simple()) + if (!subs[i]->simple()) return false; return true; case kRegexpCharClass: @@ -73,12 +73,12 @@ bool Regexp::ComputeSimple() { return !cc_->empty() && !cc_->full(); case kRegexpCapture: subs = sub(); - return subs[0]->simple(); + return subs[0]->simple(); case kRegexpStar: case kRegexpPlus: case kRegexpQuest: subs = sub(); - if (!subs[0]->simple()) + if (!subs[0]->simple()) return false; switch (subs[0]->op_) { case kRegexpStar: @@ -99,37 +99,37 @@ bool Regexp::ComputeSimple() { } // Walker subclass used by Simplify. -// Coalesces runs of star/plus/quest/repeat of the same literal along with any -// occurrences of that literal into repeats of that literal. It also works for -// char classes, any char and any byte. -// PostVisit creates the coalesced result, which should then be simplified. -class CoalesceWalker : public Regexp::Walker<Regexp*> { - public: - CoalesceWalker() {} - virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg, - Regexp** child_args, int nchild_args); - virtual Regexp* Copy(Regexp* re); - virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg); - - private: - // These functions are declared inside CoalesceWalker so that - // they can edit the private fields of the Regexps they construct. - - // Returns true if r1 and r2 can be coalesced. In particular, ensures that - // the parse flags are consistent. (They will not be checked again later.) - static bool CanCoalesce(Regexp* r1, Regexp* r2); - - // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards - // will be empty match and the coalesced op. In other cases, where part of a - // literal string was removed to be coalesced, the array elements afterwards - // will be the coalesced op and the remainder of the literal string. - static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr); - - CoalesceWalker(const CoalesceWalker&) = delete; - CoalesceWalker& operator=(const CoalesceWalker&) = delete; -}; - -// Walker subclass used by Simplify. +// Coalesces runs of star/plus/quest/repeat of the same literal along with any +// occurrences of that literal into repeats of that literal. It also works for +// char classes, any char and any byte. +// PostVisit creates the coalesced result, which should then be simplified. +class CoalesceWalker : public Regexp::Walker<Regexp*> { + public: + CoalesceWalker() {} + virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg, + Regexp** child_args, int nchild_args); + virtual Regexp* Copy(Regexp* re); + virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg); + + private: + // These functions are declared inside CoalesceWalker so that + // they can edit the private fields of the Regexps they construct. + + // Returns true if r1 and r2 can be coalesced. In particular, ensures that + // the parse flags are consistent. (They will not be checked again later.) + static bool CanCoalesce(Regexp* r1, Regexp* r2); + + // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards + // will be empty match and the coalesced op. In other cases, where part of a + // literal string was removed to be coalesced, the array elements afterwards + // will be the coalesced op and the remainder of the literal string. + static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr); + + CoalesceWalker(const CoalesceWalker&) = delete; + CoalesceWalker& operator=(const CoalesceWalker&) = delete; +}; + +// Walker subclass used by Simplify. // The simplify walk is purely post-recursive: given the simplified children, // PostVisit creates the simplified result. // The child_args are simplified Regexp*s. @@ -137,7 +137,7 @@ class SimplifyWalker : public Regexp::Walker<Regexp*> { public: SimplifyWalker() {} virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop); - virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg, + virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg, Regexp** child_args, int nchild_args); virtual Regexp* Copy(Regexp* re); virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg); @@ -161,8 +161,8 @@ class SimplifyWalker : public Regexp::Walker<Regexp*> { // Caller must Decref return value when done with it. static Regexp* SimplifyCharClass(Regexp* re); - SimplifyWalker(const SimplifyWalker&) = delete; - SimplifyWalker& operator=(const SimplifyWalker&) = delete; + SimplifyWalker(const SimplifyWalker&) = delete; + SimplifyWalker& operator=(const SimplifyWalker&) = delete; }; // Simplifies a regular expression, returning a new regexp. @@ -175,272 +175,272 @@ class SimplifyWalker : public Regexp::Walker<Regexp*> { // Caller must Decref() return value when done with it. Regexp* Regexp::Simplify() { - CoalesceWalker cw; - Regexp* cre = cw.Walk(this, NULL); - if (cre == NULL) + CoalesceWalker cw; + Regexp* cre = cw.Walk(this, NULL); + if (cre == NULL) return NULL; if (cw.stopped_early()) { cre->Decref(); return NULL; } - SimplifyWalker sw; - Regexp* sre = sw.Walk(cre, NULL); - cre->Decref(); + SimplifyWalker sw; + Regexp* sre = sw.Walk(cre, NULL); + cre->Decref(); if (sre == NULL) return NULL; if (sw.stopped_early()) { sre->Decref(); return NULL; } - return sre; + return sre; } #define Simplify DontCallSimplify // Avoid accidental recursion -// Utility function for PostVisit implementations that compares re->sub() with -// child_args to determine whether any child_args changed. In the common case, -// where nothing changed, calls Decref() for all child_args and returns false, -// so PostVisit must return re->Incref(). Otherwise, returns true. -static bool ChildArgsChanged(Regexp* re, Regexp** child_args) { - for (int i = 0; i < re->nsub(); i++) { - Regexp* sub = re->sub()[i]; - Regexp* newsub = child_args[i]; - if (newsub != sub) - return true; - } - for (int i = 0; i < re->nsub(); i++) { - Regexp* newsub = child_args[i]; - newsub->Decref(); - } - return false; -} - -Regexp* CoalesceWalker::Copy(Regexp* re) { - return re->Incref(); -} - -Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) { +// Utility function for PostVisit implementations that compares re->sub() with +// child_args to determine whether any child_args changed. In the common case, +// where nothing changed, calls Decref() for all child_args and returns false, +// so PostVisit must return re->Incref(). Otherwise, returns true. +static bool ChildArgsChanged(Regexp* re, Regexp** child_args) { + for (int i = 0; i < re->nsub(); i++) { + Regexp* sub = re->sub()[i]; + Regexp* newsub = child_args[i]; + if (newsub != sub) + return true; + } + for (int i = 0; i < re->nsub(); i++) { + Regexp* newsub = child_args[i]; + newsub->Decref(); + } + return false; +} + +Regexp* CoalesceWalker::Copy(Regexp* re) { + return re->Incref(); +} + +Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) { // Should never be called: we use Walk(), not WalkExponential(). #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - LOG(DFATAL) << "CoalesceWalker::ShortVisit called"; + LOG(DFATAL) << "CoalesceWalker::ShortVisit called"; #endif - return re->Incref(); -} - -Regexp* CoalesceWalker::PostVisit(Regexp* re, - Regexp* parent_arg, - Regexp* pre_arg, - Regexp** child_args, - int nchild_args) { - if (re->nsub() == 0) - return re->Incref(); - - if (re->op() != kRegexpConcat) { - if (!ChildArgsChanged(re, child_args)) - return re->Incref(); - - // Something changed. Build a new op. - Regexp* nre = new Regexp(re->op(), re->parse_flags()); - nre->AllocSub(re->nsub()); - Regexp** nre_subs = nre->sub(); - for (int i = 0; i < re->nsub(); i++) - nre_subs[i] = child_args[i]; - // Repeats and Captures have additional data that must be copied. - if (re->op() == kRegexpRepeat) { - nre->min_ = re->min(); - nre->max_ = re->max(); - } else if (re->op() == kRegexpCapture) { - nre->cap_ = re->cap(); - } - return nre; - } - - bool can_coalesce = false; - for (int i = 0; i < re->nsub(); i++) { - if (i+1 < re->nsub() && - CanCoalesce(child_args[i], child_args[i+1])) { - can_coalesce = true; - break; - } - } - if (!can_coalesce) { - if (!ChildArgsChanged(re, child_args)) - return re->Incref(); - - // Something changed. Build a new op. - Regexp* nre = new Regexp(re->op(), re->parse_flags()); - nre->AllocSub(re->nsub()); - Regexp** nre_subs = nre->sub(); - for (int i = 0; i < re->nsub(); i++) - nre_subs[i] = child_args[i]; - return nre; - } - - for (int i = 0; i < re->nsub(); i++) { - if (i+1 < re->nsub() && - CanCoalesce(child_args[i], child_args[i+1])) - DoCoalesce(&child_args[i], &child_args[i+1]); - } - // Determine how many empty matches were left by DoCoalesce. - int n = 0; - for (int i = n; i < re->nsub(); i++) { - if (child_args[i]->op() == kRegexpEmptyMatch) - n++; - } - // Build a new op. - Regexp* nre = new Regexp(re->op(), re->parse_flags()); - nre->AllocSub(re->nsub() - n); - Regexp** nre_subs = nre->sub(); - for (int i = 0, j = 0; i < re->nsub(); i++) { - if (child_args[i]->op() == kRegexpEmptyMatch) { - child_args[i]->Decref(); - continue; - } - nre_subs[j] = child_args[i]; - j++; - } - return nre; -} - -bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) { - // r1 must be a star/plus/quest/repeat of a literal, char class, any char or - // any byte. - if ((r1->op() == kRegexpStar || - r1->op() == kRegexpPlus || - r1->op() == kRegexpQuest || - r1->op() == kRegexpRepeat) && - (r1->sub()[0]->op() == kRegexpLiteral || - r1->sub()[0]->op() == kRegexpCharClass || - r1->sub()[0]->op() == kRegexpAnyChar || - r1->sub()[0]->op() == kRegexpAnyByte)) { - // r2 must be a star/plus/quest/repeat of the same literal, char class, - // any char or any byte. - if ((r2->op() == kRegexpStar || - r2->op() == kRegexpPlus || - r2->op() == kRegexpQuest || - r2->op() == kRegexpRepeat) && - Regexp::Equal(r1->sub()[0], r2->sub()[0]) && - // The parse flags must be consistent. - ((r1->parse_flags() & Regexp::NonGreedy) == - (r2->parse_flags() & Regexp::NonGreedy))) { - return true; - } - // ... OR an occurrence of that literal, char class, any char or any byte - if (Regexp::Equal(r1->sub()[0], r2)) { - return true; - } - // ... OR a literal string that begins with that literal. - if (r1->sub()[0]->op() == kRegexpLiteral && - r2->op() == kRegexpLiteralString && - r2->runes()[0] == r1->sub()[0]->rune() && - // The parse flags must be consistent. - ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) == - (r2->parse_flags() & Regexp::FoldCase))) { - return true; - } - } - return false; -} - -void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) { - Regexp* r1 = *r1ptr; - Regexp* r2 = *r2ptr; - - Regexp* nre = Regexp::Repeat( - r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0); - - switch (r1->op()) { - case kRegexpStar: - nre->min_ = 0; - nre->max_ = -1; - break; - - case kRegexpPlus: - nre->min_ = 1; - nre->max_ = -1; - break; - - case kRegexpQuest: - nre->min_ = 0; - nre->max_ = 1; - break; - - case kRegexpRepeat: - nre->min_ = r1->min(); - nre->max_ = r1->max(); - break; - - default: - LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op(); - nre->Decref(); - return; - } - - switch (r2->op()) { - case kRegexpStar: - nre->max_ = -1; - goto LeaveEmpty; - - case kRegexpPlus: - nre->min_++; - nre->max_ = -1; - goto LeaveEmpty; - - case kRegexpQuest: - if (nre->max() != -1) - nre->max_++; - goto LeaveEmpty; - - case kRegexpRepeat: - nre->min_ += r2->min(); - if (r2->max() == -1) - nre->max_ = -1; - else if (nre->max() != -1) - nre->max_ += r2->max(); - goto LeaveEmpty; - - case kRegexpLiteral: - case kRegexpCharClass: - case kRegexpAnyChar: - case kRegexpAnyByte: - nre->min_++; - if (nre->max() != -1) - nre->max_++; - goto LeaveEmpty; - - LeaveEmpty: - *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags); - *r2ptr = nre; - break; - - case kRegexpLiteralString: { - Rune r = r1->sub()[0]->rune(); - // Determine how much of the literal string is removed. - // We know that we have at least one rune. :) - int n = 1; - while (n < r2->nrunes() && r2->runes()[n] == r) - n++; - nre->min_ += n; - if (nre->max() != -1) - nre->max_ += n; - if (n == r2->nrunes()) - goto LeaveEmpty; - *r1ptr = nre; - *r2ptr = Regexp::LiteralString( - &r2->runes()[n], r2->nrunes() - n, r2->parse_flags()); - break; - } - - default: - LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op(); - nre->Decref(); - return; - } - - r1->Decref(); - r2->Decref(); -} - + return re->Incref(); +} + +Regexp* CoalesceWalker::PostVisit(Regexp* re, + Regexp* parent_arg, + Regexp* pre_arg, + Regexp** child_args, + int nchild_args) { + if (re->nsub() == 0) + return re->Incref(); + + if (re->op() != kRegexpConcat) { + if (!ChildArgsChanged(re, child_args)) + return re->Incref(); + + // Something changed. Build a new op. + Regexp* nre = new Regexp(re->op(), re->parse_flags()); + nre->AllocSub(re->nsub()); + Regexp** nre_subs = nre->sub(); + for (int i = 0; i < re->nsub(); i++) + nre_subs[i] = child_args[i]; + // Repeats and Captures have additional data that must be copied. + if (re->op() == kRegexpRepeat) { + nre->min_ = re->min(); + nre->max_ = re->max(); + } else if (re->op() == kRegexpCapture) { + nre->cap_ = re->cap(); + } + return nre; + } + + bool can_coalesce = false; + for (int i = 0; i < re->nsub(); i++) { + if (i+1 < re->nsub() && + CanCoalesce(child_args[i], child_args[i+1])) { + can_coalesce = true; + break; + } + } + if (!can_coalesce) { + if (!ChildArgsChanged(re, child_args)) + return re->Incref(); + + // Something changed. Build a new op. + Regexp* nre = new Regexp(re->op(), re->parse_flags()); + nre->AllocSub(re->nsub()); + Regexp** nre_subs = nre->sub(); + for (int i = 0; i < re->nsub(); i++) + nre_subs[i] = child_args[i]; + return nre; + } + + for (int i = 0; i < re->nsub(); i++) { + if (i+1 < re->nsub() && + CanCoalesce(child_args[i], child_args[i+1])) + DoCoalesce(&child_args[i], &child_args[i+1]); + } + // Determine how many empty matches were left by DoCoalesce. + int n = 0; + for (int i = n; i < re->nsub(); i++) { + if (child_args[i]->op() == kRegexpEmptyMatch) + n++; + } + // Build a new op. + Regexp* nre = new Regexp(re->op(), re->parse_flags()); + nre->AllocSub(re->nsub() - n); + Regexp** nre_subs = nre->sub(); + for (int i = 0, j = 0; i < re->nsub(); i++) { + if (child_args[i]->op() == kRegexpEmptyMatch) { + child_args[i]->Decref(); + continue; + } + nre_subs[j] = child_args[i]; + j++; + } + return nre; +} + +bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) { + // r1 must be a star/plus/quest/repeat of a literal, char class, any char or + // any byte. + if ((r1->op() == kRegexpStar || + r1->op() == kRegexpPlus || + r1->op() == kRegexpQuest || + r1->op() == kRegexpRepeat) && + (r1->sub()[0]->op() == kRegexpLiteral || + r1->sub()[0]->op() == kRegexpCharClass || + r1->sub()[0]->op() == kRegexpAnyChar || + r1->sub()[0]->op() == kRegexpAnyByte)) { + // r2 must be a star/plus/quest/repeat of the same literal, char class, + // any char or any byte. + if ((r2->op() == kRegexpStar || + r2->op() == kRegexpPlus || + r2->op() == kRegexpQuest || + r2->op() == kRegexpRepeat) && + Regexp::Equal(r1->sub()[0], r2->sub()[0]) && + // The parse flags must be consistent. + ((r1->parse_flags() & Regexp::NonGreedy) == + (r2->parse_flags() & Regexp::NonGreedy))) { + return true; + } + // ... OR an occurrence of that literal, char class, any char or any byte + if (Regexp::Equal(r1->sub()[0], r2)) { + return true; + } + // ... OR a literal string that begins with that literal. + if (r1->sub()[0]->op() == kRegexpLiteral && + r2->op() == kRegexpLiteralString && + r2->runes()[0] == r1->sub()[0]->rune() && + // The parse flags must be consistent. + ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) == + (r2->parse_flags() & Regexp::FoldCase))) { + return true; + } + } + return false; +} + +void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) { + Regexp* r1 = *r1ptr; + Regexp* r2 = *r2ptr; + + Regexp* nre = Regexp::Repeat( + r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0); + + switch (r1->op()) { + case kRegexpStar: + nre->min_ = 0; + nre->max_ = -1; + break; + + case kRegexpPlus: + nre->min_ = 1; + nre->max_ = -1; + break; + + case kRegexpQuest: + nre->min_ = 0; + nre->max_ = 1; + break; + + case kRegexpRepeat: + nre->min_ = r1->min(); + nre->max_ = r1->max(); + break; + + default: + LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op(); + nre->Decref(); + return; + } + + switch (r2->op()) { + case kRegexpStar: + nre->max_ = -1; + goto LeaveEmpty; + + case kRegexpPlus: + nre->min_++; + nre->max_ = -1; + goto LeaveEmpty; + + case kRegexpQuest: + if (nre->max() != -1) + nre->max_++; + goto LeaveEmpty; + + case kRegexpRepeat: + nre->min_ += r2->min(); + if (r2->max() == -1) + nre->max_ = -1; + else if (nre->max() != -1) + nre->max_ += r2->max(); + goto LeaveEmpty; + + case kRegexpLiteral: + case kRegexpCharClass: + case kRegexpAnyChar: + case kRegexpAnyByte: + nre->min_++; + if (nre->max() != -1) + nre->max_++; + goto LeaveEmpty; + + LeaveEmpty: + *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags); + *r2ptr = nre; + break; + + case kRegexpLiteralString: { + Rune r = r1->sub()[0]->rune(); + // Determine how much of the literal string is removed. + // We know that we have at least one rune. :) + int n = 1; + while (n < r2->nrunes() && r2->runes()[n] == r) + n++; + nre->min_ += n; + if (nre->max() != -1) + nre->max_ += n; + if (n == r2->nrunes()) + goto LeaveEmpty; + *r1ptr = nre; + *r2ptr = Regexp::LiteralString( + &r2->runes()[n], r2->nrunes() - n, r2->parse_flags()); + break; + } + + default: + LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op(); + nre->Decref(); + return; + } + + r1->Decref(); + r2->Decref(); +} + Regexp* SimplifyWalker::Copy(Regexp* re) { return re->Incref(); } @@ -454,7 +454,7 @@ Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) { } Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) { - if (re->simple()) { + if (re->simple()) { *stop = true; return re->Incref(); } @@ -487,14 +487,14 @@ Regexp* SimplifyWalker::PostVisit(Regexp* re, case kRegexpConcat: case kRegexpAlternate: { // These are simple as long as the subpieces are simple. - if (!ChildArgsChanged(re, child_args)) { + if (!ChildArgsChanged(re, child_args)) { re->simple_ = true; return re->Incref(); } Regexp* nre = new Regexp(re->op(), re->parse_flags()); - nre->AllocSub(re->nsub()); + nre->AllocSub(re->nsub()); Regexp** nre_subs = nre->sub(); - for (int i = 0; i < re->nsub(); i++) + for (int i = 0; i < re->nsub(); i++) nre_subs[i] = child_args[i]; nre->simple_ = true; return nre; @@ -510,7 +510,7 @@ Regexp* SimplifyWalker::PostVisit(Regexp* re, Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags()); nre->AllocSub(1); nre->sub()[0] = newsub; - nre->cap_ = re->cap(); + nre->cap_ = re->cap(); nre->simple_ = true; return nre; } diff --git a/contrib/libs/re2/re2/sparse_array.h b/contrib/libs/re2/re2/sparse_array.h index 09ffe086b7..d519912bdc 100644 --- a/contrib/libs/re2/re2/sparse_array.h +++ b/contrib/libs/re2/re2/sparse_array.h @@ -4,51 +4,51 @@ #ifndef RE2_SPARSE_ARRAY_H_ #define RE2_SPARSE_ARRAY_H_ - + // DESCRIPTION -// +// // SparseArray<T>(m) is a map from integers in [0, m) to T values. // It requires (sizeof(T)+sizeof(int))*m memory, but it provides // fast iteration through the elements in the array and fast clearing // of the array. The array has a concept of certain elements being // uninitialized (having no value). -// +// // Insertion and deletion are constant time operations. -// -// Allocating the array is a constant time operation +// +// Allocating the array is a constant time operation // when memory allocation is a constant time operation. -// +// // Clearing the array is a constant time operation (unusual!). -// +// // Iterating through the array is an O(n) operation, where n // is the number of items in the array (not O(m)). // -// The array iterator visits entries in the order they were first +// The array iterator visits entries in the order they were first // inserted into the array. It is safe to add items to the array while // using an iterator: the iterator will visit indices added to the array // during the iteration, but will not re-visit indices whose values // change after visiting. Thus SparseArray can be a convenient // implementation of a work queue. -// +// // The SparseArray implementation is NOT thread-safe. It is up to the // caller to make sure only one thread is accessing the array. (Typically // these arrays are temporary values and used in situations where speed is // important.) -// +// // The SparseArray interface does not present all the usual STL bells and // whistles. -// +// // Implemented with reference to Briggs & Torczon, An Efficient // Representation for Sparse Sets, ACM Letters on Programming Languages // and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. -// +// // Briggs & Torczon popularized this technique, but it had been known // long before their paper. They point out that Aho, Hopcroft, and // Ullman's 1974 Design and Analysis of Computer Algorithms and Bentley's // 1986 Programming Pearls both hint at the technique in exercises to the // reader (in Aho & Hopcroft, exercise 2.12; in Bentley, column 1 // exercise 8). -// +// // Briggs & Torczon describe a sparse set implementation. I have // trivially generalized it to create a sparse array (actually the original // target of the AHU and Bentley exercises). @@ -57,7 +57,7 @@ // // SparseArray is an array dense_ and an array sparse_ of identical size. // At any point, the number of elements in the sparse array is size_. -// +// // The array dense_ contains the size_ elements in the sparse array (with // their indices), // in the order that the elements were first inserted. This array is dense: @@ -67,13 +67,13 @@ // For indices present in the array, dense_[sparse_[i]].index_ == i. // For indices not present in the array, sparse_ can contain any value at all, // perhaps outside the range [0, size_) but perhaps not. -// +// // The lax requirement on sparse_ values makes clearing the array very easy: // set size_ to 0. Lookups are slightly more complicated. // An index i has a value in the array if and only if: // sparse_[i] is in [0, size_) AND // dense_[sparse_[i]].index_ == i. -// If both these properties hold, only then it is safe to refer to +// If both these properties hold, only then it is safe to refer to // dense_[sparse_[i]].value_ // as the value associated with index i. // @@ -85,22 +85,22 @@ // array through a call to resize(). They immediately become inaccessible, but // they are only guaranteed to be destroyed when the SparseArray destructor is // called. -// -// A moved-from SparseArray will be empty. +// +// A moved-from SparseArray will be empty. // Doing this simplifies the logic below. #ifndef __has_feature #define __has_feature(x) 0 #endif -#include <assert.h> -#include <stdint.h> +#include <assert.h> +#include <stdint.h> #if __has_feature(memory_sanitizer) #include <sanitizer/msan_interface.h> #endif -#include <algorithm> -#include <memory> -#include <utility> +#include <algorithm> +#include <memory> +#include <utility> #include "re2/pod_array.h" @@ -110,7 +110,7 @@ template<typename Value> class SparseArray { public: SparseArray(); - explicit SparseArray(int max_size); + explicit SparseArray(int max_size); ~SparseArray(); // IndexValue pairs: exposed in SparseArray::iterator. @@ -119,22 +119,22 @@ class SparseArray { typedef IndexValue* iterator; typedef const IndexValue* const_iterator; - SparseArray(const SparseArray& src); + SparseArray(const SparseArray& src); SparseArray(SparseArray&& src); - SparseArray& operator=(const SparseArray& src); + SparseArray& operator=(const SparseArray& src); SparseArray& operator=(SparseArray&& src); - + // Return the number of entries in the array. int size() const { return size_; } - // Indicate whether the array is empty. - int empty() const { - return size_ == 0; - } - + // Indicate whether the array is empty. + int empty() const { + return size_ == 0; + } + // Iterate over the array. iterator begin() { return dense_.data(); @@ -169,82 +169,82 @@ class SparseArray { } // Check whether index i is in the array. - bool has_index(int i) const; + bool has_index(int i) const; // Comparison function for sorting. // Can sort the sparse array so that future iterations // will visit indices in increasing order using - // std::sort(arr.begin(), arr.end(), arr.less); + // std::sort(arr.begin(), arr.end(), arr.less); static bool less(const IndexValue& a, const IndexValue& b); public: // Set the value at index i to v. - iterator set(int i, const Value& v) { - return SetInternal(true, i, v); - } + iterator set(int i, const Value& v) { + return SetInternal(true, i, v); + } // Set the value at new index i to v. // Fast but unsafe: only use if has_index(i) is false. iterator set_new(int i, const Value& v) { return SetInternal(false, i, v); - } + } // Set the value at index i to v. // Fast but unsafe: only use if has_index(i) is true. - iterator set_existing(int i, const Value& v) { - return SetExistingInternal(i, v); - } + iterator set_existing(int i, const Value& v) { + return SetExistingInternal(i, v); + } // Get the value at index i. // Fast but unsafe: only use if has_index(i) is true. Value& get_existing(int i) { assert(has_index(i)); return dense_[sparse_[i]].value_; - } + } const Value& get_existing(int i) const { assert(has_index(i)); return dense_[sparse_[i]].value_; - } + } private: iterator SetInternal(bool allow_existing, int i, const Value& v) { - DebugCheckInvariants(); + DebugCheckInvariants(); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { - assert(false && "illegal index"); - // Semantically, end() would be better here, but we already know - // the user did something stupid, so begin() insulates them from - // dereferencing an invalid pointer. - return begin(); - } + assert(false && "illegal index"); + // Semantically, end() would be better here, but we already know + // the user did something stupid, so begin() insulates them from + // dereferencing an invalid pointer. + return begin(); + } if (!allow_existing) { - assert(!has_index(i)); - create_index(i); - } else { - if (!has_index(i)) - create_index(i); - } + assert(!has_index(i)); + create_index(i); + } else { + if (!has_index(i)) + create_index(i); + } return SetExistingInternal(i, v); - } - + } + iterator SetExistingInternal(int i, const Value& v) { - DebugCheckInvariants(); - assert(has_index(i)); + DebugCheckInvariants(); + assert(has_index(i)); dense_[sparse_[i]].value_ = v; - DebugCheckInvariants(); + DebugCheckInvariants(); return dense_.data() + sparse_[i]; - } - + } + // Add the index i to the array. // Only use if has_index(i) is known to be false. // Since it doesn't set the value associated with i, // this function is private, only intended as a helper // for other methods. - void create_index(int i); + void create_index(int i); // In debug mode, verify that some invariant properties of the class // are being maintained. This is called at the end of the constructor // and at the beginning and end of all public non-const member functions. - void DebugCheckInvariants() const; + void DebugCheckInvariants() const; // Initializes memory for elements [min, max). void MaybeInitializeMemory(int min, int max) { @@ -257,54 +257,54 @@ class SparseArray { #endif } - int size_ = 0; + int size_ = 0; PODArray<int> sparse_; PODArray<IndexValue> dense_; }; template<typename Value> -SparseArray<Value>::SparseArray() = default; +SparseArray<Value>::SparseArray() = default; -template<typename Value> -SparseArray<Value>::SparseArray(const SparseArray& src) - : size_(src.size_), +template<typename Value> +SparseArray<Value>::SparseArray(const SparseArray& src) + : size_(src.size_), sparse_(src.max_size()), dense_(src.max_size()) { std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data()); std::copy_n(src.dense_.data(), src.max_size(), dense_.data()); -} - -template<typename Value> +} + +template<typename Value> SparseArray<Value>::SparseArray(SparseArray&& src) - : size_(src.size_), + : size_(src.size_), sparse_(std::move(src.sparse_)), - dense_(std::move(src.dense_)) { - src.size_ = 0; -} - -template<typename Value> -SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) { + dense_(std::move(src.dense_)) { + src.size_ = 0; +} + +template<typename Value> +SparseArray<Value>& SparseArray<Value>::operator=(const SparseArray& src) { // Construct these first for exception safety. PODArray<int> a(src.max_size()); PODArray<IndexValue> b(src.max_size()); - size_ = src.size_; + size_ = src.size_; sparse_ = std::move(a); dense_ = std::move(b); std::copy_n(src.sparse_.data(), src.max_size(), sparse_.data()); std::copy_n(src.dense_.data(), src.max_size(), dense_.data()); - return *this; -} - -template<typename Value> + return *this; +} + +template<typename Value> SparseArray<Value>& SparseArray<Value>::operator=(SparseArray&& src) { - size_ = src.size_; + size_ = src.size_; sparse_ = std::move(src.sparse_); - dense_ = std::move(src.dense_); - src.size_ = 0; - return *this; -} - + dense_ = std::move(src.dense_); + src.size_ = 0; + return *this; +} + // IndexValue pairs: exposed in SparseArray::iterator. template<typename Value> class SparseArray<Value>::IndexValue { @@ -313,9 +313,9 @@ class SparseArray<Value>::IndexValue { Value& value() { return value_; } const Value& value() const { return value_; } - private: + private: friend class SparseArray; - int index_; + int index_; Value value_; }; @@ -330,7 +330,7 @@ void SparseArray<Value>::resize(int new_max_size) { // Construct these first for exception safety. PODArray<int> a(new_max_size); PODArray<IndexValue> b(new_max_size); - + std::copy_n(sparse_.data(), old_max_size, a.data()); std::copy_n(dense_.data(), old_max_size, b.data()); @@ -347,7 +347,7 @@ void SparseArray<Value>::resize(int new_max_size) { // Check whether index i is in the array. template<typename Value> bool SparseArray<Value>::has_index(int i) const { - assert(i >= 0); + assert(i >= 0); assert(i < max_size()); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { return false; @@ -359,7 +359,7 @@ bool SparseArray<Value>::has_index(int i) const { template<typename Value> void SparseArray<Value>::create_index(int i) { - assert(!has_index(i)); + assert(!has_index(i)); assert(size_ < max_size()); sparse_[i] = size_; dense_[size_].index_ = i; @@ -377,7 +377,7 @@ template<typename Value> SparseArray<Value>::~SparseArray() { } template<typename Value> void SparseArray<Value>::DebugCheckInvariants() const { - assert(0 <= size_); + assert(0 <= size_); assert(size_ <= max_size()); } diff --git a/contrib/libs/re2/re2/sparse_set.h b/contrib/libs/re2/re2/sparse_set.h index 06ed88d81b..6f4b6fb926 100644 --- a/contrib/libs/re2/re2/sparse_set.h +++ b/contrib/libs/re2/re2/sparse_set.h @@ -4,172 +4,172 @@ #ifndef RE2_SPARSE_SET_H_ #define RE2_SPARSE_SET_H_ - + // DESCRIPTION -// -// SparseSet(m) is a set of integers in [0, m). +// +// SparseSet(m) is a set of integers in [0, m). // It requires sizeof(int)*m memory, but it provides // fast iteration through the elements in the set and fast clearing // of the set. -// +// // Insertion and deletion are constant time operations. -// -// Allocating the set is a constant time operation +// +// Allocating the set is a constant time operation // when memory allocation is a constant time operation. -// +// // Clearing the set is a constant time operation (unusual!). -// +// // Iterating through the set is an O(n) operation, where n // is the number of items in the set (not O(m)). // -// The set iterator visits entries in the order they were first -// inserted into the set. It is safe to add items to the set while +// The set iterator visits entries in the order they were first +// inserted into the set. It is safe to add items to the set while // using an iterator: the iterator will visit indices added to the set // during the iteration, but will not re-visit indices whose values // change after visiting. Thus SparseSet can be a convenient // implementation of a work queue. -// +// // The SparseSet implementation is NOT thread-safe. It is up to the // caller to make sure only one thread is accessing the set. (Typically // these sets are temporary values and used in situations where speed is // important.) -// +// // The SparseSet interface does not present all the usual STL bells and // whistles. -// +// // Implemented with reference to Briggs & Torczon, An Efficient // Representation for Sparse Sets, ACM Letters on Programming Languages // and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. -// -// This is a specialization of sparse array; see sparse_array.h. +// +// This is a specialization of sparse array; see sparse_array.h. // IMPLEMENTATION // -// See sparse_array.h for implementation details. +// See sparse_array.h for implementation details. // Doing this simplifies the logic below. #ifndef __has_feature #define __has_feature(x) 0 #endif -#include <assert.h> -#include <stdint.h> +#include <assert.h> +#include <stdint.h> #if __has_feature(memory_sanitizer) #include <sanitizer/msan_interface.h> #endif -#include <algorithm> -#include <memory> -#include <utility> +#include <algorithm> +#include <memory> +#include <utility> #include "re2/pod_array.h" namespace re2 { -template<typename Value> -class SparseSetT { +template<typename Value> +class SparseSetT { public: - SparseSetT(); - explicit SparseSetT(int max_size); - ~SparseSetT(); + SparseSetT(); + explicit SparseSetT(int max_size); + ~SparseSetT(); typedef int* iterator; typedef const int* const_iterator; - - // Return the number of entries in the set. - int size() const { - return size_; + + // Return the number of entries in the set. + int size() const { + return size_; } - // Indicate whether the set is empty. - int empty() const { - return size_ == 0; + // Indicate whether the set is empty. + int empty() const { + return size_ == 0; } - // Iterate over the set. - iterator begin() { + // Iterate over the set. + iterator begin() { return dense_.data(); - } - iterator end() { + } + iterator end() { return dense_.data() + size_; - } + } - const_iterator begin() const { + const_iterator begin() const { return dense_.data(); - } - const_iterator end() const { + } + const_iterator end() const { return dense_.data() + size_; - } + } - // Change the maximum size of the set. + // Change the maximum size of the set. // Invalidates all iterators. void resize(int new_max_size); - // Return the maximum size of the set. - // Indices can be in the range [0, max_size). - int max_size() const { + // Return the maximum size of the set. + // Indices can be in the range [0, max_size). + int max_size() const { if (dense_.data() != NULL) return dense_.size(); else return 0; } - // Clear the set. - void clear() { - size_ = 0; - } - - // Check whether index i is in the set. - bool contains(int i) const; - - // Comparison function for sorting. - // Can sort the sparse set so that future iterations - // will visit indices in increasing order using - // std::sort(arr.begin(), arr.end(), arr.less); - static bool less(int a, int b); - - public: - // Insert index i into the set. - iterator insert(int i) { - return InsertInternal(true, i); + // Clear the set. + void clear() { + size_ = 0; + } + + // Check whether index i is in the set. + bool contains(int i) const; + + // Comparison function for sorting. + // Can sort the sparse set so that future iterations + // will visit indices in increasing order using + // std::sort(arr.begin(), arr.end(), arr.less); + static bool less(int a, int b); + + public: + // Insert index i into the set. + iterator insert(int i) { + return InsertInternal(true, i); } - // Insert index i into the set. - // Fast but unsafe: only use if contains(i) is false. - iterator insert_new(int i) { - return InsertInternal(false, i); + // Insert index i into the set. + // Fast but unsafe: only use if contains(i) is false. + iterator insert_new(int i) { + return InsertInternal(false, i); } - private: - iterator InsertInternal(bool allow_existing, int i) { - DebugCheckInvariants(); + private: + iterator InsertInternal(bool allow_existing, int i) { + DebugCheckInvariants(); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { - assert(false && "illegal index"); + assert(false && "illegal index"); // Semantically, end() would be better here, but we already know // the user did something stupid, so begin() insulates them from // dereferencing an invalid pointer. - return begin(); + return begin(); } - if (!allow_existing) { - assert(!contains(i)); - create_index(i); - } else { - if (!contains(i)) - create_index(i); - } - DebugCheckInvariants(); + if (!allow_existing) { + assert(!contains(i)); + create_index(i); + } else { + if (!contains(i)) + create_index(i); + } + DebugCheckInvariants(); return dense_.data() + sparse_[i]; } - // Add the index i to the set. - // Only use if contains(i) is known to be false. - // This function is private, only intended as a helper - // for other methods. - void create_index(int i); + // Add the index i to the set. + // Only use if contains(i) is known to be false. + // This function is private, only intended as a helper + // for other methods. + void create_index(int i); - // In debug mode, verify that some invariant properties of the class - // are being maintained. This is called at the end of the constructor - // and at the beginning and end of all public non-const member functions. - void DebugCheckInvariants() const; + // In debug mode, verify that some invariant properties of the class + // are being maintained. This is called at the end of the constructor + // and at the beginning and end of all public non-const member functions. + void DebugCheckInvariants() const; // Initializes memory for elements [min, max). void MaybeInitializeMemory(int min, int max) { @@ -182,26 +182,26 @@ class SparseSetT { #endif } - int size_ = 0; + int size_ = 0; PODArray<int> sparse_; PODArray<int> dense_; }; -template<typename Value> -SparseSetT<Value>::SparseSetT() = default; - -// Change the maximum size of the set. -// Invalidates all iterators. -template<typename Value> +template<typename Value> +SparseSetT<Value>::SparseSetT() = default; + +// Change the maximum size of the set. +// Invalidates all iterators. +template<typename Value> void SparseSetT<Value>::resize(int new_max_size) { - DebugCheckInvariants(); + DebugCheckInvariants(); if (new_max_size > max_size()) { const int old_max_size = max_size(); - + // Construct these first for exception safety. PODArray<int> a(new_max_size); PODArray<int> b(new_max_size); - + std::copy_n(sparse_.data(), old_max_size, a.data()); std::copy_n(dense_.data(), old_max_size, b.data()); @@ -209,56 +209,56 @@ void SparseSetT<Value>::resize(int new_max_size) { dense_ = std::move(b); MaybeInitializeMemory(old_max_size, new_max_size); - } + } if (size_ > new_max_size) size_ = new_max_size; - DebugCheckInvariants(); -} - -// Check whether index i is in the set. -template<typename Value> -bool SparseSetT<Value>::contains(int i) const { - assert(i >= 0); + DebugCheckInvariants(); +} + +// Check whether index i is in the set. +template<typename Value> +bool SparseSetT<Value>::contains(int i) const { + assert(i >= 0); assert(i < max_size()); if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) { - return false; - } + return false; + } // Unsigned comparison avoids checking sparse_[i] < 0. return (uint32_t)sparse_[i] < (uint32_t)size_ && dense_[sparse_[i]] == i; -} - -template<typename Value> -void SparseSetT<Value>::create_index(int i) { - assert(!contains(i)); +} + +template<typename Value> +void SparseSetT<Value>::create_index(int i) { + assert(!contains(i)); assert(size_ < max_size()); sparse_[i] = size_; - dense_[size_] = i; - size_++; -} - + dense_[size_] = i; + size_++; +} + template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) : sparse_(max_size), dense_(max_size) { MaybeInitializeMemory(size_, max_size); - DebugCheckInvariants(); -} - -template<typename Value> SparseSetT<Value>::~SparseSetT() { - DebugCheckInvariants(); -} - -template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const { - assert(0 <= size_); + DebugCheckInvariants(); +} + +template<typename Value> SparseSetT<Value>::~SparseSetT() { + DebugCheckInvariants(); +} + +template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const { + assert(0 <= size_); assert(size_ <= max_size()); -} - -// Comparison function for sorting. -template<typename Value> bool SparseSetT<Value>::less(int a, int b) { - return a < b; -} - -typedef SparseSetT<void> SparseSet; - +} + +// Comparison function for sorting. +template<typename Value> bool SparseSetT<Value>::less(int a, int b) { + return a < b; +} + +typedef SparseSetT<void> SparseSet; + } // namespace re2 #endif // RE2_SPARSE_SET_H_ diff --git a/contrib/libs/re2/re2/stringpiece.cc b/contrib/libs/re2/re2/stringpiece.cc index ef2e2874ea..61721c19cd 100644 --- a/contrib/libs/re2/re2/stringpiece.cc +++ b/contrib/libs/re2/re2/stringpiece.cc @@ -1,65 +1,65 @@ -// Copyright 2004 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "re2/stringpiece.h" - -#include <ostream> - -#include "util/util.h" - -namespace re2 { - -const StringPiece::size_type StringPiece::npos; // initialized in stringpiece.h - -StringPiece::size_type StringPiece::copy(char* buf, size_type n, - size_type pos) const { - size_type ret = std::min(size_ - pos, n); - memcpy(buf, data_ + pos, ret); - return ret; -} - -StringPiece StringPiece::substr(size_type pos, size_type n) const { - if (pos > size_) pos = size_; - if (n > size_ - pos) n = size_ - pos; - return StringPiece(data_ + pos, n); -} - -StringPiece::size_type StringPiece::find(const StringPiece& s, - size_type pos) const { - if (pos > size_) return npos; - const_pointer result = std::search(data_ + pos, data_ + size_, - s.data_, s.data_ + s.size_); - size_type xpos = result - data_; - return xpos + s.size_ <= size_ ? xpos : npos; -} - -StringPiece::size_type StringPiece::find(char c, size_type pos) const { - if (size_ <= 0 || pos >= size_) return npos; - const_pointer result = std::find(data_ + pos, data_ + size_, c); - return result != data_ + size_ ? result - data_ : npos; -} - -StringPiece::size_type StringPiece::rfind(const StringPiece& s, - size_type pos) const { - if (size_ < s.size_) return npos; - if (s.size_ == 0) return std::min(size_, pos); - const_pointer last = data_ + std::min(size_ - s.size_, pos) + s.size_; - const_pointer result = std::find_end(data_, last, s.data_, s.data_ + s.size_); - return result != last ? result - data_ : npos; -} - -StringPiece::size_type StringPiece::rfind(char c, size_type pos) const { - if (size_ <= 0) return npos; - for (size_t i = std::min(pos + 1, size_); i != 0;) { - if (data_[--i] == c) return i; - } - return npos; -} - -std::ostream& operator<<(std::ostream& o, const StringPiece& p) { - o.write(p.data(), p.size()); - return o; -} - -} // namespace re2 +// Copyright 2004 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "re2/stringpiece.h" + +#include <ostream> + +#include "util/util.h" + +namespace re2 { + +const StringPiece::size_type StringPiece::npos; // initialized in stringpiece.h + +StringPiece::size_type StringPiece::copy(char* buf, size_type n, + size_type pos) const { + size_type ret = std::min(size_ - pos, n); + memcpy(buf, data_ + pos, ret); + return ret; +} + +StringPiece StringPiece::substr(size_type pos, size_type n) const { + if (pos > size_) pos = size_; + if (n > size_ - pos) n = size_ - pos; + return StringPiece(data_ + pos, n); +} + +StringPiece::size_type StringPiece::find(const StringPiece& s, + size_type pos) const { + if (pos > size_) return npos; + const_pointer result = std::search(data_ + pos, data_ + size_, + s.data_, s.data_ + s.size_); + size_type xpos = result - data_; + return xpos + s.size_ <= size_ ? xpos : npos; +} + +StringPiece::size_type StringPiece::find(char c, size_type pos) const { + if (size_ <= 0 || pos >= size_) return npos; + const_pointer result = std::find(data_ + pos, data_ + size_, c); + return result != data_ + size_ ? result - data_ : npos; +} + +StringPiece::size_type StringPiece::rfind(const StringPiece& s, + size_type pos) const { + if (size_ < s.size_) return npos; + if (s.size_ == 0) return std::min(size_, pos); + const_pointer last = data_ + std::min(size_ - s.size_, pos) + s.size_; + const_pointer result = std::find_end(data_, last, s.data_, s.data_ + s.size_); + return result != last ? result - data_ : npos; +} + +StringPiece::size_type StringPiece::rfind(char c, size_type pos) const { + if (size_ <= 0) return npos; + for (size_t i = std::min(pos + 1, size_); i != 0;) { + if (data_[--i] == c) return i; + } + return npos; +} + +std::ostream& operator<<(std::ostream& o, const StringPiece& p) { + o.write(p.data(), p.size()); + return o; +} + +} // namespace re2 diff --git a/contrib/libs/re2/re2/stringpiece.h b/contrib/libs/re2/re2/stringpiece.h index ef73683401..0c74c73a41 100644 --- a/contrib/libs/re2/re2/stringpiece.h +++ b/contrib/libs/re2/re2/stringpiece.h @@ -1,115 +1,115 @@ -// Copyright 2001-2010 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef RE2_STRINGPIECE_H_ -#define RE2_STRINGPIECE_H_ - -// A string-like object that points to a sized piece of memory. -// -// Functions or methods may use const StringPiece& parameters to accept either -// a "const char*" or a "string" value that will be implicitly converted to -// a StringPiece. The implicit conversion means that it is often appropriate -// to include this .h file in other files rather than forward-declaring -// StringPiece as would be appropriate for most other Google classes. -// -// Systematic usage of StringPiece is encouraged as it will reduce unnecessary -// conversions from "const char*" to "string" and back again. -// -// -// Arghh! I wish C++ literals were "string". - +// Copyright 2001-2010 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_STRINGPIECE_H_ +#define RE2_STRINGPIECE_H_ + +// A string-like object that points to a sized piece of memory. +// +// Functions or methods may use const StringPiece& parameters to accept either +// a "const char*" or a "string" value that will be implicitly converted to +// a StringPiece. The implicit conversion means that it is often appropriate +// to include this .h file in other files rather than forward-declaring +// StringPiece as would be appropriate for most other Google classes. +// +// Systematic usage of StringPiece is encouraged as it will reduce unnecessary +// conversions from "const char*" to "string" and back again. +// +// +// Arghh! I wish C++ literals were "string". + // Doing this simplifies the logic below. #ifndef __has_include #define __has_include(x) 0 #endif -#include <stddef.h> -#include <string.h> -#include <algorithm> -#include <iosfwd> -#include <iterator> -#include <string> +#include <stddef.h> +#include <string.h> +#include <algorithm> +#include <iosfwd> +#include <iterator> +#include <string> #if __has_include(<string_view>) && __cplusplus >= 201703L #include <string_view> #endif #if defined(ARCADIA_ROOT) -#include <util/generic/string.h> +#include <util/generic/string.h> #endif - -namespace re2 { - -class StringPiece { - public: + +namespace re2 { + +class StringPiece { + public: typedef std::char_traits<char> traits_type; - typedef char value_type; - typedef char* pointer; - typedef const char* const_pointer; - typedef char& reference; - typedef const char& const_reference; - typedef const char* const_iterator; - typedef const_iterator iterator; - typedef std::reverse_iterator<const_iterator> const_reverse_iterator; - typedef const_reverse_iterator reverse_iterator; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - static const size_type npos = static_cast<size_type>(-1); - - // We provide non-explicit singleton constructors so users can pass - // in a "const char*" or a "string" wherever a "StringPiece" is - // expected. - StringPiece() - : data_(NULL), size_(0) {} + typedef char value_type; + typedef char* pointer; + typedef const char* const_pointer; + typedef char& reference; + typedef const char& const_reference; + typedef const char* const_iterator; + typedef const_iterator iterator; + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + typedef const_reverse_iterator reverse_iterator; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + static const size_type npos = static_cast<size_type>(-1); + + // We provide non-explicit singleton constructors so users can pass + // in a "const char*" or a "string" wherever a "StringPiece" is + // expected. + StringPiece() + : data_(NULL), size_(0) {} #if __has_include(<string_view>) && __cplusplus >= 201703L StringPiece(const std::string_view& str) : data_(str.data()), size_(str.size()) {} #endif - StringPiece(const std::string& str) - : data_(str.data()), size_(str.size()) {} - StringPiece(const char* str) - : data_(str), size_(str == NULL ? 0 : strlen(str)) {} - StringPiece(const char* str, size_type len) - : data_(str), size_(len) {} + StringPiece(const std::string& str) + : data_(str.data()), size_(str.size()) {} + StringPiece(const char* str) + : data_(str), size_(str == NULL ? 0 : strlen(str)) {} + StringPiece(const char* str, size_type len) + : data_(str), size_(len) {} #if defined(ARCADIA_ROOT) StringPiece(const TString& str) : StringPiece(str.data(), str.size()) {} #endif - - const_iterator begin() const { return data_; } - const_iterator end() const { return data_ + size_; } - const_reverse_iterator rbegin() const { - return const_reverse_iterator(data_ + size_); - } - const_reverse_iterator rend() const { - return const_reverse_iterator(data_); - } - - size_type size() const { return size_; } - size_type length() const { return size_; } - bool empty() const { return size_ == 0; } - - const_reference operator[](size_type i) const { return data_[i]; } - const_pointer data() const { return data_; } - - void remove_prefix(size_type n) { - data_ += n; - size_ -= n; - } - - void remove_suffix(size_type n) { - size_ -= n; - } - - void set(const char* str) { - data_ = str; - size_ = str == NULL ? 0 : strlen(str); - } - - void set(const char* str, size_type len) { - data_ = str; - size_ = len; - } - + + const_iterator begin() const { return data_; } + const_iterator end() const { return data_ + size_; } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(data_ + size_); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(data_); + } + + size_type size() const { return size_; } + size_type length() const { return size_; } + bool empty() const { return size_ == 0; } + + const_reference operator[](size_type i) const { return data_[i]; } + const_pointer data() const { return data_; } + + void remove_prefix(size_type n) { + data_ += n; + size_ -= n; + } + + void remove_suffix(size_type n) { + size_ -= n; + } + + void set(const char* str) { + data_ = str; + size_ = str == NULL ? 0 : strlen(str); + } + + void set(const char* str, size_type len) { + data_ = str; + size_ = len; + } + // Converts to `std::basic_string`. template <typename A> explicit operator std::basic_string<char, traits_type, A>() const { @@ -117,101 +117,101 @@ class StringPiece { return std::basic_string<char, traits_type, A>(data_, size_); } - std::string as_string() const { - return std::string(data_, size_); - } - - // We also define ToString() here, since many other string-like - // interfaces name the routine that converts to a C++ string - // "ToString", and it's confusing to have the method that does that - // for a StringPiece be called "as_string()". We also leave the - // "as_string()" method defined here for existing code. - std::string ToString() const { - return std::string(data_, size_); - } - - void CopyToString(std::string* target) const { - target->assign(data_, size_); - } - - void AppendToString(std::string* target) const { - target->append(data_, size_); - } - - size_type copy(char* buf, size_type n, size_type pos = 0) const; - StringPiece substr(size_type pos = 0, size_type n = npos) const; - - int compare(const StringPiece& x) const { - size_type min_size = std::min(size(), x.size()); - if (min_size > 0) { - int r = memcmp(data(), x.data(), min_size); - if (r < 0) return -1; - if (r > 0) return 1; - } - if (size() < x.size()) return -1; - if (size() > x.size()) return 1; - return 0; - } - - // Does "this" start with "x"? - bool starts_with(const StringPiece& x) const { - return x.empty() || - (size() >= x.size() && memcmp(data(), x.data(), x.size()) == 0); - } - - // Does "this" end with "x"? - bool ends_with(const StringPiece& x) const { - return x.empty() || - (size() >= x.size() && - memcmp(data() + (size() - x.size()), x.data(), x.size()) == 0); - } - - bool contains(const StringPiece& s) const { - return find(s) != npos; - } - - size_type find(const StringPiece& s, size_type pos = 0) const; - size_type find(char c, size_type pos = 0) const; - size_type rfind(const StringPiece& s, size_type pos = npos) const; - size_type rfind(char c, size_type pos = npos) const; - - private: - const_pointer data_; - size_type size_; -}; - -inline bool operator==(const StringPiece& x, const StringPiece& y) { - StringPiece::size_type len = x.size(); - if (len != y.size()) return false; - return x.data() == y.data() || len == 0 || - memcmp(x.data(), y.data(), len) == 0; -} - -inline bool operator!=(const StringPiece& x, const StringPiece& y) { - return !(x == y); -} - -inline bool operator<(const StringPiece& x, const StringPiece& y) { - StringPiece::size_type min_size = std::min(x.size(), y.size()); - int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); - return (r < 0) || (r == 0 && x.size() < y.size()); -} - -inline bool operator>(const StringPiece& x, const StringPiece& y) { - return y < x; -} - -inline bool operator<=(const StringPiece& x, const StringPiece& y) { - return !(x > y); -} - -inline bool operator>=(const StringPiece& x, const StringPiece& y) { - return !(x < y); -} - -// Allow StringPiece to be logged. -std::ostream& operator<<(std::ostream& o, const StringPiece& p); - -} // namespace re2 - -#endif // RE2_STRINGPIECE_H_ + std::string as_string() const { + return std::string(data_, size_); + } + + // We also define ToString() here, since many other string-like + // interfaces name the routine that converts to a C++ string + // "ToString", and it's confusing to have the method that does that + // for a StringPiece be called "as_string()". We also leave the + // "as_string()" method defined here for existing code. + std::string ToString() const { + return std::string(data_, size_); + } + + void CopyToString(std::string* target) const { + target->assign(data_, size_); + } + + void AppendToString(std::string* target) const { + target->append(data_, size_); + } + + size_type copy(char* buf, size_type n, size_type pos = 0) const; + StringPiece substr(size_type pos = 0, size_type n = npos) const; + + int compare(const StringPiece& x) const { + size_type min_size = std::min(size(), x.size()); + if (min_size > 0) { + int r = memcmp(data(), x.data(), min_size); + if (r < 0) return -1; + if (r > 0) return 1; + } + if (size() < x.size()) return -1; + if (size() > x.size()) return 1; + return 0; + } + + // Does "this" start with "x"? + bool starts_with(const StringPiece& x) const { + return x.empty() || + (size() >= x.size() && memcmp(data(), x.data(), x.size()) == 0); + } + + // Does "this" end with "x"? + bool ends_with(const StringPiece& x) const { + return x.empty() || + (size() >= x.size() && + memcmp(data() + (size() - x.size()), x.data(), x.size()) == 0); + } + + bool contains(const StringPiece& s) const { + return find(s) != npos; + } + + size_type find(const StringPiece& s, size_type pos = 0) const; + size_type find(char c, size_type pos = 0) const; + size_type rfind(const StringPiece& s, size_type pos = npos) const; + size_type rfind(char c, size_type pos = npos) const; + + private: + const_pointer data_; + size_type size_; +}; + +inline bool operator==(const StringPiece& x, const StringPiece& y) { + StringPiece::size_type len = x.size(); + if (len != y.size()) return false; + return x.data() == y.data() || len == 0 || + memcmp(x.data(), y.data(), len) == 0; +} + +inline bool operator!=(const StringPiece& x, const StringPiece& y) { + return !(x == y); +} + +inline bool operator<(const StringPiece& x, const StringPiece& y) { + StringPiece::size_type min_size = std::min(x.size(), y.size()); + int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); + return (r < 0) || (r == 0 && x.size() < y.size()); +} + +inline bool operator>(const StringPiece& x, const StringPiece& y) { + return y < x; +} + +inline bool operator<=(const StringPiece& x, const StringPiece& y) { + return !(x > y); +} + +inline bool operator>=(const StringPiece& x, const StringPiece& y) { + return !(x < y); +} + +// Allow StringPiece to be logged. +std::ostream& operator<<(std::ostream& o, const StringPiece& p); + +} // namespace re2 + +#endif // RE2_STRINGPIECE_H_ diff --git a/contrib/libs/re2/re2/tostring.cc b/contrib/libs/re2/re2/tostring.cc index 9c1c038ca6..edc6375584 100644 --- a/contrib/libs/re2/re2/tostring.cc +++ b/contrib/libs/re2/re2/tostring.cc @@ -5,13 +5,13 @@ // Format a regular expression structure as a string. // Tested by parse_test.cc -#include <string.h> -#include <string> - -#include "util/util.h" -#include "util/logging.h" -#include "util/strutil.h" -#include "util/utf.h" +#include <string.h> +#include <string> + +#include "util/util.h" +#include "util/logging.h" +#include "util/strutil.h" +#include "util/utf.h" #include "re2/regexp.h" #include "re2/walker-inl.h" @@ -48,8 +48,8 @@ class ToStringWalker : public Regexp::Walker<int> { private: std::string* t_; // The string the walker appends to. - ToStringWalker(const ToStringWalker&) = delete; - ToStringWalker& operator=(const ToStringWalker&) = delete; + ToStringWalker(const ToStringWalker&) = delete; + ToStringWalker& operator=(const ToStringWalker&) = delete; }; std::string Regexp::ToString() { @@ -101,8 +101,8 @@ int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { case kRegexpCapture: t_->append("("); - if (re->cap() == 0) - LOG(DFATAL) << "kRegexpCapture cap() == 0"; + if (re->cap() == 0) + LOG(DFATAL) << "kRegexpCapture cap() == 0"; if (re->name()) { t_->append("?P<"); t_->append(*re->name()); @@ -129,12 +129,12 @@ int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { static void AppendLiteral(std::string *t, Rune r, bool foldcase) { if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { t->append(1, '\\'); - t->append(1, static_cast<char>(r)); + t->append(1, static_cast<char>(r)); } else if (foldcase && 'a' <= r && r <= 'z') { - r -= 'a' - 'A'; + r -= 'a' - 'A'; t->append(1, '['); - t->append(1, static_cast<char>(r)); - t->append(1, static_cast<char>(r) + 'a' - 'A'); + t->append(1, static_cast<char>(r)); + t->append(1, static_cast<char>(r) + 'a' - 'A'); t->append(1, ']'); } else { AppendCCRange(t, r, r); @@ -162,14 +162,14 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, break; case kRegexpLiteral: - AppendLiteral(t_, re->rune(), - (re->parse_flags() & Regexp::FoldCase) != 0); + AppendLiteral(t_, re->rune(), + (re->parse_flags() & Regexp::FoldCase) != 0); break; case kRegexpLiteralString: for (int i = 0; i < re->nrunes(); i++) - AppendLiteral(t_, re->runes()[i], - (re->parse_flags() & Regexp::FoldCase) != 0); + AppendLiteral(t_, re->runes()[i], + (re->parse_flags() & Regexp::FoldCase) != 0); if (prec < PrecConcat) t_->append(")"); break; @@ -307,7 +307,7 @@ static void AppendCCChar(std::string* t, Rune r) { if (0x20 <= r && r <= 0x7E) { if (strchr("[]^-\\", r)) t->append("\\"); - t->append(1, static_cast<char>(r)); + t->append(1, static_cast<char>(r)); return; } switch (r) { diff --git a/contrib/libs/re2/re2/unicode_casefold.cc b/contrib/libs/re2/re2/unicode_casefold.cc index d9de2821d5..72e0645652 100644 --- a/contrib/libs/re2/re2/unicode_casefold.cc +++ b/contrib/libs/re2/re2/unicode_casefold.cc @@ -1,191 +1,191 @@ - -// GENERATED BY make_unicode_casefold.py; DO NOT EDIT. -// make_unicode_casefold.py >unicode_casefold.cc - -#include "re2/unicode_casefold.h" - -namespace re2 { - - + +// GENERATED BY make_unicode_casefold.py; DO NOT EDIT. +// make_unicode_casefold.py >unicode_casefold.cc + +#include "re2/unicode_casefold.h" + +namespace re2 { + + // 1424 groups, 2878 pairs, 367 ranges -const CaseFold unicode_casefold[] = { - { 65, 90, 32 }, - { 97, 106, -32 }, - { 107, 107, 8383 }, - { 108, 114, -32 }, - { 115, 115, 268 }, - { 116, 122, -32 }, - { 181, 181, 743 }, - { 192, 214, 32 }, - { 216, 222, 32 }, - { 223, 223, 7615 }, - { 224, 228, -32 }, - { 229, 229, 8262 }, - { 230, 246, -32 }, - { 248, 254, -32 }, - { 255, 255, 121 }, - { 256, 303, EvenOdd }, - { 306, 311, EvenOdd }, - { 313, 328, OddEven }, - { 330, 375, EvenOdd }, - { 376, 376, -121 }, - { 377, 382, OddEven }, - { 383, 383, -300 }, - { 384, 384, 195 }, - { 385, 385, 210 }, - { 386, 389, EvenOdd }, - { 390, 390, 206 }, - { 391, 392, OddEven }, - { 393, 394, 205 }, - { 395, 396, OddEven }, - { 398, 398, 79 }, - { 399, 399, 202 }, - { 400, 400, 203 }, - { 401, 402, OddEven }, - { 403, 403, 205 }, - { 404, 404, 207 }, - { 405, 405, 97 }, - { 406, 406, 211 }, - { 407, 407, 209 }, - { 408, 409, EvenOdd }, - { 410, 410, 163 }, - { 412, 412, 211 }, - { 413, 413, 213 }, - { 414, 414, 130 }, - { 415, 415, 214 }, - { 416, 421, EvenOdd }, - { 422, 422, 218 }, - { 423, 424, OddEven }, - { 425, 425, 218 }, - { 428, 429, EvenOdd }, - { 430, 430, 218 }, - { 431, 432, OddEven }, - { 433, 434, 217 }, - { 435, 438, OddEven }, - { 439, 439, 219 }, - { 440, 441, EvenOdd }, - { 444, 445, EvenOdd }, - { 447, 447, 56 }, - { 452, 452, EvenOdd }, - { 453, 453, OddEven }, - { 454, 454, -2 }, - { 455, 455, OddEven }, - { 456, 456, EvenOdd }, - { 457, 457, -2 }, - { 458, 458, EvenOdd }, - { 459, 459, OddEven }, - { 460, 460, -2 }, - { 461, 476, OddEven }, - { 477, 477, -79 }, - { 478, 495, EvenOdd }, - { 497, 497, OddEven }, - { 498, 498, EvenOdd }, - { 499, 499, -2 }, - { 500, 501, EvenOdd }, - { 502, 502, -97 }, - { 503, 503, -56 }, - { 504, 543, EvenOdd }, - { 544, 544, -130 }, - { 546, 563, EvenOdd }, - { 570, 570, 10795 }, - { 571, 572, OddEven }, - { 573, 573, -163 }, - { 574, 574, 10792 }, - { 575, 576, 10815 }, - { 577, 578, OddEven }, - { 579, 579, -195 }, - { 580, 580, 69 }, - { 581, 581, 71 }, - { 582, 591, EvenOdd }, - { 592, 592, 10783 }, - { 593, 593, 10780 }, - { 594, 594, 10782 }, - { 595, 595, -210 }, - { 596, 596, -206 }, - { 598, 599, -205 }, - { 601, 601, -202 }, - { 603, 603, -203 }, - { 604, 604, 42319 }, - { 608, 608, -205 }, - { 609, 609, 42315 }, - { 611, 611, -207 }, - { 613, 613, 42280 }, - { 614, 614, 42308 }, - { 616, 616, -209 }, - { 617, 617, -211 }, +const CaseFold unicode_casefold[] = { + { 65, 90, 32 }, + { 97, 106, -32 }, + { 107, 107, 8383 }, + { 108, 114, -32 }, + { 115, 115, 268 }, + { 116, 122, -32 }, + { 181, 181, 743 }, + { 192, 214, 32 }, + { 216, 222, 32 }, + { 223, 223, 7615 }, + { 224, 228, -32 }, + { 229, 229, 8262 }, + { 230, 246, -32 }, + { 248, 254, -32 }, + { 255, 255, 121 }, + { 256, 303, EvenOdd }, + { 306, 311, EvenOdd }, + { 313, 328, OddEven }, + { 330, 375, EvenOdd }, + { 376, 376, -121 }, + { 377, 382, OddEven }, + { 383, 383, -300 }, + { 384, 384, 195 }, + { 385, 385, 210 }, + { 386, 389, EvenOdd }, + { 390, 390, 206 }, + { 391, 392, OddEven }, + { 393, 394, 205 }, + { 395, 396, OddEven }, + { 398, 398, 79 }, + { 399, 399, 202 }, + { 400, 400, 203 }, + { 401, 402, OddEven }, + { 403, 403, 205 }, + { 404, 404, 207 }, + { 405, 405, 97 }, + { 406, 406, 211 }, + { 407, 407, 209 }, + { 408, 409, EvenOdd }, + { 410, 410, 163 }, + { 412, 412, 211 }, + { 413, 413, 213 }, + { 414, 414, 130 }, + { 415, 415, 214 }, + { 416, 421, EvenOdd }, + { 422, 422, 218 }, + { 423, 424, OddEven }, + { 425, 425, 218 }, + { 428, 429, EvenOdd }, + { 430, 430, 218 }, + { 431, 432, OddEven }, + { 433, 434, 217 }, + { 435, 438, OddEven }, + { 439, 439, 219 }, + { 440, 441, EvenOdd }, + { 444, 445, EvenOdd }, + { 447, 447, 56 }, + { 452, 452, EvenOdd }, + { 453, 453, OddEven }, + { 454, 454, -2 }, + { 455, 455, OddEven }, + { 456, 456, EvenOdd }, + { 457, 457, -2 }, + { 458, 458, EvenOdd }, + { 459, 459, OddEven }, + { 460, 460, -2 }, + { 461, 476, OddEven }, + { 477, 477, -79 }, + { 478, 495, EvenOdd }, + { 497, 497, OddEven }, + { 498, 498, EvenOdd }, + { 499, 499, -2 }, + { 500, 501, EvenOdd }, + { 502, 502, -97 }, + { 503, 503, -56 }, + { 504, 543, EvenOdd }, + { 544, 544, -130 }, + { 546, 563, EvenOdd }, + { 570, 570, 10795 }, + { 571, 572, OddEven }, + { 573, 573, -163 }, + { 574, 574, 10792 }, + { 575, 576, 10815 }, + { 577, 578, OddEven }, + { 579, 579, -195 }, + { 580, 580, 69 }, + { 581, 581, 71 }, + { 582, 591, EvenOdd }, + { 592, 592, 10783 }, + { 593, 593, 10780 }, + { 594, 594, 10782 }, + { 595, 595, -210 }, + { 596, 596, -206 }, + { 598, 599, -205 }, + { 601, 601, -202 }, + { 603, 603, -203 }, + { 604, 604, 42319 }, + { 608, 608, -205 }, + { 609, 609, 42315 }, + { 611, 611, -207 }, + { 613, 613, 42280 }, + { 614, 614, 42308 }, + { 616, 616, -209 }, + { 617, 617, -211 }, { 618, 618, 42308 }, - { 619, 619, 10743 }, - { 620, 620, 42305 }, - { 623, 623, -211 }, - { 625, 625, 10749 }, - { 626, 626, -213 }, - { 629, 629, -214 }, - { 637, 637, 10727 }, - { 640, 640, -218 }, + { 619, 619, 10743 }, + { 620, 620, 42305 }, + { 623, 623, -211 }, + { 625, 625, 10749 }, + { 626, 626, -213 }, + { 629, 629, -214 }, + { 637, 637, 10727 }, + { 640, 640, -218 }, { 642, 642, 42307 }, - { 643, 643, -218 }, - { 647, 647, 42282 }, - { 648, 648, -218 }, - { 649, 649, -69 }, - { 650, 651, -217 }, - { 652, 652, -71 }, - { 658, 658, -219 }, - { 669, 669, 42261 }, - { 670, 670, 42258 }, - { 837, 837, 84 }, - { 880, 883, EvenOdd }, - { 886, 887, EvenOdd }, - { 891, 893, 130 }, - { 895, 895, 116 }, - { 902, 902, 38 }, - { 904, 906, 37 }, - { 908, 908, 64 }, - { 910, 911, 63 }, - { 913, 929, 32 }, - { 931, 931, 31 }, - { 932, 939, 32 }, - { 940, 940, -38 }, - { 941, 943, -37 }, - { 945, 945, -32 }, - { 946, 946, 30 }, - { 947, 948, -32 }, - { 949, 949, 64 }, - { 950, 951, -32 }, - { 952, 952, 25 }, - { 953, 953, 7173 }, - { 954, 954, 54 }, - { 955, 955, -32 }, - { 956, 956, -775 }, - { 957, 959, -32 }, - { 960, 960, 22 }, - { 961, 961, 48 }, - { 962, 962, EvenOdd }, - { 963, 965, -32 }, - { 966, 966, 15 }, - { 967, 968, -32 }, - { 969, 969, 7517 }, - { 970, 971, -32 }, - { 972, 972, -64 }, - { 973, 974, -63 }, - { 975, 975, 8 }, - { 976, 976, -62 }, - { 977, 977, 35 }, - { 981, 981, -47 }, - { 982, 982, -54 }, - { 983, 983, -8 }, - { 984, 1007, EvenOdd }, - { 1008, 1008, -86 }, - { 1009, 1009, -80 }, - { 1010, 1010, 7 }, - { 1011, 1011, -116 }, - { 1012, 1012, -92 }, - { 1013, 1013, -96 }, - { 1015, 1016, OddEven }, - { 1017, 1017, -7 }, - { 1018, 1019, EvenOdd }, - { 1021, 1023, -130 }, - { 1024, 1039, 80 }, - { 1040, 1071, 32 }, + { 643, 643, -218 }, + { 647, 647, 42282 }, + { 648, 648, -218 }, + { 649, 649, -69 }, + { 650, 651, -217 }, + { 652, 652, -71 }, + { 658, 658, -219 }, + { 669, 669, 42261 }, + { 670, 670, 42258 }, + { 837, 837, 84 }, + { 880, 883, EvenOdd }, + { 886, 887, EvenOdd }, + { 891, 893, 130 }, + { 895, 895, 116 }, + { 902, 902, 38 }, + { 904, 906, 37 }, + { 908, 908, 64 }, + { 910, 911, 63 }, + { 913, 929, 32 }, + { 931, 931, 31 }, + { 932, 939, 32 }, + { 940, 940, -38 }, + { 941, 943, -37 }, + { 945, 945, -32 }, + { 946, 946, 30 }, + { 947, 948, -32 }, + { 949, 949, 64 }, + { 950, 951, -32 }, + { 952, 952, 25 }, + { 953, 953, 7173 }, + { 954, 954, 54 }, + { 955, 955, -32 }, + { 956, 956, -775 }, + { 957, 959, -32 }, + { 960, 960, 22 }, + { 961, 961, 48 }, + { 962, 962, EvenOdd }, + { 963, 965, -32 }, + { 966, 966, 15 }, + { 967, 968, -32 }, + { 969, 969, 7517 }, + { 970, 971, -32 }, + { 972, 972, -64 }, + { 973, 974, -63 }, + { 975, 975, 8 }, + { 976, 976, -62 }, + { 977, 977, 35 }, + { 981, 981, -47 }, + { 982, 982, -54 }, + { 983, 983, -8 }, + { 984, 1007, EvenOdd }, + { 1008, 1008, -86 }, + { 1009, 1009, -80 }, + { 1010, 1010, 7 }, + { 1011, 1011, -116 }, + { 1012, 1012, -92 }, + { 1013, 1013, -96 }, + { 1015, 1016, OddEven }, + { 1017, 1017, -7 }, + { 1018, 1019, EvenOdd }, + { 1021, 1023, -130 }, + { 1024, 1039, 80 }, + { 1040, 1071, 32 }, { 1072, 1073, -32 }, { 1074, 1074, 6222 }, { 1075, 1075, -32 }, @@ -197,25 +197,25 @@ const CaseFold unicode_casefold[] = { { 1091, 1097, -32 }, { 1098, 1098, 6204 }, { 1099, 1103, -32 }, - { 1104, 1119, -80 }, + { 1104, 1119, -80 }, { 1120, 1122, EvenOdd }, { 1123, 1123, 6180 }, { 1124, 1153, EvenOdd }, - { 1162, 1215, EvenOdd }, - { 1216, 1216, 15 }, - { 1217, 1230, OddEven }, - { 1231, 1231, -15 }, - { 1232, 1327, EvenOdd }, - { 1329, 1366, 48 }, - { 1377, 1414, -48 }, - { 4256, 4293, 7264 }, - { 4295, 4295, 7264 }, - { 4301, 4301, 7264 }, + { 1162, 1215, EvenOdd }, + { 1216, 1216, 15 }, + { 1217, 1230, OddEven }, + { 1231, 1231, -15 }, + { 1232, 1327, EvenOdd }, + { 1329, 1366, 48 }, + { 1377, 1414, -48 }, + { 4256, 4293, 7264 }, + { 4295, 4295, 7264 }, + { 4301, 4301, 7264 }, { 4304, 4346, 3008 }, { 4349, 4351, 3008 }, - { 5024, 5103, 38864 }, - { 5104, 5109, 8 }, - { 5112, 5117, -8 }, + { 5024, 5103, 38864 }, + { 5104, 5109, 8 }, + { 5112, 5117, -8 }, { 7296, 7296, -6254 }, { 7297, 7297, -6253 }, { 7298, 7298, -6244 }, @@ -227,123 +227,123 @@ const CaseFold unicode_casefold[] = { { 7304, 7304, 35266 }, { 7312, 7354, -3008 }, { 7357, 7359, -3008 }, - { 7545, 7545, 35332 }, - { 7549, 7549, 3814 }, + { 7545, 7545, 35332 }, + { 7549, 7549, 3814 }, { 7566, 7566, 35384 }, - { 7680, 7776, EvenOdd }, - { 7777, 7777, 58 }, - { 7778, 7829, EvenOdd }, - { 7835, 7835, -59 }, - { 7838, 7838, -7615 }, - { 7840, 7935, EvenOdd }, - { 7936, 7943, 8 }, - { 7944, 7951, -8 }, - { 7952, 7957, 8 }, - { 7960, 7965, -8 }, - { 7968, 7975, 8 }, - { 7976, 7983, -8 }, - { 7984, 7991, 8 }, - { 7992, 7999, -8 }, - { 8000, 8005, 8 }, - { 8008, 8013, -8 }, - { 8017, 8017, 8 }, - { 8019, 8019, 8 }, - { 8021, 8021, 8 }, - { 8023, 8023, 8 }, - { 8025, 8025, -8 }, - { 8027, 8027, -8 }, - { 8029, 8029, -8 }, - { 8031, 8031, -8 }, - { 8032, 8039, 8 }, - { 8040, 8047, -8 }, - { 8048, 8049, 74 }, - { 8050, 8053, 86 }, - { 8054, 8055, 100 }, - { 8056, 8057, 128 }, - { 8058, 8059, 112 }, - { 8060, 8061, 126 }, - { 8064, 8071, 8 }, - { 8072, 8079, -8 }, - { 8080, 8087, 8 }, - { 8088, 8095, -8 }, - { 8096, 8103, 8 }, - { 8104, 8111, -8 }, - { 8112, 8113, 8 }, - { 8115, 8115, 9 }, - { 8120, 8121, -8 }, - { 8122, 8123, -74 }, - { 8124, 8124, -9 }, - { 8126, 8126, -7289 }, - { 8131, 8131, 9 }, - { 8136, 8139, -86 }, - { 8140, 8140, -9 }, - { 8144, 8145, 8 }, - { 8152, 8153, -8 }, - { 8154, 8155, -100 }, - { 8160, 8161, 8 }, - { 8165, 8165, 7 }, - { 8168, 8169, -8 }, - { 8170, 8171, -112 }, - { 8172, 8172, -7 }, - { 8179, 8179, 9 }, - { 8184, 8185, -128 }, - { 8186, 8187, -126 }, - { 8188, 8188, -9 }, - { 8486, 8486, -7549 }, - { 8490, 8490, -8415 }, - { 8491, 8491, -8294 }, - { 8498, 8498, 28 }, - { 8526, 8526, -28 }, - { 8544, 8559, 16 }, - { 8560, 8575, -16 }, - { 8579, 8580, OddEven }, - { 9398, 9423, 26 }, - { 9424, 9449, -26 }, + { 7680, 7776, EvenOdd }, + { 7777, 7777, 58 }, + { 7778, 7829, EvenOdd }, + { 7835, 7835, -59 }, + { 7838, 7838, -7615 }, + { 7840, 7935, EvenOdd }, + { 7936, 7943, 8 }, + { 7944, 7951, -8 }, + { 7952, 7957, 8 }, + { 7960, 7965, -8 }, + { 7968, 7975, 8 }, + { 7976, 7983, -8 }, + { 7984, 7991, 8 }, + { 7992, 7999, -8 }, + { 8000, 8005, 8 }, + { 8008, 8013, -8 }, + { 8017, 8017, 8 }, + { 8019, 8019, 8 }, + { 8021, 8021, 8 }, + { 8023, 8023, 8 }, + { 8025, 8025, -8 }, + { 8027, 8027, -8 }, + { 8029, 8029, -8 }, + { 8031, 8031, -8 }, + { 8032, 8039, 8 }, + { 8040, 8047, -8 }, + { 8048, 8049, 74 }, + { 8050, 8053, 86 }, + { 8054, 8055, 100 }, + { 8056, 8057, 128 }, + { 8058, 8059, 112 }, + { 8060, 8061, 126 }, + { 8064, 8071, 8 }, + { 8072, 8079, -8 }, + { 8080, 8087, 8 }, + { 8088, 8095, -8 }, + { 8096, 8103, 8 }, + { 8104, 8111, -8 }, + { 8112, 8113, 8 }, + { 8115, 8115, 9 }, + { 8120, 8121, -8 }, + { 8122, 8123, -74 }, + { 8124, 8124, -9 }, + { 8126, 8126, -7289 }, + { 8131, 8131, 9 }, + { 8136, 8139, -86 }, + { 8140, 8140, -9 }, + { 8144, 8145, 8 }, + { 8152, 8153, -8 }, + { 8154, 8155, -100 }, + { 8160, 8161, 8 }, + { 8165, 8165, 7 }, + { 8168, 8169, -8 }, + { 8170, 8171, -112 }, + { 8172, 8172, -7 }, + { 8179, 8179, 9 }, + { 8184, 8185, -128 }, + { 8186, 8187, -126 }, + { 8188, 8188, -9 }, + { 8486, 8486, -7549 }, + { 8490, 8490, -8415 }, + { 8491, 8491, -8294 }, + { 8498, 8498, 28 }, + { 8526, 8526, -28 }, + { 8544, 8559, 16 }, + { 8560, 8575, -16 }, + { 8579, 8580, OddEven }, + { 9398, 9423, 26 }, + { 9424, 9449, -26 }, { 11264, 11311, 48 }, { 11312, 11359, -48 }, - { 11360, 11361, EvenOdd }, - { 11362, 11362, -10743 }, - { 11363, 11363, -3814 }, - { 11364, 11364, -10727 }, - { 11365, 11365, -10795 }, - { 11366, 11366, -10792 }, - { 11367, 11372, OddEven }, - { 11373, 11373, -10780 }, - { 11374, 11374, -10749 }, - { 11375, 11375, -10783 }, - { 11376, 11376, -10782 }, - { 11378, 11379, EvenOdd }, - { 11381, 11382, OddEven }, - { 11390, 11391, -10815 }, - { 11392, 11491, EvenOdd }, - { 11499, 11502, OddEven }, - { 11506, 11507, EvenOdd }, - { 11520, 11557, -7264 }, - { 11559, 11559, -7264 }, - { 11565, 11565, -7264 }, + { 11360, 11361, EvenOdd }, + { 11362, 11362, -10743 }, + { 11363, 11363, -3814 }, + { 11364, 11364, -10727 }, + { 11365, 11365, -10795 }, + { 11366, 11366, -10792 }, + { 11367, 11372, OddEven }, + { 11373, 11373, -10780 }, + { 11374, 11374, -10749 }, + { 11375, 11375, -10783 }, + { 11376, 11376, -10782 }, + { 11378, 11379, EvenOdd }, + { 11381, 11382, OddEven }, + { 11390, 11391, -10815 }, + { 11392, 11491, EvenOdd }, + { 11499, 11502, OddEven }, + { 11506, 11507, EvenOdd }, + { 11520, 11557, -7264 }, + { 11559, 11559, -7264 }, + { 11565, 11565, -7264 }, { 42560, 42570, EvenOdd }, { 42571, 42571, -35267 }, { 42572, 42605, EvenOdd }, - { 42624, 42651, EvenOdd }, - { 42786, 42799, EvenOdd }, - { 42802, 42863, EvenOdd }, - { 42873, 42876, OddEven }, - { 42877, 42877, -35332 }, - { 42878, 42887, EvenOdd }, - { 42891, 42892, OddEven }, - { 42893, 42893, -42280 }, - { 42896, 42899, EvenOdd }, + { 42624, 42651, EvenOdd }, + { 42786, 42799, EvenOdd }, + { 42802, 42863, EvenOdd }, + { 42873, 42876, OddEven }, + { 42877, 42877, -35332 }, + { 42878, 42887, EvenOdd }, + { 42891, 42892, OddEven }, + { 42893, 42893, -42280 }, + { 42896, 42899, EvenOdd }, { 42900, 42900, 48 }, - { 42902, 42921, EvenOdd }, - { 42922, 42922, -42308 }, - { 42923, 42923, -42319 }, - { 42924, 42924, -42315 }, - { 42925, 42925, -42305 }, + { 42902, 42921, EvenOdd }, + { 42922, 42922, -42308 }, + { 42923, 42923, -42319 }, + { 42924, 42924, -42315 }, + { 42925, 42925, -42305 }, { 42926, 42926, -42308 }, - { 42928, 42928, -42258 }, - { 42929, 42929, -42282 }, - { 42930, 42930, -42261 }, - { 42931, 42931, 928 }, + { 42928, 42928, -42258 }, + { 42929, 42929, -42282 }, + { 42930, 42930, -42261 }, + { 42931, 42931, 928 }, { 42932, 42947, EvenOdd }, { 42948, 42948, -48 }, { 42949, 42949, -42307 }, @@ -352,12 +352,12 @@ const CaseFold unicode_casefold[] = { { 42960, 42961, EvenOdd }, { 42966, 42969, EvenOdd }, { 42997, 42998, OddEven }, - { 43859, 43859, -928 }, - { 43888, 43967, -38864 }, - { 65313, 65338, 32 }, - { 65345, 65370, -32 }, - { 66560, 66599, 40 }, - { 66600, 66639, -40 }, + { 43859, 43859, -928 }, + { 43888, 43967, -38864 }, + { 65313, 65338, 32 }, + { 65345, 65370, -32 }, + { 66560, 66599, 40 }, + { 66600, 66639, -40 }, { 66736, 66771, 40 }, { 66776, 66811, -40 }, { 66928, 66938, 39 }, @@ -368,120 +368,120 @@ const CaseFold unicode_casefold[] = { { 66979, 66993, -39 }, { 66995, 67001, -39 }, { 67003, 67004, -39 }, - { 68736, 68786, 64 }, - { 68800, 68850, -64 }, - { 71840, 71871, 32 }, - { 71872, 71903, -32 }, + { 68736, 68786, 64 }, + { 68800, 68850, -64 }, + { 71840, 71871, 32 }, + { 71872, 71903, -32 }, { 93760, 93791, 32 }, { 93792, 93823, -32 }, { 125184, 125217, 34 }, { 125218, 125251, -34 }, -}; +}; const int num_unicode_casefold = 367; - + // 1424 groups, 1454 pairs, 205 ranges -const CaseFold unicode_tolower[] = { - { 65, 90, 32 }, - { 181, 181, 775 }, - { 192, 214, 32 }, - { 216, 222, 32 }, - { 256, 302, EvenOddSkip }, - { 306, 310, EvenOddSkip }, - { 313, 327, OddEvenSkip }, - { 330, 374, EvenOddSkip }, - { 376, 376, -121 }, - { 377, 381, OddEvenSkip }, - { 383, 383, -268 }, - { 385, 385, 210 }, - { 386, 388, EvenOddSkip }, - { 390, 390, 206 }, - { 391, 391, OddEven }, - { 393, 394, 205 }, - { 395, 395, OddEven }, - { 398, 398, 79 }, - { 399, 399, 202 }, - { 400, 400, 203 }, - { 401, 401, OddEven }, - { 403, 403, 205 }, - { 404, 404, 207 }, - { 406, 406, 211 }, - { 407, 407, 209 }, - { 408, 408, EvenOdd }, - { 412, 412, 211 }, - { 413, 413, 213 }, - { 415, 415, 214 }, - { 416, 420, EvenOddSkip }, - { 422, 422, 218 }, - { 423, 423, OddEven }, - { 425, 425, 218 }, - { 428, 428, EvenOdd }, - { 430, 430, 218 }, - { 431, 431, OddEven }, - { 433, 434, 217 }, - { 435, 437, OddEvenSkip }, - { 439, 439, 219 }, - { 440, 440, EvenOdd }, - { 444, 444, EvenOdd }, - { 452, 452, 2 }, - { 453, 453, OddEven }, - { 455, 455, 2 }, - { 456, 456, EvenOdd }, - { 458, 458, 2 }, - { 459, 475, OddEvenSkip }, - { 478, 494, EvenOddSkip }, - { 497, 497, 2 }, - { 498, 500, EvenOddSkip }, - { 502, 502, -97 }, - { 503, 503, -56 }, - { 504, 542, EvenOddSkip }, - { 544, 544, -130 }, - { 546, 562, EvenOddSkip }, - { 570, 570, 10795 }, - { 571, 571, OddEven }, - { 573, 573, -163 }, - { 574, 574, 10792 }, - { 577, 577, OddEven }, - { 579, 579, -195 }, - { 580, 580, 69 }, - { 581, 581, 71 }, - { 582, 590, EvenOddSkip }, - { 837, 837, 116 }, - { 880, 882, EvenOddSkip }, - { 886, 886, EvenOdd }, - { 895, 895, 116 }, - { 902, 902, 38 }, - { 904, 906, 37 }, - { 908, 908, 64 }, - { 910, 911, 63 }, - { 913, 929, 32 }, - { 931, 939, 32 }, - { 962, 962, EvenOdd }, - { 975, 975, 8 }, - { 976, 976, -30 }, - { 977, 977, -25 }, - { 981, 981, -15 }, - { 982, 982, -22 }, - { 984, 1006, EvenOddSkip }, - { 1008, 1008, -54 }, - { 1009, 1009, -48 }, - { 1012, 1012, -60 }, - { 1013, 1013, -64 }, - { 1015, 1015, OddEven }, - { 1017, 1017, -7 }, - { 1018, 1018, EvenOdd }, - { 1021, 1023, -130 }, - { 1024, 1039, 80 }, - { 1040, 1071, 32 }, - { 1120, 1152, EvenOddSkip }, - { 1162, 1214, EvenOddSkip }, - { 1216, 1216, 15 }, - { 1217, 1229, OddEvenSkip }, - { 1232, 1326, EvenOddSkip }, - { 1329, 1366, 48 }, - { 4256, 4293, 7264 }, - { 4295, 4295, 7264 }, - { 4301, 4301, 7264 }, - { 5112, 5117, -8 }, +const CaseFold unicode_tolower[] = { + { 65, 90, 32 }, + { 181, 181, 775 }, + { 192, 214, 32 }, + { 216, 222, 32 }, + { 256, 302, EvenOddSkip }, + { 306, 310, EvenOddSkip }, + { 313, 327, OddEvenSkip }, + { 330, 374, EvenOddSkip }, + { 376, 376, -121 }, + { 377, 381, OddEvenSkip }, + { 383, 383, -268 }, + { 385, 385, 210 }, + { 386, 388, EvenOddSkip }, + { 390, 390, 206 }, + { 391, 391, OddEven }, + { 393, 394, 205 }, + { 395, 395, OddEven }, + { 398, 398, 79 }, + { 399, 399, 202 }, + { 400, 400, 203 }, + { 401, 401, OddEven }, + { 403, 403, 205 }, + { 404, 404, 207 }, + { 406, 406, 211 }, + { 407, 407, 209 }, + { 408, 408, EvenOdd }, + { 412, 412, 211 }, + { 413, 413, 213 }, + { 415, 415, 214 }, + { 416, 420, EvenOddSkip }, + { 422, 422, 218 }, + { 423, 423, OddEven }, + { 425, 425, 218 }, + { 428, 428, EvenOdd }, + { 430, 430, 218 }, + { 431, 431, OddEven }, + { 433, 434, 217 }, + { 435, 437, OddEvenSkip }, + { 439, 439, 219 }, + { 440, 440, EvenOdd }, + { 444, 444, EvenOdd }, + { 452, 452, 2 }, + { 453, 453, OddEven }, + { 455, 455, 2 }, + { 456, 456, EvenOdd }, + { 458, 458, 2 }, + { 459, 475, OddEvenSkip }, + { 478, 494, EvenOddSkip }, + { 497, 497, 2 }, + { 498, 500, EvenOddSkip }, + { 502, 502, -97 }, + { 503, 503, -56 }, + { 504, 542, EvenOddSkip }, + { 544, 544, -130 }, + { 546, 562, EvenOddSkip }, + { 570, 570, 10795 }, + { 571, 571, OddEven }, + { 573, 573, -163 }, + { 574, 574, 10792 }, + { 577, 577, OddEven }, + { 579, 579, -195 }, + { 580, 580, 69 }, + { 581, 581, 71 }, + { 582, 590, EvenOddSkip }, + { 837, 837, 116 }, + { 880, 882, EvenOddSkip }, + { 886, 886, EvenOdd }, + { 895, 895, 116 }, + { 902, 902, 38 }, + { 904, 906, 37 }, + { 908, 908, 64 }, + { 910, 911, 63 }, + { 913, 929, 32 }, + { 931, 939, 32 }, + { 962, 962, EvenOdd }, + { 975, 975, 8 }, + { 976, 976, -30 }, + { 977, 977, -25 }, + { 981, 981, -15 }, + { 982, 982, -22 }, + { 984, 1006, EvenOddSkip }, + { 1008, 1008, -54 }, + { 1009, 1009, -48 }, + { 1012, 1012, -60 }, + { 1013, 1013, -64 }, + { 1015, 1015, OddEven }, + { 1017, 1017, -7 }, + { 1018, 1018, EvenOdd }, + { 1021, 1023, -130 }, + { 1024, 1039, 80 }, + { 1040, 1071, 32 }, + { 1120, 1152, EvenOddSkip }, + { 1162, 1214, EvenOddSkip }, + { 1216, 1216, 15 }, + { 1217, 1229, OddEvenSkip }, + { 1232, 1326, EvenOddSkip }, + { 1329, 1366, 48 }, + { 4256, 4293, 7264 }, + { 4295, 4295, 7264 }, + { 4301, 4301, 7264 }, + { 5112, 5117, -8 }, { 7296, 7296, -6222 }, { 7297, 7297, -6221 }, { 7298, 7298, -6212 }, @@ -492,80 +492,80 @@ const CaseFold unicode_tolower[] = { { 7304, 7304, 35267 }, { 7312, 7354, -3008 }, { 7357, 7359, -3008 }, - { 7680, 7828, EvenOddSkip }, - { 7835, 7835, -58 }, - { 7838, 7838, -7615 }, - { 7840, 7934, EvenOddSkip }, - { 7944, 7951, -8 }, - { 7960, 7965, -8 }, - { 7976, 7983, -8 }, - { 7992, 7999, -8 }, - { 8008, 8013, -8 }, - { 8025, 8025, -8 }, - { 8027, 8027, -8 }, - { 8029, 8029, -8 }, - { 8031, 8031, -8 }, - { 8040, 8047, -8 }, - { 8072, 8079, -8 }, - { 8088, 8095, -8 }, - { 8104, 8111, -8 }, - { 8120, 8121, -8 }, - { 8122, 8123, -74 }, - { 8124, 8124, -9 }, - { 8126, 8126, -7173 }, - { 8136, 8139, -86 }, - { 8140, 8140, -9 }, - { 8152, 8153, -8 }, - { 8154, 8155, -100 }, - { 8168, 8169, -8 }, - { 8170, 8171, -112 }, - { 8172, 8172, -7 }, - { 8184, 8185, -128 }, - { 8186, 8187, -126 }, - { 8188, 8188, -9 }, - { 8486, 8486, -7517 }, - { 8490, 8490, -8383 }, - { 8491, 8491, -8262 }, - { 8498, 8498, 28 }, - { 8544, 8559, 16 }, - { 8579, 8579, OddEven }, - { 9398, 9423, 26 }, + { 7680, 7828, EvenOddSkip }, + { 7835, 7835, -58 }, + { 7838, 7838, -7615 }, + { 7840, 7934, EvenOddSkip }, + { 7944, 7951, -8 }, + { 7960, 7965, -8 }, + { 7976, 7983, -8 }, + { 7992, 7999, -8 }, + { 8008, 8013, -8 }, + { 8025, 8025, -8 }, + { 8027, 8027, -8 }, + { 8029, 8029, -8 }, + { 8031, 8031, -8 }, + { 8040, 8047, -8 }, + { 8072, 8079, -8 }, + { 8088, 8095, -8 }, + { 8104, 8111, -8 }, + { 8120, 8121, -8 }, + { 8122, 8123, -74 }, + { 8124, 8124, -9 }, + { 8126, 8126, -7173 }, + { 8136, 8139, -86 }, + { 8140, 8140, -9 }, + { 8152, 8153, -8 }, + { 8154, 8155, -100 }, + { 8168, 8169, -8 }, + { 8170, 8171, -112 }, + { 8172, 8172, -7 }, + { 8184, 8185, -128 }, + { 8186, 8187, -126 }, + { 8188, 8188, -9 }, + { 8486, 8486, -7517 }, + { 8490, 8490, -8383 }, + { 8491, 8491, -8262 }, + { 8498, 8498, 28 }, + { 8544, 8559, 16 }, + { 8579, 8579, OddEven }, + { 9398, 9423, 26 }, { 11264, 11311, 48 }, - { 11360, 11360, EvenOdd }, - { 11362, 11362, -10743 }, - { 11363, 11363, -3814 }, - { 11364, 11364, -10727 }, - { 11367, 11371, OddEvenSkip }, - { 11373, 11373, -10780 }, - { 11374, 11374, -10749 }, - { 11375, 11375, -10783 }, - { 11376, 11376, -10782 }, - { 11378, 11378, EvenOdd }, - { 11381, 11381, OddEven }, - { 11390, 11391, -10815 }, - { 11392, 11490, EvenOddSkip }, - { 11499, 11501, OddEvenSkip }, - { 11506, 11506, EvenOdd }, - { 42560, 42604, EvenOddSkip }, - { 42624, 42650, EvenOddSkip }, - { 42786, 42798, EvenOddSkip }, - { 42802, 42862, EvenOddSkip }, - { 42873, 42875, OddEvenSkip }, - { 42877, 42877, -35332 }, - { 42878, 42886, EvenOddSkip }, - { 42891, 42891, OddEven }, - { 42893, 42893, -42280 }, - { 42896, 42898, EvenOddSkip }, - { 42902, 42920, EvenOddSkip }, - { 42922, 42922, -42308 }, - { 42923, 42923, -42319 }, - { 42924, 42924, -42315 }, - { 42925, 42925, -42305 }, + { 11360, 11360, EvenOdd }, + { 11362, 11362, -10743 }, + { 11363, 11363, -3814 }, + { 11364, 11364, -10727 }, + { 11367, 11371, OddEvenSkip }, + { 11373, 11373, -10780 }, + { 11374, 11374, -10749 }, + { 11375, 11375, -10783 }, + { 11376, 11376, -10782 }, + { 11378, 11378, EvenOdd }, + { 11381, 11381, OddEven }, + { 11390, 11391, -10815 }, + { 11392, 11490, EvenOddSkip }, + { 11499, 11501, OddEvenSkip }, + { 11506, 11506, EvenOdd }, + { 42560, 42604, EvenOddSkip }, + { 42624, 42650, EvenOddSkip }, + { 42786, 42798, EvenOddSkip }, + { 42802, 42862, EvenOddSkip }, + { 42873, 42875, OddEvenSkip }, + { 42877, 42877, -35332 }, + { 42878, 42886, EvenOddSkip }, + { 42891, 42891, OddEven }, + { 42893, 42893, -42280 }, + { 42896, 42898, EvenOddSkip }, + { 42902, 42920, EvenOddSkip }, + { 42922, 42922, -42308 }, + { 42923, 42923, -42319 }, + { 42924, 42924, -42315 }, + { 42925, 42925, -42305 }, { 42926, 42926, -42308 }, - { 42928, 42928, -42258 }, - { 42929, 42929, -42282 }, - { 42930, 42930, -42261 }, - { 42931, 42931, 928 }, + { 42928, 42928, -42258 }, + { 42929, 42929, -42282 }, + { 42930, 42930, -42261 }, + { 42931, 42931, 928 }, { 42932, 42946, EvenOddSkip }, { 42948, 42948, -48 }, { 42949, 42949, -42307 }, @@ -574,23 +574,23 @@ const CaseFold unicode_tolower[] = { { 42960, 42960, EvenOdd }, { 42966, 42968, EvenOddSkip }, { 42997, 42997, OddEven }, - { 43888, 43967, -38864 }, - { 65313, 65338, 32 }, - { 66560, 66599, 40 }, + { 43888, 43967, -38864 }, + { 65313, 65338, 32 }, + { 66560, 66599, 40 }, { 66736, 66771, 40 }, { 66928, 66938, 39 }, { 66940, 66954, 39 }, { 66956, 66962, 39 }, { 66964, 66965, 39 }, - { 68736, 68786, 64 }, - { 71840, 71871, 32 }, + { 68736, 68786, 64 }, + { 71840, 71871, 32 }, { 93760, 93791, 32 }, { 125184, 125217, 34 }, -}; +}; const int num_unicode_tolower = 205; - - - -} // namespace re2 - - + + + +} // namespace re2 + + diff --git a/contrib/libs/re2/re2/unicode_casefold.h b/contrib/libs/re2/re2/unicode_casefold.h index 8bdbb42fbc..e8158740a8 100644 --- a/contrib/libs/re2/re2/unicode_casefold.h +++ b/contrib/libs/re2/re2/unicode_casefold.h @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#ifndef RE2_UNICODE_CASEFOLD_H_ -#define RE2_UNICODE_CASEFOLD_H_ - +#ifndef RE2_UNICODE_CASEFOLD_H_ +#define RE2_UNICODE_CASEFOLD_H_ + // Unicode case folding tables. // The Unicode case folding tables encode the mapping from one Unicode point @@ -19,7 +19,7 @@ // 'K' -> 'K' // // Like everything Unicode, these tables are big. If we represent the table -// as a sorted list of uint32_t pairs, it has 2049 entries and is 16 kB. +// as a sorted list of uint32_t pairs, it has 2049 entries and is 16 kB. // Most table entries look like the ones around them: // 'A' maps to 'A'+32, 'B' maps to 'B'+32, etc. // Instead of listing all the pairs explicitly, we make a list of ranges @@ -39,40 +39,40 @@ // The grouped form also allows for efficient fold range calculations // rather than looping one character at a time. -#include <stdint.h> +#include <stdint.h> -#include "util/util.h" -#include "util/utf.h" +#include "util/util.h" +#include "util/utf.h" namespace re2 { enum { EvenOdd = 1, - OddEven = -1, - EvenOddSkip = 1<<30, - OddEvenSkip, + OddEven = -1, + EvenOddSkip = 1<<30, + OddEvenSkip, }; struct CaseFold { - Rune lo; - Rune hi; - int32_t delta; + Rune lo; + Rune hi; + int32_t delta; }; extern const CaseFold unicode_casefold[]; extern const int num_unicode_casefold; -extern const CaseFold unicode_tolower[]; -extern const int num_unicode_tolower; - +extern const CaseFold unicode_tolower[]; +extern const int num_unicode_tolower; + // Returns the CaseFold* in the tables that contains rune. // If rune is not in the tables, returns the first CaseFold* after rune. // If rune is larger than any value in the tables, returns NULL. -extern const CaseFold* LookupCaseFold(const CaseFold*, int, Rune rune); - -// Returns the result of applying the fold f to the rune r. -extern Rune ApplyFold(const CaseFold *f, Rune r); +extern const CaseFold* LookupCaseFold(const CaseFold*, int, Rune rune); +// Returns the result of applying the fold f to the rune r. +extern Rune ApplyFold(const CaseFold *f, Rune r); + } // namespace re2 -#endif // RE2_UNICODE_CASEFOLD_H_ +#endif // RE2_UNICODE_CASEFOLD_H_ diff --git a/contrib/libs/re2/re2/unicode_groups.cc b/contrib/libs/re2/re2/unicode_groups.cc index 2a8d7dae1f..d564572650 100644 --- a/contrib/libs/re2/re2/unicode_groups.cc +++ b/contrib/libs/re2/re2/unicode_groups.cc @@ -1,12 +1,12 @@ - -// GENERATED BY make_unicode_groups.py; DO NOT EDIT. -// make_unicode_groups.py >unicode_groups.cc - -#include "re2/unicode_groups.h" - -namespace re2 { - - + +// GENERATED BY make_unicode_groups.py; DO NOT EDIT. +// make_unicode_groups.py >unicode_groups.cc + +#include "re2/unicode_groups.h" + +namespace re2 { + + static const URange16 C_range16[] = { { 0, 31 }, { 127, 159 }, @@ -25,7 +25,7 @@ static const URange16 C_range16[] = { { 55296, 63743 }, { 65279, 65279 }, { 65529, 65531 }, -}; +}; static const URange32 C_range32[] = { { 69821, 69821 }, { 69837, 69837 }, @@ -36,11 +36,11 @@ static const URange32 C_range32[] = { { 917536, 917631 }, { 983040, 1048573 }, { 1048576, 1114109 }, -}; +}; static const URange16 Cc_range16[] = { { 0, 31 }, { 127, 159 }, -}; +}; static const URange16 Cf_range16[] = { { 173, 173 }, { 1536, 1541 }, @@ -56,7 +56,7 @@ static const URange16 Cf_range16[] = { { 8294, 8303 }, { 65279, 65279 }, { 65529, 65531 }, -}; +}; static const URange32 Cf_range32[] = { { 69821, 69821 }, { 69837, 69837 }, @@ -65,7 +65,7 @@ static const URange32 Cf_range32[] = { { 119155, 119162 }, { 917505, 917505 }, { 917536, 917631 }, -}; +}; static const URange16 Co_range16[] = { { 57344, 63743 }, }; @@ -79,9 +79,9 @@ static const URange16 Cs_range16[] = { static const URange16 L_range16[] = { { 65, 90 }, { 97, 122 }, - { 170, 170 }, + { 170, 170 }, { 181, 181 }, - { 186, 186 }, + { 186, 186 }, { 192, 214 }, { 216, 246 }, { 248, 705 }, @@ -103,211 +103,211 @@ static const URange16 L_range16[] = { { 1329, 1366 }, { 1369, 1369 }, { 1376, 1416 }, - { 1488, 1514 }, + { 1488, 1514 }, { 1519, 1522 }, { 1568, 1610 }, - { 1646, 1647 }, - { 1649, 1747 }, - { 1749, 1749 }, + { 1646, 1647 }, + { 1649, 1747 }, + { 1749, 1749 }, { 1765, 1766 }, - { 1774, 1775 }, - { 1786, 1788 }, - { 1791, 1791 }, - { 1808, 1808 }, - { 1810, 1839 }, - { 1869, 1957 }, - { 1969, 1969 }, - { 1994, 2026 }, + { 1774, 1775 }, + { 1786, 1788 }, + { 1791, 1791 }, + { 1808, 1808 }, + { 1810, 1839 }, + { 1869, 1957 }, + { 1969, 1969 }, + { 1994, 2026 }, { 2036, 2037 }, { 2042, 2042 }, - { 2048, 2069 }, + { 2048, 2069 }, { 2074, 2074 }, { 2084, 2084 }, { 2088, 2088 }, - { 2112, 2136 }, + { 2112, 2136 }, { 2144, 2154 }, { 2160, 2183 }, { 2185, 2190 }, { 2208, 2249 }, - { 2308, 2361 }, - { 2365, 2365 }, - { 2384, 2384 }, - { 2392, 2401 }, + { 2308, 2361 }, + { 2365, 2365 }, + { 2384, 2384 }, + { 2392, 2401 }, { 2417, 2432 }, - { 2437, 2444 }, - { 2447, 2448 }, - { 2451, 2472 }, - { 2474, 2480 }, - { 2482, 2482 }, - { 2486, 2489 }, - { 2493, 2493 }, - { 2510, 2510 }, - { 2524, 2525 }, - { 2527, 2529 }, - { 2544, 2545 }, + { 2437, 2444 }, + { 2447, 2448 }, + { 2451, 2472 }, + { 2474, 2480 }, + { 2482, 2482 }, + { 2486, 2489 }, + { 2493, 2493 }, + { 2510, 2510 }, + { 2524, 2525 }, + { 2527, 2529 }, + { 2544, 2545 }, { 2556, 2556 }, - { 2565, 2570 }, - { 2575, 2576 }, - { 2579, 2600 }, - { 2602, 2608 }, - { 2610, 2611 }, - { 2613, 2614 }, - { 2616, 2617 }, - { 2649, 2652 }, - { 2654, 2654 }, - { 2674, 2676 }, - { 2693, 2701 }, - { 2703, 2705 }, - { 2707, 2728 }, - { 2730, 2736 }, - { 2738, 2739 }, - { 2741, 2745 }, - { 2749, 2749 }, - { 2768, 2768 }, - { 2784, 2785 }, - { 2809, 2809 }, - { 2821, 2828 }, - { 2831, 2832 }, - { 2835, 2856 }, - { 2858, 2864 }, - { 2866, 2867 }, - { 2869, 2873 }, - { 2877, 2877 }, - { 2908, 2909 }, - { 2911, 2913 }, - { 2929, 2929 }, - { 2947, 2947 }, - { 2949, 2954 }, - { 2958, 2960 }, - { 2962, 2965 }, - { 2969, 2970 }, - { 2972, 2972 }, - { 2974, 2975 }, - { 2979, 2980 }, - { 2984, 2986 }, - { 2990, 3001 }, - { 3024, 3024 }, - { 3077, 3084 }, - { 3086, 3088 }, - { 3090, 3112 }, - { 3114, 3129 }, - { 3133, 3133 }, - { 3160, 3162 }, + { 2565, 2570 }, + { 2575, 2576 }, + { 2579, 2600 }, + { 2602, 2608 }, + { 2610, 2611 }, + { 2613, 2614 }, + { 2616, 2617 }, + { 2649, 2652 }, + { 2654, 2654 }, + { 2674, 2676 }, + { 2693, 2701 }, + { 2703, 2705 }, + { 2707, 2728 }, + { 2730, 2736 }, + { 2738, 2739 }, + { 2741, 2745 }, + { 2749, 2749 }, + { 2768, 2768 }, + { 2784, 2785 }, + { 2809, 2809 }, + { 2821, 2828 }, + { 2831, 2832 }, + { 2835, 2856 }, + { 2858, 2864 }, + { 2866, 2867 }, + { 2869, 2873 }, + { 2877, 2877 }, + { 2908, 2909 }, + { 2911, 2913 }, + { 2929, 2929 }, + { 2947, 2947 }, + { 2949, 2954 }, + { 2958, 2960 }, + { 2962, 2965 }, + { 2969, 2970 }, + { 2972, 2972 }, + { 2974, 2975 }, + { 2979, 2980 }, + { 2984, 2986 }, + { 2990, 3001 }, + { 3024, 3024 }, + { 3077, 3084 }, + { 3086, 3088 }, + { 3090, 3112 }, + { 3114, 3129 }, + { 3133, 3133 }, + { 3160, 3162 }, { 3165, 3165 }, - { 3168, 3169 }, + { 3168, 3169 }, { 3200, 3200 }, - { 3205, 3212 }, - { 3214, 3216 }, - { 3218, 3240 }, - { 3242, 3251 }, - { 3253, 3257 }, - { 3261, 3261 }, + { 3205, 3212 }, + { 3214, 3216 }, + { 3218, 3240 }, + { 3242, 3251 }, + { 3253, 3257 }, + { 3261, 3261 }, { 3293, 3294 }, - { 3296, 3297 }, - { 3313, 3314 }, + { 3296, 3297 }, + { 3313, 3314 }, { 3332, 3340 }, - { 3342, 3344 }, - { 3346, 3386 }, - { 3389, 3389 }, - { 3406, 3406 }, + { 3342, 3344 }, + { 3346, 3386 }, + { 3389, 3389 }, + { 3406, 3406 }, { 3412, 3414 }, - { 3423, 3425 }, - { 3450, 3455 }, - { 3461, 3478 }, - { 3482, 3505 }, - { 3507, 3515 }, - { 3517, 3517 }, - { 3520, 3526 }, - { 3585, 3632 }, - { 3634, 3635 }, + { 3423, 3425 }, + { 3450, 3455 }, + { 3461, 3478 }, + { 3482, 3505 }, + { 3507, 3515 }, + { 3517, 3517 }, + { 3520, 3526 }, + { 3585, 3632 }, + { 3634, 3635 }, { 3648, 3654 }, - { 3713, 3714 }, - { 3716, 3716 }, + { 3713, 3714 }, + { 3716, 3716 }, { 3718, 3722 }, { 3724, 3747 }, - { 3749, 3749 }, + { 3749, 3749 }, { 3751, 3760 }, - { 3762, 3763 }, - { 3773, 3773 }, - { 3776, 3780 }, + { 3762, 3763 }, + { 3773, 3773 }, + { 3776, 3780 }, { 3782, 3782 }, - { 3804, 3807 }, - { 3840, 3840 }, - { 3904, 3911 }, - { 3913, 3948 }, - { 3976, 3980 }, - { 4096, 4138 }, - { 4159, 4159 }, - { 4176, 4181 }, - { 4186, 4189 }, - { 4193, 4193 }, - { 4197, 4198 }, - { 4206, 4208 }, - { 4213, 4225 }, - { 4238, 4238 }, + { 3804, 3807 }, + { 3840, 3840 }, + { 3904, 3911 }, + { 3913, 3948 }, + { 3976, 3980 }, + { 4096, 4138 }, + { 4159, 4159 }, + { 4176, 4181 }, + { 4186, 4189 }, + { 4193, 4193 }, + { 4197, 4198 }, + { 4206, 4208 }, + { 4213, 4225 }, + { 4238, 4238 }, { 4256, 4293 }, { 4295, 4295 }, { 4301, 4301 }, - { 4304, 4346 }, + { 4304, 4346 }, { 4348, 4680 }, - { 4682, 4685 }, - { 4688, 4694 }, - { 4696, 4696 }, - { 4698, 4701 }, - { 4704, 4744 }, - { 4746, 4749 }, - { 4752, 4784 }, - { 4786, 4789 }, - { 4792, 4798 }, - { 4800, 4800 }, - { 4802, 4805 }, - { 4808, 4822 }, - { 4824, 4880 }, - { 4882, 4885 }, - { 4888, 4954 }, - { 4992, 5007 }, + { 4682, 4685 }, + { 4688, 4694 }, + { 4696, 4696 }, + { 4698, 4701 }, + { 4704, 4744 }, + { 4746, 4749 }, + { 4752, 4784 }, + { 4786, 4789 }, + { 4792, 4798 }, + { 4800, 4800 }, + { 4802, 4805 }, + { 4808, 4822 }, + { 4824, 4880 }, + { 4882, 4885 }, + { 4888, 4954 }, + { 4992, 5007 }, { 5024, 5109 }, { 5112, 5117 }, - { 5121, 5740 }, - { 5743, 5759 }, - { 5761, 5786 }, - { 5792, 5866 }, - { 5873, 5880 }, + { 5121, 5740 }, + { 5743, 5759 }, + { 5761, 5786 }, + { 5792, 5866 }, + { 5873, 5880 }, { 5888, 5905 }, { 5919, 5937 }, - { 5952, 5969 }, - { 5984, 5996 }, - { 5998, 6000 }, - { 6016, 6067 }, + { 5952, 5969 }, + { 5984, 5996 }, + { 5998, 6000 }, + { 6016, 6067 }, { 6103, 6103 }, - { 6108, 6108 }, + { 6108, 6108 }, { 6176, 6264 }, { 6272, 6276 }, { 6279, 6312 }, - { 6314, 6314 }, - { 6320, 6389 }, - { 6400, 6430 }, - { 6480, 6509 }, - { 6512, 6516 }, - { 6528, 6571 }, - { 6576, 6601 }, - { 6656, 6678 }, - { 6688, 6740 }, + { 6314, 6314 }, + { 6320, 6389 }, + { 6400, 6430 }, + { 6480, 6509 }, + { 6512, 6516 }, + { 6528, 6571 }, + { 6576, 6601 }, + { 6656, 6678 }, + { 6688, 6740 }, { 6823, 6823 }, - { 6917, 6963 }, + { 6917, 6963 }, { 6981, 6988 }, - { 7043, 7072 }, - { 7086, 7087 }, - { 7098, 7141 }, - { 7168, 7203 }, - { 7245, 7247 }, + { 7043, 7072 }, + { 7086, 7087 }, + { 7098, 7141 }, + { 7168, 7203 }, + { 7245, 7247 }, { 7258, 7293 }, { 7296, 7304 }, { 7312, 7354 }, { 7357, 7359 }, - { 7401, 7404 }, + { 7401, 7404 }, { 7406, 7411 }, - { 7413, 7414 }, + { 7413, 7414 }, { 7418, 7418 }, { 7424, 7615 }, { 7680, 7957 }, @@ -352,38 +352,38 @@ static const URange16 L_range16[] = { { 11520, 11557 }, { 11559, 11559 }, { 11565, 11565 }, - { 11568, 11623 }, + { 11568, 11623 }, { 11631, 11631 }, - { 11648, 11670 }, - { 11680, 11686 }, - { 11688, 11694 }, - { 11696, 11702 }, - { 11704, 11710 }, - { 11712, 11718 }, - { 11720, 11726 }, - { 11728, 11734 }, - { 11736, 11742 }, + { 11648, 11670 }, + { 11680, 11686 }, + { 11688, 11694 }, + { 11696, 11702 }, + { 11704, 11710 }, + { 11712, 11718 }, + { 11720, 11726 }, + { 11728, 11734 }, + { 11736, 11742 }, { 11823, 11823 }, { 12293, 12294 }, { 12337, 12341 }, { 12347, 12348 }, - { 12353, 12438 }, + { 12353, 12438 }, { 12445, 12447 }, - { 12449, 12538 }, + { 12449, 12538 }, { 12540, 12543 }, { 12549, 12591 }, - { 12593, 12686 }, + { 12593, 12686 }, { 12704, 12735 }, - { 12784, 12799 }, + { 12784, 12799 }, { 13312, 19903 }, { 19968, 42124 }, { 42192, 42237 }, { 42240, 42508 }, - { 42512, 42527 }, - { 42538, 42539 }, + { 42512, 42527 }, + { 42538, 42539 }, { 42560, 42606 }, { 42623, 42653 }, - { 42656, 42725 }, + { 42656, 42725 }, { 42775, 42783 }, { 42786, 42888 }, { 42891, 42954 }, @@ -391,95 +391,95 @@ static const URange16 L_range16[] = { { 42963, 42963 }, { 42965, 42969 }, { 42994, 43009 }, - { 43011, 43013 }, - { 43015, 43018 }, - { 43020, 43042 }, - { 43072, 43123 }, - { 43138, 43187 }, - { 43250, 43255 }, - { 43259, 43259 }, + { 43011, 43013 }, + { 43015, 43018 }, + { 43020, 43042 }, + { 43072, 43123 }, + { 43138, 43187 }, + { 43250, 43255 }, + { 43259, 43259 }, { 43261, 43262 }, - { 43274, 43301 }, - { 43312, 43334 }, - { 43360, 43388 }, - { 43396, 43442 }, + { 43274, 43301 }, + { 43312, 43334 }, + { 43360, 43388 }, + { 43396, 43442 }, { 43471, 43471 }, - { 43488, 43492 }, + { 43488, 43492 }, { 43494, 43503 }, - { 43514, 43518 }, - { 43520, 43560 }, - { 43584, 43586 }, - { 43588, 43595 }, + { 43514, 43518 }, + { 43520, 43560 }, + { 43584, 43586 }, + { 43588, 43595 }, { 43616, 43638 }, - { 43642, 43642 }, - { 43646, 43695 }, - { 43697, 43697 }, - { 43701, 43702 }, - { 43705, 43709 }, - { 43712, 43712 }, - { 43714, 43714 }, + { 43642, 43642 }, + { 43646, 43695 }, + { 43697, 43697 }, + { 43701, 43702 }, + { 43705, 43709 }, + { 43712, 43712 }, + { 43714, 43714 }, { 43739, 43741 }, - { 43744, 43754 }, + { 43744, 43754 }, { 43762, 43764 }, - { 43777, 43782 }, - { 43785, 43790 }, - { 43793, 43798 }, - { 43808, 43814 }, - { 43816, 43822 }, + { 43777, 43782 }, + { 43785, 43790 }, + { 43793, 43798 }, + { 43808, 43814 }, + { 43816, 43822 }, { 43824, 43866 }, { 43868, 43881 }, { 43888, 44002 }, - { 44032, 55203 }, - { 55216, 55238 }, - { 55243, 55291 }, - { 63744, 64109 }, - { 64112, 64217 }, + { 44032, 55203 }, + { 55216, 55238 }, + { 55243, 55291 }, + { 63744, 64109 }, + { 64112, 64217 }, { 64256, 64262 }, { 64275, 64279 }, - { 64285, 64285 }, - { 64287, 64296 }, - { 64298, 64310 }, - { 64312, 64316 }, - { 64318, 64318 }, - { 64320, 64321 }, - { 64323, 64324 }, - { 64326, 64433 }, - { 64467, 64829 }, - { 64848, 64911 }, - { 64914, 64967 }, - { 65008, 65019 }, - { 65136, 65140 }, - { 65142, 65276 }, + { 64285, 64285 }, + { 64287, 64296 }, + { 64298, 64310 }, + { 64312, 64316 }, + { 64318, 64318 }, + { 64320, 64321 }, + { 64323, 64324 }, + { 64326, 64433 }, + { 64467, 64829 }, + { 64848, 64911 }, + { 64914, 64967 }, + { 65008, 65019 }, + { 65136, 65140 }, + { 65142, 65276 }, { 65313, 65338 }, { 65345, 65370 }, { 65382, 65470 }, - { 65474, 65479 }, - { 65482, 65487 }, - { 65490, 65495 }, - { 65498, 65500 }, -}; + { 65474, 65479 }, + { 65482, 65487 }, + { 65490, 65495 }, + { 65498, 65500 }, +}; static const URange32 L_range32[] = { - { 65536, 65547 }, - { 65549, 65574 }, - { 65576, 65594 }, - { 65596, 65597 }, - { 65599, 65613 }, - { 65616, 65629 }, - { 65664, 65786 }, - { 66176, 66204 }, - { 66208, 66256 }, - { 66304, 66335 }, + { 65536, 65547 }, + { 65549, 65574 }, + { 65576, 65594 }, + { 65596, 65597 }, + { 65599, 65613 }, + { 65616, 65629 }, + { 65664, 65786 }, + { 66176, 66204 }, + { 66208, 66256 }, + { 66304, 66335 }, { 66349, 66368 }, - { 66370, 66377 }, - { 66384, 66421 }, - { 66432, 66461 }, - { 66464, 66499 }, - { 66504, 66511 }, + { 66370, 66377 }, + { 66384, 66421 }, + { 66432, 66461 }, + { 66464, 66499 }, + { 66504, 66511 }, { 66560, 66717 }, { 66736, 66771 }, { 66776, 66811 }, - { 66816, 66855 }, - { 66864, 66915 }, + { 66816, 66855 }, + { 66864, 66915 }, { 66928, 66938 }, { 66940, 66954 }, { 66956, 66962 }, @@ -488,39 +488,39 @@ static const URange32 L_range32[] = { { 66979, 66993 }, { 66995, 67001 }, { 67003, 67004 }, - { 67072, 67382 }, - { 67392, 67413 }, - { 67424, 67431 }, + { 67072, 67382 }, + { 67392, 67413 }, + { 67424, 67431 }, { 67456, 67461 }, { 67463, 67504 }, { 67506, 67514 }, - { 67584, 67589 }, - { 67592, 67592 }, - { 67594, 67637 }, - { 67639, 67640 }, - { 67644, 67644 }, - { 67647, 67669 }, - { 67680, 67702 }, - { 67712, 67742 }, - { 67808, 67826 }, - { 67828, 67829 }, - { 67840, 67861 }, - { 67872, 67897 }, - { 67968, 68023 }, - { 68030, 68031 }, - { 68096, 68096 }, - { 68112, 68115 }, - { 68117, 68119 }, + { 67584, 67589 }, + { 67592, 67592 }, + { 67594, 67637 }, + { 67639, 67640 }, + { 67644, 67644 }, + { 67647, 67669 }, + { 67680, 67702 }, + { 67712, 67742 }, + { 67808, 67826 }, + { 67828, 67829 }, + { 67840, 67861 }, + { 67872, 67897 }, + { 67968, 68023 }, + { 68030, 68031 }, + { 68096, 68096 }, + { 68112, 68115 }, + { 68117, 68119 }, { 68121, 68149 }, - { 68192, 68220 }, - { 68224, 68252 }, - { 68288, 68295 }, - { 68297, 68324 }, - { 68352, 68405 }, - { 68416, 68437 }, - { 68448, 68466 }, - { 68480, 68497 }, - { 68608, 68680 }, + { 68192, 68220 }, + { 68224, 68252 }, + { 68288, 68295 }, + { 68297, 68324 }, + { 68352, 68405 }, + { 68416, 68437 }, + { 68448, 68466 }, + { 68480, 68497 }, + { 68608, 68680 }, { 68736, 68786 }, { 68800, 68850 }, { 68864, 68899 }, @@ -532,48 +532,48 @@ static const URange32 L_range32[] = { { 69488, 69505 }, { 69552, 69572 }, { 69600, 69622 }, - { 69635, 69687 }, + { 69635, 69687 }, { 69745, 69746 }, { 69749, 69749 }, - { 69763, 69807 }, - { 69840, 69864 }, - { 69891, 69926 }, + { 69763, 69807 }, + { 69840, 69864 }, + { 69891, 69926 }, { 69956, 69956 }, { 69959, 69959 }, - { 69968, 70002 }, - { 70006, 70006 }, - { 70019, 70066 }, - { 70081, 70084 }, - { 70106, 70106 }, - { 70108, 70108 }, - { 70144, 70161 }, - { 70163, 70187 }, - { 70272, 70278 }, - { 70280, 70280 }, - { 70282, 70285 }, - { 70287, 70301 }, - { 70303, 70312 }, - { 70320, 70366 }, - { 70405, 70412 }, - { 70415, 70416 }, - { 70419, 70440 }, - { 70442, 70448 }, - { 70450, 70451 }, - { 70453, 70457 }, - { 70461, 70461 }, - { 70480, 70480 }, - { 70493, 70497 }, + { 69968, 70002 }, + { 70006, 70006 }, + { 70019, 70066 }, + { 70081, 70084 }, + { 70106, 70106 }, + { 70108, 70108 }, + { 70144, 70161 }, + { 70163, 70187 }, + { 70272, 70278 }, + { 70280, 70280 }, + { 70282, 70285 }, + { 70287, 70301 }, + { 70303, 70312 }, + { 70320, 70366 }, + { 70405, 70412 }, + { 70415, 70416 }, + { 70419, 70440 }, + { 70442, 70448 }, + { 70450, 70451 }, + { 70453, 70457 }, + { 70461, 70461 }, + { 70480, 70480 }, + { 70493, 70497 }, { 70656, 70708 }, { 70727, 70730 }, { 70751, 70753 }, - { 70784, 70831 }, - { 70852, 70853 }, - { 70855, 70855 }, - { 71040, 71086 }, - { 71128, 71131 }, - { 71168, 71215 }, - { 71236, 71236 }, - { 71296, 71338 }, + { 70784, 70831 }, + { 70852, 70853 }, + { 70855, 70855 }, + { 71040, 71086 }, + { 71128, 71131 }, + { 71168, 71215 }, + { 71236, 71236 }, + { 71296, 71338 }, { 71352, 71352 }, { 71424, 71450 }, { 71488, 71494 }, @@ -611,22 +611,22 @@ static const URange32 L_range32[] = { { 73112, 73112 }, { 73440, 73458 }, { 73648, 73648 }, - { 73728, 74649 }, - { 74880, 75075 }, + { 73728, 74649 }, + { 74880, 75075 }, { 77712, 77808 }, - { 77824, 78894 }, - { 82944, 83526 }, - { 92160, 92728 }, - { 92736, 92766 }, + { 77824, 78894 }, + { 82944, 83526 }, + { 92160, 92728 }, + { 92736, 92766 }, { 92784, 92862 }, - { 92880, 92909 }, - { 92928, 92975 }, + { 92880, 92909 }, + { 92928, 92975 }, { 92992, 92995 }, - { 93027, 93047 }, - { 93053, 93071 }, + { 93027, 93047 }, + { 93053, 93071 }, { 93760, 93823 }, { 93952, 94026 }, - { 94032, 94032 }, + { 94032, 94032 }, { 94099, 94111 }, { 94176, 94177 }, { 94179, 94179 }, @@ -640,10 +640,10 @@ static const URange32 L_range32[] = { { 110928, 110930 }, { 110948, 110951 }, { 110960, 111355 }, - { 113664, 113770 }, - { 113776, 113788 }, - { 113792, 113800 }, - { 113808, 113817 }, + { 113664, 113770 }, + { 113776, 113788 }, + { 113792, 113800 }, + { 113808, 113817 }, { 119808, 119892 }, { 119894, 119964 }, { 119966, 119967 }, @@ -684,647 +684,647 @@ static const URange32 L_range32[] = { { 124904, 124907 }, { 124909, 124910 }, { 124912, 124926 }, - { 124928, 125124 }, + { 124928, 125124 }, { 125184, 125251 }, { 125259, 125259 }, - { 126464, 126467 }, - { 126469, 126495 }, - { 126497, 126498 }, - { 126500, 126500 }, - { 126503, 126503 }, - { 126505, 126514 }, - { 126516, 126519 }, - { 126521, 126521 }, - { 126523, 126523 }, - { 126530, 126530 }, - { 126535, 126535 }, - { 126537, 126537 }, - { 126539, 126539 }, - { 126541, 126543 }, - { 126545, 126546 }, - { 126548, 126548 }, - { 126551, 126551 }, - { 126553, 126553 }, - { 126555, 126555 }, - { 126557, 126557 }, - { 126559, 126559 }, - { 126561, 126562 }, - { 126564, 126564 }, - { 126567, 126570 }, - { 126572, 126578 }, - { 126580, 126583 }, - { 126585, 126588 }, - { 126590, 126590 }, - { 126592, 126601 }, - { 126603, 126619 }, - { 126625, 126627 }, - { 126629, 126633 }, - { 126635, 126651 }, + { 126464, 126467 }, + { 126469, 126495 }, + { 126497, 126498 }, + { 126500, 126500 }, + { 126503, 126503 }, + { 126505, 126514 }, + { 126516, 126519 }, + { 126521, 126521 }, + { 126523, 126523 }, + { 126530, 126530 }, + { 126535, 126535 }, + { 126537, 126537 }, + { 126539, 126539 }, + { 126541, 126543 }, + { 126545, 126546 }, + { 126548, 126548 }, + { 126551, 126551 }, + { 126553, 126553 }, + { 126555, 126555 }, + { 126557, 126557 }, + { 126559, 126559 }, + { 126561, 126562 }, + { 126564, 126564 }, + { 126567, 126570 }, + { 126572, 126578 }, + { 126580, 126583 }, + { 126585, 126588 }, + { 126590, 126590 }, + { 126592, 126601 }, + { 126603, 126619 }, + { 126625, 126627 }, + { 126629, 126633 }, + { 126635, 126651 }, { 131072, 173791 }, { 173824, 177976 }, - { 177984, 178205 }, - { 178208, 183969 }, + { 177984, 178205 }, + { 178208, 183969 }, { 183984, 191456 }, - { 194560, 195101 }, + { 194560, 195101 }, { 196608, 201546 }, -}; -static const URange16 Ll_range16[] = { - { 97, 122 }, - { 181, 181 }, - { 223, 246 }, - { 248, 255 }, - { 257, 257 }, - { 259, 259 }, - { 261, 261 }, - { 263, 263 }, - { 265, 265 }, - { 267, 267 }, - { 269, 269 }, - { 271, 271 }, - { 273, 273 }, - { 275, 275 }, - { 277, 277 }, - { 279, 279 }, - { 281, 281 }, - { 283, 283 }, - { 285, 285 }, - { 287, 287 }, - { 289, 289 }, - { 291, 291 }, - { 293, 293 }, - { 295, 295 }, - { 297, 297 }, - { 299, 299 }, - { 301, 301 }, - { 303, 303 }, - { 305, 305 }, - { 307, 307 }, - { 309, 309 }, - { 311, 312 }, - { 314, 314 }, - { 316, 316 }, - { 318, 318 }, - { 320, 320 }, - { 322, 322 }, - { 324, 324 }, - { 326, 326 }, - { 328, 329 }, - { 331, 331 }, - { 333, 333 }, - { 335, 335 }, - { 337, 337 }, - { 339, 339 }, - { 341, 341 }, - { 343, 343 }, - { 345, 345 }, - { 347, 347 }, - { 349, 349 }, - { 351, 351 }, - { 353, 353 }, - { 355, 355 }, - { 357, 357 }, - { 359, 359 }, - { 361, 361 }, - { 363, 363 }, - { 365, 365 }, - { 367, 367 }, - { 369, 369 }, - { 371, 371 }, - { 373, 373 }, - { 375, 375 }, - { 378, 378 }, - { 380, 380 }, - { 382, 384 }, - { 387, 387 }, - { 389, 389 }, - { 392, 392 }, - { 396, 397 }, - { 402, 402 }, - { 405, 405 }, - { 409, 411 }, - { 414, 414 }, - { 417, 417 }, - { 419, 419 }, - { 421, 421 }, - { 424, 424 }, - { 426, 427 }, - { 429, 429 }, - { 432, 432 }, - { 436, 436 }, - { 438, 438 }, - { 441, 442 }, - { 445, 447 }, - { 454, 454 }, - { 457, 457 }, - { 460, 460 }, - { 462, 462 }, - { 464, 464 }, - { 466, 466 }, - { 468, 468 }, - { 470, 470 }, - { 472, 472 }, - { 474, 474 }, - { 476, 477 }, - { 479, 479 }, - { 481, 481 }, - { 483, 483 }, - { 485, 485 }, - { 487, 487 }, - { 489, 489 }, - { 491, 491 }, - { 493, 493 }, - { 495, 496 }, - { 499, 499 }, - { 501, 501 }, - { 505, 505 }, - { 507, 507 }, - { 509, 509 }, - { 511, 511 }, - { 513, 513 }, - { 515, 515 }, - { 517, 517 }, - { 519, 519 }, - { 521, 521 }, - { 523, 523 }, - { 525, 525 }, - { 527, 527 }, - { 529, 529 }, - { 531, 531 }, - { 533, 533 }, - { 535, 535 }, - { 537, 537 }, - { 539, 539 }, - { 541, 541 }, - { 543, 543 }, - { 545, 545 }, - { 547, 547 }, - { 549, 549 }, - { 551, 551 }, - { 553, 553 }, - { 555, 555 }, - { 557, 557 }, - { 559, 559 }, - { 561, 561 }, - { 563, 569 }, - { 572, 572 }, - { 575, 576 }, - { 578, 578 }, - { 583, 583 }, - { 585, 585 }, - { 587, 587 }, - { 589, 589 }, - { 591, 659 }, - { 661, 687 }, - { 881, 881 }, - { 883, 883 }, - { 887, 887 }, - { 891, 893 }, - { 912, 912 }, - { 940, 974 }, - { 976, 977 }, - { 981, 983 }, - { 985, 985 }, - { 987, 987 }, - { 989, 989 }, - { 991, 991 }, - { 993, 993 }, - { 995, 995 }, - { 997, 997 }, - { 999, 999 }, - { 1001, 1001 }, - { 1003, 1003 }, - { 1005, 1005 }, - { 1007, 1011 }, - { 1013, 1013 }, - { 1016, 1016 }, - { 1019, 1020 }, - { 1072, 1119 }, - { 1121, 1121 }, - { 1123, 1123 }, - { 1125, 1125 }, - { 1127, 1127 }, - { 1129, 1129 }, - { 1131, 1131 }, - { 1133, 1133 }, - { 1135, 1135 }, - { 1137, 1137 }, - { 1139, 1139 }, - { 1141, 1141 }, - { 1143, 1143 }, - { 1145, 1145 }, - { 1147, 1147 }, - { 1149, 1149 }, - { 1151, 1151 }, - { 1153, 1153 }, - { 1163, 1163 }, - { 1165, 1165 }, - { 1167, 1167 }, - { 1169, 1169 }, - { 1171, 1171 }, - { 1173, 1173 }, - { 1175, 1175 }, - { 1177, 1177 }, - { 1179, 1179 }, - { 1181, 1181 }, - { 1183, 1183 }, - { 1185, 1185 }, - { 1187, 1187 }, - { 1189, 1189 }, - { 1191, 1191 }, - { 1193, 1193 }, - { 1195, 1195 }, - { 1197, 1197 }, - { 1199, 1199 }, - { 1201, 1201 }, - { 1203, 1203 }, - { 1205, 1205 }, - { 1207, 1207 }, - { 1209, 1209 }, - { 1211, 1211 }, - { 1213, 1213 }, - { 1215, 1215 }, - { 1218, 1218 }, - { 1220, 1220 }, - { 1222, 1222 }, - { 1224, 1224 }, - { 1226, 1226 }, - { 1228, 1228 }, - { 1230, 1231 }, - { 1233, 1233 }, - { 1235, 1235 }, - { 1237, 1237 }, - { 1239, 1239 }, - { 1241, 1241 }, - { 1243, 1243 }, - { 1245, 1245 }, - { 1247, 1247 }, - { 1249, 1249 }, - { 1251, 1251 }, - { 1253, 1253 }, - { 1255, 1255 }, - { 1257, 1257 }, - { 1259, 1259 }, - { 1261, 1261 }, - { 1263, 1263 }, - { 1265, 1265 }, - { 1267, 1267 }, - { 1269, 1269 }, - { 1271, 1271 }, - { 1273, 1273 }, - { 1275, 1275 }, - { 1277, 1277 }, - { 1279, 1279 }, - { 1281, 1281 }, - { 1283, 1283 }, - { 1285, 1285 }, - { 1287, 1287 }, - { 1289, 1289 }, - { 1291, 1291 }, - { 1293, 1293 }, - { 1295, 1295 }, - { 1297, 1297 }, - { 1299, 1299 }, - { 1301, 1301 }, - { 1303, 1303 }, - { 1305, 1305 }, - { 1307, 1307 }, - { 1309, 1309 }, - { 1311, 1311 }, - { 1313, 1313 }, - { 1315, 1315 }, - { 1317, 1317 }, - { 1319, 1319 }, - { 1321, 1321 }, - { 1323, 1323 }, - { 1325, 1325 }, - { 1327, 1327 }, +}; +static const URange16 Ll_range16[] = { + { 97, 122 }, + { 181, 181 }, + { 223, 246 }, + { 248, 255 }, + { 257, 257 }, + { 259, 259 }, + { 261, 261 }, + { 263, 263 }, + { 265, 265 }, + { 267, 267 }, + { 269, 269 }, + { 271, 271 }, + { 273, 273 }, + { 275, 275 }, + { 277, 277 }, + { 279, 279 }, + { 281, 281 }, + { 283, 283 }, + { 285, 285 }, + { 287, 287 }, + { 289, 289 }, + { 291, 291 }, + { 293, 293 }, + { 295, 295 }, + { 297, 297 }, + { 299, 299 }, + { 301, 301 }, + { 303, 303 }, + { 305, 305 }, + { 307, 307 }, + { 309, 309 }, + { 311, 312 }, + { 314, 314 }, + { 316, 316 }, + { 318, 318 }, + { 320, 320 }, + { 322, 322 }, + { 324, 324 }, + { 326, 326 }, + { 328, 329 }, + { 331, 331 }, + { 333, 333 }, + { 335, 335 }, + { 337, 337 }, + { 339, 339 }, + { 341, 341 }, + { 343, 343 }, + { 345, 345 }, + { 347, 347 }, + { 349, 349 }, + { 351, 351 }, + { 353, 353 }, + { 355, 355 }, + { 357, 357 }, + { 359, 359 }, + { 361, 361 }, + { 363, 363 }, + { 365, 365 }, + { 367, 367 }, + { 369, 369 }, + { 371, 371 }, + { 373, 373 }, + { 375, 375 }, + { 378, 378 }, + { 380, 380 }, + { 382, 384 }, + { 387, 387 }, + { 389, 389 }, + { 392, 392 }, + { 396, 397 }, + { 402, 402 }, + { 405, 405 }, + { 409, 411 }, + { 414, 414 }, + { 417, 417 }, + { 419, 419 }, + { 421, 421 }, + { 424, 424 }, + { 426, 427 }, + { 429, 429 }, + { 432, 432 }, + { 436, 436 }, + { 438, 438 }, + { 441, 442 }, + { 445, 447 }, + { 454, 454 }, + { 457, 457 }, + { 460, 460 }, + { 462, 462 }, + { 464, 464 }, + { 466, 466 }, + { 468, 468 }, + { 470, 470 }, + { 472, 472 }, + { 474, 474 }, + { 476, 477 }, + { 479, 479 }, + { 481, 481 }, + { 483, 483 }, + { 485, 485 }, + { 487, 487 }, + { 489, 489 }, + { 491, 491 }, + { 493, 493 }, + { 495, 496 }, + { 499, 499 }, + { 501, 501 }, + { 505, 505 }, + { 507, 507 }, + { 509, 509 }, + { 511, 511 }, + { 513, 513 }, + { 515, 515 }, + { 517, 517 }, + { 519, 519 }, + { 521, 521 }, + { 523, 523 }, + { 525, 525 }, + { 527, 527 }, + { 529, 529 }, + { 531, 531 }, + { 533, 533 }, + { 535, 535 }, + { 537, 537 }, + { 539, 539 }, + { 541, 541 }, + { 543, 543 }, + { 545, 545 }, + { 547, 547 }, + { 549, 549 }, + { 551, 551 }, + { 553, 553 }, + { 555, 555 }, + { 557, 557 }, + { 559, 559 }, + { 561, 561 }, + { 563, 569 }, + { 572, 572 }, + { 575, 576 }, + { 578, 578 }, + { 583, 583 }, + { 585, 585 }, + { 587, 587 }, + { 589, 589 }, + { 591, 659 }, + { 661, 687 }, + { 881, 881 }, + { 883, 883 }, + { 887, 887 }, + { 891, 893 }, + { 912, 912 }, + { 940, 974 }, + { 976, 977 }, + { 981, 983 }, + { 985, 985 }, + { 987, 987 }, + { 989, 989 }, + { 991, 991 }, + { 993, 993 }, + { 995, 995 }, + { 997, 997 }, + { 999, 999 }, + { 1001, 1001 }, + { 1003, 1003 }, + { 1005, 1005 }, + { 1007, 1011 }, + { 1013, 1013 }, + { 1016, 1016 }, + { 1019, 1020 }, + { 1072, 1119 }, + { 1121, 1121 }, + { 1123, 1123 }, + { 1125, 1125 }, + { 1127, 1127 }, + { 1129, 1129 }, + { 1131, 1131 }, + { 1133, 1133 }, + { 1135, 1135 }, + { 1137, 1137 }, + { 1139, 1139 }, + { 1141, 1141 }, + { 1143, 1143 }, + { 1145, 1145 }, + { 1147, 1147 }, + { 1149, 1149 }, + { 1151, 1151 }, + { 1153, 1153 }, + { 1163, 1163 }, + { 1165, 1165 }, + { 1167, 1167 }, + { 1169, 1169 }, + { 1171, 1171 }, + { 1173, 1173 }, + { 1175, 1175 }, + { 1177, 1177 }, + { 1179, 1179 }, + { 1181, 1181 }, + { 1183, 1183 }, + { 1185, 1185 }, + { 1187, 1187 }, + { 1189, 1189 }, + { 1191, 1191 }, + { 1193, 1193 }, + { 1195, 1195 }, + { 1197, 1197 }, + { 1199, 1199 }, + { 1201, 1201 }, + { 1203, 1203 }, + { 1205, 1205 }, + { 1207, 1207 }, + { 1209, 1209 }, + { 1211, 1211 }, + { 1213, 1213 }, + { 1215, 1215 }, + { 1218, 1218 }, + { 1220, 1220 }, + { 1222, 1222 }, + { 1224, 1224 }, + { 1226, 1226 }, + { 1228, 1228 }, + { 1230, 1231 }, + { 1233, 1233 }, + { 1235, 1235 }, + { 1237, 1237 }, + { 1239, 1239 }, + { 1241, 1241 }, + { 1243, 1243 }, + { 1245, 1245 }, + { 1247, 1247 }, + { 1249, 1249 }, + { 1251, 1251 }, + { 1253, 1253 }, + { 1255, 1255 }, + { 1257, 1257 }, + { 1259, 1259 }, + { 1261, 1261 }, + { 1263, 1263 }, + { 1265, 1265 }, + { 1267, 1267 }, + { 1269, 1269 }, + { 1271, 1271 }, + { 1273, 1273 }, + { 1275, 1275 }, + { 1277, 1277 }, + { 1279, 1279 }, + { 1281, 1281 }, + { 1283, 1283 }, + { 1285, 1285 }, + { 1287, 1287 }, + { 1289, 1289 }, + { 1291, 1291 }, + { 1293, 1293 }, + { 1295, 1295 }, + { 1297, 1297 }, + { 1299, 1299 }, + { 1301, 1301 }, + { 1303, 1303 }, + { 1305, 1305 }, + { 1307, 1307 }, + { 1309, 1309 }, + { 1311, 1311 }, + { 1313, 1313 }, + { 1315, 1315 }, + { 1317, 1317 }, + { 1319, 1319 }, + { 1321, 1321 }, + { 1323, 1323 }, + { 1325, 1325 }, + { 1327, 1327 }, { 1376, 1416 }, { 4304, 4346 }, { 4349, 4351 }, - { 5112, 5117 }, + { 5112, 5117 }, { 7296, 7304 }, - { 7424, 7467 }, - { 7531, 7543 }, - { 7545, 7578 }, - { 7681, 7681 }, - { 7683, 7683 }, - { 7685, 7685 }, - { 7687, 7687 }, - { 7689, 7689 }, - { 7691, 7691 }, - { 7693, 7693 }, - { 7695, 7695 }, - { 7697, 7697 }, - { 7699, 7699 }, - { 7701, 7701 }, - { 7703, 7703 }, - { 7705, 7705 }, - { 7707, 7707 }, - { 7709, 7709 }, - { 7711, 7711 }, - { 7713, 7713 }, - { 7715, 7715 }, - { 7717, 7717 }, - { 7719, 7719 }, - { 7721, 7721 }, - { 7723, 7723 }, - { 7725, 7725 }, - { 7727, 7727 }, - { 7729, 7729 }, - { 7731, 7731 }, - { 7733, 7733 }, - { 7735, 7735 }, - { 7737, 7737 }, - { 7739, 7739 }, - { 7741, 7741 }, - { 7743, 7743 }, - { 7745, 7745 }, - { 7747, 7747 }, - { 7749, 7749 }, - { 7751, 7751 }, - { 7753, 7753 }, - { 7755, 7755 }, - { 7757, 7757 }, - { 7759, 7759 }, - { 7761, 7761 }, - { 7763, 7763 }, - { 7765, 7765 }, - { 7767, 7767 }, - { 7769, 7769 }, - { 7771, 7771 }, - { 7773, 7773 }, - { 7775, 7775 }, - { 7777, 7777 }, - { 7779, 7779 }, - { 7781, 7781 }, - { 7783, 7783 }, - { 7785, 7785 }, - { 7787, 7787 }, - { 7789, 7789 }, - { 7791, 7791 }, - { 7793, 7793 }, - { 7795, 7795 }, - { 7797, 7797 }, - { 7799, 7799 }, - { 7801, 7801 }, - { 7803, 7803 }, - { 7805, 7805 }, - { 7807, 7807 }, - { 7809, 7809 }, - { 7811, 7811 }, - { 7813, 7813 }, - { 7815, 7815 }, - { 7817, 7817 }, - { 7819, 7819 }, - { 7821, 7821 }, - { 7823, 7823 }, - { 7825, 7825 }, - { 7827, 7827 }, - { 7829, 7837 }, - { 7839, 7839 }, - { 7841, 7841 }, - { 7843, 7843 }, - { 7845, 7845 }, - { 7847, 7847 }, - { 7849, 7849 }, - { 7851, 7851 }, - { 7853, 7853 }, - { 7855, 7855 }, - { 7857, 7857 }, - { 7859, 7859 }, - { 7861, 7861 }, - { 7863, 7863 }, - { 7865, 7865 }, - { 7867, 7867 }, - { 7869, 7869 }, - { 7871, 7871 }, - { 7873, 7873 }, - { 7875, 7875 }, - { 7877, 7877 }, - { 7879, 7879 }, - { 7881, 7881 }, - { 7883, 7883 }, - { 7885, 7885 }, - { 7887, 7887 }, - { 7889, 7889 }, - { 7891, 7891 }, - { 7893, 7893 }, - { 7895, 7895 }, - { 7897, 7897 }, - { 7899, 7899 }, - { 7901, 7901 }, - { 7903, 7903 }, - { 7905, 7905 }, - { 7907, 7907 }, - { 7909, 7909 }, - { 7911, 7911 }, - { 7913, 7913 }, - { 7915, 7915 }, - { 7917, 7917 }, - { 7919, 7919 }, - { 7921, 7921 }, - { 7923, 7923 }, - { 7925, 7925 }, - { 7927, 7927 }, - { 7929, 7929 }, - { 7931, 7931 }, - { 7933, 7933 }, - { 7935, 7943 }, - { 7952, 7957 }, - { 7968, 7975 }, - { 7984, 7991 }, - { 8000, 8005 }, - { 8016, 8023 }, - { 8032, 8039 }, - { 8048, 8061 }, - { 8064, 8071 }, - { 8080, 8087 }, - { 8096, 8103 }, - { 8112, 8116 }, - { 8118, 8119 }, - { 8126, 8126 }, - { 8130, 8132 }, - { 8134, 8135 }, - { 8144, 8147 }, - { 8150, 8151 }, - { 8160, 8167 }, - { 8178, 8180 }, - { 8182, 8183 }, - { 8458, 8458 }, - { 8462, 8463 }, - { 8467, 8467 }, - { 8495, 8495 }, - { 8500, 8500 }, - { 8505, 8505 }, - { 8508, 8509 }, - { 8518, 8521 }, - { 8526, 8526 }, - { 8580, 8580 }, + { 7424, 7467 }, + { 7531, 7543 }, + { 7545, 7578 }, + { 7681, 7681 }, + { 7683, 7683 }, + { 7685, 7685 }, + { 7687, 7687 }, + { 7689, 7689 }, + { 7691, 7691 }, + { 7693, 7693 }, + { 7695, 7695 }, + { 7697, 7697 }, + { 7699, 7699 }, + { 7701, 7701 }, + { 7703, 7703 }, + { 7705, 7705 }, + { 7707, 7707 }, + { 7709, 7709 }, + { 7711, 7711 }, + { 7713, 7713 }, + { 7715, 7715 }, + { 7717, 7717 }, + { 7719, 7719 }, + { 7721, 7721 }, + { 7723, 7723 }, + { 7725, 7725 }, + { 7727, 7727 }, + { 7729, 7729 }, + { 7731, 7731 }, + { 7733, 7733 }, + { 7735, 7735 }, + { 7737, 7737 }, + { 7739, 7739 }, + { 7741, 7741 }, + { 7743, 7743 }, + { 7745, 7745 }, + { 7747, 7747 }, + { 7749, 7749 }, + { 7751, 7751 }, + { 7753, 7753 }, + { 7755, 7755 }, + { 7757, 7757 }, + { 7759, 7759 }, + { 7761, 7761 }, + { 7763, 7763 }, + { 7765, 7765 }, + { 7767, 7767 }, + { 7769, 7769 }, + { 7771, 7771 }, + { 7773, 7773 }, + { 7775, 7775 }, + { 7777, 7777 }, + { 7779, 7779 }, + { 7781, 7781 }, + { 7783, 7783 }, + { 7785, 7785 }, + { 7787, 7787 }, + { 7789, 7789 }, + { 7791, 7791 }, + { 7793, 7793 }, + { 7795, 7795 }, + { 7797, 7797 }, + { 7799, 7799 }, + { 7801, 7801 }, + { 7803, 7803 }, + { 7805, 7805 }, + { 7807, 7807 }, + { 7809, 7809 }, + { 7811, 7811 }, + { 7813, 7813 }, + { 7815, 7815 }, + { 7817, 7817 }, + { 7819, 7819 }, + { 7821, 7821 }, + { 7823, 7823 }, + { 7825, 7825 }, + { 7827, 7827 }, + { 7829, 7837 }, + { 7839, 7839 }, + { 7841, 7841 }, + { 7843, 7843 }, + { 7845, 7845 }, + { 7847, 7847 }, + { 7849, 7849 }, + { 7851, 7851 }, + { 7853, 7853 }, + { 7855, 7855 }, + { 7857, 7857 }, + { 7859, 7859 }, + { 7861, 7861 }, + { 7863, 7863 }, + { 7865, 7865 }, + { 7867, 7867 }, + { 7869, 7869 }, + { 7871, 7871 }, + { 7873, 7873 }, + { 7875, 7875 }, + { 7877, 7877 }, + { 7879, 7879 }, + { 7881, 7881 }, + { 7883, 7883 }, + { 7885, 7885 }, + { 7887, 7887 }, + { 7889, 7889 }, + { 7891, 7891 }, + { 7893, 7893 }, + { 7895, 7895 }, + { 7897, 7897 }, + { 7899, 7899 }, + { 7901, 7901 }, + { 7903, 7903 }, + { 7905, 7905 }, + { 7907, 7907 }, + { 7909, 7909 }, + { 7911, 7911 }, + { 7913, 7913 }, + { 7915, 7915 }, + { 7917, 7917 }, + { 7919, 7919 }, + { 7921, 7921 }, + { 7923, 7923 }, + { 7925, 7925 }, + { 7927, 7927 }, + { 7929, 7929 }, + { 7931, 7931 }, + { 7933, 7933 }, + { 7935, 7943 }, + { 7952, 7957 }, + { 7968, 7975 }, + { 7984, 7991 }, + { 8000, 8005 }, + { 8016, 8023 }, + { 8032, 8039 }, + { 8048, 8061 }, + { 8064, 8071 }, + { 8080, 8087 }, + { 8096, 8103 }, + { 8112, 8116 }, + { 8118, 8119 }, + { 8126, 8126 }, + { 8130, 8132 }, + { 8134, 8135 }, + { 8144, 8147 }, + { 8150, 8151 }, + { 8160, 8167 }, + { 8178, 8180 }, + { 8182, 8183 }, + { 8458, 8458 }, + { 8462, 8463 }, + { 8467, 8467 }, + { 8495, 8495 }, + { 8500, 8500 }, + { 8505, 8505 }, + { 8508, 8509 }, + { 8518, 8521 }, + { 8526, 8526 }, + { 8580, 8580 }, { 11312, 11359 }, - { 11361, 11361 }, - { 11365, 11366 }, - { 11368, 11368 }, - { 11370, 11370 }, - { 11372, 11372 }, - { 11377, 11377 }, - { 11379, 11380 }, - { 11382, 11387 }, - { 11393, 11393 }, - { 11395, 11395 }, - { 11397, 11397 }, - { 11399, 11399 }, - { 11401, 11401 }, - { 11403, 11403 }, - { 11405, 11405 }, - { 11407, 11407 }, - { 11409, 11409 }, - { 11411, 11411 }, - { 11413, 11413 }, - { 11415, 11415 }, - { 11417, 11417 }, - { 11419, 11419 }, - { 11421, 11421 }, - { 11423, 11423 }, - { 11425, 11425 }, - { 11427, 11427 }, - { 11429, 11429 }, - { 11431, 11431 }, - { 11433, 11433 }, - { 11435, 11435 }, - { 11437, 11437 }, - { 11439, 11439 }, - { 11441, 11441 }, - { 11443, 11443 }, - { 11445, 11445 }, - { 11447, 11447 }, - { 11449, 11449 }, - { 11451, 11451 }, - { 11453, 11453 }, - { 11455, 11455 }, - { 11457, 11457 }, - { 11459, 11459 }, - { 11461, 11461 }, - { 11463, 11463 }, - { 11465, 11465 }, - { 11467, 11467 }, - { 11469, 11469 }, - { 11471, 11471 }, - { 11473, 11473 }, - { 11475, 11475 }, - { 11477, 11477 }, - { 11479, 11479 }, - { 11481, 11481 }, - { 11483, 11483 }, - { 11485, 11485 }, - { 11487, 11487 }, - { 11489, 11489 }, - { 11491, 11492 }, - { 11500, 11500 }, - { 11502, 11502 }, - { 11507, 11507 }, - { 11520, 11557 }, - { 11559, 11559 }, - { 11565, 11565 }, - { 42561, 42561 }, - { 42563, 42563 }, - { 42565, 42565 }, - { 42567, 42567 }, - { 42569, 42569 }, - { 42571, 42571 }, - { 42573, 42573 }, - { 42575, 42575 }, - { 42577, 42577 }, - { 42579, 42579 }, - { 42581, 42581 }, - { 42583, 42583 }, - { 42585, 42585 }, - { 42587, 42587 }, - { 42589, 42589 }, - { 42591, 42591 }, - { 42593, 42593 }, - { 42595, 42595 }, - { 42597, 42597 }, - { 42599, 42599 }, - { 42601, 42601 }, - { 42603, 42603 }, - { 42605, 42605 }, - { 42625, 42625 }, - { 42627, 42627 }, - { 42629, 42629 }, - { 42631, 42631 }, - { 42633, 42633 }, - { 42635, 42635 }, - { 42637, 42637 }, - { 42639, 42639 }, - { 42641, 42641 }, - { 42643, 42643 }, - { 42645, 42645 }, - { 42647, 42647 }, - { 42649, 42649 }, - { 42651, 42651 }, - { 42787, 42787 }, - { 42789, 42789 }, - { 42791, 42791 }, - { 42793, 42793 }, - { 42795, 42795 }, - { 42797, 42797 }, - { 42799, 42801 }, - { 42803, 42803 }, - { 42805, 42805 }, - { 42807, 42807 }, - { 42809, 42809 }, - { 42811, 42811 }, - { 42813, 42813 }, - { 42815, 42815 }, - { 42817, 42817 }, - { 42819, 42819 }, - { 42821, 42821 }, - { 42823, 42823 }, - { 42825, 42825 }, - { 42827, 42827 }, - { 42829, 42829 }, - { 42831, 42831 }, - { 42833, 42833 }, - { 42835, 42835 }, - { 42837, 42837 }, - { 42839, 42839 }, - { 42841, 42841 }, - { 42843, 42843 }, - { 42845, 42845 }, - { 42847, 42847 }, - { 42849, 42849 }, - { 42851, 42851 }, - { 42853, 42853 }, - { 42855, 42855 }, - { 42857, 42857 }, - { 42859, 42859 }, - { 42861, 42861 }, - { 42863, 42863 }, - { 42865, 42872 }, - { 42874, 42874 }, - { 42876, 42876 }, - { 42879, 42879 }, - { 42881, 42881 }, - { 42883, 42883 }, - { 42885, 42885 }, - { 42887, 42887 }, - { 42892, 42892 }, - { 42894, 42894 }, - { 42897, 42897 }, - { 42899, 42901 }, - { 42903, 42903 }, - { 42905, 42905 }, - { 42907, 42907 }, - { 42909, 42909 }, - { 42911, 42911 }, - { 42913, 42913 }, - { 42915, 42915 }, - { 42917, 42917 }, - { 42919, 42919 }, - { 42921, 42921 }, + { 11361, 11361 }, + { 11365, 11366 }, + { 11368, 11368 }, + { 11370, 11370 }, + { 11372, 11372 }, + { 11377, 11377 }, + { 11379, 11380 }, + { 11382, 11387 }, + { 11393, 11393 }, + { 11395, 11395 }, + { 11397, 11397 }, + { 11399, 11399 }, + { 11401, 11401 }, + { 11403, 11403 }, + { 11405, 11405 }, + { 11407, 11407 }, + { 11409, 11409 }, + { 11411, 11411 }, + { 11413, 11413 }, + { 11415, 11415 }, + { 11417, 11417 }, + { 11419, 11419 }, + { 11421, 11421 }, + { 11423, 11423 }, + { 11425, 11425 }, + { 11427, 11427 }, + { 11429, 11429 }, + { 11431, 11431 }, + { 11433, 11433 }, + { 11435, 11435 }, + { 11437, 11437 }, + { 11439, 11439 }, + { 11441, 11441 }, + { 11443, 11443 }, + { 11445, 11445 }, + { 11447, 11447 }, + { 11449, 11449 }, + { 11451, 11451 }, + { 11453, 11453 }, + { 11455, 11455 }, + { 11457, 11457 }, + { 11459, 11459 }, + { 11461, 11461 }, + { 11463, 11463 }, + { 11465, 11465 }, + { 11467, 11467 }, + { 11469, 11469 }, + { 11471, 11471 }, + { 11473, 11473 }, + { 11475, 11475 }, + { 11477, 11477 }, + { 11479, 11479 }, + { 11481, 11481 }, + { 11483, 11483 }, + { 11485, 11485 }, + { 11487, 11487 }, + { 11489, 11489 }, + { 11491, 11492 }, + { 11500, 11500 }, + { 11502, 11502 }, + { 11507, 11507 }, + { 11520, 11557 }, + { 11559, 11559 }, + { 11565, 11565 }, + { 42561, 42561 }, + { 42563, 42563 }, + { 42565, 42565 }, + { 42567, 42567 }, + { 42569, 42569 }, + { 42571, 42571 }, + { 42573, 42573 }, + { 42575, 42575 }, + { 42577, 42577 }, + { 42579, 42579 }, + { 42581, 42581 }, + { 42583, 42583 }, + { 42585, 42585 }, + { 42587, 42587 }, + { 42589, 42589 }, + { 42591, 42591 }, + { 42593, 42593 }, + { 42595, 42595 }, + { 42597, 42597 }, + { 42599, 42599 }, + { 42601, 42601 }, + { 42603, 42603 }, + { 42605, 42605 }, + { 42625, 42625 }, + { 42627, 42627 }, + { 42629, 42629 }, + { 42631, 42631 }, + { 42633, 42633 }, + { 42635, 42635 }, + { 42637, 42637 }, + { 42639, 42639 }, + { 42641, 42641 }, + { 42643, 42643 }, + { 42645, 42645 }, + { 42647, 42647 }, + { 42649, 42649 }, + { 42651, 42651 }, + { 42787, 42787 }, + { 42789, 42789 }, + { 42791, 42791 }, + { 42793, 42793 }, + { 42795, 42795 }, + { 42797, 42797 }, + { 42799, 42801 }, + { 42803, 42803 }, + { 42805, 42805 }, + { 42807, 42807 }, + { 42809, 42809 }, + { 42811, 42811 }, + { 42813, 42813 }, + { 42815, 42815 }, + { 42817, 42817 }, + { 42819, 42819 }, + { 42821, 42821 }, + { 42823, 42823 }, + { 42825, 42825 }, + { 42827, 42827 }, + { 42829, 42829 }, + { 42831, 42831 }, + { 42833, 42833 }, + { 42835, 42835 }, + { 42837, 42837 }, + { 42839, 42839 }, + { 42841, 42841 }, + { 42843, 42843 }, + { 42845, 42845 }, + { 42847, 42847 }, + { 42849, 42849 }, + { 42851, 42851 }, + { 42853, 42853 }, + { 42855, 42855 }, + { 42857, 42857 }, + { 42859, 42859 }, + { 42861, 42861 }, + { 42863, 42863 }, + { 42865, 42872 }, + { 42874, 42874 }, + { 42876, 42876 }, + { 42879, 42879 }, + { 42881, 42881 }, + { 42883, 42883 }, + { 42885, 42885 }, + { 42887, 42887 }, + { 42892, 42892 }, + { 42894, 42894 }, + { 42897, 42897 }, + { 42899, 42901 }, + { 42903, 42903 }, + { 42905, 42905 }, + { 42907, 42907 }, + { 42909, 42909 }, + { 42911, 42911 }, + { 42913, 42913 }, + { 42915, 42915 }, + { 42917, 42917 }, + { 42919, 42919 }, + { 42921, 42921 }, { 42927, 42927 }, - { 42933, 42933 }, - { 42935, 42935 }, + { 42933, 42933 }, + { 42935, 42935 }, { 42937, 42937 }, { 42939, 42939 }, { 42941, 42941 }, @@ -1339,121 +1339,121 @@ static const URange16 Ll_range16[] = { { 42967, 42967 }, { 42969, 42969 }, { 42998, 42998 }, - { 43002, 43002 }, - { 43824, 43866 }, + { 43002, 43002 }, + { 43824, 43866 }, { 43872, 43880 }, - { 43888, 43967 }, - { 64256, 64262 }, - { 64275, 64279 }, - { 65345, 65370 }, -}; -static const URange32 Ll_range32[] = { - { 66600, 66639 }, + { 43888, 43967 }, + { 64256, 64262 }, + { 64275, 64279 }, + { 65345, 65370 }, +}; +static const URange32 Ll_range32[] = { + { 66600, 66639 }, { 66776, 66811 }, { 66967, 66977 }, { 66979, 66993 }, { 66995, 67001 }, { 67003, 67004 }, - { 68800, 68850 }, - { 71872, 71903 }, + { 68800, 68850 }, + { 71872, 71903 }, { 93792, 93823 }, - { 119834, 119859 }, - { 119886, 119892 }, - { 119894, 119911 }, - { 119938, 119963 }, - { 119990, 119993 }, - { 119995, 119995 }, - { 119997, 120003 }, - { 120005, 120015 }, - { 120042, 120067 }, - { 120094, 120119 }, - { 120146, 120171 }, - { 120198, 120223 }, - { 120250, 120275 }, - { 120302, 120327 }, - { 120354, 120379 }, - { 120406, 120431 }, - { 120458, 120485 }, - { 120514, 120538 }, - { 120540, 120545 }, - { 120572, 120596 }, - { 120598, 120603 }, - { 120630, 120654 }, - { 120656, 120661 }, - { 120688, 120712 }, - { 120714, 120719 }, - { 120746, 120770 }, - { 120772, 120777 }, - { 120779, 120779 }, + { 119834, 119859 }, + { 119886, 119892 }, + { 119894, 119911 }, + { 119938, 119963 }, + { 119990, 119993 }, + { 119995, 119995 }, + { 119997, 120003 }, + { 120005, 120015 }, + { 120042, 120067 }, + { 120094, 120119 }, + { 120146, 120171 }, + { 120198, 120223 }, + { 120250, 120275 }, + { 120302, 120327 }, + { 120354, 120379 }, + { 120406, 120431 }, + { 120458, 120485 }, + { 120514, 120538 }, + { 120540, 120545 }, + { 120572, 120596 }, + { 120598, 120603 }, + { 120630, 120654 }, + { 120656, 120661 }, + { 120688, 120712 }, + { 120714, 120719 }, + { 120746, 120770 }, + { 120772, 120777 }, + { 120779, 120779 }, { 122624, 122633 }, { 122635, 122654 }, { 125218, 125251 }, -}; -static const URange16 Lm_range16[] = { - { 688, 705 }, - { 710, 721 }, - { 736, 740 }, - { 748, 748 }, - { 750, 750 }, - { 884, 884 }, - { 890, 890 }, - { 1369, 1369 }, - { 1600, 1600 }, - { 1765, 1766 }, - { 2036, 2037 }, - { 2042, 2042 }, - { 2074, 2074 }, - { 2084, 2084 }, - { 2088, 2088 }, +}; +static const URange16 Lm_range16[] = { + { 688, 705 }, + { 710, 721 }, + { 736, 740 }, + { 748, 748 }, + { 750, 750 }, + { 884, 884 }, + { 890, 890 }, + { 1369, 1369 }, + { 1600, 1600 }, + { 1765, 1766 }, + { 2036, 2037 }, + { 2042, 2042 }, + { 2074, 2074 }, + { 2084, 2084 }, + { 2088, 2088 }, { 2249, 2249 }, - { 2417, 2417 }, - { 3654, 3654 }, - { 3782, 3782 }, - { 4348, 4348 }, - { 6103, 6103 }, - { 6211, 6211 }, - { 6823, 6823 }, - { 7288, 7293 }, - { 7468, 7530 }, - { 7544, 7544 }, - { 7579, 7615 }, - { 8305, 8305 }, - { 8319, 8319 }, - { 8336, 8348 }, - { 11388, 11389 }, - { 11631, 11631 }, - { 11823, 11823 }, - { 12293, 12293 }, - { 12337, 12341 }, - { 12347, 12347 }, - { 12445, 12446 }, - { 12540, 12542 }, - { 40981, 40981 }, - { 42232, 42237 }, - { 42508, 42508 }, - { 42623, 42623 }, - { 42652, 42653 }, - { 42775, 42783 }, - { 42864, 42864 }, - { 42888, 42888 }, + { 2417, 2417 }, + { 3654, 3654 }, + { 3782, 3782 }, + { 4348, 4348 }, + { 6103, 6103 }, + { 6211, 6211 }, + { 6823, 6823 }, + { 7288, 7293 }, + { 7468, 7530 }, + { 7544, 7544 }, + { 7579, 7615 }, + { 8305, 8305 }, + { 8319, 8319 }, + { 8336, 8348 }, + { 11388, 11389 }, + { 11631, 11631 }, + { 11823, 11823 }, + { 12293, 12293 }, + { 12337, 12341 }, + { 12347, 12347 }, + { 12445, 12446 }, + { 12540, 12542 }, + { 40981, 40981 }, + { 42232, 42237 }, + { 42508, 42508 }, + { 42623, 42623 }, + { 42652, 42653 }, + { 42775, 42783 }, + { 42864, 42864 }, + { 42888, 42888 }, { 42994, 42996 }, - { 43000, 43001 }, - { 43471, 43471 }, - { 43494, 43494 }, - { 43632, 43632 }, - { 43741, 43741 }, - { 43763, 43764 }, - { 43868, 43871 }, + { 43000, 43001 }, + { 43471, 43471 }, + { 43494, 43494 }, + { 43632, 43632 }, + { 43741, 43741 }, + { 43763, 43764 }, + { 43868, 43871 }, { 43881, 43881 }, - { 65392, 65392 }, - { 65438, 65439 }, -}; -static const URange32 Lm_range32[] = { + { 65392, 65392 }, + { 65438, 65439 }, +}; +static const URange32 Lm_range32[] = { { 67456, 67461 }, { 67463, 67504 }, { 67506, 67514 }, - { 92992, 92995 }, - { 94099, 94111 }, + { 92992, 92995 }, + { 94099, 94111 }, { 94176, 94177 }, { 94179, 94179 }, { 110576, 110579 }, @@ -1461,7 +1461,7 @@ static const URange32 Lm_range32[] = { { 110589, 110590 }, { 123191, 123197 }, { 125259, 125259 }, -}; +}; static const URange16 Lo_range16[] = { { 170, 170 }, { 186, 186 }, @@ -1753,7 +1753,7 @@ static const URange16 Lo_range16[] = { { 65482, 65487 }, { 65490, 65495 }, { 65498, 65500 }, -}; +}; static const URange32 Lo_range32[] = { { 65536, 65547 }, { 65549, 65574 }, @@ -1966,612 +1966,612 @@ static const URange32 Lo_range32[] = { { 183984, 191456 }, { 194560, 195101 }, { 196608, 201546 }, -}; -static const URange16 Lt_range16[] = { - { 453, 453 }, - { 456, 456 }, - { 459, 459 }, - { 498, 498 }, - { 8072, 8079 }, - { 8088, 8095 }, - { 8104, 8111 }, - { 8124, 8124 }, - { 8140, 8140 }, - { 8188, 8188 }, -}; -static const URange16 Lu_range16[] = { - { 65, 90 }, - { 192, 214 }, - { 216, 222 }, - { 256, 256 }, - { 258, 258 }, - { 260, 260 }, - { 262, 262 }, - { 264, 264 }, - { 266, 266 }, - { 268, 268 }, - { 270, 270 }, - { 272, 272 }, - { 274, 274 }, - { 276, 276 }, - { 278, 278 }, - { 280, 280 }, - { 282, 282 }, - { 284, 284 }, - { 286, 286 }, - { 288, 288 }, - { 290, 290 }, - { 292, 292 }, - { 294, 294 }, - { 296, 296 }, - { 298, 298 }, - { 300, 300 }, - { 302, 302 }, - { 304, 304 }, - { 306, 306 }, - { 308, 308 }, - { 310, 310 }, - { 313, 313 }, - { 315, 315 }, - { 317, 317 }, - { 319, 319 }, - { 321, 321 }, - { 323, 323 }, - { 325, 325 }, - { 327, 327 }, - { 330, 330 }, - { 332, 332 }, - { 334, 334 }, - { 336, 336 }, - { 338, 338 }, - { 340, 340 }, - { 342, 342 }, - { 344, 344 }, - { 346, 346 }, - { 348, 348 }, - { 350, 350 }, - { 352, 352 }, - { 354, 354 }, - { 356, 356 }, - { 358, 358 }, - { 360, 360 }, - { 362, 362 }, - { 364, 364 }, - { 366, 366 }, - { 368, 368 }, - { 370, 370 }, - { 372, 372 }, - { 374, 374 }, - { 376, 377 }, - { 379, 379 }, - { 381, 381 }, - { 385, 386 }, - { 388, 388 }, - { 390, 391 }, - { 393, 395 }, - { 398, 401 }, - { 403, 404 }, - { 406, 408 }, - { 412, 413 }, - { 415, 416 }, - { 418, 418 }, - { 420, 420 }, - { 422, 423 }, - { 425, 425 }, - { 428, 428 }, - { 430, 431 }, - { 433, 435 }, - { 437, 437 }, - { 439, 440 }, - { 444, 444 }, - { 452, 452 }, - { 455, 455 }, - { 458, 458 }, - { 461, 461 }, - { 463, 463 }, - { 465, 465 }, - { 467, 467 }, - { 469, 469 }, - { 471, 471 }, - { 473, 473 }, - { 475, 475 }, - { 478, 478 }, - { 480, 480 }, - { 482, 482 }, - { 484, 484 }, - { 486, 486 }, - { 488, 488 }, - { 490, 490 }, - { 492, 492 }, - { 494, 494 }, - { 497, 497 }, - { 500, 500 }, - { 502, 504 }, - { 506, 506 }, - { 508, 508 }, - { 510, 510 }, - { 512, 512 }, - { 514, 514 }, - { 516, 516 }, - { 518, 518 }, - { 520, 520 }, - { 522, 522 }, - { 524, 524 }, - { 526, 526 }, - { 528, 528 }, - { 530, 530 }, - { 532, 532 }, - { 534, 534 }, - { 536, 536 }, - { 538, 538 }, - { 540, 540 }, - { 542, 542 }, - { 544, 544 }, - { 546, 546 }, - { 548, 548 }, - { 550, 550 }, - { 552, 552 }, - { 554, 554 }, - { 556, 556 }, - { 558, 558 }, - { 560, 560 }, - { 562, 562 }, - { 570, 571 }, - { 573, 574 }, - { 577, 577 }, - { 579, 582 }, - { 584, 584 }, - { 586, 586 }, - { 588, 588 }, - { 590, 590 }, - { 880, 880 }, - { 882, 882 }, - { 886, 886 }, - { 895, 895 }, - { 902, 902 }, - { 904, 906 }, - { 908, 908 }, - { 910, 911 }, - { 913, 929 }, - { 931, 939 }, - { 975, 975 }, - { 978, 980 }, - { 984, 984 }, - { 986, 986 }, - { 988, 988 }, - { 990, 990 }, - { 992, 992 }, - { 994, 994 }, - { 996, 996 }, - { 998, 998 }, - { 1000, 1000 }, - { 1002, 1002 }, - { 1004, 1004 }, - { 1006, 1006 }, - { 1012, 1012 }, - { 1015, 1015 }, - { 1017, 1018 }, - { 1021, 1071 }, - { 1120, 1120 }, - { 1122, 1122 }, - { 1124, 1124 }, - { 1126, 1126 }, - { 1128, 1128 }, - { 1130, 1130 }, - { 1132, 1132 }, - { 1134, 1134 }, - { 1136, 1136 }, - { 1138, 1138 }, - { 1140, 1140 }, - { 1142, 1142 }, - { 1144, 1144 }, - { 1146, 1146 }, - { 1148, 1148 }, - { 1150, 1150 }, - { 1152, 1152 }, - { 1162, 1162 }, - { 1164, 1164 }, - { 1166, 1166 }, - { 1168, 1168 }, - { 1170, 1170 }, - { 1172, 1172 }, - { 1174, 1174 }, - { 1176, 1176 }, - { 1178, 1178 }, - { 1180, 1180 }, - { 1182, 1182 }, - { 1184, 1184 }, - { 1186, 1186 }, - { 1188, 1188 }, - { 1190, 1190 }, - { 1192, 1192 }, - { 1194, 1194 }, - { 1196, 1196 }, - { 1198, 1198 }, - { 1200, 1200 }, - { 1202, 1202 }, - { 1204, 1204 }, - { 1206, 1206 }, - { 1208, 1208 }, - { 1210, 1210 }, - { 1212, 1212 }, - { 1214, 1214 }, - { 1216, 1217 }, - { 1219, 1219 }, - { 1221, 1221 }, - { 1223, 1223 }, - { 1225, 1225 }, - { 1227, 1227 }, - { 1229, 1229 }, - { 1232, 1232 }, - { 1234, 1234 }, - { 1236, 1236 }, - { 1238, 1238 }, - { 1240, 1240 }, - { 1242, 1242 }, - { 1244, 1244 }, - { 1246, 1246 }, - { 1248, 1248 }, - { 1250, 1250 }, - { 1252, 1252 }, - { 1254, 1254 }, - { 1256, 1256 }, - { 1258, 1258 }, - { 1260, 1260 }, - { 1262, 1262 }, - { 1264, 1264 }, - { 1266, 1266 }, - { 1268, 1268 }, - { 1270, 1270 }, - { 1272, 1272 }, - { 1274, 1274 }, - { 1276, 1276 }, - { 1278, 1278 }, - { 1280, 1280 }, - { 1282, 1282 }, - { 1284, 1284 }, - { 1286, 1286 }, - { 1288, 1288 }, - { 1290, 1290 }, - { 1292, 1292 }, - { 1294, 1294 }, - { 1296, 1296 }, - { 1298, 1298 }, - { 1300, 1300 }, - { 1302, 1302 }, - { 1304, 1304 }, - { 1306, 1306 }, - { 1308, 1308 }, - { 1310, 1310 }, - { 1312, 1312 }, - { 1314, 1314 }, - { 1316, 1316 }, - { 1318, 1318 }, - { 1320, 1320 }, - { 1322, 1322 }, - { 1324, 1324 }, - { 1326, 1326 }, - { 1329, 1366 }, - { 4256, 4293 }, - { 4295, 4295 }, - { 4301, 4301 }, - { 5024, 5109 }, +}; +static const URange16 Lt_range16[] = { + { 453, 453 }, + { 456, 456 }, + { 459, 459 }, + { 498, 498 }, + { 8072, 8079 }, + { 8088, 8095 }, + { 8104, 8111 }, + { 8124, 8124 }, + { 8140, 8140 }, + { 8188, 8188 }, +}; +static const URange16 Lu_range16[] = { + { 65, 90 }, + { 192, 214 }, + { 216, 222 }, + { 256, 256 }, + { 258, 258 }, + { 260, 260 }, + { 262, 262 }, + { 264, 264 }, + { 266, 266 }, + { 268, 268 }, + { 270, 270 }, + { 272, 272 }, + { 274, 274 }, + { 276, 276 }, + { 278, 278 }, + { 280, 280 }, + { 282, 282 }, + { 284, 284 }, + { 286, 286 }, + { 288, 288 }, + { 290, 290 }, + { 292, 292 }, + { 294, 294 }, + { 296, 296 }, + { 298, 298 }, + { 300, 300 }, + { 302, 302 }, + { 304, 304 }, + { 306, 306 }, + { 308, 308 }, + { 310, 310 }, + { 313, 313 }, + { 315, 315 }, + { 317, 317 }, + { 319, 319 }, + { 321, 321 }, + { 323, 323 }, + { 325, 325 }, + { 327, 327 }, + { 330, 330 }, + { 332, 332 }, + { 334, 334 }, + { 336, 336 }, + { 338, 338 }, + { 340, 340 }, + { 342, 342 }, + { 344, 344 }, + { 346, 346 }, + { 348, 348 }, + { 350, 350 }, + { 352, 352 }, + { 354, 354 }, + { 356, 356 }, + { 358, 358 }, + { 360, 360 }, + { 362, 362 }, + { 364, 364 }, + { 366, 366 }, + { 368, 368 }, + { 370, 370 }, + { 372, 372 }, + { 374, 374 }, + { 376, 377 }, + { 379, 379 }, + { 381, 381 }, + { 385, 386 }, + { 388, 388 }, + { 390, 391 }, + { 393, 395 }, + { 398, 401 }, + { 403, 404 }, + { 406, 408 }, + { 412, 413 }, + { 415, 416 }, + { 418, 418 }, + { 420, 420 }, + { 422, 423 }, + { 425, 425 }, + { 428, 428 }, + { 430, 431 }, + { 433, 435 }, + { 437, 437 }, + { 439, 440 }, + { 444, 444 }, + { 452, 452 }, + { 455, 455 }, + { 458, 458 }, + { 461, 461 }, + { 463, 463 }, + { 465, 465 }, + { 467, 467 }, + { 469, 469 }, + { 471, 471 }, + { 473, 473 }, + { 475, 475 }, + { 478, 478 }, + { 480, 480 }, + { 482, 482 }, + { 484, 484 }, + { 486, 486 }, + { 488, 488 }, + { 490, 490 }, + { 492, 492 }, + { 494, 494 }, + { 497, 497 }, + { 500, 500 }, + { 502, 504 }, + { 506, 506 }, + { 508, 508 }, + { 510, 510 }, + { 512, 512 }, + { 514, 514 }, + { 516, 516 }, + { 518, 518 }, + { 520, 520 }, + { 522, 522 }, + { 524, 524 }, + { 526, 526 }, + { 528, 528 }, + { 530, 530 }, + { 532, 532 }, + { 534, 534 }, + { 536, 536 }, + { 538, 538 }, + { 540, 540 }, + { 542, 542 }, + { 544, 544 }, + { 546, 546 }, + { 548, 548 }, + { 550, 550 }, + { 552, 552 }, + { 554, 554 }, + { 556, 556 }, + { 558, 558 }, + { 560, 560 }, + { 562, 562 }, + { 570, 571 }, + { 573, 574 }, + { 577, 577 }, + { 579, 582 }, + { 584, 584 }, + { 586, 586 }, + { 588, 588 }, + { 590, 590 }, + { 880, 880 }, + { 882, 882 }, + { 886, 886 }, + { 895, 895 }, + { 902, 902 }, + { 904, 906 }, + { 908, 908 }, + { 910, 911 }, + { 913, 929 }, + { 931, 939 }, + { 975, 975 }, + { 978, 980 }, + { 984, 984 }, + { 986, 986 }, + { 988, 988 }, + { 990, 990 }, + { 992, 992 }, + { 994, 994 }, + { 996, 996 }, + { 998, 998 }, + { 1000, 1000 }, + { 1002, 1002 }, + { 1004, 1004 }, + { 1006, 1006 }, + { 1012, 1012 }, + { 1015, 1015 }, + { 1017, 1018 }, + { 1021, 1071 }, + { 1120, 1120 }, + { 1122, 1122 }, + { 1124, 1124 }, + { 1126, 1126 }, + { 1128, 1128 }, + { 1130, 1130 }, + { 1132, 1132 }, + { 1134, 1134 }, + { 1136, 1136 }, + { 1138, 1138 }, + { 1140, 1140 }, + { 1142, 1142 }, + { 1144, 1144 }, + { 1146, 1146 }, + { 1148, 1148 }, + { 1150, 1150 }, + { 1152, 1152 }, + { 1162, 1162 }, + { 1164, 1164 }, + { 1166, 1166 }, + { 1168, 1168 }, + { 1170, 1170 }, + { 1172, 1172 }, + { 1174, 1174 }, + { 1176, 1176 }, + { 1178, 1178 }, + { 1180, 1180 }, + { 1182, 1182 }, + { 1184, 1184 }, + { 1186, 1186 }, + { 1188, 1188 }, + { 1190, 1190 }, + { 1192, 1192 }, + { 1194, 1194 }, + { 1196, 1196 }, + { 1198, 1198 }, + { 1200, 1200 }, + { 1202, 1202 }, + { 1204, 1204 }, + { 1206, 1206 }, + { 1208, 1208 }, + { 1210, 1210 }, + { 1212, 1212 }, + { 1214, 1214 }, + { 1216, 1217 }, + { 1219, 1219 }, + { 1221, 1221 }, + { 1223, 1223 }, + { 1225, 1225 }, + { 1227, 1227 }, + { 1229, 1229 }, + { 1232, 1232 }, + { 1234, 1234 }, + { 1236, 1236 }, + { 1238, 1238 }, + { 1240, 1240 }, + { 1242, 1242 }, + { 1244, 1244 }, + { 1246, 1246 }, + { 1248, 1248 }, + { 1250, 1250 }, + { 1252, 1252 }, + { 1254, 1254 }, + { 1256, 1256 }, + { 1258, 1258 }, + { 1260, 1260 }, + { 1262, 1262 }, + { 1264, 1264 }, + { 1266, 1266 }, + { 1268, 1268 }, + { 1270, 1270 }, + { 1272, 1272 }, + { 1274, 1274 }, + { 1276, 1276 }, + { 1278, 1278 }, + { 1280, 1280 }, + { 1282, 1282 }, + { 1284, 1284 }, + { 1286, 1286 }, + { 1288, 1288 }, + { 1290, 1290 }, + { 1292, 1292 }, + { 1294, 1294 }, + { 1296, 1296 }, + { 1298, 1298 }, + { 1300, 1300 }, + { 1302, 1302 }, + { 1304, 1304 }, + { 1306, 1306 }, + { 1308, 1308 }, + { 1310, 1310 }, + { 1312, 1312 }, + { 1314, 1314 }, + { 1316, 1316 }, + { 1318, 1318 }, + { 1320, 1320 }, + { 1322, 1322 }, + { 1324, 1324 }, + { 1326, 1326 }, + { 1329, 1366 }, + { 4256, 4293 }, + { 4295, 4295 }, + { 4301, 4301 }, + { 5024, 5109 }, { 7312, 7354 }, { 7357, 7359 }, - { 7680, 7680 }, - { 7682, 7682 }, - { 7684, 7684 }, - { 7686, 7686 }, - { 7688, 7688 }, - { 7690, 7690 }, - { 7692, 7692 }, - { 7694, 7694 }, - { 7696, 7696 }, - { 7698, 7698 }, - { 7700, 7700 }, - { 7702, 7702 }, - { 7704, 7704 }, - { 7706, 7706 }, - { 7708, 7708 }, - { 7710, 7710 }, - { 7712, 7712 }, - { 7714, 7714 }, - { 7716, 7716 }, - { 7718, 7718 }, - { 7720, 7720 }, - { 7722, 7722 }, - { 7724, 7724 }, - { 7726, 7726 }, - { 7728, 7728 }, - { 7730, 7730 }, - { 7732, 7732 }, - { 7734, 7734 }, - { 7736, 7736 }, - { 7738, 7738 }, - { 7740, 7740 }, - { 7742, 7742 }, - { 7744, 7744 }, - { 7746, 7746 }, - { 7748, 7748 }, - { 7750, 7750 }, - { 7752, 7752 }, - { 7754, 7754 }, - { 7756, 7756 }, - { 7758, 7758 }, - { 7760, 7760 }, - { 7762, 7762 }, - { 7764, 7764 }, - { 7766, 7766 }, - { 7768, 7768 }, - { 7770, 7770 }, - { 7772, 7772 }, - { 7774, 7774 }, - { 7776, 7776 }, - { 7778, 7778 }, - { 7780, 7780 }, - { 7782, 7782 }, - { 7784, 7784 }, - { 7786, 7786 }, - { 7788, 7788 }, - { 7790, 7790 }, - { 7792, 7792 }, - { 7794, 7794 }, - { 7796, 7796 }, - { 7798, 7798 }, - { 7800, 7800 }, - { 7802, 7802 }, - { 7804, 7804 }, - { 7806, 7806 }, - { 7808, 7808 }, - { 7810, 7810 }, - { 7812, 7812 }, - { 7814, 7814 }, - { 7816, 7816 }, - { 7818, 7818 }, - { 7820, 7820 }, - { 7822, 7822 }, - { 7824, 7824 }, - { 7826, 7826 }, - { 7828, 7828 }, - { 7838, 7838 }, - { 7840, 7840 }, - { 7842, 7842 }, - { 7844, 7844 }, - { 7846, 7846 }, - { 7848, 7848 }, - { 7850, 7850 }, - { 7852, 7852 }, - { 7854, 7854 }, - { 7856, 7856 }, - { 7858, 7858 }, - { 7860, 7860 }, - { 7862, 7862 }, - { 7864, 7864 }, - { 7866, 7866 }, - { 7868, 7868 }, - { 7870, 7870 }, - { 7872, 7872 }, - { 7874, 7874 }, - { 7876, 7876 }, - { 7878, 7878 }, - { 7880, 7880 }, - { 7882, 7882 }, - { 7884, 7884 }, - { 7886, 7886 }, - { 7888, 7888 }, - { 7890, 7890 }, - { 7892, 7892 }, - { 7894, 7894 }, - { 7896, 7896 }, - { 7898, 7898 }, - { 7900, 7900 }, - { 7902, 7902 }, - { 7904, 7904 }, - { 7906, 7906 }, - { 7908, 7908 }, - { 7910, 7910 }, - { 7912, 7912 }, - { 7914, 7914 }, - { 7916, 7916 }, - { 7918, 7918 }, - { 7920, 7920 }, - { 7922, 7922 }, - { 7924, 7924 }, - { 7926, 7926 }, - { 7928, 7928 }, - { 7930, 7930 }, - { 7932, 7932 }, - { 7934, 7934 }, - { 7944, 7951 }, - { 7960, 7965 }, - { 7976, 7983 }, - { 7992, 7999 }, - { 8008, 8013 }, - { 8025, 8025 }, - { 8027, 8027 }, - { 8029, 8029 }, - { 8031, 8031 }, - { 8040, 8047 }, - { 8120, 8123 }, - { 8136, 8139 }, - { 8152, 8155 }, - { 8168, 8172 }, - { 8184, 8187 }, - { 8450, 8450 }, - { 8455, 8455 }, - { 8459, 8461 }, - { 8464, 8466 }, - { 8469, 8469 }, - { 8473, 8477 }, - { 8484, 8484 }, - { 8486, 8486 }, - { 8488, 8488 }, - { 8490, 8493 }, - { 8496, 8499 }, - { 8510, 8511 }, - { 8517, 8517 }, - { 8579, 8579 }, + { 7680, 7680 }, + { 7682, 7682 }, + { 7684, 7684 }, + { 7686, 7686 }, + { 7688, 7688 }, + { 7690, 7690 }, + { 7692, 7692 }, + { 7694, 7694 }, + { 7696, 7696 }, + { 7698, 7698 }, + { 7700, 7700 }, + { 7702, 7702 }, + { 7704, 7704 }, + { 7706, 7706 }, + { 7708, 7708 }, + { 7710, 7710 }, + { 7712, 7712 }, + { 7714, 7714 }, + { 7716, 7716 }, + { 7718, 7718 }, + { 7720, 7720 }, + { 7722, 7722 }, + { 7724, 7724 }, + { 7726, 7726 }, + { 7728, 7728 }, + { 7730, 7730 }, + { 7732, 7732 }, + { 7734, 7734 }, + { 7736, 7736 }, + { 7738, 7738 }, + { 7740, 7740 }, + { 7742, 7742 }, + { 7744, 7744 }, + { 7746, 7746 }, + { 7748, 7748 }, + { 7750, 7750 }, + { 7752, 7752 }, + { 7754, 7754 }, + { 7756, 7756 }, + { 7758, 7758 }, + { 7760, 7760 }, + { 7762, 7762 }, + { 7764, 7764 }, + { 7766, 7766 }, + { 7768, 7768 }, + { 7770, 7770 }, + { 7772, 7772 }, + { 7774, 7774 }, + { 7776, 7776 }, + { 7778, 7778 }, + { 7780, 7780 }, + { 7782, 7782 }, + { 7784, 7784 }, + { 7786, 7786 }, + { 7788, 7788 }, + { 7790, 7790 }, + { 7792, 7792 }, + { 7794, 7794 }, + { 7796, 7796 }, + { 7798, 7798 }, + { 7800, 7800 }, + { 7802, 7802 }, + { 7804, 7804 }, + { 7806, 7806 }, + { 7808, 7808 }, + { 7810, 7810 }, + { 7812, 7812 }, + { 7814, 7814 }, + { 7816, 7816 }, + { 7818, 7818 }, + { 7820, 7820 }, + { 7822, 7822 }, + { 7824, 7824 }, + { 7826, 7826 }, + { 7828, 7828 }, + { 7838, 7838 }, + { 7840, 7840 }, + { 7842, 7842 }, + { 7844, 7844 }, + { 7846, 7846 }, + { 7848, 7848 }, + { 7850, 7850 }, + { 7852, 7852 }, + { 7854, 7854 }, + { 7856, 7856 }, + { 7858, 7858 }, + { 7860, 7860 }, + { 7862, 7862 }, + { 7864, 7864 }, + { 7866, 7866 }, + { 7868, 7868 }, + { 7870, 7870 }, + { 7872, 7872 }, + { 7874, 7874 }, + { 7876, 7876 }, + { 7878, 7878 }, + { 7880, 7880 }, + { 7882, 7882 }, + { 7884, 7884 }, + { 7886, 7886 }, + { 7888, 7888 }, + { 7890, 7890 }, + { 7892, 7892 }, + { 7894, 7894 }, + { 7896, 7896 }, + { 7898, 7898 }, + { 7900, 7900 }, + { 7902, 7902 }, + { 7904, 7904 }, + { 7906, 7906 }, + { 7908, 7908 }, + { 7910, 7910 }, + { 7912, 7912 }, + { 7914, 7914 }, + { 7916, 7916 }, + { 7918, 7918 }, + { 7920, 7920 }, + { 7922, 7922 }, + { 7924, 7924 }, + { 7926, 7926 }, + { 7928, 7928 }, + { 7930, 7930 }, + { 7932, 7932 }, + { 7934, 7934 }, + { 7944, 7951 }, + { 7960, 7965 }, + { 7976, 7983 }, + { 7992, 7999 }, + { 8008, 8013 }, + { 8025, 8025 }, + { 8027, 8027 }, + { 8029, 8029 }, + { 8031, 8031 }, + { 8040, 8047 }, + { 8120, 8123 }, + { 8136, 8139 }, + { 8152, 8155 }, + { 8168, 8172 }, + { 8184, 8187 }, + { 8450, 8450 }, + { 8455, 8455 }, + { 8459, 8461 }, + { 8464, 8466 }, + { 8469, 8469 }, + { 8473, 8477 }, + { 8484, 8484 }, + { 8486, 8486 }, + { 8488, 8488 }, + { 8490, 8493 }, + { 8496, 8499 }, + { 8510, 8511 }, + { 8517, 8517 }, + { 8579, 8579 }, { 11264, 11311 }, - { 11360, 11360 }, - { 11362, 11364 }, - { 11367, 11367 }, - { 11369, 11369 }, - { 11371, 11371 }, - { 11373, 11376 }, - { 11378, 11378 }, - { 11381, 11381 }, - { 11390, 11392 }, - { 11394, 11394 }, - { 11396, 11396 }, - { 11398, 11398 }, - { 11400, 11400 }, - { 11402, 11402 }, - { 11404, 11404 }, - { 11406, 11406 }, - { 11408, 11408 }, - { 11410, 11410 }, - { 11412, 11412 }, - { 11414, 11414 }, - { 11416, 11416 }, - { 11418, 11418 }, - { 11420, 11420 }, - { 11422, 11422 }, - { 11424, 11424 }, - { 11426, 11426 }, - { 11428, 11428 }, - { 11430, 11430 }, - { 11432, 11432 }, - { 11434, 11434 }, - { 11436, 11436 }, - { 11438, 11438 }, - { 11440, 11440 }, - { 11442, 11442 }, - { 11444, 11444 }, - { 11446, 11446 }, - { 11448, 11448 }, - { 11450, 11450 }, - { 11452, 11452 }, - { 11454, 11454 }, - { 11456, 11456 }, - { 11458, 11458 }, - { 11460, 11460 }, - { 11462, 11462 }, - { 11464, 11464 }, - { 11466, 11466 }, - { 11468, 11468 }, - { 11470, 11470 }, - { 11472, 11472 }, - { 11474, 11474 }, - { 11476, 11476 }, - { 11478, 11478 }, - { 11480, 11480 }, - { 11482, 11482 }, - { 11484, 11484 }, - { 11486, 11486 }, - { 11488, 11488 }, - { 11490, 11490 }, - { 11499, 11499 }, - { 11501, 11501 }, - { 11506, 11506 }, - { 42560, 42560 }, - { 42562, 42562 }, - { 42564, 42564 }, - { 42566, 42566 }, - { 42568, 42568 }, - { 42570, 42570 }, - { 42572, 42572 }, - { 42574, 42574 }, - { 42576, 42576 }, - { 42578, 42578 }, - { 42580, 42580 }, - { 42582, 42582 }, - { 42584, 42584 }, - { 42586, 42586 }, - { 42588, 42588 }, - { 42590, 42590 }, - { 42592, 42592 }, - { 42594, 42594 }, - { 42596, 42596 }, - { 42598, 42598 }, - { 42600, 42600 }, - { 42602, 42602 }, - { 42604, 42604 }, - { 42624, 42624 }, - { 42626, 42626 }, - { 42628, 42628 }, - { 42630, 42630 }, - { 42632, 42632 }, - { 42634, 42634 }, - { 42636, 42636 }, - { 42638, 42638 }, - { 42640, 42640 }, - { 42642, 42642 }, - { 42644, 42644 }, - { 42646, 42646 }, - { 42648, 42648 }, - { 42650, 42650 }, - { 42786, 42786 }, - { 42788, 42788 }, - { 42790, 42790 }, - { 42792, 42792 }, - { 42794, 42794 }, - { 42796, 42796 }, - { 42798, 42798 }, - { 42802, 42802 }, - { 42804, 42804 }, - { 42806, 42806 }, - { 42808, 42808 }, - { 42810, 42810 }, - { 42812, 42812 }, - { 42814, 42814 }, - { 42816, 42816 }, - { 42818, 42818 }, - { 42820, 42820 }, - { 42822, 42822 }, - { 42824, 42824 }, - { 42826, 42826 }, - { 42828, 42828 }, - { 42830, 42830 }, - { 42832, 42832 }, - { 42834, 42834 }, - { 42836, 42836 }, - { 42838, 42838 }, - { 42840, 42840 }, - { 42842, 42842 }, - { 42844, 42844 }, - { 42846, 42846 }, - { 42848, 42848 }, - { 42850, 42850 }, - { 42852, 42852 }, - { 42854, 42854 }, - { 42856, 42856 }, - { 42858, 42858 }, - { 42860, 42860 }, - { 42862, 42862 }, - { 42873, 42873 }, - { 42875, 42875 }, - { 42877, 42878 }, - { 42880, 42880 }, - { 42882, 42882 }, - { 42884, 42884 }, - { 42886, 42886 }, - { 42891, 42891 }, - { 42893, 42893 }, - { 42896, 42896 }, - { 42898, 42898 }, - { 42902, 42902 }, - { 42904, 42904 }, - { 42906, 42906 }, - { 42908, 42908 }, - { 42910, 42910 }, - { 42912, 42912 }, - { 42914, 42914 }, - { 42916, 42916 }, - { 42918, 42918 }, - { 42920, 42920 }, + { 11360, 11360 }, + { 11362, 11364 }, + { 11367, 11367 }, + { 11369, 11369 }, + { 11371, 11371 }, + { 11373, 11376 }, + { 11378, 11378 }, + { 11381, 11381 }, + { 11390, 11392 }, + { 11394, 11394 }, + { 11396, 11396 }, + { 11398, 11398 }, + { 11400, 11400 }, + { 11402, 11402 }, + { 11404, 11404 }, + { 11406, 11406 }, + { 11408, 11408 }, + { 11410, 11410 }, + { 11412, 11412 }, + { 11414, 11414 }, + { 11416, 11416 }, + { 11418, 11418 }, + { 11420, 11420 }, + { 11422, 11422 }, + { 11424, 11424 }, + { 11426, 11426 }, + { 11428, 11428 }, + { 11430, 11430 }, + { 11432, 11432 }, + { 11434, 11434 }, + { 11436, 11436 }, + { 11438, 11438 }, + { 11440, 11440 }, + { 11442, 11442 }, + { 11444, 11444 }, + { 11446, 11446 }, + { 11448, 11448 }, + { 11450, 11450 }, + { 11452, 11452 }, + { 11454, 11454 }, + { 11456, 11456 }, + { 11458, 11458 }, + { 11460, 11460 }, + { 11462, 11462 }, + { 11464, 11464 }, + { 11466, 11466 }, + { 11468, 11468 }, + { 11470, 11470 }, + { 11472, 11472 }, + { 11474, 11474 }, + { 11476, 11476 }, + { 11478, 11478 }, + { 11480, 11480 }, + { 11482, 11482 }, + { 11484, 11484 }, + { 11486, 11486 }, + { 11488, 11488 }, + { 11490, 11490 }, + { 11499, 11499 }, + { 11501, 11501 }, + { 11506, 11506 }, + { 42560, 42560 }, + { 42562, 42562 }, + { 42564, 42564 }, + { 42566, 42566 }, + { 42568, 42568 }, + { 42570, 42570 }, + { 42572, 42572 }, + { 42574, 42574 }, + { 42576, 42576 }, + { 42578, 42578 }, + { 42580, 42580 }, + { 42582, 42582 }, + { 42584, 42584 }, + { 42586, 42586 }, + { 42588, 42588 }, + { 42590, 42590 }, + { 42592, 42592 }, + { 42594, 42594 }, + { 42596, 42596 }, + { 42598, 42598 }, + { 42600, 42600 }, + { 42602, 42602 }, + { 42604, 42604 }, + { 42624, 42624 }, + { 42626, 42626 }, + { 42628, 42628 }, + { 42630, 42630 }, + { 42632, 42632 }, + { 42634, 42634 }, + { 42636, 42636 }, + { 42638, 42638 }, + { 42640, 42640 }, + { 42642, 42642 }, + { 42644, 42644 }, + { 42646, 42646 }, + { 42648, 42648 }, + { 42650, 42650 }, + { 42786, 42786 }, + { 42788, 42788 }, + { 42790, 42790 }, + { 42792, 42792 }, + { 42794, 42794 }, + { 42796, 42796 }, + { 42798, 42798 }, + { 42802, 42802 }, + { 42804, 42804 }, + { 42806, 42806 }, + { 42808, 42808 }, + { 42810, 42810 }, + { 42812, 42812 }, + { 42814, 42814 }, + { 42816, 42816 }, + { 42818, 42818 }, + { 42820, 42820 }, + { 42822, 42822 }, + { 42824, 42824 }, + { 42826, 42826 }, + { 42828, 42828 }, + { 42830, 42830 }, + { 42832, 42832 }, + { 42834, 42834 }, + { 42836, 42836 }, + { 42838, 42838 }, + { 42840, 42840 }, + { 42842, 42842 }, + { 42844, 42844 }, + { 42846, 42846 }, + { 42848, 42848 }, + { 42850, 42850 }, + { 42852, 42852 }, + { 42854, 42854 }, + { 42856, 42856 }, + { 42858, 42858 }, + { 42860, 42860 }, + { 42862, 42862 }, + { 42873, 42873 }, + { 42875, 42875 }, + { 42877, 42878 }, + { 42880, 42880 }, + { 42882, 42882 }, + { 42884, 42884 }, + { 42886, 42886 }, + { 42891, 42891 }, + { 42893, 42893 }, + { 42896, 42896 }, + { 42898, 42898 }, + { 42902, 42902 }, + { 42904, 42904 }, + { 42906, 42906 }, + { 42908, 42908 }, + { 42910, 42910 }, + { 42912, 42912 }, + { 42914, 42914 }, + { 42916, 42916 }, + { 42918, 42918 }, + { 42920, 42920 }, { 42922, 42926 }, - { 42928, 42932 }, - { 42934, 42934 }, + { 42928, 42932 }, + { 42934, 42934 }, { 42936, 42936 }, { 42938, 42938 }, { 42940, 42940 }, @@ -2584,51 +2584,51 @@ static const URange16 Lu_range16[] = { { 42966, 42966 }, { 42968, 42968 }, { 42997, 42997 }, - { 65313, 65338 }, -}; -static const URange32 Lu_range32[] = { - { 66560, 66599 }, + { 65313, 65338 }, +}; +static const URange32 Lu_range32[] = { + { 66560, 66599 }, { 66736, 66771 }, { 66928, 66938 }, { 66940, 66954 }, { 66956, 66962 }, { 66964, 66965 }, - { 68736, 68786 }, - { 71840, 71871 }, + { 68736, 68786 }, + { 71840, 71871 }, { 93760, 93791 }, - { 119808, 119833 }, - { 119860, 119885 }, - { 119912, 119937 }, - { 119964, 119964 }, - { 119966, 119967 }, - { 119970, 119970 }, - { 119973, 119974 }, - { 119977, 119980 }, - { 119982, 119989 }, - { 120016, 120041 }, - { 120068, 120069 }, - { 120071, 120074 }, - { 120077, 120084 }, - { 120086, 120092 }, - { 120120, 120121 }, - { 120123, 120126 }, - { 120128, 120132 }, - { 120134, 120134 }, - { 120138, 120144 }, - { 120172, 120197 }, - { 120224, 120249 }, - { 120276, 120301 }, - { 120328, 120353 }, - { 120380, 120405 }, - { 120432, 120457 }, - { 120488, 120512 }, - { 120546, 120570 }, - { 120604, 120628 }, - { 120662, 120686 }, - { 120720, 120744 }, - { 120778, 120778 }, + { 119808, 119833 }, + { 119860, 119885 }, + { 119912, 119937 }, + { 119964, 119964 }, + { 119966, 119967 }, + { 119970, 119970 }, + { 119973, 119974 }, + { 119977, 119980 }, + { 119982, 119989 }, + { 120016, 120041 }, + { 120068, 120069 }, + { 120071, 120074 }, + { 120077, 120084 }, + { 120086, 120092 }, + { 120120, 120121 }, + { 120123, 120126 }, + { 120128, 120132 }, + { 120134, 120134 }, + { 120138, 120144 }, + { 120172, 120197 }, + { 120224, 120249 }, + { 120276, 120301 }, + { 120328, 120353 }, + { 120380, 120405 }, + { 120432, 120457 }, + { 120488, 120512 }, + { 120546, 120570 }, + { 120604, 120628 }, + { 120662, 120686 }, + { 120720, 120744 }, + { 120778, 120778 }, { 125184, 125217 }, -}; +}; static const URange16 M_range16[] = { { 768, 879 }, { 1155, 1161 }, @@ -2819,7 +2819,7 @@ static const URange16 M_range16[] = { { 64286, 64286 }, { 65024, 65039 }, { 65056, 65071 }, -}; +}; static const URange32 M_range32[] = { { 66045, 66045 }, { 66272, 66272 }, @@ -2931,161 +2931,161 @@ static const URange32 M_range32[] = { { 125136, 125142 }, { 125252, 125258 }, { 917760, 917999 }, -}; -static const URange16 Mc_range16[] = { - { 2307, 2307 }, - { 2363, 2363 }, - { 2366, 2368 }, - { 2377, 2380 }, - { 2382, 2383 }, - { 2434, 2435 }, - { 2494, 2496 }, - { 2503, 2504 }, - { 2507, 2508 }, - { 2519, 2519 }, - { 2563, 2563 }, - { 2622, 2624 }, - { 2691, 2691 }, - { 2750, 2752 }, - { 2761, 2761 }, - { 2763, 2764 }, - { 2818, 2819 }, - { 2878, 2878 }, - { 2880, 2880 }, - { 2887, 2888 }, - { 2891, 2892 }, - { 2903, 2903 }, - { 3006, 3007 }, - { 3009, 3010 }, - { 3014, 3016 }, - { 3018, 3020 }, - { 3031, 3031 }, - { 3073, 3075 }, - { 3137, 3140 }, - { 3202, 3203 }, - { 3262, 3262 }, - { 3264, 3268 }, - { 3271, 3272 }, - { 3274, 3275 }, - { 3285, 3286 }, - { 3330, 3331 }, - { 3390, 3392 }, - { 3398, 3400 }, - { 3402, 3404 }, - { 3415, 3415 }, - { 3458, 3459 }, - { 3535, 3537 }, - { 3544, 3551 }, - { 3570, 3571 }, - { 3902, 3903 }, - { 3967, 3967 }, - { 4139, 4140 }, - { 4145, 4145 }, - { 4152, 4152 }, - { 4155, 4156 }, - { 4182, 4183 }, - { 4194, 4196 }, - { 4199, 4205 }, - { 4227, 4228 }, - { 4231, 4236 }, - { 4239, 4239 }, - { 4250, 4252 }, +}; +static const URange16 Mc_range16[] = { + { 2307, 2307 }, + { 2363, 2363 }, + { 2366, 2368 }, + { 2377, 2380 }, + { 2382, 2383 }, + { 2434, 2435 }, + { 2494, 2496 }, + { 2503, 2504 }, + { 2507, 2508 }, + { 2519, 2519 }, + { 2563, 2563 }, + { 2622, 2624 }, + { 2691, 2691 }, + { 2750, 2752 }, + { 2761, 2761 }, + { 2763, 2764 }, + { 2818, 2819 }, + { 2878, 2878 }, + { 2880, 2880 }, + { 2887, 2888 }, + { 2891, 2892 }, + { 2903, 2903 }, + { 3006, 3007 }, + { 3009, 3010 }, + { 3014, 3016 }, + { 3018, 3020 }, + { 3031, 3031 }, + { 3073, 3075 }, + { 3137, 3140 }, + { 3202, 3203 }, + { 3262, 3262 }, + { 3264, 3268 }, + { 3271, 3272 }, + { 3274, 3275 }, + { 3285, 3286 }, + { 3330, 3331 }, + { 3390, 3392 }, + { 3398, 3400 }, + { 3402, 3404 }, + { 3415, 3415 }, + { 3458, 3459 }, + { 3535, 3537 }, + { 3544, 3551 }, + { 3570, 3571 }, + { 3902, 3903 }, + { 3967, 3967 }, + { 4139, 4140 }, + { 4145, 4145 }, + { 4152, 4152 }, + { 4155, 4156 }, + { 4182, 4183 }, + { 4194, 4196 }, + { 4199, 4205 }, + { 4227, 4228 }, + { 4231, 4236 }, + { 4239, 4239 }, + { 4250, 4252 }, { 5909, 5909 }, { 5940, 5940 }, - { 6070, 6070 }, - { 6078, 6085 }, - { 6087, 6088 }, - { 6435, 6438 }, - { 6441, 6443 }, - { 6448, 6449 }, - { 6451, 6456 }, - { 6681, 6682 }, - { 6741, 6741 }, - { 6743, 6743 }, - { 6753, 6753 }, - { 6755, 6756 }, - { 6765, 6770 }, - { 6916, 6916 }, - { 6965, 6965 }, - { 6971, 6971 }, - { 6973, 6977 }, - { 6979, 6980 }, - { 7042, 7042 }, - { 7073, 7073 }, - { 7078, 7079 }, - { 7082, 7082 }, - { 7143, 7143 }, - { 7146, 7148 }, - { 7150, 7150 }, - { 7154, 7155 }, - { 7204, 7211 }, - { 7220, 7221 }, - { 7393, 7393 }, + { 6070, 6070 }, + { 6078, 6085 }, + { 6087, 6088 }, + { 6435, 6438 }, + { 6441, 6443 }, + { 6448, 6449 }, + { 6451, 6456 }, + { 6681, 6682 }, + { 6741, 6741 }, + { 6743, 6743 }, + { 6753, 6753 }, + { 6755, 6756 }, + { 6765, 6770 }, + { 6916, 6916 }, + { 6965, 6965 }, + { 6971, 6971 }, + { 6973, 6977 }, + { 6979, 6980 }, + { 7042, 7042 }, + { 7073, 7073 }, + { 7078, 7079 }, + { 7082, 7082 }, + { 7143, 7143 }, + { 7146, 7148 }, + { 7150, 7150 }, + { 7154, 7155 }, + { 7204, 7211 }, + { 7220, 7221 }, + { 7393, 7393 }, { 7415, 7415 }, - { 12334, 12335 }, - { 43043, 43044 }, - { 43047, 43047 }, - { 43136, 43137 }, - { 43188, 43203 }, - { 43346, 43347 }, - { 43395, 43395 }, - { 43444, 43445 }, - { 43450, 43451 }, + { 12334, 12335 }, + { 43043, 43044 }, + { 43047, 43047 }, + { 43136, 43137 }, + { 43188, 43203 }, + { 43346, 43347 }, + { 43395, 43395 }, + { 43444, 43445 }, + { 43450, 43451 }, { 43454, 43456 }, - { 43567, 43568 }, - { 43571, 43572 }, - { 43597, 43597 }, - { 43643, 43643 }, - { 43645, 43645 }, - { 43755, 43755 }, - { 43758, 43759 }, - { 43765, 43765 }, - { 44003, 44004 }, - { 44006, 44007 }, - { 44009, 44010 }, - { 44012, 44012 }, -}; -static const URange32 Mc_range32[] = { - { 69632, 69632 }, - { 69634, 69634 }, - { 69762, 69762 }, - { 69808, 69810 }, - { 69815, 69816 }, - { 69932, 69932 }, + { 43567, 43568 }, + { 43571, 43572 }, + { 43597, 43597 }, + { 43643, 43643 }, + { 43645, 43645 }, + { 43755, 43755 }, + { 43758, 43759 }, + { 43765, 43765 }, + { 44003, 44004 }, + { 44006, 44007 }, + { 44009, 44010 }, + { 44012, 44012 }, +}; +static const URange32 Mc_range32[] = { + { 69632, 69632 }, + { 69634, 69634 }, + { 69762, 69762 }, + { 69808, 69810 }, + { 69815, 69816 }, + { 69932, 69932 }, { 69957, 69958 }, - { 70018, 70018 }, - { 70067, 70069 }, - { 70079, 70080 }, + { 70018, 70018 }, + { 70067, 70069 }, + { 70079, 70080 }, { 70094, 70094 }, - { 70188, 70190 }, - { 70194, 70195 }, - { 70197, 70197 }, - { 70368, 70370 }, - { 70402, 70403 }, - { 70462, 70463 }, - { 70465, 70468 }, - { 70471, 70472 }, - { 70475, 70477 }, - { 70487, 70487 }, - { 70498, 70499 }, + { 70188, 70190 }, + { 70194, 70195 }, + { 70197, 70197 }, + { 70368, 70370 }, + { 70402, 70403 }, + { 70462, 70463 }, + { 70465, 70468 }, + { 70471, 70472 }, + { 70475, 70477 }, + { 70487, 70487 }, + { 70498, 70499 }, { 70709, 70711 }, { 70720, 70721 }, { 70725, 70725 }, - { 70832, 70834 }, - { 70841, 70841 }, - { 70843, 70846 }, - { 70849, 70849 }, - { 71087, 71089 }, - { 71096, 71099 }, - { 71102, 71102 }, - { 71216, 71218 }, - { 71227, 71228 }, - { 71230, 71230 }, - { 71340, 71340 }, - { 71342, 71343 }, - { 71350, 71350 }, - { 71456, 71457 }, - { 71462, 71462 }, + { 70832, 70834 }, + { 70841, 70841 }, + { 70843, 70846 }, + { 70849, 70849 }, + { 71087, 71089 }, + { 71096, 71099 }, + { 71102, 71102 }, + { 71216, 71218 }, + { 71227, 71228 }, + { 71230, 71230 }, + { 71340, 71340 }, + { 71342, 71343 }, + { 71350, 71350 }, + { 71456, 71457 }, + { 71462, 71462 }, { 71724, 71726 }, { 71736, 71736 }, { 71984, 71989 }, @@ -3110,9 +3110,9 @@ static const URange32 Mc_range32[] = { { 73461, 73462 }, { 94033, 94087 }, { 94192, 94193 }, - { 119141, 119142 }, - { 119149, 119154 }, -}; + { 119141, 119142 }, + { 119149, 119154 }, +}; static const URange16 Me_range16[] = { { 1160, 1161 }, { 6846, 6846 }, @@ -3120,283 +3120,283 @@ static const URange16 Me_range16[] = { { 8418, 8420 }, { 42608, 42610 }, }; -static const URange16 Mn_range16[] = { - { 768, 879 }, - { 1155, 1159 }, - { 1425, 1469 }, - { 1471, 1471 }, - { 1473, 1474 }, - { 1476, 1477 }, - { 1479, 1479 }, - { 1552, 1562 }, - { 1611, 1631 }, - { 1648, 1648 }, - { 1750, 1756 }, - { 1759, 1764 }, - { 1767, 1768 }, - { 1770, 1773 }, - { 1809, 1809 }, - { 1840, 1866 }, - { 1958, 1968 }, - { 2027, 2035 }, +static const URange16 Mn_range16[] = { + { 768, 879 }, + { 1155, 1159 }, + { 1425, 1469 }, + { 1471, 1471 }, + { 1473, 1474 }, + { 1476, 1477 }, + { 1479, 1479 }, + { 1552, 1562 }, + { 1611, 1631 }, + { 1648, 1648 }, + { 1750, 1756 }, + { 1759, 1764 }, + { 1767, 1768 }, + { 1770, 1773 }, + { 1809, 1809 }, + { 1840, 1866 }, + { 1958, 1968 }, + { 2027, 2035 }, { 2045, 2045 }, - { 2070, 2073 }, - { 2075, 2083 }, - { 2085, 2087 }, - { 2089, 2093 }, - { 2137, 2139 }, + { 2070, 2073 }, + { 2075, 2083 }, + { 2085, 2087 }, + { 2089, 2093 }, + { 2137, 2139 }, { 2200, 2207 }, { 2250, 2273 }, - { 2275, 2306 }, - { 2362, 2362 }, - { 2364, 2364 }, - { 2369, 2376 }, - { 2381, 2381 }, - { 2385, 2391 }, - { 2402, 2403 }, - { 2433, 2433 }, - { 2492, 2492 }, - { 2497, 2500 }, - { 2509, 2509 }, - { 2530, 2531 }, + { 2275, 2306 }, + { 2362, 2362 }, + { 2364, 2364 }, + { 2369, 2376 }, + { 2381, 2381 }, + { 2385, 2391 }, + { 2402, 2403 }, + { 2433, 2433 }, + { 2492, 2492 }, + { 2497, 2500 }, + { 2509, 2509 }, + { 2530, 2531 }, { 2558, 2558 }, - { 2561, 2562 }, - { 2620, 2620 }, - { 2625, 2626 }, - { 2631, 2632 }, - { 2635, 2637 }, - { 2641, 2641 }, - { 2672, 2673 }, - { 2677, 2677 }, - { 2689, 2690 }, - { 2748, 2748 }, - { 2753, 2757 }, - { 2759, 2760 }, - { 2765, 2765 }, - { 2786, 2787 }, + { 2561, 2562 }, + { 2620, 2620 }, + { 2625, 2626 }, + { 2631, 2632 }, + { 2635, 2637 }, + { 2641, 2641 }, + { 2672, 2673 }, + { 2677, 2677 }, + { 2689, 2690 }, + { 2748, 2748 }, + { 2753, 2757 }, + { 2759, 2760 }, + { 2765, 2765 }, + { 2786, 2787 }, { 2810, 2815 }, - { 2817, 2817 }, - { 2876, 2876 }, - { 2879, 2879 }, - { 2881, 2884 }, - { 2893, 2893 }, + { 2817, 2817 }, + { 2876, 2876 }, + { 2879, 2879 }, + { 2881, 2884 }, + { 2893, 2893 }, { 2901, 2902 }, - { 2914, 2915 }, - { 2946, 2946 }, - { 3008, 3008 }, - { 3021, 3021 }, - { 3072, 3072 }, + { 2914, 2915 }, + { 2946, 2946 }, + { 3008, 3008 }, + { 3021, 3021 }, + { 3072, 3072 }, { 3076, 3076 }, { 3132, 3132 }, - { 3134, 3136 }, - { 3142, 3144 }, - { 3146, 3149 }, - { 3157, 3158 }, - { 3170, 3171 }, - { 3201, 3201 }, - { 3260, 3260 }, - { 3263, 3263 }, - { 3270, 3270 }, - { 3276, 3277 }, - { 3298, 3299 }, + { 3134, 3136 }, + { 3142, 3144 }, + { 3146, 3149 }, + { 3157, 3158 }, + { 3170, 3171 }, + { 3201, 3201 }, + { 3260, 3260 }, + { 3263, 3263 }, + { 3270, 3270 }, + { 3276, 3277 }, + { 3298, 3299 }, { 3328, 3329 }, { 3387, 3388 }, - { 3393, 3396 }, - { 3405, 3405 }, - { 3426, 3427 }, + { 3393, 3396 }, + { 3405, 3405 }, + { 3426, 3427 }, { 3457, 3457 }, - { 3530, 3530 }, - { 3538, 3540 }, - { 3542, 3542 }, - { 3633, 3633 }, - { 3636, 3642 }, - { 3655, 3662 }, - { 3761, 3761 }, + { 3530, 3530 }, + { 3538, 3540 }, + { 3542, 3542 }, + { 3633, 3633 }, + { 3636, 3642 }, + { 3655, 3662 }, + { 3761, 3761 }, { 3764, 3772 }, - { 3784, 3789 }, - { 3864, 3865 }, - { 3893, 3893 }, - { 3895, 3895 }, - { 3897, 3897 }, - { 3953, 3966 }, - { 3968, 3972 }, - { 3974, 3975 }, - { 3981, 3991 }, - { 3993, 4028 }, - { 4038, 4038 }, - { 4141, 4144 }, - { 4146, 4151 }, - { 4153, 4154 }, - { 4157, 4158 }, - { 4184, 4185 }, - { 4190, 4192 }, - { 4209, 4212 }, - { 4226, 4226 }, - { 4229, 4230 }, - { 4237, 4237 }, - { 4253, 4253 }, - { 4957, 4959 }, - { 5906, 5908 }, + { 3784, 3789 }, + { 3864, 3865 }, + { 3893, 3893 }, + { 3895, 3895 }, + { 3897, 3897 }, + { 3953, 3966 }, + { 3968, 3972 }, + { 3974, 3975 }, + { 3981, 3991 }, + { 3993, 4028 }, + { 4038, 4038 }, + { 4141, 4144 }, + { 4146, 4151 }, + { 4153, 4154 }, + { 4157, 4158 }, + { 4184, 4185 }, + { 4190, 4192 }, + { 4209, 4212 }, + { 4226, 4226 }, + { 4229, 4230 }, + { 4237, 4237 }, + { 4253, 4253 }, + { 4957, 4959 }, + { 5906, 5908 }, { 5938, 5939 }, - { 5970, 5971 }, - { 6002, 6003 }, - { 6068, 6069 }, - { 6071, 6077 }, - { 6086, 6086 }, - { 6089, 6099 }, - { 6109, 6109 }, - { 6155, 6157 }, + { 5970, 5971 }, + { 6002, 6003 }, + { 6068, 6069 }, + { 6071, 6077 }, + { 6086, 6086 }, + { 6089, 6099 }, + { 6109, 6109 }, + { 6155, 6157 }, { 6159, 6159 }, { 6277, 6278 }, - { 6313, 6313 }, - { 6432, 6434 }, - { 6439, 6440 }, - { 6450, 6450 }, - { 6457, 6459 }, - { 6679, 6680 }, - { 6683, 6683 }, - { 6742, 6742 }, - { 6744, 6750 }, - { 6752, 6752 }, - { 6754, 6754 }, - { 6757, 6764 }, - { 6771, 6780 }, - { 6783, 6783 }, - { 6832, 6845 }, + { 6313, 6313 }, + { 6432, 6434 }, + { 6439, 6440 }, + { 6450, 6450 }, + { 6457, 6459 }, + { 6679, 6680 }, + { 6683, 6683 }, + { 6742, 6742 }, + { 6744, 6750 }, + { 6752, 6752 }, + { 6754, 6754 }, + { 6757, 6764 }, + { 6771, 6780 }, + { 6783, 6783 }, + { 6832, 6845 }, { 6847, 6862 }, - { 6912, 6915 }, - { 6964, 6964 }, - { 6966, 6970 }, - { 6972, 6972 }, - { 6978, 6978 }, - { 7019, 7027 }, - { 7040, 7041 }, - { 7074, 7077 }, - { 7080, 7081 }, - { 7083, 7085 }, - { 7142, 7142 }, - { 7144, 7145 }, - { 7149, 7149 }, - { 7151, 7153 }, - { 7212, 7219 }, - { 7222, 7223 }, - { 7376, 7378 }, - { 7380, 7392 }, - { 7394, 7400 }, - { 7405, 7405 }, - { 7412, 7412 }, - { 7416, 7417 }, + { 6912, 6915 }, + { 6964, 6964 }, + { 6966, 6970 }, + { 6972, 6972 }, + { 6978, 6978 }, + { 7019, 7027 }, + { 7040, 7041 }, + { 7074, 7077 }, + { 7080, 7081 }, + { 7083, 7085 }, + { 7142, 7142 }, + { 7144, 7145 }, + { 7149, 7149 }, + { 7151, 7153 }, + { 7212, 7219 }, + { 7222, 7223 }, + { 7376, 7378 }, + { 7380, 7392 }, + { 7394, 7400 }, + { 7405, 7405 }, + { 7412, 7412 }, + { 7416, 7417 }, { 7616, 7679 }, - { 8400, 8412 }, - { 8417, 8417 }, - { 8421, 8432 }, - { 11503, 11505 }, - { 11647, 11647 }, - { 11744, 11775 }, - { 12330, 12333 }, - { 12441, 12442 }, - { 42607, 42607 }, - { 42612, 42621 }, - { 42654, 42655 }, - { 42736, 42737 }, - { 43010, 43010 }, - { 43014, 43014 }, - { 43019, 43019 }, - { 43045, 43046 }, + { 8400, 8412 }, + { 8417, 8417 }, + { 8421, 8432 }, + { 11503, 11505 }, + { 11647, 11647 }, + { 11744, 11775 }, + { 12330, 12333 }, + { 12441, 12442 }, + { 42607, 42607 }, + { 42612, 42621 }, + { 42654, 42655 }, + { 42736, 42737 }, + { 43010, 43010 }, + { 43014, 43014 }, + { 43019, 43019 }, + { 43045, 43046 }, { 43052, 43052 }, { 43204, 43205 }, - { 43232, 43249 }, + { 43232, 43249 }, { 43263, 43263 }, - { 43302, 43309 }, - { 43335, 43345 }, - { 43392, 43394 }, - { 43443, 43443 }, - { 43446, 43449 }, + { 43302, 43309 }, + { 43335, 43345 }, + { 43392, 43394 }, + { 43443, 43443 }, + { 43446, 43449 }, { 43452, 43453 }, - { 43493, 43493 }, - { 43561, 43566 }, - { 43569, 43570 }, - { 43573, 43574 }, - { 43587, 43587 }, - { 43596, 43596 }, - { 43644, 43644 }, - { 43696, 43696 }, - { 43698, 43700 }, - { 43703, 43704 }, - { 43710, 43711 }, - { 43713, 43713 }, - { 43756, 43757 }, - { 43766, 43766 }, - { 44005, 44005 }, - { 44008, 44008 }, - { 44013, 44013 }, - { 64286, 64286 }, - { 65024, 65039 }, - { 65056, 65071 }, -}; -static const URange32 Mn_range32[] = { - { 66045, 66045 }, - { 66272, 66272 }, - { 66422, 66426 }, - { 68097, 68099 }, - { 68101, 68102 }, - { 68108, 68111 }, - { 68152, 68154 }, - { 68159, 68159 }, - { 68325, 68326 }, + { 43493, 43493 }, + { 43561, 43566 }, + { 43569, 43570 }, + { 43573, 43574 }, + { 43587, 43587 }, + { 43596, 43596 }, + { 43644, 43644 }, + { 43696, 43696 }, + { 43698, 43700 }, + { 43703, 43704 }, + { 43710, 43711 }, + { 43713, 43713 }, + { 43756, 43757 }, + { 43766, 43766 }, + { 44005, 44005 }, + { 44008, 44008 }, + { 44013, 44013 }, + { 64286, 64286 }, + { 65024, 65039 }, + { 65056, 65071 }, +}; +static const URange32 Mn_range32[] = { + { 66045, 66045 }, + { 66272, 66272 }, + { 66422, 66426 }, + { 68097, 68099 }, + { 68101, 68102 }, + { 68108, 68111 }, + { 68152, 68154 }, + { 68159, 68159 }, + { 68325, 68326 }, { 68900, 68903 }, { 69291, 69292 }, { 69446, 69456 }, { 69506, 69509 }, - { 69633, 69633 }, - { 69688, 69702 }, + { 69633, 69633 }, + { 69688, 69702 }, { 69744, 69744 }, { 69747, 69748 }, - { 69759, 69761 }, - { 69811, 69814 }, - { 69817, 69818 }, + { 69759, 69761 }, + { 69811, 69814 }, + { 69817, 69818 }, { 69826, 69826 }, - { 69888, 69890 }, - { 69927, 69931 }, - { 69933, 69940 }, - { 70003, 70003 }, - { 70016, 70017 }, - { 70070, 70078 }, + { 69888, 69890 }, + { 69927, 69931 }, + { 69933, 69940 }, + { 70003, 70003 }, + { 70016, 70017 }, + { 70070, 70078 }, { 70089, 70092 }, { 70095, 70095 }, - { 70191, 70193 }, - { 70196, 70196 }, - { 70198, 70199 }, + { 70191, 70193 }, + { 70196, 70196 }, + { 70198, 70199 }, { 70206, 70206 }, - { 70367, 70367 }, - { 70371, 70378 }, - { 70400, 70401 }, + { 70367, 70367 }, + { 70371, 70378 }, + { 70400, 70401 }, { 70459, 70460 }, - { 70464, 70464 }, - { 70502, 70508 }, - { 70512, 70516 }, + { 70464, 70464 }, + { 70502, 70508 }, + { 70512, 70516 }, { 70712, 70719 }, { 70722, 70724 }, { 70726, 70726 }, { 70750, 70750 }, - { 70835, 70840 }, - { 70842, 70842 }, - { 70847, 70848 }, - { 70850, 70851 }, - { 71090, 71093 }, - { 71100, 71101 }, - { 71103, 71104 }, - { 71132, 71133 }, - { 71219, 71226 }, - { 71229, 71229 }, - { 71231, 71232 }, - { 71339, 71339 }, - { 71341, 71341 }, - { 71344, 71349 }, - { 71351, 71351 }, - { 71453, 71455 }, - { 71458, 71461 }, - { 71463, 71467 }, + { 70835, 70840 }, + { 70842, 70842 }, + { 70847, 70848 }, + { 70850, 70851 }, + { 71090, 71093 }, + { 71100, 71101 }, + { 71103, 71104 }, + { 71132, 71133 }, + { 71219, 71226 }, + { 71229, 71229 }, + { 71231, 71232 }, + { 71339, 71339 }, + { 71341, 71341 }, + { 71344, 71349 }, + { 71351, 71351 }, + { 71453, 71455 }, + { 71458, 71461 }, + { 71463, 71467 }, { 71727, 71735 }, { 71737, 71738 }, { 71995, 71996 }, @@ -3429,25 +3429,25 @@ static const URange32 Mn_range32[] = { { 73109, 73109 }, { 73111, 73111 }, { 73459, 73460 }, - { 92912, 92916 }, - { 92976, 92982 }, + { 92912, 92916 }, + { 92976, 92982 }, { 94031, 94031 }, - { 94095, 94098 }, + { 94095, 94098 }, { 94180, 94180 }, - { 113821, 113822 }, + { 113821, 113822 }, { 118528, 118573 }, { 118576, 118598 }, - { 119143, 119145 }, - { 119163, 119170 }, - { 119173, 119179 }, - { 119210, 119213 }, - { 119362, 119364 }, - { 121344, 121398 }, - { 121403, 121452 }, - { 121461, 121461 }, - { 121476, 121476 }, - { 121499, 121503 }, - { 121505, 121519 }, + { 119143, 119145 }, + { 119163, 119170 }, + { 119173, 119179 }, + { 119210, 119213 }, + { 119362, 119364 }, + { 121344, 121398 }, + { 121403, 121452 }, + { 121461, 121461 }, + { 121476, 121476 }, + { 121499, 121503 }, + { 121505, 121519 }, { 122880, 122886 }, { 122888, 122904 }, { 122907, 122913 }, @@ -3456,148 +3456,148 @@ static const URange32 Mn_range32[] = { { 123184, 123190 }, { 123566, 123566 }, { 123628, 123631 }, - { 125136, 125142 }, + { 125136, 125142 }, { 125252, 125258 }, - { 917760, 917999 }, -}; -static const URange16 N_range16[] = { - { 48, 57 }, - { 178, 179 }, - { 185, 185 }, - { 188, 190 }, - { 1632, 1641 }, - { 1776, 1785 }, - { 1984, 1993 }, - { 2406, 2415 }, - { 2534, 2543 }, - { 2548, 2553 }, - { 2662, 2671 }, - { 2790, 2799 }, - { 2918, 2927 }, - { 2930, 2935 }, - { 3046, 3058 }, - { 3174, 3183 }, - { 3192, 3198 }, - { 3302, 3311 }, + { 917760, 917999 }, +}; +static const URange16 N_range16[] = { + { 48, 57 }, + { 178, 179 }, + { 185, 185 }, + { 188, 190 }, + { 1632, 1641 }, + { 1776, 1785 }, + { 1984, 1993 }, + { 2406, 2415 }, + { 2534, 2543 }, + { 2548, 2553 }, + { 2662, 2671 }, + { 2790, 2799 }, + { 2918, 2927 }, + { 2930, 2935 }, + { 3046, 3058 }, + { 3174, 3183 }, + { 3192, 3198 }, + { 3302, 3311 }, { 3416, 3422 }, { 3430, 3448 }, - { 3558, 3567 }, - { 3664, 3673 }, - { 3792, 3801 }, - { 3872, 3891 }, - { 4160, 4169 }, - { 4240, 4249 }, - { 4969, 4988 }, - { 5870, 5872 }, - { 6112, 6121 }, - { 6128, 6137 }, - { 6160, 6169 }, - { 6470, 6479 }, - { 6608, 6618 }, - { 6784, 6793 }, - { 6800, 6809 }, - { 6992, 7001 }, - { 7088, 7097 }, - { 7232, 7241 }, - { 7248, 7257 }, - { 8304, 8304 }, - { 8308, 8313 }, - { 8320, 8329 }, - { 8528, 8578 }, - { 8581, 8585 }, - { 9312, 9371 }, - { 9450, 9471 }, - { 10102, 10131 }, - { 11517, 11517 }, - { 12295, 12295 }, - { 12321, 12329 }, - { 12344, 12346 }, - { 12690, 12693 }, - { 12832, 12841 }, - { 12872, 12879 }, - { 12881, 12895 }, - { 12928, 12937 }, - { 12977, 12991 }, - { 42528, 42537 }, - { 42726, 42735 }, - { 43056, 43061 }, - { 43216, 43225 }, - { 43264, 43273 }, - { 43472, 43481 }, - { 43504, 43513 }, - { 43600, 43609 }, - { 44016, 44025 }, - { 65296, 65305 }, -}; -static const URange32 N_range32[] = { - { 65799, 65843 }, - { 65856, 65912 }, - { 65930, 65931 }, - { 66273, 66299 }, - { 66336, 66339 }, - { 66369, 66369 }, - { 66378, 66378 }, - { 66513, 66517 }, - { 66720, 66729 }, - { 67672, 67679 }, - { 67705, 67711 }, - { 67751, 67759 }, - { 67835, 67839 }, - { 67862, 67867 }, - { 68028, 68029 }, - { 68032, 68047 }, - { 68050, 68095 }, + { 3558, 3567 }, + { 3664, 3673 }, + { 3792, 3801 }, + { 3872, 3891 }, + { 4160, 4169 }, + { 4240, 4249 }, + { 4969, 4988 }, + { 5870, 5872 }, + { 6112, 6121 }, + { 6128, 6137 }, + { 6160, 6169 }, + { 6470, 6479 }, + { 6608, 6618 }, + { 6784, 6793 }, + { 6800, 6809 }, + { 6992, 7001 }, + { 7088, 7097 }, + { 7232, 7241 }, + { 7248, 7257 }, + { 8304, 8304 }, + { 8308, 8313 }, + { 8320, 8329 }, + { 8528, 8578 }, + { 8581, 8585 }, + { 9312, 9371 }, + { 9450, 9471 }, + { 10102, 10131 }, + { 11517, 11517 }, + { 12295, 12295 }, + { 12321, 12329 }, + { 12344, 12346 }, + { 12690, 12693 }, + { 12832, 12841 }, + { 12872, 12879 }, + { 12881, 12895 }, + { 12928, 12937 }, + { 12977, 12991 }, + { 42528, 42537 }, + { 42726, 42735 }, + { 43056, 43061 }, + { 43216, 43225 }, + { 43264, 43273 }, + { 43472, 43481 }, + { 43504, 43513 }, + { 43600, 43609 }, + { 44016, 44025 }, + { 65296, 65305 }, +}; +static const URange32 N_range32[] = { + { 65799, 65843 }, + { 65856, 65912 }, + { 65930, 65931 }, + { 66273, 66299 }, + { 66336, 66339 }, + { 66369, 66369 }, + { 66378, 66378 }, + { 66513, 66517 }, + { 66720, 66729 }, + { 67672, 67679 }, + { 67705, 67711 }, + { 67751, 67759 }, + { 67835, 67839 }, + { 67862, 67867 }, + { 68028, 68029 }, + { 68032, 68047 }, + { 68050, 68095 }, { 68160, 68168 }, - { 68221, 68222 }, - { 68253, 68255 }, - { 68331, 68335 }, - { 68440, 68447 }, - { 68472, 68479 }, - { 68521, 68527 }, - { 68858, 68863 }, + { 68221, 68222 }, + { 68253, 68255 }, + { 68331, 68335 }, + { 68440, 68447 }, + { 68472, 68479 }, + { 68521, 68527 }, + { 68858, 68863 }, { 68912, 68921 }, - { 69216, 69246 }, + { 69216, 69246 }, { 69405, 69414 }, { 69457, 69460 }, { 69573, 69579 }, - { 69714, 69743 }, - { 69872, 69881 }, - { 69942, 69951 }, - { 70096, 70105 }, - { 70113, 70132 }, - { 70384, 70393 }, + { 69714, 69743 }, + { 69872, 69881 }, + { 69942, 69951 }, + { 70096, 70105 }, + { 70113, 70132 }, + { 70384, 70393 }, { 70736, 70745 }, - { 70864, 70873 }, - { 71248, 71257 }, - { 71360, 71369 }, - { 71472, 71483 }, - { 71904, 71922 }, + { 70864, 70873 }, + { 71248, 71257 }, + { 71360, 71369 }, + { 71472, 71483 }, + { 71904, 71922 }, { 72016, 72025 }, { 72784, 72812 }, { 73040, 73049 }, { 73120, 73129 }, { 73664, 73684 }, - { 74752, 74862 }, - { 92768, 92777 }, + { 74752, 74862 }, + { 92768, 92777 }, { 92864, 92873 }, - { 93008, 93017 }, - { 93019, 93025 }, + { 93008, 93017 }, + { 93019, 93025 }, { 93824, 93846 }, { 119520, 119539 }, { 119648, 119672 }, - { 120782, 120831 }, + { 120782, 120831 }, { 123200, 123209 }, { 123632, 123641 }, - { 125127, 125135 }, + { 125127, 125135 }, { 125264, 125273 }, { 126065, 126123 }, { 126125, 126127 }, { 126129, 126132 }, { 126209, 126253 }, { 126255, 126269 }, - { 127232, 127244 }, + { 127232, 127244 }, { 130032, 130041 }, -}; +}; static const URange16 Nd_range16[] = { { 48, 57 }, { 1632, 1641 }, @@ -3636,7 +3636,7 @@ static const URange16 Nd_range16[] = { { 43600, 43609 }, { 44016, 44025 }, { 65296, 65305 }, -}; +}; static const URange32 Nd_range32[] = { { 66720, 66729 }, { 68912, 68921 }, @@ -3663,7 +3663,7 @@ static const URange32 Nd_range32[] = { { 123632, 123641 }, { 125264, 125273 }, { 130032, 130041 }, -}; +}; static const URange16 Nl_range16[] = { { 5870, 5872 }, { 8544, 8578 }, @@ -3755,177 +3755,177 @@ static const URange32 No_range32[] = { { 126255, 126269 }, { 127232, 127244 }, }; -static const URange16 P_range16[] = { - { 33, 35 }, - { 37, 42 }, - { 44, 47 }, - { 58, 59 }, - { 63, 64 }, - { 91, 93 }, - { 95, 95 }, - { 123, 123 }, - { 125, 125 }, - { 161, 161 }, - { 167, 167 }, - { 171, 171 }, - { 182, 183 }, - { 187, 187 }, - { 191, 191 }, - { 894, 894 }, - { 903, 903 }, - { 1370, 1375 }, - { 1417, 1418 }, - { 1470, 1470 }, - { 1472, 1472 }, - { 1475, 1475 }, - { 1478, 1478 }, - { 1523, 1524 }, - { 1545, 1546 }, - { 1548, 1549 }, - { 1563, 1563 }, +static const URange16 P_range16[] = { + { 33, 35 }, + { 37, 42 }, + { 44, 47 }, + { 58, 59 }, + { 63, 64 }, + { 91, 93 }, + { 95, 95 }, + { 123, 123 }, + { 125, 125 }, + { 161, 161 }, + { 167, 167 }, + { 171, 171 }, + { 182, 183 }, + { 187, 187 }, + { 191, 191 }, + { 894, 894 }, + { 903, 903 }, + { 1370, 1375 }, + { 1417, 1418 }, + { 1470, 1470 }, + { 1472, 1472 }, + { 1475, 1475 }, + { 1478, 1478 }, + { 1523, 1524 }, + { 1545, 1546 }, + { 1548, 1549 }, + { 1563, 1563 }, { 1565, 1567 }, - { 1642, 1645 }, - { 1748, 1748 }, - { 1792, 1805 }, - { 2039, 2041 }, - { 2096, 2110 }, - { 2142, 2142 }, - { 2404, 2405 }, - { 2416, 2416 }, + { 1642, 1645 }, + { 1748, 1748 }, + { 1792, 1805 }, + { 2039, 2041 }, + { 2096, 2110 }, + { 2142, 2142 }, + { 2404, 2405 }, + { 2416, 2416 }, { 2557, 2557 }, { 2678, 2678 }, - { 2800, 2800 }, + { 2800, 2800 }, { 3191, 3191 }, { 3204, 3204 }, - { 3572, 3572 }, - { 3663, 3663 }, - { 3674, 3675 }, - { 3844, 3858 }, - { 3860, 3860 }, - { 3898, 3901 }, - { 3973, 3973 }, - { 4048, 4052 }, - { 4057, 4058 }, - { 4170, 4175 }, - { 4347, 4347 }, - { 4960, 4968 }, - { 5120, 5120 }, + { 3572, 3572 }, + { 3663, 3663 }, + { 3674, 3675 }, + { 3844, 3858 }, + { 3860, 3860 }, + { 3898, 3901 }, + { 3973, 3973 }, + { 4048, 4052 }, + { 4057, 4058 }, + { 4170, 4175 }, + { 4347, 4347 }, + { 4960, 4968 }, + { 5120, 5120 }, { 5742, 5742 }, - { 5787, 5788 }, - { 5867, 5869 }, - { 5941, 5942 }, - { 6100, 6102 }, - { 6104, 6106 }, - { 6144, 6154 }, - { 6468, 6469 }, - { 6686, 6687 }, - { 6816, 6822 }, - { 6824, 6829 }, - { 7002, 7008 }, + { 5787, 5788 }, + { 5867, 5869 }, + { 5941, 5942 }, + { 6100, 6102 }, + { 6104, 6106 }, + { 6144, 6154 }, + { 6468, 6469 }, + { 6686, 6687 }, + { 6816, 6822 }, + { 6824, 6829 }, + { 7002, 7008 }, { 7037, 7038 }, - { 7164, 7167 }, - { 7227, 7231 }, - { 7294, 7295 }, - { 7360, 7367 }, - { 7379, 7379 }, - { 8208, 8231 }, - { 8240, 8259 }, - { 8261, 8273 }, - { 8275, 8286 }, - { 8317, 8318 }, - { 8333, 8334 }, - { 8968, 8971 }, - { 9001, 9002 }, - { 10088, 10101 }, - { 10181, 10182 }, - { 10214, 10223 }, - { 10627, 10648 }, - { 10712, 10715 }, - { 10748, 10749 }, - { 11513, 11516 }, - { 11518, 11519 }, - { 11632, 11632 }, - { 11776, 11822 }, + { 7164, 7167 }, + { 7227, 7231 }, + { 7294, 7295 }, + { 7360, 7367 }, + { 7379, 7379 }, + { 8208, 8231 }, + { 8240, 8259 }, + { 8261, 8273 }, + { 8275, 8286 }, + { 8317, 8318 }, + { 8333, 8334 }, + { 8968, 8971 }, + { 9001, 9002 }, + { 10088, 10101 }, + { 10181, 10182 }, + { 10214, 10223 }, + { 10627, 10648 }, + { 10712, 10715 }, + { 10748, 10749 }, + { 11513, 11516 }, + { 11518, 11519 }, + { 11632, 11632 }, + { 11776, 11822 }, { 11824, 11855 }, { 11858, 11869 }, - { 12289, 12291 }, - { 12296, 12305 }, - { 12308, 12319 }, - { 12336, 12336 }, - { 12349, 12349 }, - { 12448, 12448 }, - { 12539, 12539 }, - { 42238, 42239 }, - { 42509, 42511 }, - { 42611, 42611 }, - { 42622, 42622 }, - { 42738, 42743 }, - { 43124, 43127 }, - { 43214, 43215 }, - { 43256, 43258 }, - { 43260, 43260 }, - { 43310, 43311 }, - { 43359, 43359 }, - { 43457, 43469 }, - { 43486, 43487 }, - { 43612, 43615 }, - { 43742, 43743 }, - { 43760, 43761 }, - { 44011, 44011 }, - { 64830, 64831 }, - { 65040, 65049 }, - { 65072, 65106 }, - { 65108, 65121 }, - { 65123, 65123 }, - { 65128, 65128 }, - { 65130, 65131 }, - { 65281, 65283 }, - { 65285, 65290 }, - { 65292, 65295 }, - { 65306, 65307 }, - { 65311, 65312 }, - { 65339, 65341 }, - { 65343, 65343 }, - { 65371, 65371 }, - { 65373, 65373 }, - { 65375, 65381 }, -}; -static const URange32 P_range32[] = { - { 65792, 65794 }, - { 66463, 66463 }, - { 66512, 66512 }, - { 66927, 66927 }, - { 67671, 67671 }, - { 67871, 67871 }, - { 67903, 67903 }, - { 68176, 68184 }, - { 68223, 68223 }, - { 68336, 68342 }, - { 68409, 68415 }, - { 68505, 68508 }, + { 12289, 12291 }, + { 12296, 12305 }, + { 12308, 12319 }, + { 12336, 12336 }, + { 12349, 12349 }, + { 12448, 12448 }, + { 12539, 12539 }, + { 42238, 42239 }, + { 42509, 42511 }, + { 42611, 42611 }, + { 42622, 42622 }, + { 42738, 42743 }, + { 43124, 43127 }, + { 43214, 43215 }, + { 43256, 43258 }, + { 43260, 43260 }, + { 43310, 43311 }, + { 43359, 43359 }, + { 43457, 43469 }, + { 43486, 43487 }, + { 43612, 43615 }, + { 43742, 43743 }, + { 43760, 43761 }, + { 44011, 44011 }, + { 64830, 64831 }, + { 65040, 65049 }, + { 65072, 65106 }, + { 65108, 65121 }, + { 65123, 65123 }, + { 65128, 65128 }, + { 65130, 65131 }, + { 65281, 65283 }, + { 65285, 65290 }, + { 65292, 65295 }, + { 65306, 65307 }, + { 65311, 65312 }, + { 65339, 65341 }, + { 65343, 65343 }, + { 65371, 65371 }, + { 65373, 65373 }, + { 65375, 65381 }, +}; +static const URange32 P_range32[] = { + { 65792, 65794 }, + { 66463, 66463 }, + { 66512, 66512 }, + { 66927, 66927 }, + { 67671, 67671 }, + { 67871, 67871 }, + { 67903, 67903 }, + { 68176, 68184 }, + { 68223, 68223 }, + { 68336, 68342 }, + { 68409, 68415 }, + { 68505, 68508 }, { 69293, 69293 }, { 69461, 69465 }, { 69510, 69513 }, - { 69703, 69709 }, - { 69819, 69820 }, - { 69822, 69825 }, - { 69952, 69955 }, - { 70004, 70005 }, + { 69703, 69709 }, + { 69819, 69820 }, + { 69822, 69825 }, + { 69952, 69955 }, + { 70004, 70005 }, { 70085, 70088 }, - { 70093, 70093 }, - { 70107, 70107 }, - { 70109, 70111 }, - { 70200, 70205 }, - { 70313, 70313 }, + { 70093, 70093 }, + { 70107, 70107 }, + { 70109, 70111 }, + { 70200, 70205 }, + { 70313, 70313 }, { 70731, 70735 }, { 70746, 70747 }, { 70749, 70749 }, - { 70854, 70854 }, - { 71105, 71127 }, - { 71233, 71235 }, + { 70854, 70854 }, + { 71105, 71127 }, + { 71233, 71235 }, { 71264, 71276 }, { 71353, 71353 }, - { 71484, 71486 }, + { 71484, 71486 }, { 71739, 71739 }, { 72004, 72006 }, { 72162, 72162 }, @@ -3936,18 +3936,18 @@ static const URange32 P_range32[] = { { 72816, 72817 }, { 73463, 73464 }, { 73727, 73727 }, - { 74864, 74868 }, + { 74864, 74868 }, { 77809, 77810 }, - { 92782, 92783 }, - { 92917, 92917 }, - { 92983, 92987 }, - { 92996, 92996 }, + { 92782, 92783 }, + { 92917, 92917 }, + { 92983, 92987 }, + { 92996, 92996 }, { 93847, 93850 }, { 94178, 94178 }, - { 113823, 113823 }, - { 121479, 121483 }, + { 113823, 113823 }, + { 121479, 121483 }, { 125278, 125279 }, -}; +}; static const URange16 Pc_range16[] = { { 95, 95 }, { 8255, 8256 }, @@ -4352,229 +4352,229 @@ static const URange16 Ps_range16[] = { { 65375, 65375 }, { 65378, 65378 }, }; -static const URange16 S_range16[] = { - { 36, 36 }, - { 43, 43 }, - { 60, 62 }, - { 94, 94 }, - { 96, 96 }, - { 124, 124 }, - { 126, 126 }, - { 162, 166 }, - { 168, 169 }, - { 172, 172 }, - { 174, 177 }, - { 180, 180 }, - { 184, 184 }, - { 215, 215 }, - { 247, 247 }, - { 706, 709 }, - { 722, 735 }, - { 741, 747 }, - { 749, 749 }, - { 751, 767 }, - { 885, 885 }, - { 900, 901 }, - { 1014, 1014 }, - { 1154, 1154 }, - { 1421, 1423 }, - { 1542, 1544 }, - { 1547, 1547 }, - { 1550, 1551 }, - { 1758, 1758 }, - { 1769, 1769 }, - { 1789, 1790 }, - { 2038, 2038 }, +static const URange16 S_range16[] = { + { 36, 36 }, + { 43, 43 }, + { 60, 62 }, + { 94, 94 }, + { 96, 96 }, + { 124, 124 }, + { 126, 126 }, + { 162, 166 }, + { 168, 169 }, + { 172, 172 }, + { 174, 177 }, + { 180, 180 }, + { 184, 184 }, + { 215, 215 }, + { 247, 247 }, + { 706, 709 }, + { 722, 735 }, + { 741, 747 }, + { 749, 749 }, + { 751, 767 }, + { 885, 885 }, + { 900, 901 }, + { 1014, 1014 }, + { 1154, 1154 }, + { 1421, 1423 }, + { 1542, 1544 }, + { 1547, 1547 }, + { 1550, 1551 }, + { 1758, 1758 }, + { 1769, 1769 }, + { 1789, 1790 }, + { 2038, 2038 }, { 2046, 2047 }, { 2184, 2184 }, - { 2546, 2547 }, - { 2554, 2555 }, - { 2801, 2801 }, - { 2928, 2928 }, - { 3059, 3066 }, - { 3199, 3199 }, + { 2546, 2547 }, + { 2554, 2555 }, + { 2801, 2801 }, + { 2928, 2928 }, + { 3059, 3066 }, + { 3199, 3199 }, { 3407, 3407 }, - { 3449, 3449 }, - { 3647, 3647 }, - { 3841, 3843 }, - { 3859, 3859 }, - { 3861, 3863 }, - { 3866, 3871 }, - { 3892, 3892 }, - { 3894, 3894 }, - { 3896, 3896 }, - { 4030, 4037 }, - { 4039, 4044 }, - { 4046, 4047 }, - { 4053, 4056 }, - { 4254, 4255 }, - { 5008, 5017 }, + { 3449, 3449 }, + { 3647, 3647 }, + { 3841, 3843 }, + { 3859, 3859 }, + { 3861, 3863 }, + { 3866, 3871 }, + { 3892, 3892 }, + { 3894, 3894 }, + { 3896, 3896 }, + { 4030, 4037 }, + { 4039, 4044 }, + { 4046, 4047 }, + { 4053, 4056 }, + { 4254, 4255 }, + { 5008, 5017 }, { 5741, 5741 }, - { 6107, 6107 }, - { 6464, 6464 }, - { 6622, 6655 }, - { 7009, 7018 }, - { 7028, 7036 }, - { 8125, 8125 }, - { 8127, 8129 }, - { 8141, 8143 }, - { 8157, 8159 }, - { 8173, 8175 }, - { 8189, 8190 }, - { 8260, 8260 }, - { 8274, 8274 }, - { 8314, 8316 }, - { 8330, 8332 }, + { 6107, 6107 }, + { 6464, 6464 }, + { 6622, 6655 }, + { 7009, 7018 }, + { 7028, 7036 }, + { 8125, 8125 }, + { 8127, 8129 }, + { 8141, 8143 }, + { 8157, 8159 }, + { 8173, 8175 }, + { 8189, 8190 }, + { 8260, 8260 }, + { 8274, 8274 }, + { 8314, 8316 }, + { 8330, 8332 }, { 8352, 8384 }, - { 8448, 8449 }, - { 8451, 8454 }, - { 8456, 8457 }, - { 8468, 8468 }, - { 8470, 8472 }, - { 8478, 8483 }, - { 8485, 8485 }, - { 8487, 8487 }, - { 8489, 8489 }, - { 8494, 8494 }, - { 8506, 8507 }, - { 8512, 8516 }, - { 8522, 8525 }, - { 8527, 8527 }, - { 8586, 8587 }, - { 8592, 8967 }, - { 8972, 9000 }, + { 8448, 8449 }, + { 8451, 8454 }, + { 8456, 8457 }, + { 8468, 8468 }, + { 8470, 8472 }, + { 8478, 8483 }, + { 8485, 8485 }, + { 8487, 8487 }, + { 8489, 8489 }, + { 8494, 8494 }, + { 8506, 8507 }, + { 8512, 8516 }, + { 8522, 8525 }, + { 8527, 8527 }, + { 8586, 8587 }, + { 8592, 8967 }, + { 8972, 9000 }, { 9003, 9254 }, - { 9280, 9290 }, - { 9372, 9449 }, - { 9472, 10087 }, - { 10132, 10180 }, - { 10183, 10213 }, - { 10224, 10626 }, - { 10649, 10711 }, - { 10716, 10747 }, - { 10750, 11123 }, - { 11126, 11157 }, + { 9280, 9290 }, + { 9372, 9449 }, + { 9472, 10087 }, + { 10132, 10180 }, + { 10183, 10213 }, + { 10224, 10626 }, + { 10649, 10711 }, + { 10716, 10747 }, + { 10750, 11123 }, + { 11126, 11157 }, { 11159, 11263 }, - { 11493, 11498 }, + { 11493, 11498 }, { 11856, 11857 }, - { 11904, 11929 }, - { 11931, 12019 }, - { 12032, 12245 }, - { 12272, 12283 }, - { 12292, 12292 }, - { 12306, 12307 }, - { 12320, 12320 }, - { 12342, 12343 }, - { 12350, 12351 }, - { 12443, 12444 }, - { 12688, 12689 }, - { 12694, 12703 }, - { 12736, 12771 }, - { 12800, 12830 }, - { 12842, 12871 }, - { 12880, 12880 }, - { 12896, 12927 }, - { 12938, 12976 }, + { 11904, 11929 }, + { 11931, 12019 }, + { 12032, 12245 }, + { 12272, 12283 }, + { 12292, 12292 }, + { 12306, 12307 }, + { 12320, 12320 }, + { 12342, 12343 }, + { 12350, 12351 }, + { 12443, 12444 }, + { 12688, 12689 }, + { 12694, 12703 }, + { 12736, 12771 }, + { 12800, 12830 }, + { 12842, 12871 }, + { 12880, 12880 }, + { 12896, 12927 }, + { 12938, 12976 }, { 12992, 13311 }, - { 19904, 19967 }, - { 42128, 42182 }, - { 42752, 42774 }, - { 42784, 42785 }, - { 42889, 42890 }, - { 43048, 43051 }, - { 43062, 43065 }, - { 43639, 43641 }, - { 43867, 43867 }, + { 19904, 19967 }, + { 42128, 42182 }, + { 42752, 42774 }, + { 42784, 42785 }, + { 42889, 42890 }, + { 43048, 43051 }, + { 43062, 43065 }, + { 43639, 43641 }, + { 43867, 43867 }, { 43882, 43883 }, - { 64297, 64297 }, + { 64297, 64297 }, { 64434, 64450 }, { 64832, 64847 }, { 64975, 64975 }, { 65020, 65023 }, - { 65122, 65122 }, - { 65124, 65126 }, - { 65129, 65129 }, - { 65284, 65284 }, - { 65291, 65291 }, - { 65308, 65310 }, - { 65342, 65342 }, - { 65344, 65344 }, - { 65372, 65372 }, - { 65374, 65374 }, - { 65504, 65510 }, - { 65512, 65518 }, - { 65532, 65533 }, -}; -static const URange32 S_range32[] = { - { 65847, 65855 }, - { 65913, 65929 }, + { 65122, 65122 }, + { 65124, 65126 }, + { 65129, 65129 }, + { 65284, 65284 }, + { 65291, 65291 }, + { 65308, 65310 }, + { 65342, 65342 }, + { 65344, 65344 }, + { 65372, 65372 }, + { 65374, 65374 }, + { 65504, 65510 }, + { 65512, 65518 }, + { 65532, 65533 }, +}; +static const URange32 S_range32[] = { + { 65847, 65855 }, + { 65913, 65929 }, { 65932, 65934 }, { 65936, 65948 }, - { 65952, 65952 }, - { 66000, 66044 }, - { 67703, 67704 }, - { 68296, 68296 }, - { 71487, 71487 }, + { 65952, 65952 }, + { 66000, 66044 }, + { 67703, 67704 }, + { 68296, 68296 }, + { 71487, 71487 }, { 73685, 73713 }, - { 92988, 92991 }, - { 92997, 92997 }, - { 113820, 113820 }, + { 92988, 92991 }, + { 92997, 92997 }, + { 113820, 113820 }, { 118608, 118723 }, - { 118784, 119029 }, - { 119040, 119078 }, - { 119081, 119140 }, - { 119146, 119148 }, - { 119171, 119172 }, - { 119180, 119209 }, + { 118784, 119029 }, + { 119040, 119078 }, + { 119081, 119140 }, + { 119146, 119148 }, + { 119171, 119172 }, + { 119180, 119209 }, { 119214, 119274 }, - { 119296, 119361 }, - { 119365, 119365 }, - { 119552, 119638 }, - { 120513, 120513 }, - { 120539, 120539 }, - { 120571, 120571 }, - { 120597, 120597 }, - { 120629, 120629 }, - { 120655, 120655 }, - { 120687, 120687 }, - { 120713, 120713 }, - { 120745, 120745 }, - { 120771, 120771 }, - { 120832, 121343 }, - { 121399, 121402 }, - { 121453, 121460 }, - { 121462, 121475 }, - { 121477, 121478 }, + { 119296, 119361 }, + { 119365, 119365 }, + { 119552, 119638 }, + { 120513, 120513 }, + { 120539, 120539 }, + { 120571, 120571 }, + { 120597, 120597 }, + { 120629, 120629 }, + { 120655, 120655 }, + { 120687, 120687 }, + { 120713, 120713 }, + { 120745, 120745 }, + { 120771, 120771 }, + { 120832, 121343 }, + { 121399, 121402 }, + { 121453, 121460 }, + { 121462, 121475 }, + { 121477, 121478 }, { 123215, 123215 }, { 123647, 123647 }, { 126124, 126124 }, { 126128, 126128 }, { 126254, 126254 }, - { 126704, 126705 }, - { 126976, 127019 }, - { 127024, 127123 }, - { 127136, 127150 }, - { 127153, 127167 }, - { 127169, 127183 }, - { 127185, 127221 }, + { 126704, 126705 }, + { 126976, 127019 }, + { 127024, 127123 }, + { 127136, 127150 }, + { 127153, 127167 }, + { 127169, 127183 }, + { 127185, 127221 }, { 127245, 127405 }, - { 127462, 127490 }, + { 127462, 127490 }, { 127504, 127547 }, - { 127552, 127560 }, - { 127568, 127569 }, + { 127552, 127560 }, + { 127568, 127569 }, { 127584, 127589 }, { 127744, 128727 }, { 128733, 128748 }, { 128752, 128764 }, - { 128768, 128883 }, + { 128768, 128883 }, { 128896, 128984 }, { 128992, 129003 }, { 129008, 129008 }, - { 129024, 129035 }, - { 129040, 129095 }, - { 129104, 129113 }, - { 129120, 129159 }, - { 129168, 129197 }, + { 129024, 129035 }, + { 129040, 129095 }, + { 129104, 129113 }, + { 129120, 129159 }, + { 129168, 129197 }, { 129200, 129201 }, { 129280, 129619 }, { 129632, 129645 }, @@ -4589,7 +4589,7 @@ static const URange32 S_range32[] = { { 129776, 129782 }, { 129792, 129938 }, { 129940, 129994 }, -}; +}; static const URange16 Sc_range16[] = { { 36, 36 }, { 162, 165 }, @@ -4718,181 +4718,181 @@ static const URange32 Sm_range32[] = { { 120771, 120771 }, { 126704, 126705 }, }; -static const URange16 So_range16[] = { - { 166, 166 }, - { 169, 169 }, - { 174, 174 }, - { 176, 176 }, - { 1154, 1154 }, - { 1421, 1422 }, - { 1550, 1551 }, - { 1758, 1758 }, - { 1769, 1769 }, - { 1789, 1790 }, - { 2038, 2038 }, - { 2554, 2554 }, - { 2928, 2928 }, - { 3059, 3064 }, - { 3066, 3066 }, - { 3199, 3199 }, +static const URange16 So_range16[] = { + { 166, 166 }, + { 169, 169 }, + { 174, 174 }, + { 176, 176 }, + { 1154, 1154 }, + { 1421, 1422 }, + { 1550, 1551 }, + { 1758, 1758 }, + { 1769, 1769 }, + { 1789, 1790 }, + { 2038, 2038 }, + { 2554, 2554 }, + { 2928, 2928 }, + { 3059, 3064 }, + { 3066, 3066 }, + { 3199, 3199 }, { 3407, 3407 }, - { 3449, 3449 }, - { 3841, 3843 }, - { 3859, 3859 }, - { 3861, 3863 }, - { 3866, 3871 }, - { 3892, 3892 }, - { 3894, 3894 }, - { 3896, 3896 }, - { 4030, 4037 }, - { 4039, 4044 }, - { 4046, 4047 }, - { 4053, 4056 }, - { 4254, 4255 }, - { 5008, 5017 }, + { 3449, 3449 }, + { 3841, 3843 }, + { 3859, 3859 }, + { 3861, 3863 }, + { 3866, 3871 }, + { 3892, 3892 }, + { 3894, 3894 }, + { 3896, 3896 }, + { 4030, 4037 }, + { 4039, 4044 }, + { 4046, 4047 }, + { 4053, 4056 }, + { 4254, 4255 }, + { 5008, 5017 }, { 5741, 5741 }, - { 6464, 6464 }, - { 6622, 6655 }, - { 7009, 7018 }, - { 7028, 7036 }, - { 8448, 8449 }, - { 8451, 8454 }, - { 8456, 8457 }, - { 8468, 8468 }, - { 8470, 8471 }, - { 8478, 8483 }, - { 8485, 8485 }, - { 8487, 8487 }, - { 8489, 8489 }, - { 8494, 8494 }, - { 8506, 8507 }, - { 8522, 8522 }, - { 8524, 8525 }, - { 8527, 8527 }, - { 8586, 8587 }, - { 8597, 8601 }, - { 8604, 8607 }, - { 8609, 8610 }, - { 8612, 8613 }, - { 8615, 8621 }, - { 8623, 8653 }, - { 8656, 8657 }, - { 8659, 8659 }, - { 8661, 8691 }, - { 8960, 8967 }, - { 8972, 8991 }, - { 8994, 9000 }, - { 9003, 9083 }, - { 9085, 9114 }, - { 9140, 9179 }, + { 6464, 6464 }, + { 6622, 6655 }, + { 7009, 7018 }, + { 7028, 7036 }, + { 8448, 8449 }, + { 8451, 8454 }, + { 8456, 8457 }, + { 8468, 8468 }, + { 8470, 8471 }, + { 8478, 8483 }, + { 8485, 8485 }, + { 8487, 8487 }, + { 8489, 8489 }, + { 8494, 8494 }, + { 8506, 8507 }, + { 8522, 8522 }, + { 8524, 8525 }, + { 8527, 8527 }, + { 8586, 8587 }, + { 8597, 8601 }, + { 8604, 8607 }, + { 8609, 8610 }, + { 8612, 8613 }, + { 8615, 8621 }, + { 8623, 8653 }, + { 8656, 8657 }, + { 8659, 8659 }, + { 8661, 8691 }, + { 8960, 8967 }, + { 8972, 8991 }, + { 8994, 9000 }, + { 9003, 9083 }, + { 9085, 9114 }, + { 9140, 9179 }, { 9186, 9254 }, - { 9280, 9290 }, - { 9372, 9449 }, - { 9472, 9654 }, - { 9656, 9664 }, - { 9666, 9719 }, - { 9728, 9838 }, - { 9840, 10087 }, - { 10132, 10175 }, - { 10240, 10495 }, - { 11008, 11055 }, - { 11077, 11078 }, - { 11085, 11123 }, - { 11126, 11157 }, + { 9280, 9290 }, + { 9372, 9449 }, + { 9472, 9654 }, + { 9656, 9664 }, + { 9666, 9719 }, + { 9728, 9838 }, + { 9840, 10087 }, + { 10132, 10175 }, + { 10240, 10495 }, + { 11008, 11055 }, + { 11077, 11078 }, + { 11085, 11123 }, + { 11126, 11157 }, { 11159, 11263 }, - { 11493, 11498 }, + { 11493, 11498 }, { 11856, 11857 }, - { 11904, 11929 }, - { 11931, 12019 }, - { 12032, 12245 }, - { 12272, 12283 }, - { 12292, 12292 }, - { 12306, 12307 }, - { 12320, 12320 }, - { 12342, 12343 }, - { 12350, 12351 }, - { 12688, 12689 }, - { 12694, 12703 }, - { 12736, 12771 }, - { 12800, 12830 }, - { 12842, 12871 }, - { 12880, 12880 }, - { 12896, 12927 }, - { 12938, 12976 }, + { 11904, 11929 }, + { 11931, 12019 }, + { 12032, 12245 }, + { 12272, 12283 }, + { 12292, 12292 }, + { 12306, 12307 }, + { 12320, 12320 }, + { 12342, 12343 }, + { 12350, 12351 }, + { 12688, 12689 }, + { 12694, 12703 }, + { 12736, 12771 }, + { 12800, 12830 }, + { 12842, 12871 }, + { 12880, 12880 }, + { 12896, 12927 }, + { 12938, 12976 }, { 12992, 13311 }, - { 19904, 19967 }, - { 42128, 42182 }, - { 43048, 43051 }, - { 43062, 43063 }, - { 43065, 43065 }, - { 43639, 43641 }, + { 19904, 19967 }, + { 42128, 42182 }, + { 43048, 43051 }, + { 43062, 43063 }, + { 43065, 43065 }, + { 43639, 43641 }, { 64832, 64847 }, { 64975, 64975 }, { 65021, 65023 }, - { 65508, 65508 }, - { 65512, 65512 }, - { 65517, 65518 }, - { 65532, 65533 }, -}; -static const URange32 So_range32[] = { - { 65847, 65855 }, - { 65913, 65929 }, + { 65508, 65508 }, + { 65512, 65512 }, + { 65517, 65518 }, + { 65532, 65533 }, +}; +static const URange32 So_range32[] = { + { 65847, 65855 }, + { 65913, 65929 }, { 65932, 65934 }, { 65936, 65948 }, - { 65952, 65952 }, - { 66000, 66044 }, - { 67703, 67704 }, - { 68296, 68296 }, - { 71487, 71487 }, + { 65952, 65952 }, + { 66000, 66044 }, + { 67703, 67704 }, + { 68296, 68296 }, + { 71487, 71487 }, { 73685, 73692 }, { 73697, 73713 }, - { 92988, 92991 }, - { 92997, 92997 }, - { 113820, 113820 }, + { 92988, 92991 }, + { 92997, 92997 }, + { 113820, 113820 }, { 118608, 118723 }, - { 118784, 119029 }, - { 119040, 119078 }, - { 119081, 119140 }, - { 119146, 119148 }, - { 119171, 119172 }, - { 119180, 119209 }, + { 118784, 119029 }, + { 119040, 119078 }, + { 119081, 119140 }, + { 119146, 119148 }, + { 119171, 119172 }, + { 119180, 119209 }, { 119214, 119274 }, - { 119296, 119361 }, - { 119365, 119365 }, - { 119552, 119638 }, - { 120832, 121343 }, - { 121399, 121402 }, - { 121453, 121460 }, - { 121462, 121475 }, - { 121477, 121478 }, + { 119296, 119361 }, + { 119365, 119365 }, + { 119552, 119638 }, + { 120832, 121343 }, + { 121399, 121402 }, + { 121453, 121460 }, + { 121462, 121475 }, + { 121477, 121478 }, { 123215, 123215 }, { 126124, 126124 }, { 126254, 126254 }, - { 126976, 127019 }, - { 127024, 127123 }, - { 127136, 127150 }, - { 127153, 127167 }, - { 127169, 127183 }, - { 127185, 127221 }, + { 126976, 127019 }, + { 127024, 127123 }, + { 127136, 127150 }, + { 127153, 127167 }, + { 127169, 127183 }, + { 127185, 127221 }, { 127245, 127405 }, - { 127462, 127490 }, + { 127462, 127490 }, { 127504, 127547 }, - { 127552, 127560 }, - { 127568, 127569 }, + { 127552, 127560 }, + { 127568, 127569 }, { 127584, 127589 }, - { 127744, 127994 }, + { 127744, 127994 }, { 128000, 128727 }, { 128733, 128748 }, { 128752, 128764 }, - { 128768, 128883 }, + { 128768, 128883 }, { 128896, 128984 }, { 128992, 129003 }, { 129008, 129008 }, - { 129024, 129035 }, - { 129040, 129095 }, - { 129104, 129113 }, - { 129120, 129159 }, - { 129168, 129197 }, + { 129024, 129035 }, + { 129040, 129095 }, + { 129104, 129113 }, + { 129120, 129159 }, + { 129168, 129197 }, { 129200, 129201 }, { 129280, 129619 }, { 129632, 129645 }, @@ -4907,23 +4907,23 @@ static const URange32 So_range32[] = { { 129776, 129782 }, { 129792, 129938 }, { 129940, 129994 }, -}; -static const URange16 Z_range16[] = { - { 32, 32 }, - { 160, 160 }, - { 5760, 5760 }, - { 8192, 8202 }, - { 8232, 8233 }, - { 8239, 8239 }, - { 8287, 8287 }, - { 12288, 12288 }, -}; -static const URange16 Zl_range16[] = { - { 8232, 8232 }, -}; +}; +static const URange16 Z_range16[] = { + { 32, 32 }, + { 160, 160 }, + { 5760, 5760 }, + { 8192, 8202 }, + { 8232, 8233 }, + { 8239, 8239 }, + { 8287, 8287 }, + { 12288, 12288 }, +}; +static const URange16 Zl_range16[] = { + { 8232, 8232 }, +}; static const URange16 Zp_range16[] = { { 8233, 8233 }, -}; +}; static const URange16 Zs_range16[] = { { 32, 32 }, { 160, 160 }, @@ -4932,12 +4932,12 @@ static const URange16 Zs_range16[] = { { 8239, 8239 }, { 8287, 8287 }, { 12288, 12288 }, -}; +}; static const URange32 Adlam_range32[] = { { 125184, 125259 }, { 125264, 125273 }, { 125278, 125279 }, -}; +}; static const URange32 Ahom_range32[] = { { 71424, 71450 }, { 71453, 71467 }, @@ -5129,7 +5129,7 @@ static const URange16 Common_range16[] = { { 1563, 1563 }, { 1567, 1567 }, { 1600, 1600 }, - { 1757, 1757 }, + { 1757, 1757 }, { 2274, 2274 }, { 2404, 2405 }, { 3647, 3647 }, @@ -5192,7 +5192,7 @@ static const URange16 Common_range16[] = { { 65072, 65106 }, { 65108, 65126 }, { 65128, 65131 }, - { 65279, 65279 }, + { 65279, 65279 }, { 65281, 65312 }, { 65339, 65344 }, { 65371, 65381 }, @@ -5201,7 +5201,7 @@ static const URange16 Common_range16[] = { { 65504, 65510 }, { 65512, 65518 }, { 65529, 65533 }, -}; +}; static const URange32 Common_range32[] = { { 65792, 65794 }, { 65799, 65843 }, @@ -5209,7 +5209,7 @@ static const URange32 Common_range32[] = { { 65936, 65948 }, { 66000, 66044 }, { 66273, 66299 }, - { 113824, 113827 }, + { 113824, 113827 }, { 118608, 118723 }, { 118784, 119029 }, { 119040, 119078 }, @@ -5284,20 +5284,20 @@ static const URange32 Common_range32[] = { { 129792, 129938 }, { 129940, 129994 }, { 130032, 130041 }, - { 917505, 917505 }, - { 917536, 917631 }, -}; + { 917505, 917505 }, + { 917536, 917631 }, +}; static const URange16 Coptic_range16[] = { { 994, 1007 }, { 11392, 11507 }, { 11513, 11519 }, -}; +}; static const URange32 Cuneiform_range32[] = { { 73728, 74649 }, { 74752, 74862 }, { 74864, 74868 }, { 74880, 75075 }, -}; +}; static const URange32 Cypriot_range32[] = { { 67584, 67589 }, { 67592, 67592 }, @@ -5305,29 +5305,29 @@ static const URange32 Cypriot_range32[] = { { 67639, 67640 }, { 67644, 67644 }, { 67647, 67647 }, -}; +}; static const URange32 Cypro_Minoan_range32[] = { { 77712, 77810 }, }; -static const URange16 Cyrillic_range16[] = { - { 1024, 1156 }, - { 1159, 1327 }, +static const URange16 Cyrillic_range16[] = { + { 1024, 1156 }, + { 1159, 1327 }, { 7296, 7304 }, - { 7467, 7467 }, - { 7544, 7544 }, - { 11744, 11775 }, - { 42560, 42655 }, - { 65070, 65071 }, -}; + { 7467, 7467 }, + { 7544, 7544 }, + { 11744, 11775 }, + { 42560, 42655 }, + { 65070, 65071 }, +}; static const URange32 Deseret_range32[] = { { 66560, 66639 }, -}; +}; static const URange16 Devanagari_range16[] = { { 2304, 2384 }, { 2389, 2403 }, { 2406, 2431 }, { 43232, 43263 }, -}; +}; static const URange32 Dives_Akuru_range32[] = { { 71936, 71942 }, { 71945, 71945 }, @@ -5358,40 +5358,40 @@ static const URange32 Elbasan_range32[] = { static const URange32 Elymaic_range32[] = { { 69600, 69622 }, }; -static const URange16 Ethiopic_range16[] = { - { 4608, 4680 }, - { 4682, 4685 }, - { 4688, 4694 }, - { 4696, 4696 }, - { 4698, 4701 }, - { 4704, 4744 }, - { 4746, 4749 }, - { 4752, 4784 }, - { 4786, 4789 }, - { 4792, 4798 }, - { 4800, 4800 }, - { 4802, 4805 }, - { 4808, 4822 }, - { 4824, 4880 }, - { 4882, 4885 }, - { 4888, 4954 }, - { 4957, 4988 }, - { 4992, 5017 }, - { 11648, 11670 }, - { 11680, 11686 }, - { 11688, 11694 }, - { 11696, 11702 }, - { 11704, 11710 }, - { 11712, 11718 }, - { 11720, 11726 }, - { 11728, 11734 }, - { 11736, 11742 }, - { 43777, 43782 }, - { 43785, 43790 }, - { 43793, 43798 }, - { 43808, 43814 }, - { 43816, 43822 }, -}; +static const URange16 Ethiopic_range16[] = { + { 4608, 4680 }, + { 4682, 4685 }, + { 4688, 4694 }, + { 4696, 4696 }, + { 4698, 4701 }, + { 4704, 4744 }, + { 4746, 4749 }, + { 4752, 4784 }, + { 4786, 4789 }, + { 4792, 4798 }, + { 4800, 4800 }, + { 4802, 4805 }, + { 4808, 4822 }, + { 4824, 4880 }, + { 4882, 4885 }, + { 4888, 4954 }, + { 4957, 4988 }, + { 4992, 5017 }, + { 11648, 11670 }, + { 11680, 11686 }, + { 11688, 11694 }, + { 11696, 11702 }, + { 11704, 11710 }, + { 11712, 11718 }, + { 11720, 11726 }, + { 11728, 11734 }, + { 11736, 11742 }, + { 43777, 43782 }, + { 43785, 43790 }, + { 43793, 43798 }, + { 43808, 43814 }, + { 43816, 43822 }, +}; static const URange32 Ethiopic_range32[] = { { 124896, 124902 }, { 124904, 124907 }, @@ -5409,20 +5409,20 @@ static const URange16 Georgian_range16[] = { { 11520, 11557 }, { 11559, 11559 }, { 11565, 11565 }, -}; +}; static const URange16 Glagolitic_range16[] = { { 11264, 11359 }, -}; +}; static const URange32 Glagolitic_range32[] = { { 122880, 122886 }, { 122888, 122904 }, { 122907, 122913 }, { 122915, 122916 }, { 122918, 122922 }, -}; +}; static const URange32 Gothic_range32[] = { { 66352, 66378 }, -}; +}; static const URange32 Grantha_range32[] = { { 70400, 70403 }, { 70405, 70412 }, @@ -5522,30 +5522,30 @@ static const URange16 Gurmukhi_range16[] = { { 2654, 2654 }, { 2662, 2678 }, }; -static const URange16 Han_range16[] = { - { 11904, 11929 }, - { 11931, 12019 }, - { 12032, 12245 }, - { 12293, 12293 }, - { 12295, 12295 }, - { 12321, 12329 }, - { 12344, 12347 }, +static const URange16 Han_range16[] = { + { 11904, 11929 }, + { 11931, 12019 }, + { 12032, 12245 }, + { 12293, 12293 }, + { 12295, 12295 }, + { 12321, 12329 }, + { 12344, 12347 }, { 13312, 19903 }, { 19968, 40959 }, - { 63744, 64109 }, - { 64112, 64217 }, -}; -static const URange32 Han_range32[] = { + { 63744, 64109 }, + { 64112, 64217 }, +}; +static const URange32 Han_range32[] = { { 94178, 94179 }, { 94192, 94193 }, { 131072, 173791 }, { 173824, 177976 }, - { 177984, 178205 }, - { 178208, 183969 }, + { 177984, 178205 }, + { 178208, 183969 }, { 183984, 191456 }, - { 194560, 195101 }, + { 194560, 195101 }, { 196608, 201546 }, -}; +}; static const URange16 Hangul_range16[] = { { 4352, 4607 }, { 12334, 12335 }, @@ -5561,19 +5561,19 @@ static const URange16 Hangul_range16[] = { { 65482, 65487 }, { 65490, 65495 }, { 65498, 65500 }, -}; +}; static const URange32 Hanifi_Rohingya_range32[] = { { 68864, 68903 }, { 68912, 68921 }, -}; +}; static const URange16 Hanunoo_range16[] = { { 5920, 5940 }, -}; +}; static const URange32 Hatran_range32[] = { { 67808, 67826 }, { 67828, 67829 }, { 67835, 67839 }, -}; +}; static const URange16 Hebrew_range16[] = { { 1425, 1479 }, { 1488, 1514 }, @@ -5584,20 +5584,20 @@ static const URange16 Hebrew_range16[] = { { 64320, 64321 }, { 64323, 64324 }, { 64326, 64335 }, -}; -static const URange16 Hiragana_range16[] = { - { 12353, 12438 }, - { 12445, 12447 }, -}; -static const URange32 Hiragana_range32[] = { +}; +static const URange16 Hiragana_range16[] = { + { 12353, 12438 }, + { 12445, 12447 }, +}; +static const URange32 Hiragana_range32[] = { { 110593, 110879 }, { 110928, 110930 }, - { 127488, 127488 }, -}; + { 127488, 127488 }, +}; static const URange32 Imperial_Aramaic_range32[] = { { 67648, 67669 }, { 67671, 67679 }, -}; +}; static const URange16 Inherited_range16[] = { { 768, 879 }, { 1157, 1158 }, @@ -5618,7 +5618,7 @@ static const URange16 Inherited_range16[] = { { 12441, 12442 }, { 65024, 65039 }, { 65056, 65069 }, -}; +}; static const URange32 Inherited_range32[] = { { 66045, 66045 }, { 66272, 66272 }, @@ -5630,11 +5630,11 @@ static const URange32 Inherited_range32[] = { { 119173, 119179 }, { 119210, 119213 }, { 917760, 917999 }, -}; +}; static const URange32 Inscriptional_Pahlavi_range32[] = { { 68448, 68466 }, { 68472, 68479 }, -}; +}; static const URange32 Inscriptional_Parthian_range32[] = { { 68416, 68437 }, { 68440, 68447 }, @@ -5680,38 +5680,38 @@ static const URange32 Katakana_range32[] = { { 110880, 110882 }, { 110948, 110951 }, }; -static const URange16 Kayah_Li_range16[] = { - { 43264, 43309 }, - { 43311, 43311 }, -}; -static const URange32 Kharoshthi_range32[] = { - { 68096, 68099 }, - { 68101, 68102 }, - { 68108, 68115 }, - { 68117, 68119 }, +static const URange16 Kayah_Li_range16[] = { + { 43264, 43309 }, + { 43311, 43311 }, +}; +static const URange32 Kharoshthi_range32[] = { + { 68096, 68099 }, + { 68101, 68102 }, + { 68108, 68115 }, + { 68117, 68119 }, { 68121, 68149 }, - { 68152, 68154 }, + { 68152, 68154 }, { 68159, 68168 }, - { 68176, 68184 }, -}; + { 68176, 68184 }, +}; static const URange32 Khitan_Small_Script_range32[] = { { 94180, 94180 }, { 101120, 101589 }, -}; +}; static const URange16 Khmer_range16[] = { { 6016, 6109 }, { 6112, 6121 }, { 6128, 6137 }, { 6624, 6655 }, -}; +}; static const URange32 Khojki_range32[] = { { 70144, 70161 }, { 70163, 70206 }, -}; +}; static const URange32 Khudawadi_range32[] = { { 70320, 70378 }, { 70384, 70393 }, -}; +}; static const URange16 Lao_range16[] = { { 3713, 3714 }, { 3716, 3716 }, @@ -5725,42 +5725,42 @@ static const URange16 Lao_range16[] = { { 3792, 3801 }, { 3804, 3807 }, }; -static const URange16 Latin_range16[] = { - { 65, 90 }, - { 97, 122 }, - { 170, 170 }, - { 186, 186 }, - { 192, 214 }, - { 216, 246 }, - { 248, 696 }, - { 736, 740 }, - { 7424, 7461 }, - { 7468, 7516 }, - { 7522, 7525 }, - { 7531, 7543 }, - { 7545, 7614 }, - { 7680, 7935 }, - { 8305, 8305 }, - { 8319, 8319 }, - { 8336, 8348 }, - { 8490, 8491 }, - { 8498, 8498 }, - { 8526, 8526 }, - { 8544, 8584 }, - { 11360, 11391 }, - { 42786, 42887 }, +static const URange16 Latin_range16[] = { + { 65, 90 }, + { 97, 122 }, + { 170, 170 }, + { 186, 186 }, + { 192, 214 }, + { 216, 246 }, + { 248, 696 }, + { 736, 740 }, + { 7424, 7461 }, + { 7468, 7516 }, + { 7522, 7525 }, + { 7531, 7543 }, + { 7545, 7614 }, + { 7680, 7935 }, + { 8305, 8305 }, + { 8319, 8319 }, + { 8336, 8348 }, + { 8490, 8491 }, + { 8498, 8498 }, + { 8526, 8526 }, + { 8544, 8584 }, + { 11360, 11391 }, + { 42786, 42887 }, { 42891, 42954 }, { 42960, 42961 }, { 42963, 42963 }, { 42965, 42969 }, { 42994, 43007 }, - { 43824, 43866 }, - { 43868, 43876 }, + { 43824, 43866 }, + { 43868, 43876 }, { 43878, 43881 }, - { 64256, 64262 }, - { 65313, 65338 }, - { 65345, 65370 }, -}; + { 64256, 64262 }, + { 65313, 65338 }, + { 65345, 65370 }, +}; static const URange32 Latin_range32[] = { { 67456, 67461 }, { 67463, 67504 }, @@ -5771,19 +5771,19 @@ static const URange16 Lepcha_range16[] = { { 7168, 7223 }, { 7227, 7241 }, { 7245, 7247 }, -}; +}; static const URange16 Limbu_range16[] = { { 6400, 6430 }, { 6432, 6443 }, { 6448, 6459 }, { 6464, 6464 }, { 6468, 6479 }, -}; +}; static const URange32 Linear_A_range32[] = { { 67072, 67382 }, { 67392, 67413 }, { 67424, 67431 }, -}; +}; static const URange32 Linear_B_range32[] = { { 65536, 65547 }, { 65549, 65574 }, @@ -5792,26 +5792,26 @@ static const URange32 Linear_B_range32[] = { { 65599, 65613 }, { 65616, 65629 }, { 65664, 65786 }, -}; +}; static const URange16 Lisu_range16[] = { { 42192, 42239 }, -}; +}; static const URange32 Lisu_range32[] = { { 73648, 73648 }, -}; +}; static const URange32 Lycian_range32[] = { { 66176, 66204 }, -}; +}; static const URange32 Lydian_range32[] = { { 67872, 67897 }, { 67903, 67903 }, -}; +}; static const URange32 Mahajani_range32[] = { { 69968, 70006 }, -}; +}; static const URange32 Makasar_range32[] = { { 73440, 73464 }, -}; +}; static const URange16 Malayalam_range16[] = { { 3328, 3340 }, { 3342, 3344 }, @@ -5820,20 +5820,20 @@ static const URange16 Malayalam_range16[] = { { 3402, 3407 }, { 3412, 3427 }, { 3430, 3455 }, -}; +}; static const URange16 Mandaic_range16[] = { { 2112, 2139 }, { 2142, 2142 }, -}; +}; static const URange32 Manichaean_range32[] = { { 68288, 68326 }, { 68331, 68342 }, -}; +}; static const URange32 Marchen_range32[] = { { 72816, 72847 }, { 72850, 72871 }, { 72873, 72886 }, -}; +}; static const URange32 Masaram_Gondi_range32[] = { { 72960, 72966 }, { 72968, 72969 }, @@ -5842,78 +5842,78 @@ static const URange32 Masaram_Gondi_range32[] = { { 73020, 73021 }, { 73023, 73031 }, { 73040, 73049 }, -}; +}; static const URange32 Medefaidrin_range32[] = { { 93760, 93850 }, -}; +}; static const URange16 Meetei_Mayek_range16[] = { { 43744, 43766 }, { 43968, 44013 }, { 44016, 44025 }, }; -static const URange32 Mende_Kikakui_range32[] = { - { 124928, 125124 }, - { 125127, 125142 }, -}; +static const URange32 Mende_Kikakui_range32[] = { + { 124928, 125124 }, + { 125127, 125142 }, +}; static const URange32 Meroitic_Cursive_range32[] = { { 68000, 68023 }, { 68028, 68047 }, { 68050, 68095 }, -}; +}; static const URange32 Meroitic_Hieroglyphs_range32[] = { { 67968, 67999 }, -}; +}; static const URange32 Miao_range32[] = { { 93952, 94026 }, { 94031, 94087 }, { 94095, 94111 }, -}; +}; static const URange32 Modi_range32[] = { { 71168, 71236 }, { 71248, 71257 }, -}; +}; static const URange16 Mongolian_range16[] = { { 6144, 6145 }, { 6148, 6148 }, { 6150, 6169 }, { 6176, 6264 }, { 6272, 6314 }, -}; +}; static const URange32 Mongolian_range32[] = { { 71264, 71276 }, -}; +}; static const URange32 Mro_range32[] = { { 92736, 92766 }, { 92768, 92777 }, { 92782, 92783 }, -}; +}; static const URange32 Multani_range32[] = { { 70272, 70278 }, { 70280, 70280 }, { 70282, 70285 }, { 70287, 70301 }, { 70303, 70313 }, -}; +}; static const URange16 Myanmar_range16[] = { { 4096, 4255 }, { 43488, 43518 }, { 43616, 43647 }, -}; +}; static const URange32 Nabataean_range32[] = { { 67712, 67742 }, { 67751, 67759 }, -}; +}; static const URange32 Nandinagari_range32[] = { { 72096, 72103 }, { 72106, 72151 }, { 72154, 72164 }, -}; -static const URange16 New_Tai_Lue_range16[] = { - { 6528, 6571 }, - { 6576, 6601 }, - { 6608, 6618 }, - { 6622, 6623 }, -}; +}; +static const URange16 New_Tai_Lue_range16[] = { + { 6528, 6571 }, + { 6576, 6601 }, + { 6608, 6618 }, + { 6622, 6623 }, +}; static const URange32 Newa_range32[] = { { 70656, 70747 }, { 70749, 70753 }, @@ -5935,24 +5935,24 @@ static const URange32 Nyiakeng_Puachue_Hmong_range32[] = { static const URange16 Ogham_range16[] = { { 5760, 5788 }, }; -static const URange16 Ol_Chiki_range16[] = { - { 7248, 7295 }, -}; +static const URange16 Ol_Chiki_range16[] = { + { 7248, 7295 }, +}; static const URange32 Old_Hungarian_range32[] = { { 68736, 68786 }, { 68800, 68850 }, { 68858, 68863 }, -}; +}; static const URange32 Old_Italic_range32[] = { { 66304, 66339 }, { 66349, 66351 }, -}; +}; static const URange32 Old_North_Arabian_range32[] = { { 68224, 68255 }, -}; +}; static const URange32 Old_Permic_range32[] = { { 66384, 66426 }, -}; +}; static const URange32 Old_Persian_range32[] = { { 66464, 66499 }, { 66504, 66517 }, @@ -5969,154 +5969,154 @@ static const URange32 Old_Turkic_range32[] = { static const URange32 Old_Uyghur_range32[] = { { 69488, 69513 }, }; -static const URange16 Oriya_range16[] = { - { 2817, 2819 }, - { 2821, 2828 }, - { 2831, 2832 }, - { 2835, 2856 }, - { 2858, 2864 }, - { 2866, 2867 }, - { 2869, 2873 }, - { 2876, 2884 }, - { 2887, 2888 }, - { 2891, 2893 }, +static const URange16 Oriya_range16[] = { + { 2817, 2819 }, + { 2821, 2828 }, + { 2831, 2832 }, + { 2835, 2856 }, + { 2858, 2864 }, + { 2866, 2867 }, + { 2869, 2873 }, + { 2876, 2884 }, + { 2887, 2888 }, + { 2891, 2893 }, { 2901, 2903 }, - { 2908, 2909 }, - { 2911, 2915 }, - { 2918, 2935 }, -}; + { 2908, 2909 }, + { 2911, 2915 }, + { 2918, 2935 }, +}; static const URange32 Osage_range32[] = { { 66736, 66771 }, { 66776, 66811 }, -}; +}; static const URange32 Osmanya_range32[] = { { 66688, 66717 }, { 66720, 66729 }, -}; +}; static const URange32 Pahawh_Hmong_range32[] = { { 92928, 92997 }, { 93008, 93017 }, { 93019, 93025 }, { 93027, 93047 }, { 93053, 93071 }, -}; +}; static const URange32 Palmyrene_range32[] = { { 67680, 67711 }, -}; +}; static const URange32 Pau_Cin_Hau_range32[] = { { 72384, 72440 }, -}; +}; static const URange16 Phags_Pa_range16[] = { { 43072, 43127 }, -}; -static const URange32 Phoenician_range32[] = { - { 67840, 67867 }, - { 67871, 67871 }, -}; +}; +static const URange32 Phoenician_range32[] = { + { 67840, 67867 }, + { 67871, 67871 }, +}; static const URange32 Psalter_Pahlavi_range32[] = { { 68480, 68497 }, { 68505, 68508 }, { 68521, 68527 }, -}; +}; static const URange16 Rejang_range16[] = { { 43312, 43347 }, { 43359, 43359 }, -}; +}; static const URange16 Runic_range16[] = { { 5792, 5866 }, { 5870, 5880 }, -}; +}; static const URange16 Samaritan_range16[] = { { 2048, 2093 }, { 2096, 2110 }, -}; +}; static const URange16 Saurashtra_range16[] = { { 43136, 43205 }, { 43214, 43225 }, -}; +}; static const URange32 Sharada_range32[] = { { 70016, 70111 }, -}; +}; static const URange32 Shavian_range32[] = { { 66640, 66687 }, -}; +}; static const URange32 Siddham_range32[] = { { 71040, 71093 }, { 71096, 71133 }, -}; +}; static const URange32 SignWriting_range32[] = { { 120832, 121483 }, { 121499, 121503 }, { 121505, 121519 }, -}; -static const URange16 Sinhala_range16[] = { +}; +static const URange16 Sinhala_range16[] = { { 3457, 3459 }, - { 3461, 3478 }, - { 3482, 3505 }, - { 3507, 3515 }, - { 3517, 3517 }, - { 3520, 3526 }, - { 3530, 3530 }, - { 3535, 3540 }, - { 3542, 3542 }, - { 3544, 3551 }, - { 3558, 3567 }, - { 3570, 3572 }, -}; -static const URange32 Sinhala_range32[] = { - { 70113, 70132 }, -}; + { 3461, 3478 }, + { 3482, 3505 }, + { 3507, 3515 }, + { 3517, 3517 }, + { 3520, 3526 }, + { 3530, 3530 }, + { 3535, 3540 }, + { 3542, 3542 }, + { 3544, 3551 }, + { 3558, 3567 }, + { 3570, 3572 }, +}; +static const URange32 Sinhala_range32[] = { + { 70113, 70132 }, +}; static const URange32 Sogdian_range32[] = { { 69424, 69465 }, -}; +}; static const URange32 Sora_Sompeng_range32[] = { { 69840, 69864 }, { 69872, 69881 }, -}; +}; static const URange32 Soyombo_range32[] = { { 72272, 72354 }, -}; +}; static const URange16 Sundanese_range16[] = { { 7040, 7103 }, { 7360, 7367 }, -}; +}; static const URange16 Syloti_Nagri_range16[] = { { 43008, 43052 }, -}; +}; static const URange16 Syriac_range16[] = { { 1792, 1805 }, { 1807, 1866 }, { 1869, 1871 }, { 2144, 2154 }, -}; +}; static const URange16 Tagalog_range16[] = { { 5888, 5909 }, { 5919, 5919 }, -}; +}; static const URange16 Tagbanwa_range16[] = { { 5984, 5996 }, { 5998, 6000 }, { 6002, 6003 }, -}; +}; static const URange16 Tai_Le_range16[] = { { 6480, 6509 }, { 6512, 6516 }, -}; +}; static const URange16 Tai_Tham_range16[] = { { 6688, 6750 }, { 6752, 6780 }, { 6783, 6793 }, { 6800, 6809 }, { 6816, 6829 }, -}; +}; static const URange16 Tai_Viet_range16[] = { { 43648, 43714 }, { 43739, 43743 }, -}; +}; static const URange32 Takri_range32[] = { { 71296, 71353 }, { 71360, 71369 }, -}; +}; static const URange16 Tamil_range16[] = { { 2946, 2947 }, { 2949, 2954 }, @@ -6134,11 +6134,11 @@ static const URange16 Tamil_range16[] = { { 3024, 3024 }, { 3031, 3031 }, { 3046, 3066 }, -}; +}; static const URange32 Tamil_range32[] = { { 73664, 73713 }, { 73727, 73727 }, -}; +}; static const URange32 Tangsa_range32[] = { { 92784, 92862 }, { 92864, 92873 }, @@ -6148,7 +6148,7 @@ static const URange32 Tangut_range32[] = { { 94208, 100343 }, { 100352, 101119 }, { 101632, 101640 }, -}; +}; static const URange16 Telugu_range16[] = { { 3072, 3084 }, { 3086, 3088 }, @@ -6163,14 +6163,14 @@ static const URange16 Telugu_range16[] = { { 3168, 3171 }, { 3174, 3183 }, { 3191, 3199 }, -}; +}; static const URange16 Thaana_range16[] = { { 1920, 1969 }, -}; +}; static const URange16 Thai_range16[] = { { 3585, 3642 }, { 3648, 3675 }, -}; +}; static const URange16 Tibetan_range16[] = { { 3840, 3911 }, { 3913, 3948 }, @@ -6179,26 +6179,26 @@ static const URange16 Tibetan_range16[] = { { 4030, 4044 }, { 4046, 4052 }, { 4057, 4058 }, -}; +}; static const URange16 Tifinagh_range16[] = { { 11568, 11623 }, { 11631, 11632 }, { 11647, 11647 }, -}; -static const URange32 Tirhuta_range32[] = { - { 70784, 70855 }, - { 70864, 70873 }, -}; +}; +static const URange32 Tirhuta_range32[] = { + { 70784, 70855 }, + { 70864, 70873 }, +}; static const URange32 Toto_range32[] = { { 123536, 123566 }, }; static const URange32 Ugaritic_range32[] = { { 66432, 66461 }, { 66463, 66463 }, -}; +}; static const URange16 Vai_range16[] = { { 42240, 42539 }, -}; +}; static const URange32 Vithkuqi_range32[] = { { 66928, 66938 }, { 66940, 66954 }, @@ -6212,226 +6212,226 @@ static const URange32 Vithkuqi_range32[] = { static const URange32 Wancho_range32[] = { { 123584, 123641 }, { 123647, 123647 }, -}; +}; static const URange32 Warang_Citi_range32[] = { { 71840, 71922 }, { 71935, 71935 }, -}; +}; static const URange32 Yezidi_range32[] = { { 69248, 69289 }, { 69291, 69293 }, { 69296, 69297 }, -}; +}; static const URange16 Yi_range16[] = { { 40960, 42124 }, { 42128, 42182 }, -}; +}; static const URange32 Zanabazar_Square_range32[] = { { 72192, 72263 }, -}; +}; // 4038 16-bit ranges, 1712 32-bit ranges -const UGroup unicode_groups[] = { +const UGroup unicode_groups[] = { { "Adlam", +1, 0, 0, Adlam_range32, 3 }, - { "Ahom", +1, 0, 0, Ahom_range32, 3 }, - { "Anatolian_Hieroglyphs", +1, 0, 0, Anatolian_Hieroglyphs_range32, 1 }, + { "Ahom", +1, 0, 0, Ahom_range32, 3 }, + { "Anatolian_Hieroglyphs", +1, 0, 0, Anatolian_Hieroglyphs_range32, 1 }, { "Arabic", +1, Arabic_range16, 22, Arabic_range32, 35 }, { "Armenian", +1, Armenian_range16, 4, 0, 0 }, - { "Avestan", +1, 0, 0, Avestan_range32, 2 }, - { "Balinese", +1, Balinese_range16, 2, 0, 0 }, - { "Bamum", +1, Bamum_range16, 1, Bamum_range32, 1 }, - { "Bassa_Vah", +1, 0, 0, Bassa_Vah_range32, 2 }, - { "Batak", +1, Batak_range16, 2, 0, 0 }, - { "Bengali", +1, Bengali_range16, 14, 0, 0 }, + { "Avestan", +1, 0, 0, Avestan_range32, 2 }, + { "Balinese", +1, Balinese_range16, 2, 0, 0 }, + { "Bamum", +1, Bamum_range16, 1, Bamum_range32, 1 }, + { "Bassa_Vah", +1, 0, 0, Bassa_Vah_range32, 2 }, + { "Batak", +1, Batak_range16, 2, 0, 0 }, + { "Bengali", +1, Bengali_range16, 14, 0, 0 }, { "Bhaiksuki", +1, 0, 0, Bhaiksuki_range32, 4 }, - { "Bopomofo", +1, Bopomofo_range16, 3, 0, 0 }, - { "Brahmi", +1, 0, 0, Brahmi_range32, 3 }, - { "Braille", +1, Braille_range16, 1, 0, 0 }, - { "Buginese", +1, Buginese_range16, 2, 0, 0 }, - { "Buhid", +1, Buhid_range16, 1, 0, 0 }, + { "Bopomofo", +1, Bopomofo_range16, 3, 0, 0 }, + { "Brahmi", +1, 0, 0, Brahmi_range32, 3 }, + { "Braille", +1, Braille_range16, 1, 0, 0 }, + { "Buginese", +1, Buginese_range16, 2, 0, 0 }, + { "Buhid", +1, Buhid_range16, 1, 0, 0 }, { "C", +1, C_range16, 17, C_range32, 9 }, { "Canadian_Aboriginal", +1, Canadian_Aboriginal_range16, 2, Canadian_Aboriginal_range32, 1 }, - { "Carian", +1, 0, 0, Carian_range32, 1 }, - { "Caucasian_Albanian", +1, 0, 0, Caucasian_Albanian_range32, 2 }, - { "Cc", +1, Cc_range16, 2, 0, 0 }, + { "Carian", +1, 0, 0, Carian_range32, 1 }, + { "Caucasian_Albanian", +1, 0, 0, Caucasian_Albanian_range32, 2 }, + { "Cc", +1, Cc_range16, 2, 0, 0 }, { "Cf", +1, Cf_range16, 14, Cf_range32, 7 }, - { "Chakma", +1, 0, 0, Chakma_range32, 2 }, - { "Cham", +1, Cham_range16, 4, 0, 0 }, - { "Cherokee", +1, Cherokee_range16, 3, 0, 0 }, + { "Chakma", +1, 0, 0, Chakma_range32, 2 }, + { "Cham", +1, Cham_range16, 4, 0, 0 }, + { "Cherokee", +1, Cherokee_range16, 3, 0, 0 }, { "Chorasmian", +1, 0, 0, Chorasmian_range32, 1 }, - { "Co", +1, Co_range16, 1, Co_range32, 2 }, + { "Co", +1, Co_range16, 1, Co_range32, 2 }, { "Common", +1, Common_range16, 91, Common_range32, 83 }, - { "Coptic", +1, Coptic_range16, 3, 0, 0 }, - { "Cs", +1, Cs_range16, 1, 0, 0 }, - { "Cuneiform", +1, 0, 0, Cuneiform_range32, 4 }, - { "Cypriot", +1, 0, 0, Cypriot_range32, 6 }, + { "Coptic", +1, Coptic_range16, 3, 0, 0 }, + { "Cs", +1, Cs_range16, 1, 0, 0 }, + { "Cuneiform", +1, 0, 0, Cuneiform_range32, 4 }, + { "Cypriot", +1, 0, 0, Cypriot_range32, 6 }, { "Cypro_Minoan", +1, 0, 0, Cypro_Minoan_range32, 1 }, { "Cyrillic", +1, Cyrillic_range16, 8, 0, 0 }, - { "Deseret", +1, 0, 0, Deseret_range32, 1 }, - { "Devanagari", +1, Devanagari_range16, 4, 0, 0 }, + { "Deseret", +1, 0, 0, Deseret_range32, 1 }, + { "Devanagari", +1, Devanagari_range16, 4, 0, 0 }, { "Dives_Akuru", +1, 0, 0, Dives_Akuru_range32, 8 }, { "Dogra", +1, 0, 0, Dogra_range32, 1 }, - { "Duployan", +1, 0, 0, Duployan_range32, 5 }, + { "Duployan", +1, 0, 0, Duployan_range32, 5 }, { "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 2 }, - { "Elbasan", +1, 0, 0, Elbasan_range32, 1 }, + { "Elbasan", +1, 0, 0, Elbasan_range32, 1 }, { "Elymaic", +1, 0, 0, Elymaic_range32, 1 }, { "Ethiopic", +1, Ethiopic_range16, 32, Ethiopic_range32, 4 }, { "Georgian", +1, Georgian_range16, 10, 0, 0 }, { "Glagolitic", +1, Glagolitic_range16, 1, Glagolitic_range32, 5 }, - { "Gothic", +1, 0, 0, Gothic_range32, 1 }, - { "Grantha", +1, 0, 0, Grantha_range32, 15 }, - { "Greek", +1, Greek_range16, 33, Greek_range32, 3 }, - { "Gujarati", +1, Gujarati_range16, 14, 0, 0 }, + { "Gothic", +1, 0, 0, Gothic_range32, 1 }, + { "Grantha", +1, 0, 0, Grantha_range32, 15 }, + { "Greek", +1, Greek_range16, 33, Greek_range32, 3 }, + { "Gujarati", +1, Gujarati_range16, 14, 0, 0 }, { "Gunjala_Gondi", +1, 0, 0, Gunjala_Gondi_range32, 6 }, - { "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 }, + { "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 }, { "Han", +1, Han_range16, 11, Han_range32, 9 }, - { "Hangul", +1, Hangul_range16, 14, 0, 0 }, + { "Hangul", +1, Hangul_range16, 14, 0, 0 }, { "Hanifi_Rohingya", +1, 0, 0, Hanifi_Rohingya_range32, 2 }, - { "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 }, - { "Hatran", +1, 0, 0, Hatran_range32, 3 }, - { "Hebrew", +1, Hebrew_range16, 9, 0, 0 }, + { "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 }, + { "Hatran", +1, 0, 0, Hatran_range32, 3 }, + { "Hebrew", +1, Hebrew_range16, 9, 0, 0 }, { "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 3 }, - { "Imperial_Aramaic", +1, 0, 0, Imperial_Aramaic_range32, 2 }, + { "Imperial_Aramaic", +1, 0, 0, Imperial_Aramaic_range32, 2 }, { "Inherited", +1, Inherited_range16, 19, Inherited_range32, 10 }, - { "Inscriptional_Pahlavi", +1, 0, 0, Inscriptional_Pahlavi_range32, 2 }, - { "Inscriptional_Parthian", +1, 0, 0, Inscriptional_Parthian_range32, 2 }, - { "Javanese", +1, Javanese_range16, 3, 0, 0 }, + { "Inscriptional_Pahlavi", +1, 0, 0, Inscriptional_Pahlavi_range32, 2 }, + { "Inscriptional_Parthian", +1, 0, 0, Inscriptional_Parthian_range32, 2 }, + { "Javanese", +1, Javanese_range16, 3, 0, 0 }, { "Kaithi", +1, 0, 0, Kaithi_range32, 2 }, { "Kannada", +1, Kannada_range16, 13, 0, 0 }, { "Katakana", +1, Katakana_range16, 7, Katakana_range32, 6 }, - { "Kayah_Li", +1, Kayah_Li_range16, 2, 0, 0 }, - { "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 }, + { "Kayah_Li", +1, Kayah_Li_range16, 2, 0, 0 }, + { "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 }, { "Khitan_Small_Script", +1, 0, 0, Khitan_Small_Script_range32, 2 }, - { "Khmer", +1, Khmer_range16, 4, 0, 0 }, - { "Khojki", +1, 0, 0, Khojki_range32, 2 }, - { "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 }, + { "Khmer", +1, Khmer_range16, 4, 0, 0 }, + { "Khojki", +1, 0, 0, Khojki_range32, 2 }, + { "Khudawadi", +1, 0, 0, Khudawadi_range32, 2 }, { "L", +1, L_range16, 380, L_range32, 268 }, { "Lao", +1, Lao_range16, 11, 0, 0 }, { "Latin", +1, Latin_range16, 34, Latin_range32, 4 }, - { "Lepcha", +1, Lepcha_range16, 3, 0, 0 }, - { "Limbu", +1, Limbu_range16, 5, 0, 0 }, - { "Linear_A", +1, 0, 0, Linear_A_range32, 3 }, - { "Linear_B", +1, 0, 0, Linear_B_range32, 7 }, + { "Lepcha", +1, Lepcha_range16, 3, 0, 0 }, + { "Limbu", +1, Limbu_range16, 5, 0, 0 }, + { "Linear_A", +1, 0, 0, Linear_A_range32, 3 }, + { "Linear_B", +1, 0, 0, Linear_B_range32, 7 }, { "Lisu", +1, Lisu_range16, 1, Lisu_range32, 1 }, { "Ll", +1, Ll_range16, 617, Ll_range32, 40 }, { "Lm", +1, Lm_range16, 57, Lm_range32, 12 }, { "Lo", +1, Lo_range16, 290, Lo_range32, 211 }, - { "Lt", +1, Lt_range16, 10, 0, 0 }, + { "Lt", +1, Lt_range16, 10, 0, 0 }, { "Lu", +1, Lu_range16, 605, Lu_range32, 41 }, - { "Lycian", +1, 0, 0, Lycian_range32, 1 }, - { "Lydian", +1, 0, 0, Lydian_range32, 2 }, + { "Lycian", +1, 0, 0, Lycian_range32, 1 }, + { "Lydian", +1, 0, 0, Lydian_range32, 2 }, { "M", +1, M_range16, 189, M_range32, 110 }, - { "Mahajani", +1, 0, 0, Mahajani_range32, 1 }, + { "Mahajani", +1, 0, 0, Mahajani_range32, 1 }, { "Makasar", +1, 0, 0, Makasar_range32, 1 }, { "Malayalam", +1, Malayalam_range16, 7, 0, 0 }, - { "Mandaic", +1, Mandaic_range16, 2, 0, 0 }, - { "Manichaean", +1, 0, 0, Manichaean_range32, 2 }, + { "Mandaic", +1, Mandaic_range16, 2, 0, 0 }, + { "Manichaean", +1, 0, 0, Manichaean_range32, 2 }, { "Marchen", +1, 0, 0, Marchen_range32, 3 }, { "Masaram_Gondi", +1, 0, 0, Masaram_Gondi_range32, 7 }, { "Mc", +1, Mc_range16, 111, Mc_range32, 66 }, - { "Me", +1, Me_range16, 5, 0, 0 }, + { "Me", +1, Me_range16, 5, 0, 0 }, { "Medefaidrin", +1, 0, 0, Medefaidrin_range32, 1 }, - { "Meetei_Mayek", +1, Meetei_Mayek_range16, 3, 0, 0 }, - { "Mende_Kikakui", +1, 0, 0, Mende_Kikakui_range32, 2 }, - { "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 3 }, - { "Meroitic_Hieroglyphs", +1, 0, 0, Meroitic_Hieroglyphs_range32, 1 }, - { "Miao", +1, 0, 0, Miao_range32, 3 }, + { "Meetei_Mayek", +1, Meetei_Mayek_range16, 3, 0, 0 }, + { "Mende_Kikakui", +1, 0, 0, Mende_Kikakui_range32, 2 }, + { "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 3 }, + { "Meroitic_Hieroglyphs", +1, 0, 0, Meroitic_Hieroglyphs_range32, 1 }, + { "Miao", +1, 0, 0, Miao_range32, 3 }, { "Mn", +1, Mn_range16, 212, Mn_range32, 124 }, - { "Modi", +1, 0, 0, Modi_range32, 2 }, + { "Modi", +1, 0, 0, Modi_range32, 2 }, { "Mongolian", +1, Mongolian_range16, 5, Mongolian_range32, 1 }, - { "Mro", +1, 0, 0, Mro_range32, 3 }, - { "Multani", +1, 0, 0, Multani_range32, 5 }, - { "Myanmar", +1, Myanmar_range16, 3, 0, 0 }, + { "Mro", +1, 0, 0, Mro_range32, 3 }, + { "Multani", +1, 0, 0, Multani_range32, 5 }, + { "Myanmar", +1, Myanmar_range16, 3, 0, 0 }, { "N", +1, N_range16, 67, N_range32, 67 }, - { "Nabataean", +1, 0, 0, Nabataean_range32, 2 }, + { "Nabataean", +1, 0, 0, Nabataean_range32, 2 }, { "Nandinagari", +1, 0, 0, Nandinagari_range32, 3 }, { "Nd", +1, Nd_range16, 37, Nd_range32, 25 }, - { "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 }, + { "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 }, { "Newa", +1, 0, 0, Newa_range32, 2 }, { "Nko", +1, Nko_range16, 2, 0, 0 }, - { "Nl", +1, Nl_range16, 7, Nl_range32, 5 }, + { "Nl", +1, Nl_range16, 7, Nl_range32, 5 }, { "No", +1, No_range16, 29, No_range32, 42 }, { "Nushu", +1, 0, 0, Nushu_range32, 2 }, { "Nyiakeng_Puachue_Hmong", +1, 0, 0, Nyiakeng_Puachue_Hmong_range32, 4 }, - { "Ogham", +1, Ogham_range16, 1, 0, 0 }, - { "Ol_Chiki", +1, Ol_Chiki_range16, 1, 0, 0 }, - { "Old_Hungarian", +1, 0, 0, Old_Hungarian_range32, 3 }, + { "Ogham", +1, Ogham_range16, 1, 0, 0 }, + { "Ol_Chiki", +1, Ol_Chiki_range16, 1, 0, 0 }, + { "Old_Hungarian", +1, 0, 0, Old_Hungarian_range32, 3 }, { "Old_Italic", +1, 0, 0, Old_Italic_range32, 2 }, - { "Old_North_Arabian", +1, 0, 0, Old_North_Arabian_range32, 1 }, - { "Old_Permic", +1, 0, 0, Old_Permic_range32, 1 }, - { "Old_Persian", +1, 0, 0, Old_Persian_range32, 2 }, + { "Old_North_Arabian", +1, 0, 0, Old_North_Arabian_range32, 1 }, + { "Old_Permic", +1, 0, 0, Old_Permic_range32, 1 }, + { "Old_Persian", +1, 0, 0, Old_Persian_range32, 2 }, { "Old_Sogdian", +1, 0, 0, Old_Sogdian_range32, 1 }, - { "Old_South_Arabian", +1, 0, 0, Old_South_Arabian_range32, 1 }, - { "Old_Turkic", +1, 0, 0, Old_Turkic_range32, 1 }, + { "Old_South_Arabian", +1, 0, 0, Old_South_Arabian_range32, 1 }, + { "Old_Turkic", +1, 0, 0, Old_Turkic_range32, 1 }, { "Old_Uyghur", +1, 0, 0, Old_Uyghur_range32, 1 }, - { "Oriya", +1, Oriya_range16, 14, 0, 0 }, + { "Oriya", +1, Oriya_range16, 14, 0, 0 }, { "Osage", +1, 0, 0, Osage_range32, 2 }, - { "Osmanya", +1, 0, 0, Osmanya_range32, 2 }, + { "Osmanya", +1, 0, 0, Osmanya_range32, 2 }, { "P", +1, P_range16, 133, P_range32, 56 }, - { "Pahawh_Hmong", +1, 0, 0, Pahawh_Hmong_range32, 5 }, - { "Palmyrene", +1, 0, 0, Palmyrene_range32, 1 }, - { "Pau_Cin_Hau", +1, 0, 0, Pau_Cin_Hau_range32, 1 }, - { "Pc", +1, Pc_range16, 6, 0, 0 }, + { "Pahawh_Hmong", +1, 0, 0, Pahawh_Hmong_range32, 5 }, + { "Palmyrene", +1, 0, 0, Palmyrene_range32, 1 }, + { "Pau_Cin_Hau", +1, 0, 0, Pau_Cin_Hau_range32, 1 }, + { "Pc", +1, Pc_range16, 6, 0, 0 }, { "Pd", +1, Pd_range16, 18, Pd_range32, 1 }, { "Pe", +1, Pe_range16, 76, 0, 0 }, - { "Pf", +1, Pf_range16, 10, 0, 0 }, - { "Phags_Pa", +1, Phags_Pa_range16, 1, 0, 0 }, - { "Phoenician", +1, 0, 0, Phoenician_range32, 2 }, - { "Pi", +1, Pi_range16, 11, 0, 0 }, + { "Pf", +1, Pf_range16, 10, 0, 0 }, + { "Phags_Pa", +1, Phags_Pa_range16, 1, 0, 0 }, + { "Phoenician", +1, 0, 0, Phoenician_range32, 2 }, + { "Pi", +1, Pi_range16, 11, 0, 0 }, { "Po", +1, Po_range16, 130, Po_range32, 55 }, { "Ps", +1, Ps_range16, 79, 0, 0 }, - { "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 }, - { "Rejang", +1, Rejang_range16, 2, 0, 0 }, - { "Runic", +1, Runic_range16, 2, 0, 0 }, + { "Psalter_Pahlavi", +1, 0, 0, Psalter_Pahlavi_range32, 3 }, + { "Rejang", +1, Rejang_range16, 2, 0, 0 }, + { "Runic", +1, Runic_range16, 2, 0, 0 }, { "S", +1, S_range16, 151, S_range32, 83 }, - { "Samaritan", +1, Samaritan_range16, 2, 0, 0 }, - { "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 }, + { "Samaritan", +1, Samaritan_range16, 2, 0, 0 }, + { "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 }, { "Sc", +1, Sc_range16, 18, Sc_range32, 3 }, { "Sharada", +1, 0, 0, Sharada_range32, 1 }, - { "Shavian", +1, 0, 0, Shavian_range32, 1 }, - { "Siddham", +1, 0, 0, Siddham_range32, 2 }, - { "SignWriting", +1, 0, 0, SignWriting_range32, 3 }, - { "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 }, + { "Shavian", +1, 0, 0, Shavian_range32, 1 }, + { "Siddham", +1, 0, 0, Siddham_range32, 2 }, + { "SignWriting", +1, 0, 0, SignWriting_range32, 3 }, + { "Sinhala", +1, Sinhala_range16, 12, Sinhala_range32, 1 }, { "Sk", +1, Sk_range16, 30, Sk_range32, 1 }, - { "Sm", +1, Sm_range16, 53, Sm_range32, 11 }, + { "Sm", +1, Sm_range16, 53, Sm_range32, 11 }, { "So", +1, So_range16, 114, So_range32, 72 }, { "Sogdian", +1, 0, 0, Sogdian_range32, 1 }, - { "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 }, + { "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 }, { "Soyombo", +1, 0, 0, Soyombo_range32, 1 }, - { "Sundanese", +1, Sundanese_range16, 2, 0, 0 }, - { "Syloti_Nagri", +1, Syloti_Nagri_range16, 1, 0, 0 }, + { "Sundanese", +1, Sundanese_range16, 2, 0, 0 }, + { "Syloti_Nagri", +1, Syloti_Nagri_range16, 1, 0, 0 }, { "Syriac", +1, Syriac_range16, 4, 0, 0 }, - { "Tagalog", +1, Tagalog_range16, 2, 0, 0 }, - { "Tagbanwa", +1, Tagbanwa_range16, 3, 0, 0 }, - { "Tai_Le", +1, Tai_Le_range16, 2, 0, 0 }, - { "Tai_Tham", +1, Tai_Tham_range16, 5, 0, 0 }, - { "Tai_Viet", +1, Tai_Viet_range16, 2, 0, 0 }, - { "Takri", +1, 0, 0, Takri_range32, 2 }, + { "Tagalog", +1, Tagalog_range16, 2, 0, 0 }, + { "Tagbanwa", +1, Tagbanwa_range16, 3, 0, 0 }, + { "Tai_Le", +1, Tai_Le_range16, 2, 0, 0 }, + { "Tai_Tham", +1, Tai_Tham_range16, 5, 0, 0 }, + { "Tai_Viet", +1, Tai_Viet_range16, 2, 0, 0 }, + { "Takri", +1, 0, 0, Takri_range32, 2 }, { "Tamil", +1, Tamil_range16, 16, Tamil_range32, 2 }, { "Tangsa", +1, 0, 0, Tangsa_range32, 2 }, { "Tangut", +1, 0, 0, Tangut_range32, 4 }, { "Telugu", +1, Telugu_range16, 13, 0, 0 }, - { "Thaana", +1, Thaana_range16, 1, 0, 0 }, - { "Thai", +1, Thai_range16, 2, 0, 0 }, - { "Tibetan", +1, Tibetan_range16, 7, 0, 0 }, - { "Tifinagh", +1, Tifinagh_range16, 3, 0, 0 }, - { "Tirhuta", +1, 0, 0, Tirhuta_range32, 2 }, + { "Thaana", +1, Thaana_range16, 1, 0, 0 }, + { "Thai", +1, Thai_range16, 2, 0, 0 }, + { "Tibetan", +1, Tibetan_range16, 7, 0, 0 }, + { "Tifinagh", +1, Tifinagh_range16, 3, 0, 0 }, + { "Tirhuta", +1, 0, 0, Tirhuta_range32, 2 }, { "Toto", +1, 0, 0, Toto_range32, 1 }, - { "Ugaritic", +1, 0, 0, Ugaritic_range32, 2 }, - { "Vai", +1, Vai_range16, 1, 0, 0 }, + { "Ugaritic", +1, 0, 0, Ugaritic_range32, 2 }, + { "Vai", +1, Vai_range16, 1, 0, 0 }, { "Vithkuqi", +1, 0, 0, Vithkuqi_range32, 8 }, { "Wancho", +1, 0, 0, Wancho_range32, 2 }, - { "Warang_Citi", +1, 0, 0, Warang_Citi_range32, 2 }, + { "Warang_Citi", +1, 0, 0, Warang_Citi_range32, 2 }, { "Yezidi", +1, 0, 0, Yezidi_range32, 3 }, - { "Yi", +1, Yi_range16, 2, 0, 0 }, - { "Z", +1, Z_range16, 8, 0, 0 }, + { "Yi", +1, Yi_range16, 2, 0, 0 }, + { "Z", +1, Z_range16, 8, 0, 0 }, { "Zanabazar_Square", +1, 0, 0, Zanabazar_Square_range32, 1 }, - { "Zl", +1, Zl_range16, 1, 0, 0 }, - { "Zp", +1, Zp_range16, 1, 0, 0 }, - { "Zs", +1, Zs_range16, 7, 0, 0 }, -}; + { "Zl", +1, Zl_range16, 1, 0, 0 }, + { "Zp", +1, Zp_range16, 1, 0, 0 }, + { "Zs", +1, Zs_range16, 7, 0, 0 }, +}; const int num_unicode_groups = 197; - - -} // namespace re2 - - + + +} // namespace re2 + + diff --git a/contrib/libs/re2/re2/unicode_groups.h b/contrib/libs/re2/re2/unicode_groups.h index 75f55daa61..7e6857b785 100644 --- a/contrib/libs/re2/re2/unicode_groups.h +++ b/contrib/libs/re2/re2/unicode_groups.h @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#ifndef RE2_UNICODE_GROUPS_H_ -#define RE2_UNICODE_GROUPS_H_ - +#ifndef RE2_UNICODE_GROUPS_H_ +#define RE2_UNICODE_GROUPS_H_ + // Unicode character groups. // The codes get split into ranges of 16-bit codes @@ -18,23 +18,23 @@ // to 16.5 kB of data but make the data harder to use; // we don't bother. -#include <stdint.h> +#include <stdint.h> -#include "util/util.h" -#include "util/utf.h" +#include "util/util.h" +#include "util/utf.h" namespace re2 { struct URange16 { - uint16_t lo; - uint16_t hi; + uint16_t lo; + uint16_t hi; }; struct URange32 { - Rune lo; - Rune hi; + Rune lo; + Rune hi; }; struct UGroup @@ -64,4 +64,4 @@ extern const int num_perl_groups; } // namespace re2 -#endif // RE2_UNICODE_GROUPS_H_ +#endif // RE2_UNICODE_GROUPS_H_ diff --git a/contrib/libs/re2/re2/walker-inl.h b/contrib/libs/re2/re2/walker-inl.h index 4d064a0970..0527e530e2 100644 --- a/contrib/libs/re2/re2/walker-inl.h +++ b/contrib/libs/re2/re2/walker-inl.h @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#ifndef RE2_WALKER_INL_H_ -#define RE2_WALKER_INL_H_ - +#ifndef RE2_WALKER_INL_H_ +#define RE2_WALKER_INL_H_ + // Helper class for traversing Regexps without recursion. // Clients should declare their own subclasses that override // the PreVisit and PostVisit methods, which are called before @@ -13,9 +13,9 @@ // Not quite the Visitor pattern, because (among other things) // the Visitor pattern is recursive. -#include <stack> +#include <stack> -#include "util/logging.h" +#include "util/logging.h" #include "re2/regexp.h" namespace re2 { @@ -95,8 +95,8 @@ template<typename T> class Regexp::Walker { T WalkInternal(Regexp* re, T top_arg, bool use_copy); - Walker(const Walker&) = delete; - Walker& operator=(const Walker&) = delete; + Walker(const Walker&) = delete; + Walker& operator=(const Walker&) = delete; }; template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re, @@ -190,7 +190,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg, s->child_args = &s->child_arg; else if (re->nsub_ > 1) s->child_args = new T[re->nsub_]; - FALLTHROUGH_INTENDED; + FALLTHROUGH_INTENDED; } default: { if (re->nsub_ > 0) { @@ -244,4 +244,4 @@ template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg, } // namespace re2 -#endif // RE2_WALKER_INL_H_ +#endif // RE2_WALKER_INL_H_ diff --git a/contrib/libs/re2/util/logging.h b/contrib/libs/re2/util/logging.h index 5b2217f29c..924e2165f6 100644 --- a/contrib/libs/re2/util/logging.h +++ b/contrib/libs/re2/util/logging.h @@ -1,109 +1,109 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef UTIL_LOGGING_H_ -#define UTIL_LOGGING_H_ - -// Simplified version of Google's logging. - -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <ostream> -#include <sstream> - -#include "util/util.h" - -// Debug-only checking. -#define DCHECK(condition) assert(condition) -#define DCHECK_EQ(val1, val2) assert((val1) == (val2)) -#define DCHECK_NE(val1, val2) assert((val1) != (val2)) -#define DCHECK_LE(val1, val2) assert((val1) <= (val2)) -#define DCHECK_LT(val1, val2) assert((val1) < (val2)) -#define DCHECK_GE(val1, val2) assert((val1) >= (val2)) -#define DCHECK_GT(val1, val2) assert((val1) > (val2)) - -// Always-on checking -#define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x -#define CHECK_LT(x, y) CHECK((x) < (y)) -#define CHECK_GT(x, y) CHECK((x) > (y)) -#define CHECK_LE(x, y) CHECK((x) <= (y)) -#define CHECK_GE(x, y) CHECK((x) >= (y)) -#define CHECK_EQ(x, y) CHECK((x) == (y)) -#define CHECK_NE(x, y) CHECK((x) != (y)) - -#define LOG_INFO LogMessage(__FILE__, __LINE__) -#define LOG_WARNING LogMessage(__FILE__, __LINE__) -#define LOG_ERROR LogMessage(__FILE__, __LINE__) -#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__) -#define LOG_QFATAL LOG_FATAL - -// It seems that one of the Windows header files defines ERROR as 0. -#ifdef _WIN32 -#define LOG_0 LOG_INFO -#endif - -#ifdef NDEBUG -#define LOG_DFATAL LOG_ERROR -#else -#define LOG_DFATAL LOG_FATAL -#endif - -#define LOG(severity) LOG_ ## severity.stream() - -#define VLOG(x) if((x)>0){}else LOG_INFO.stream() - -class LogMessage { - public: - LogMessage(const char* file, int line) - : flushed_(false) { - stream() << file << ":" << line << ": "; - } - void Flush() { - stream() << "\n"; +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef UTIL_LOGGING_H_ +#define UTIL_LOGGING_H_ + +// Simplified version of Google's logging. + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <ostream> +#include <sstream> + +#include "util/util.h" + +// Debug-only checking. +#define DCHECK(condition) assert(condition) +#define DCHECK_EQ(val1, val2) assert((val1) == (val2)) +#define DCHECK_NE(val1, val2) assert((val1) != (val2)) +#define DCHECK_LE(val1, val2) assert((val1) <= (val2)) +#define DCHECK_LT(val1, val2) assert((val1) < (val2)) +#define DCHECK_GE(val1, val2) assert((val1) >= (val2)) +#define DCHECK_GT(val1, val2) assert((val1) > (val2)) + +// Always-on checking +#define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x +#define CHECK_LT(x, y) CHECK((x) < (y)) +#define CHECK_GT(x, y) CHECK((x) > (y)) +#define CHECK_LE(x, y) CHECK((x) <= (y)) +#define CHECK_GE(x, y) CHECK((x) >= (y)) +#define CHECK_EQ(x, y) CHECK((x) == (y)) +#define CHECK_NE(x, y) CHECK((x) != (y)) + +#define LOG_INFO LogMessage(__FILE__, __LINE__) +#define LOG_WARNING LogMessage(__FILE__, __LINE__) +#define LOG_ERROR LogMessage(__FILE__, __LINE__) +#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__) +#define LOG_QFATAL LOG_FATAL + +// It seems that one of the Windows header files defines ERROR as 0. +#ifdef _WIN32 +#define LOG_0 LOG_INFO +#endif + +#ifdef NDEBUG +#define LOG_DFATAL LOG_ERROR +#else +#define LOG_DFATAL LOG_FATAL +#endif + +#define LOG(severity) LOG_ ## severity.stream() + +#define VLOG(x) if((x)>0){}else LOG_INFO.stream() + +class LogMessage { + public: + LogMessage(const char* file, int line) + : flushed_(false) { + stream() << file << ":" << line << ": "; + } + void Flush() { + stream() << "\n"; std::string s = str_.str(); - size_t n = s.size(); - if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc - flushed_ = true; - } - ~LogMessage() { - if (!flushed_) { - Flush(); - } - } - std::ostream& stream() { return str_; } - - private: - bool flushed_; - std::ostringstream str_; - - LogMessage(const LogMessage&) = delete; - LogMessage& operator=(const LogMessage&) = delete; -}; - -// Silence "destructor never returns" warning for ~LogMessageFatal(). -// Since this is a header file, push and then pop to limit the scope. -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable: 4722) -#endif - -class LogMessageFatal : public LogMessage { - public: - LogMessageFatal(const char* file, int line) - : LogMessage(file, line) {} + size_t n = s.size(); + if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc + flushed_ = true; + } + ~LogMessage() { + if (!flushed_) { + Flush(); + } + } + std::ostream& stream() { return str_; } + + private: + bool flushed_; + std::ostringstream str_; + + LogMessage(const LogMessage&) = delete; + LogMessage& operator=(const LogMessage&) = delete; +}; + +// Silence "destructor never returns" warning for ~LogMessageFatal(). +// Since this is a header file, push and then pop to limit the scope. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4722) +#endif + +class LogMessageFatal : public LogMessage { + public: + LogMessageFatal(const char* file, int line) + : LogMessage(file, line) {} ATTRIBUTE_NORETURN ~LogMessageFatal() { - Flush(); - abort(); - } - private: - LogMessageFatal(const LogMessageFatal&) = delete; - LogMessageFatal& operator=(const LogMessageFatal&) = delete; -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -#endif // UTIL_LOGGING_H_ + Flush(); + abort(); + } + private: + LogMessageFatal(const LogMessageFatal&) = delete; + LogMessageFatal& operator=(const LogMessageFatal&) = delete; +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // UTIL_LOGGING_H_ diff --git a/contrib/libs/re2/util/mix.h b/contrib/libs/re2/util/mix.h index d85c172ab0..4404a27761 100644 --- a/contrib/libs/re2/util/mix.h +++ b/contrib/libs/re2/util/mix.h @@ -1,41 +1,41 @@ -// Copyright 2016 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef UTIL_MIX_H_ -#define UTIL_MIX_H_ - -#include <stddef.h> -#include <limits> - -namespace re2 { - -// Silence "truncation of constant value" warning for kMul in 32-bit mode. -// Since this is a header file, push and then pop to limit the scope. -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable: 4309) -#endif - -class HashMix { - public: - HashMix() : hash_(1) {} - explicit HashMix(size_t val) : hash_(val + 83) {} - void Mix(size_t val) { - static const size_t kMul = static_cast<size_t>(0xdc3eb94af8ab4c93ULL); - hash_ *= kMul; - hash_ = ((hash_ << 19) | - (hash_ >> (std::numeric_limits<size_t>::digits - 19))) + val; - } - size_t get() const { return hash_; } - private: - size_t hash_; -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -} // namespace re2 - -#endif // UTIL_MIX_H_ +// Copyright 2016 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef UTIL_MIX_H_ +#define UTIL_MIX_H_ + +#include <stddef.h> +#include <limits> + +namespace re2 { + +// Silence "truncation of constant value" warning for kMul in 32-bit mode. +// Since this is a header file, push and then pop to limit the scope. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4309) +#endif + +class HashMix { + public: + HashMix() : hash_(1) {} + explicit HashMix(size_t val) : hash_(val + 83) {} + void Mix(size_t val) { + static const size_t kMul = static_cast<size_t>(0xdc3eb94af8ab4c93ULL); + hash_ *= kMul; + hash_ = ((hash_ << 19) | + (hash_ >> (std::numeric_limits<size_t>::digits - 19))) + val; + } + size_t get() const { return hash_; } + private: + size_t hash_; +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +} // namespace re2 + +#endif // UTIL_MIX_H_ diff --git a/contrib/libs/re2/util/mutex.h b/contrib/libs/re2/util/mutex.h index 158046bb5c..6619daa4c4 100644 --- a/contrib/libs/re2/util/mutex.h +++ b/contrib/libs/re2/util/mutex.h @@ -1,15 +1,15 @@ -// Copyright 2007 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef UTIL_MUTEX_H_ -#define UTIL_MUTEX_H_ - -/* - * A simple mutex wrapper, supporting locks and read-write locks. - * You should assume the locks are *not* re-entrant. - */ - +// Copyright 2007 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef UTIL_MUTEX_H_ +#define UTIL_MUTEX_H_ + +/* + * A simple mutex wrapper, supporting locks and read-write locks. + * You should assume the locks are *not* re-entrant. + */ + #ifdef _WIN32 // Requires Windows Vista or Windows Server 2008 at minimum. #include <windows.h> @@ -17,55 +17,55 @@ #define MUTEX_IS_WIN32_SRWLOCK #endif #else -#ifndef _POSIX_C_SOURCE -#define _POSIX_C_SOURCE 200809L -#endif -#include <unistd.h> -#if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0 -#define MUTEX_IS_PTHREAD_RWLOCK -#endif -#endif - +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif +#include <unistd.h> +#if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0 +#define MUTEX_IS_PTHREAD_RWLOCK +#endif +#endif + #if defined(MUTEX_IS_WIN32_SRWLOCK) typedef SRWLOCK MutexType; #elif defined(MUTEX_IS_PTHREAD_RWLOCK) -#include <pthread.h> -#include <stdlib.h> -typedef pthread_rwlock_t MutexType; -#else -#include <mutex> -typedef std::mutex MutexType; -#endif - -namespace re2 { - -class Mutex { - public: - inline Mutex(); - inline ~Mutex(); - inline void Lock(); // Block if needed until free then acquire exclusively - inline void Unlock(); // Release a lock acquired via Lock() - // Note that on systems that don't support read-write locks, these may - // be implemented as synonyms to Lock() and Unlock(). So you can use - // these for efficiency, but don't use them anyplace where being able - // to do shared reads is necessary to avoid deadlock. - inline void ReaderLock(); // Block until free or shared then acquire a share - inline void ReaderUnlock(); // Release a read share of this Mutex - inline void WriterLock() { Lock(); } // Acquire an exclusive lock - inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock() - - private: - MutexType mutex_; - - // Catch the error of writing Mutex when intending MutexLock. - Mutex(Mutex *ignored); - - Mutex(const Mutex&) = delete; - Mutex& operator=(const Mutex&) = delete; -}; - +#include <pthread.h> +#include <stdlib.h> +typedef pthread_rwlock_t MutexType; +#else +#include <mutex> +typedef std::mutex MutexType; +#endif + +namespace re2 { + +class Mutex { + public: + inline Mutex(); + inline ~Mutex(); + inline void Lock(); // Block if needed until free then acquire exclusively + inline void Unlock(); // Release a lock acquired via Lock() + // Note that on systems that don't support read-write locks, these may + // be implemented as synonyms to Lock() and Unlock(). So you can use + // these for efficiency, but don't use them anyplace where being able + // to do shared reads is necessary to avoid deadlock. + inline void ReaderLock(); // Block until free or shared then acquire a share + inline void ReaderUnlock(); // Release a read share of this Mutex + inline void WriterLock() { Lock(); } // Acquire an exclusive lock + inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock() + + private: + MutexType mutex_; + + // Catch the error of writing Mutex when intending MutexLock. + Mutex(Mutex *ignored); + + Mutex(const Mutex&) = delete; + Mutex& operator=(const Mutex&) = delete; +}; + #if defined(MUTEX_IS_WIN32_SRWLOCK) - + Mutex::Mutex() : mutex_(SRWLOCK_INIT) { } Mutex::~Mutex() { } void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); } @@ -75,74 +75,74 @@ void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); } #elif defined(MUTEX_IS_PTHREAD_RWLOCK) -#define SAFE_PTHREAD(fncall) \ - do { \ - if ((fncall) != 0) abort(); \ - } while (0) - -Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); } -Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); } -void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); } -void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } -void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); } -void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } - -#undef SAFE_PTHREAD - -#else - -Mutex::Mutex() { } -Mutex::~Mutex() { } -void Mutex::Lock() { mutex_.lock(); } -void Mutex::Unlock() { mutex_.unlock(); } -void Mutex::ReaderLock() { Lock(); } // C++11 doesn't have std::shared_mutex. -void Mutex::ReaderUnlock() { Unlock(); } - -#endif - -// -------------------------------------------------------------------------- -// Some helper classes - -// MutexLock(mu) acquires mu when constructed and releases it when destroyed. -class MutexLock { - public: - explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); } - ~MutexLock() { mu_->Unlock(); } - private: - Mutex * const mu_; - - MutexLock(const MutexLock&) = delete; - MutexLock& operator=(const MutexLock&) = delete; -}; - -// ReaderMutexLock and WriterMutexLock do the same, for rwlocks -class ReaderMutexLock { - public: - explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); } - ~ReaderMutexLock() { mu_->ReaderUnlock(); } - private: - Mutex * const mu_; - - ReaderMutexLock(const ReaderMutexLock&) = delete; - ReaderMutexLock& operator=(const ReaderMutexLock&) = delete; -}; - -class WriterMutexLock { - public: - explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); } - ~WriterMutexLock() { mu_->WriterUnlock(); } - private: - Mutex * const mu_; - - WriterMutexLock(const WriterMutexLock&) = delete; - WriterMutexLock& operator=(const WriterMutexLock&) = delete; -}; - -// Catch bug where variable name is omitted, e.g. MutexLock (&mu); -#define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name") -#define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name") -#define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name") - -} // namespace re2 - -#endif // UTIL_MUTEX_H_ +#define SAFE_PTHREAD(fncall) \ + do { \ + if ((fncall) != 0) abort(); \ + } while (0) + +Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); } +Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); } +void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); } +void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } +void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); } +void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } + +#undef SAFE_PTHREAD + +#else + +Mutex::Mutex() { } +Mutex::~Mutex() { } +void Mutex::Lock() { mutex_.lock(); } +void Mutex::Unlock() { mutex_.unlock(); } +void Mutex::ReaderLock() { Lock(); } // C++11 doesn't have std::shared_mutex. +void Mutex::ReaderUnlock() { Unlock(); } + +#endif + +// -------------------------------------------------------------------------- +// Some helper classes + +// MutexLock(mu) acquires mu when constructed and releases it when destroyed. +class MutexLock { + public: + explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); } + ~MutexLock() { mu_->Unlock(); } + private: + Mutex * const mu_; + + MutexLock(const MutexLock&) = delete; + MutexLock& operator=(const MutexLock&) = delete; +}; + +// ReaderMutexLock and WriterMutexLock do the same, for rwlocks +class ReaderMutexLock { + public: + explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); } + ~ReaderMutexLock() { mu_->ReaderUnlock(); } + private: + Mutex * const mu_; + + ReaderMutexLock(const ReaderMutexLock&) = delete; + ReaderMutexLock& operator=(const ReaderMutexLock&) = delete; +}; + +class WriterMutexLock { + public: + explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); } + ~WriterMutexLock() { mu_->WriterUnlock(); } + private: + Mutex * const mu_; + + WriterMutexLock(const WriterMutexLock&) = delete; + WriterMutexLock& operator=(const WriterMutexLock&) = delete; +}; + +// Catch bug where variable name is omitted, e.g. MutexLock (&mu); +#define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name") +#define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name") +#define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name") + +} // namespace re2 + +#endif // UTIL_MUTEX_H_ diff --git a/contrib/libs/re2/util/rune.cc b/contrib/libs/re2/util/rune.cc index 4f625ea380..d3066d2789 100644 --- a/contrib/libs/re2/util/rune.cc +++ b/contrib/libs/re2/util/rune.cc @@ -11,10 +11,10 @@ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ - + #include <stdarg.h> #include <string.h> - + #include "util/utf.h" namespace re2 { @@ -135,7 +135,7 @@ runetochar(char *str, const Rune *rune) */ c = *rune; if(c <= Rune1) { - str[0] = static_cast<char>(c); + str[0] = static_cast<char>(c); return 1; } @@ -144,7 +144,7 @@ runetochar(char *str, const Rune *rune) * 0080-07FF => T2 Tx */ if(c <= Rune2) { - str[0] = T2 | static_cast<char>(c >> 1*Bitx); + str[0] = T2 | static_cast<char>(c >> 1*Bitx); str[1] = Tx | (c & Maskx); return 2; } @@ -163,9 +163,9 @@ runetochar(char *str, const Rune *rune) * 0800-FFFF => T3 Tx Tx */ if (c <= Rune3) { - str[0] = T3 | static_cast<char>(c >> 2*Bitx); + str[0] = T3 | static_cast<char>(c >> 2*Bitx); str[1] = Tx | ((c >> 1*Bitx) & Maskx); - str[2] = Tx | (c & Maskx); + str[2] = Tx | (c & Maskx); return 3; } @@ -173,7 +173,7 @@ runetochar(char *str, const Rune *rune) * four character sequence (21-bit value) * 10000-1FFFFF => T4 Tx Tx Tx */ - str[0] = T4 | static_cast<char>(c >> 3*Bitx); + str[0] = T4 | static_cast<char>(c >> 3*Bitx); str[1] = Tx | ((c >> 2*Bitx) & Maskx); str[2] = Tx | ((c >> 1*Bitx) & Maskx); str[3] = Tx | (c & Maskx); diff --git a/contrib/libs/re2/util/strutil.cc b/contrib/libs/re2/util/strutil.cc index fb7e6b1b0c..475216a7e6 100644 --- a/contrib/libs/re2/util/strutil.cc +++ b/contrib/libs/re2/util/strutil.cc @@ -2,16 +2,16 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -#include <stdarg.h> -#include <stdio.h> - -#include "util/strutil.h" - -#ifdef _WIN32 -#define snprintf _snprintf -#define vsnprintf _vsnprintf -#endif - +#include <stdarg.h> +#include <stdio.h> + +#include "util/strutil.h" + +#ifdef _WIN32 +#define snprintf _snprintf +#define vsnprintf _vsnprintf +#endif + namespace re2 { // ---------------------------------------------------------------------- @@ -19,16 +19,16 @@ namespace re2 { // Copies 'src' to 'dest', escaping dangerous characters using // C-style escape sequences. 'src' and 'dest' should not overlap. // Returns the number of bytes written to 'dest' (not including the \0) -// or (size_t)-1 if there was insufficient space. +// or (size_t)-1 if there was insufficient space. // ---------------------------------------------------------------------- -static size_t CEscapeString(const char* src, size_t src_len, - char* dest, size_t dest_len) { +static size_t CEscapeString(const char* src, size_t src_len, + char* dest, size_t dest_len) { const char* src_end = src + src_len; - size_t used = 0; + size_t used = 0; for (; src < src_end; src++) { - if (dest_len - used < 2) // space for two-character escape - return (size_t)-1; + if (dest_len - used < 2) // space for two-character escape + return (size_t)-1; unsigned char c = *src; switch (c) { @@ -43,9 +43,9 @@ static size_t CEscapeString(const char* src, size_t src_len, // digit then that digit must be escaped too to prevent it being // interpreted as part of the character code by C. if (c < ' ' || c > '~') { - if (dest_len - used < 5) // space for four-character escape + \0 - return (size_t)-1; - snprintf(dest + used, 5, "\\%03o", c); + if (dest_len - used < 5) // space for four-character escape + \0 + return (size_t)-1; + snprintf(dest + used, 5, "\\%03o", c); used += 4; } else { dest[used++] = c; break; @@ -54,7 +54,7 @@ static size_t CEscapeString(const char* src, size_t src_len, } if (dest_len - used < 1) // make sure that there is room for \0 - return (size_t)-1; + return (size_t)-1; dest[used] = '\0'; // doesn't count towards return value though return used; @@ -66,10 +66,10 @@ static size_t CEscapeString(const char* src, size_t src_len, // C-style escape sequences. 'src' and 'dest' should not overlap. // ---------------------------------------------------------------------- std::string CEscape(const StringPiece& src) { - const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion - char* dest = new char[dest_len]; - const size_t used = CEscapeString(src.data(), src.size(), - dest, dest_len); + const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion + char* dest = new char[dest_len]; + const size_t used = CEscapeString(src.data(), src.size(), + dest, dest_len); std::string s = std::string(dest, used); delete[] dest; return s; @@ -93,57 +93,57 @@ void PrefixSuccessor(std::string* prefix) { } static void StringAppendV(std::string* dst, const char* format, va_list ap) { - // First try with a small fixed size buffer - char space[1024]; - - // It's possible for methods that use a va_list to invalidate - // the data in it upon use. The fix is to make a copy - // of the structure before using it and use that copy instead. - va_list backup_ap; - va_copy(backup_ap, ap); - int result = vsnprintf(space, sizeof(space), format, backup_ap); - va_end(backup_ap); - - if ((result >= 0) && (static_cast<size_t>(result) < sizeof(space))) { - // It fit - dst->append(space, result); - return; - } - - // Repeatedly increase buffer size until it fits - int length = sizeof(space); - while (true) { - if (result < 0) { - // Older behavior: just try doubling the buffer size - length *= 2; - } else { - // We need exactly "result+1" characters - length = result+1; - } - char* buf = new char[length]; - - // Restore the va_list before we use it again - va_copy(backup_ap, ap); - result = vsnprintf(buf, length, format, backup_ap); - va_end(backup_ap); - - if ((result >= 0) && (result < length)) { - // It fit - dst->append(buf, result); - delete[] buf; - return; - } - delete[] buf; - } -} - + // First try with a small fixed size buffer + char space[1024]; + + // It's possible for methods that use a va_list to invalidate + // the data in it upon use. The fix is to make a copy + // of the structure before using it and use that copy instead. + va_list backup_ap; + va_copy(backup_ap, ap); + int result = vsnprintf(space, sizeof(space), format, backup_ap); + va_end(backup_ap); + + if ((result >= 0) && (static_cast<size_t>(result) < sizeof(space))) { + // It fit + dst->append(space, result); + return; + } + + // Repeatedly increase buffer size until it fits + int length = sizeof(space); + while (true) { + if (result < 0) { + // Older behavior: just try doubling the buffer size + length *= 2; + } else { + // We need exactly "result+1" characters + length = result+1; + } + char* buf = new char[length]; + + // Restore the va_list before we use it again + va_copy(backup_ap, ap); + result = vsnprintf(buf, length, format, backup_ap); + va_end(backup_ap); + + if ((result >= 0) && (result < length)) { + // It fit + dst->append(buf, result); + delete[] buf; + return; + } + delete[] buf; + } +} + std::string StringPrintf(const char* format, ...) { - va_list ap; - va_start(ap, format); + va_list ap; + va_start(ap, format); std::string result; - StringAppendV(&result, format, ap); - va_end(ap); - return result; -} - + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} + } // namespace re2 diff --git a/contrib/libs/re2/util/strutil.h b/contrib/libs/re2/util/strutil.h index a69908a0dd..a8109c847f 100644 --- a/contrib/libs/re2/util/strutil.h +++ b/contrib/libs/re2/util/strutil.h @@ -1,21 +1,21 @@ -// Copyright 2016 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef UTIL_STRUTIL_H_ -#define UTIL_STRUTIL_H_ - -#include <string> - -#include "re2/stringpiece.h" -#include "util/util.h" - -namespace re2 { - +// Copyright 2016 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef UTIL_STRUTIL_H_ +#define UTIL_STRUTIL_H_ + +#include <string> + +#include "re2/stringpiece.h" +#include "util/util.h" + +namespace re2 { + std::string CEscape(const StringPiece& src); void PrefixSuccessor(std::string* prefix); std::string StringPrintf(const char* format, ...); + +} // namespace re2 -} // namespace re2 - -#endif // UTIL_STRUTIL_H_ +#endif // UTIL_STRUTIL_H_ diff --git a/contrib/libs/re2/util/utf.h b/contrib/libs/re2/util/utf.h index 85b4297239..b61561389f 100644 --- a/contrib/libs/re2/util/utf.h +++ b/contrib/libs/re2/util/utf.h @@ -15,9 +15,9 @@ * in name space re2. */ -#ifndef UTIL_UTF_H_ -#define UTIL_UTF_H_ - +#ifndef UTIL_UTF_H_ +#define UTIL_UTF_H_ + #include <stdint.h> namespace re2 { @@ -41,4 +41,4 @@ char* utfrune(const char*, Rune); } // namespace re2 -#endif // UTIL_UTF_H_ +#endif // UTIL_UTF_H_ diff --git a/contrib/libs/re2/util/util.h b/contrib/libs/re2/util/util.h index 56e46c1a33..cfc30316bc 100644 --- a/contrib/libs/re2/util/util.h +++ b/contrib/libs/re2/util/util.h @@ -1,12 +1,12 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef UTIL_UTIL_H_ -#define UTIL_UTIL_H_ - +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef UTIL_UTIL_H_ +#define UTIL_UTIL_H_ + #define arraysize(array) (sizeof(array)/sizeof((array)[0])) - + #ifndef ATTRIBUTE_NORETURN #if defined(__GNUC__) #define ATTRIBUTE_NORETURN __attribute__((noreturn)) @@ -16,7 +16,7 @@ #define ATTRIBUTE_NORETURN #endif #endif - + #ifndef ATTRIBUTE_UNUSED #if defined(__GNUC__) #define ATTRIBUTE_UNUSED __attribute__((unused)) @@ -25,18 +25,18 @@ #endif #endif -#ifndef FALLTHROUGH_INTENDED +#ifndef FALLTHROUGH_INTENDED #if defined(__clang__) #define FALLTHROUGH_INTENDED [[clang::fallthrough]] #elif defined(__GNUC__) && __GNUC__ >= 7 #define FALLTHROUGH_INTENDED [[gnu::fallthrough]] #else #define FALLTHROUGH_INTENDED do {} while (0) +#endif #endif -#endif - -#ifndef NO_THREAD_SAFETY_ANALYSIS -#define NO_THREAD_SAFETY_ANALYSIS -#endif - -#endif // UTIL_UTIL_H_ + +#ifndef NO_THREAD_SAFETY_ANALYSIS +#define NO_THREAD_SAFETY_ANALYSIS +#endif + +#endif // UTIL_UTIL_H_ diff --git a/contrib/libs/re2/ya.make b/contrib/libs/re2/ya.make index 8072de2eb2..47a1c3e62d 100644 --- a/contrib/libs/re2/ya.make +++ b/contrib/libs/re2/ya.make @@ -29,24 +29,24 @@ IF (WITH_VALGRIND) ENDIF() SRCS( - re2/bitstate.cc - re2/compile.cc - re2/dfa.cc - re2/filtered_re2.cc - re2/mimics_pcre.cc - re2/nfa.cc - re2/onepass.cc - re2/parse.cc - re2/perl_groups.cc - re2/prefilter.cc - re2/prefilter_tree.cc - re2/prog.cc - re2/re2.cc - re2/regexp.cc - re2/set.cc - re2/simplify.cc + re2/bitstate.cc + re2/compile.cc + re2/dfa.cc + re2/filtered_re2.cc + re2/mimics_pcre.cc + re2/nfa.cc + re2/onepass.cc + re2/parse.cc + re2/perl_groups.cc + re2/prefilter.cc + re2/prefilter_tree.cc + re2/prog.cc + re2/re2.cc + re2/regexp.cc + re2/set.cc + re2/simplify.cc re2/stringpiece.cc - re2/tostring.cc + re2/tostring.cc re2/unicode_casefold.cc re2/unicode_groups.cc util/rune.cc |