diff options
author | axc <axc@yandex-team.ru> | 2022-02-10 16:47:35 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:35 +0300 |
commit | 1f5217043ad70f25dc35e75b3bd261a1e23d045e (patch) | |
tree | 11bf68c1fa5272d3d3446cbd5a0ff96ed9d75788 /contrib/libs/pire | |
parent | 69505a07cbb096113e85aa02e7d136cac4aa826c (diff) | |
download | ydb-1f5217043ad70f25dc35e75b3bd261a1e23d045e.tar.gz |
Restoring authorship annotation for <axc@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/pire')
-rw-r--r-- | contrib/libs/pire/pire/platform.h | 176 | ||||
-rw-r--r-- | contrib/libs/pire/pire/re_lexer.cpp | 426 | ||||
-rw-r--r-- | contrib/libs/pire/pire/re_parser.y | 212 |
3 files changed, 407 insertions, 407 deletions
diff --git a/contrib/libs/pire/pire/platform.h b/contrib/libs/pire/pire/platform.h index 54ded6b387..69680ad88f 100644 --- a/contrib/libs/pire/pire/platform.h +++ b/contrib/libs/pire/pire/platform.h @@ -74,19 +74,19 @@ typedef i32 ssize_t; inline int snprintf(char *str, size_t size, const char *format, ...) { - va_list argptr; - va_start(argptr, format); - int i = _vsnprintf(str, size-1, format, argptr); - va_end(argptr); - - // A workaround for some bug - if (i < 0) { - str[size - 1] = '\x00'; - i = (int)size; - } else if (i < (int)size) { - str[i] = '\x00'; - } - return i; + va_list argptr; + va_start(argptr, format); + int i = _vsnprintf(str, size-1, format, argptr); + va_end(argptr); + + // A workaround for some bug + if (i < 0) { + str[size - 1] = '\x00'; + i = (int)size; + } else if (i < (int)size) { + str[i] = '\x00'; + } + return i; } } @@ -95,40 +95,40 @@ inline int snprintf(char *str, size_t size, const char *format, ...) namespace Pire { namespace Impl { -// A portable way to define a constant like `(size_t)0101010101010101ull' without any warnings. -template<unsigned Pos, unsigned char Byte> -struct DoGenerateConst { - static const size_t Value = DoGenerateConst<Pos-1, Byte>::Value << 8 | (size_t) Byte; -}; - -template<unsigned char Byte> -struct DoGenerateConst<0, Byte> { - static const size_t Value = 0; -}; - -template<unsigned char Byte> -struct GenerateConst { - static const size_t Value = DoGenerateConst<sizeof(size_t), Byte>::Value; -}; - - +// A portable way to define a constant like `(size_t)0101010101010101ull' without any warnings. +template<unsigned Pos, unsigned char Byte> +struct DoGenerateConst { + static const size_t Value = DoGenerateConst<Pos-1, Byte>::Value << 8 | (size_t) Byte; +}; + +template<unsigned char Byte> +struct DoGenerateConst<0, Byte> { + static const size_t Value = 0; +}; + +template<unsigned char Byte> +struct GenerateConst { + static const size_t Value = DoGenerateConst<sizeof(size_t), Byte>::Value; +}; + + // Common implementation of mask comparison logic suitable for // any instruction set struct BasicInstructionSet { - typedef size_t Vector; + typedef size_t Vector; - // Check bytes in the chunk against bytes in the mask - static inline Vector CheckBytes(Vector mask, Vector chunk) - { - const size_t mask0x01 = GenerateConst<0x01>::Value; - const size_t mask0x80 = GenerateConst<0x80>::Value; - size_t mc = chunk ^ mask; - return ((mc - mask0x01) & ~mc & mask0x80); - } + // Check bytes in the chunk against bytes in the mask + static inline Vector CheckBytes(Vector mask, Vector chunk) + { + const size_t mask0x01 = GenerateConst<0x01>::Value; + const size_t mask0x80 = GenerateConst<0x80>::Value; + size_t mc = chunk ^ mask; + return ((mc - mask0x01) & ~mc & mask0x80); + } - static inline Vector Or(Vector mask1, Vector mask2) { return (mask1 | mask2); } + static inline Vector Or(Vector mask1, Vector mask2) { return (mask1 | mask2); } - static inline bool IsAnySet(Vector mask) { return (mask != 0); } + static inline bool IsAnySet(Vector mask) { return (mask != 0); } }; }} @@ -141,22 +141,22 @@ namespace Impl { // SSE2-optimized mask comparison logic struct AvailSSE2 { - typedef __m128i Vector; - - static inline Vector CheckBytes(Vector mask, Vector chunk) - { - return _mm_cmpeq_epi8(mask, chunk); - } - - static inline Vector Or(Vector mask1, Vector mask2) - { - return _mm_or_si128(mask1, mask2); - } - - static inline bool IsAnySet(Vector mask) - { - return _mm_movemask_epi8(mask); - } + typedef __m128i Vector; + + static inline Vector CheckBytes(Vector mask, Vector chunk) + { + return _mm_cmpeq_epi8(mask, chunk); + } + + static inline Vector Or(Vector mask1, Vector mask2) + { + return _mm_or_si128(mask1, mask2); + } + + static inline bool IsAnySet(Vector mask) + { + return _mm_movemask_epi8(mask); + } }; typedef AvailSSE2 AvailInstructionSet; @@ -173,27 +173,27 @@ namespace Impl { // MMX-optimized mask comparison logic struct AvailMMX { - typedef __m64 Vector; - - static inline Vector CheckBytes(Vector mask, Vector chunk) - { - return _mm_cmpeq_pi8(mask, chunk); - } - - static inline Vector Or(Vector mask1, Vector mask2) - { - return _mm_or_si64(mask1, mask2); - } - - static inline bool IsAnySet(Vector mask) - { - union { - Vector mmxMask; - ui64 ui64Mask; - }; - mmxMask = mask; - return ui64Mask; - } + typedef __m64 Vector; + + static inline Vector CheckBytes(Vector mask, Vector chunk) + { + return _mm_cmpeq_pi8(mask, chunk); + } + + static inline Vector Or(Vector mask1, Vector mask2) + { + return _mm_or_si64(mask1, mask2); + } + + static inline bool IsAnySet(Vector mask) + { + union { + Vector mmxMask; + ui64 ui64Mask; + }; + mmxMask = mask; + return ui64Mask; + } }; typedef AvailMMX AvailInstructionSet; @@ -234,25 +234,25 @@ template <size_t Size> struct MaxWordSizeHelper; // Maximum size of SSE register is 128 bit on x86 and x86_64 template <> struct MaxWordSizeHelper<16> { - struct MaxSizeWord { - char val[16]; - }; + struct MaxSizeWord { + char val[16]; + }; }; typedef MaxWordSizeHelper<16>::MaxSizeWord MaxSizeWord; // MaxSizeWord size should be a multiple of size_t size and a multipe of Word size PIRE_STATIC_ASSERT( - (sizeof(MaxSizeWord) % sizeof(size_t) == 0) && - (sizeof(MaxSizeWord) % sizeof(Word) == 0)); + (sizeof(MaxSizeWord) % sizeof(size_t) == 0) && + (sizeof(MaxSizeWord) % sizeof(Word) == 0)); inline size_t FillSizeT(char c) { - size_t w = c; - w &= 0x0ff; - for (size_t i = 8; i != sizeof(size_t)*8; i <<= 1) - w = (w << i) | w; - return w; + size_t w = c; + w &= 0x0ff; + for (size_t i = 8; i != sizeof(size_t)*8; i <<= 1) + w = (w << i) | w; + return w; } }} diff --git a/contrib/libs/pire/pire/re_lexer.cpp b/contrib/libs/pire/pire/re_lexer.cpp index 132fbeb039..afb194e437 100644 --- a/contrib/libs/pire/pire/re_lexer.cpp +++ b/contrib/libs/pire/pire/re_lexer.cpp @@ -29,15 +29,15 @@ #include <contrib/libs/pire/pire/stub/singleton.h> #include "fsm.h" -#include "re_lexer.h" -#include "re_parser.h" +#include "re_lexer.h" +#include "re_parser.h" #include "read_unicode.h" namespace Pire { namespace Impl { - int yre_parse(Pire::Lexer& lexer); + int yre_parse(Pire::Lexer& lexer); } Term Term::Character(wchar32 c) { Term::CharacterRange cr; cr.first.insert(Term::String(1, c)); cr.second = false; return Term(TokenTypes::Letters, cr); } @@ -50,51 +50,51 @@ Lexer::~Lexer() = default; wchar32 Lexer::GetChar() { - if (m_input.empty()) - return End; - else if (m_input.front() == '\\') { - m_input.pop_front(); - if (m_input.empty()) - Error("Regexp must not end with a backslash"); - wchar32 ch = m_input.front(); - m_input.pop_front(); - return Control | ch; - } else { - wchar32 ch = m_input.front(); - m_input.pop_front(); - return ch; - } + if (m_input.empty()) + return End; + else if (m_input.front() == '\\') { + m_input.pop_front(); + if (m_input.empty()) + Error("Regexp must not end with a backslash"); + wchar32 ch = m_input.front(); + m_input.pop_front(); + return Control | ch; + } else { + wchar32 ch = m_input.front(); + m_input.pop_front(); + return ch; + } } wchar32 Lexer::PeekChar() { - if (m_input.empty()) - return End; - else - return m_input.front(); + if (m_input.empty()) + return End; + else + return m_input.front(); } void Lexer::UngetChar(wchar32 c) { - if (c != End) - m_input.push_front(c); + if (c != End) + m_input.push_front(c); } namespace { class CompareFeaturesByPriority: public ybinary_function<const Feature::Ptr&, const Feature::Ptr&, bool> { - public: + public: bool operator()(const Feature::Ptr& a, const Feature::Ptr& b) const - { - return a->Priority() < b->Priority(); - } - }; + { + return a->Priority() < b->Priority(); + } + }; } Lexer& Lexer::AddFeature(Feature::Ptr& feature) { - feature->m_lexer = this; + feature->m_lexer = this; m_features.insert(LowerBound(m_features.begin(), m_features.end(), feature, CompareFeaturesByPriority()), std::move(feature)); - return *this; + return *this; } Lexer& Lexer::AddFeature(Feature::Ptr&& feature) @@ -106,107 +106,107 @@ Lexer& Lexer::AddFeature(Feature::Ptr&& feature) Term Lexer::DoLex() { - static const char* controls = "|().*+?^$\\"; - for (;;) { - UngetChar(GetChar()); - wchar32 ch = PeekChar(); - if (ch == End) - return Term(TokenTypes::End); + static const char* controls = "|().*+?^$\\"; + for (;;) { + UngetChar(GetChar()); + wchar32 ch = PeekChar(); + if (ch == End) + return Term(TokenTypes::End); for (auto&& i : m_features) { if (i->Accepts(ch)) { Term ret = i->Lex(); - if (ret.Type()) - return ret; - } - } - ch = GetChar(); - - if (ch == '|') - return Term(TokenTypes::Or); - else if (ch == '(') { - return Term(TokenTypes::Open); - } else if (ch == ')') - return Term(TokenTypes::Close); - else if (ch == '.') - return Term::Dot(); - else if (ch == '*') - return Term::Repetition(0, Inf); - else if (ch == '+') - return Term::Repetition(1, Inf); - else if (ch == '?') - return Term::Repetition(0, 1); - else if (ch == '^') - return Term::BeginMark(); - else if (ch == '$') - return Term::EndMark(); - else if ((ch & ControlMask) == Control && strchr(controls, ch & ~ControlMask)) - return Term::Character(ch & ~ControlMask); - else - return Term::Character(ch); - } + if (ret.Type()) + return ret; + } + } + ch = GetChar(); + + if (ch == '|') + return Term(TokenTypes::Or); + else if (ch == '(') { + return Term(TokenTypes::Open); + } else if (ch == ')') + return Term(TokenTypes::Close); + else if (ch == '.') + return Term::Dot(); + else if (ch == '*') + return Term::Repetition(0, Inf); + else if (ch == '+') + return Term::Repetition(1, Inf); + else if (ch == '?') + return Term::Repetition(0, 1); + else if (ch == '^') + return Term::BeginMark(); + else if (ch == '$') + return Term::EndMark(); + else if ((ch & ControlMask) == Control && strchr(controls, ch & ~ControlMask)) + return Term::Character(ch & ~ControlMask); + else + return Term::Character(ch); + } } Term Lexer::Lex() { - Term t = DoLex(); + Term t = DoLex(); for (auto i = m_features.rbegin(), ie = m_features.rend(); i != ie; ++i) - (*i)->Alter(t); + (*i)->Alter(t); - if (t.Value().IsA<Term::CharacterRange>()) { + if (t.Value().IsA<Term::CharacterRange>()) { const auto& chars = t.Value().As<Term::CharacterRange>(); - //std::cerr << "lex: type " << t.type() << "; chars = { " << join(chars.first.begin(), chars.first.end(), ", ") << " }" << std::endl; + //std::cerr << "lex: type " << t.type() << "; chars = { " << join(chars.first.begin(), chars.first.end(), ", ") << " }" << std::endl; for (auto&& i : chars.first) for (auto&& j : i) if ((j & ControlMask) == Control) - Error("Control character in tokens sequence"); - } - - int type = t.Type(); - if (type == TokenTypes::Letters) - type = YRE_LETTERS; - else if (type == TokenTypes::Count) - type = YRE_COUNT; - else if (type == TokenTypes::Dot) - type = YRE_DOT; - else if (type == TokenTypes::Open) - type = '('; - else if (type == TokenTypes::Close) - type = ')'; - else if (type == TokenTypes::Or) - type = '|'; - else if (type == TokenTypes::And) - type = YRE_AND; - else if (type == TokenTypes::Not) - type = YRE_NOT; - else if (type == TokenTypes::BeginMark) - type = '^'; - else if (type == TokenTypes::EndMark) - type = '$'; - else if (type == TokenTypes::End) - type = 0; - return Term(type, t.Value()); + Error("Control character in tokens sequence"); + } + + int type = t.Type(); + if (type == TokenTypes::Letters) + type = YRE_LETTERS; + else if (type == TokenTypes::Count) + type = YRE_COUNT; + else if (type == TokenTypes::Dot) + type = YRE_DOT; + else if (type == TokenTypes::Open) + type = '('; + else if (type == TokenTypes::Close) + type = ')'; + else if (type == TokenTypes::Or) + type = '|'; + else if (type == TokenTypes::And) + type = YRE_AND; + else if (type == TokenTypes::Not) + type = YRE_NOT; + else if (type == TokenTypes::BeginMark) + type = '^'; + else if (type == TokenTypes::EndMark) + type = '$'; + else if (type == TokenTypes::End) + type = 0; + return Term(type, t.Value()); } void Lexer::Parenthesized(Fsm& fsm) { for (auto i = m_features.rbegin(), ie = m_features.rend(); i != ie; ++i) - (*i)->Parenthesized(fsm); + (*i)->Parenthesized(fsm); } wchar32 Feature::CorrectChar(wchar32 c, const char* controls) { - bool ctrl = (strchr(controls, c & 0xFF) != 0); - if ((c & ControlMask) == Control && ctrl) - return c & ~ControlMask; - if (c <= 0xFF && ctrl) - return c | Control; - return c; + bool ctrl = (strchr(controls, c & 0xFF) != 0); + if ((c & ControlMask) == Control && ctrl) + return c & ~ControlMask; + if (c <= 0xFF && ctrl) + return c | Control; + return c; } namespace { class EnableUnicodeSequencesImpl : public UnicodeReader { - public: + public: bool Accepts(wchar32 c) const { return c == (Control | 'x'); } @@ -218,27 +218,27 @@ namespace { class CharacterRangeReader: public UnicodeReader { public: - bool Accepts(wchar32 c) const { return c == '[' || c == (Control | '[') || c == (Control | ']'); } - - Term Lex() - { - static const char* controls = "^[]-\\"; - static const char* controls2 = "*+{}()$?.&~"; - wchar32 ch = CorrectChar(GetChar(), controls); - if (ch == '[' || ch == ']') - return Term::Character(ch); - - Term::CharacterRange cs; - ch = CorrectChar(GetChar(), controls); - if (ch == (Control | '^')) { - cs.second = true; - ch = CorrectChar(GetChar(), controls); - } + bool Accepts(wchar32 c) const { return c == '[' || c == (Control | '[') || c == (Control | ']'); } + + Term Lex() + { + static const char* controls = "^[]-\\"; + static const char* controls2 = "*+{}()$?.&~"; + wchar32 ch = CorrectChar(GetChar(), controls); + if (ch == '[' || ch == ']') + return Term::Character(ch); + + Term::CharacterRange cs; + ch = CorrectChar(GetChar(), controls); + if (ch == (Control | '^')) { + cs.second = true; + ch = CorrectChar(GetChar(), controls); + } bool firstUnicode; wchar32 unicodeSymbol = 0; - for (; ch != End && ch != (Control | ']'); ch = CorrectChar(GetChar(), controls)) { + for (; ch != End && ch != (Control | ']'); ch = CorrectChar(GetChar(), controls)) { if (ch == (Control | 'x')) { UngetChar(ch); firstUnicode = true; @@ -248,7 +248,7 @@ namespace { } if (((ch & ControlMask) != Control || firstUnicode) && CorrectChar(PeekChar(), controls) == (Control | '-')) { - GetChar(); + GetChar(); wchar32 current = GetChar(); bool secondUnicode = (current == (Control | 'x')); @@ -265,104 +265,104 @@ namespace { } for (ch = begin; ch <= end; ++ch) { - cs.first.insert(Term::String(1, ch)); + cs.first.insert(Term::String(1, ch)); } } else if (ch == (Control | '-')) { - cs.first.insert(Term::String(1, '-')); + cs.first.insert(Term::String(1, '-')); } else if ((ch & ControlMask) == Control && (strchr(controls2, ch & ~ControlMask) || strchr(controls, ch & ~ControlMask))) { - cs.first.insert(Term::String(1, ch & ~ControlMask)); + cs.first.insert(Term::String(1, ch & ~ControlMask)); } else if ((ch & ControlMask) != Control || !strchr(controls, ch & ~ControlMask)) { cs.first.insert(Term::String(1, (firstUnicode) ? unicodeSymbol : ch)); } else { - Error("Wrong character in range"); + Error("Wrong character in range"); } - } - if (ch == End) - Error("Unexpected end of pattern"); - - return Term(TokenTypes::Letters, cs); - } - }; - - class RepetitionCountReader: public Feature { - public: - bool Accepts(wchar32 c) const { return c == '{' || c == (Control | '{') || c == (Control | '}'); } - - Term Lex() - { - wchar32 ch = GetChar(); - if (ch == (Control | '{') || ch == (Control | '}')) - return Term::Character(ch & ~ControlMask); - ch = GetChar(); - int lower = 0, upper = 0; - - if (!is_digit(ch)) - Error("Wrong repetition count"); - - for (; is_digit(ch); ch = GetChar()) - lower = lower * 10 + (ch - '0'); - if (ch == '}') - return Term::Repetition(lower, lower); - else if (ch != ',') - Error("Wrong repetition count"); - - ch = GetChar(); - if (ch == '}') - return Term::Repetition(lower, Inf); - else if (!is_digit(ch)) - Error("Wrong repetition count"); - for (; is_digit(ch); ch = GetChar()) - upper = upper * 10 + (ch - '0'); - - if (ch != '}') - Error("Wrong repetition count"); - return Term::Repetition(lower, upper); - } - }; - - class CaseInsensitiveImpl: public Feature { - public: - void Alter(Term& t) - { - if (t.Value().IsA<Term::CharacterRange>()) { - typedef Term::CharacterRange::first_type CharSet; - const CharSet& old = t.Value().As<Term::CharacterRange>().first; - CharSet altered; + } + if (ch == End) + Error("Unexpected end of pattern"); + + return Term(TokenTypes::Letters, cs); + } + }; + + class RepetitionCountReader: public Feature { + public: + bool Accepts(wchar32 c) const { return c == '{' || c == (Control | '{') || c == (Control | '}'); } + + Term Lex() + { + wchar32 ch = GetChar(); + if (ch == (Control | '{') || ch == (Control | '}')) + return Term::Character(ch & ~ControlMask); + ch = GetChar(); + int lower = 0, upper = 0; + + if (!is_digit(ch)) + Error("Wrong repetition count"); + + for (; is_digit(ch); ch = GetChar()) + lower = lower * 10 + (ch - '0'); + if (ch == '}') + return Term::Repetition(lower, lower); + else if (ch != ',') + Error("Wrong repetition count"); + + ch = GetChar(); + if (ch == '}') + return Term::Repetition(lower, Inf); + else if (!is_digit(ch)) + Error("Wrong repetition count"); + for (; is_digit(ch); ch = GetChar()) + upper = upper * 10 + (ch - '0'); + + if (ch != '}') + Error("Wrong repetition count"); + return Term::Repetition(lower, upper); + } + }; + + class CaseInsensitiveImpl: public Feature { + public: + void Alter(Term& t) + { + if (t.Value().IsA<Term::CharacterRange>()) { + typedef Term::CharacterRange::first_type CharSet; + const CharSet& old = t.Value().As<Term::CharacterRange>().first; + CharSet altered; for (auto&& i : old) { if (i.size() == 1) { altered.insert(Term::String(1, to_upper(i[0]))); altered.insert(Term::String(1, to_lower(i[0]))); - } else + } else altered.insert(i); - } - t = Term(t.Type(), Term::CharacterRange(altered, t.Value().As<Term::CharacterRange>().second)); - } - } - }; - class AndNotSupportImpl: public Feature { - public: - bool Accepts(wchar32 c) const - { - return c == '&' || c == '~' || c == (Control | '&') || c == (Control | '~'); - } - - Term Lex() - { - wchar32 ch = GetChar(); - if (ch == (Control | '&') || ch == (Control | '~')) - return Term::Character(ch & ~ControlMask); - else if (ch == '&') - return Term(TokenTypes::And); - else if (ch == '~') - return Term(TokenTypes::Not); - else { - Error("Pire::AndNotSupport::Lex(): strange input character"); - return Term(0); // Make compiler happy - } - } - }; + } + t = Term(t.Type(), Term::CharacterRange(altered, t.Value().As<Term::CharacterRange>().second)); + } + } + }; + class AndNotSupportImpl: public Feature { + public: + bool Accepts(wchar32 c) const + { + return c == '&' || c == '~' || c == (Control | '&') || c == (Control | '~'); + } + + Term Lex() + { + wchar32 ch = GetChar(); + if (ch == (Control | '&') || ch == (Control | '~')) + return Term::Character(ch & ~ControlMask); + else if (ch == '&') + return Term(TokenTypes::And); + else if (ch == '~') + return Term(TokenTypes::Not); + else { + Error("Pire::AndNotSupport::Lex(): strange input character"); + return Term(0); // Make compiler happy + } + } + }; } namespace Features { @@ -375,18 +375,18 @@ void Lexer::InstallDefaultFeatures() { AddFeature(Feature::Ptr(new CharacterRangeReader)); AddFeature(Feature::Ptr(new RepetitionCountReader)); - AddFeature(Features::CharClasses()); + AddFeature(Features::CharClasses()); AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl)); } Fsm Lexer::Parse() { - if (!Impl::yre_parse(*this)) - return m_retval.As<Fsm>(); - else { - Error("Syntax error in regexp"); - return Fsm(); // Make compiler happy - } + if (!Impl::yre_parse(*this)) + return m_retval.As<Fsm>(); + else { + Error("Syntax error in regexp"); + return Fsm(); // Make compiler happy + } } } diff --git a/contrib/libs/pire/pire/re_parser.y b/contrib/libs/pire/pire/re_parser.y index dbad88e287..39de0a92f0 100644 --- a/contrib/libs/pire/pire/re_parser.y +++ b/contrib/libs/pire/pire/re_parser.y @@ -52,16 +52,16 @@ using Pire::Fsm; using Pire::Encoding; int yylex(YYSTYPE*, Lexer&); -void yyerror(Pire::Lexer&, const char*); +void yyerror(Pire::Lexer&, const char*); Fsm& ConvertToFSM(const Encoding& encoding, Any* any); void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr); %} -%parse-param { Pire::Lexer& rlex } -%lex-param { Pire::Lexer& rlex } -%pure-parser +%parse-param { Pire::Lexer& rlex } +%lex-param { Pire::Lexer& rlex } +%pure-parser // Terminal declarations %term YRE_LETTERS @@ -75,83 +75,83 @@ void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& c %% regexp - : alternative - { - ConvertToFSM(rlex.Encoding(), $1); - DoSwap(rlex.Retval(), *$1); - delete $1; + : alternative + { + ConvertToFSM(rlex.Encoding(), $1); + DoSwap(rlex.Retval(), *$1); + delete $1; $$ = nullptr; - } - ; + } + ; alternative - : conjunction + : conjunction | alternative '|' conjunction { ConvertToFSM(rlex.Encoding(), ($$ = $1)) |= ConvertToFSM(rlex.Encoding(), $3); delete $2; delete $3; } - ; + ; conjunction - : negation + : negation | conjunction YRE_AND negation { ConvertToFSM(rlex.Encoding(), ($$ = $1)) &= ConvertToFSM(rlex.Encoding(), $3); delete $2; delete $3; } - ; + ; negation - : concatenation + : concatenation | YRE_NOT concatenation { ConvertToFSM(rlex.Encoding(), ($$ = $2)).Complement(); delete $1; } - ; + ; concatenation - : { $$ = new Any(Fsm()); } - | concatenation iteration - { - Fsm& a = ConvertToFSM(rlex.Encoding(), ($$ = $1)); - if ($2->IsA<Term::CharacterRange>() && !$2->As<Term::CharacterRange>().second) - AppendRange(rlex.Encoding(), a, $2->As<Term::CharacterRange>()); - else if ($2->IsA<Term::DotTag>()) - rlex.Encoding().AppendDot(a); - else - a += ConvertToFSM(rlex.Encoding(), $2); - delete $2; - } - ; + : { $$ = new Any(Fsm()); } + | concatenation iteration + { + Fsm& a = ConvertToFSM(rlex.Encoding(), ($$ = $1)); + if ($2->IsA<Term::CharacterRange>() && !$2->As<Term::CharacterRange>().second) + AppendRange(rlex.Encoding(), a, $2->As<Term::CharacterRange>()); + else if ($2->IsA<Term::DotTag>()) + rlex.Encoding().AppendDot(a); + else + a += ConvertToFSM(rlex.Encoding(), $2); + delete $2; + } + ; iteration - : term - | term YRE_COUNT - { - Fsm& orig = ConvertToFSM(rlex.Encoding(), $1); - $$ = new Any(orig); - Fsm& cur = $$->As<Fsm>(); - const Term::RepetitionCount& repc = $2->As<Term::RepetitionCount>(); - - - if (repc.first == 0 && repc.second == 1) { - Fsm empty; - cur |= empty; - } else if (repc.first == 0 && repc.second == Inf) { - cur.Iterate(); - } else if (repc.first == 1 && repc.second == Inf) { - cur += *cur; - } else { - cur *= repc.first; - if (repc.second == Inf) { - cur += *orig; - } else if (repc.second != repc.first) { - cur += (orig | Fsm()) * (repc.second - repc.first); - } - } + : term + | term YRE_COUNT + { + Fsm& orig = ConvertToFSM(rlex.Encoding(), $1); + $$ = new Any(orig); + Fsm& cur = $$->As<Fsm>(); + const Term::RepetitionCount& repc = $2->As<Term::RepetitionCount>(); + + + if (repc.first == 0 && repc.second == 1) { + Fsm empty; + cur |= empty; + } else if (repc.first == 0 && repc.second == Inf) { + cur.Iterate(); + } else if (repc.first == 1 && repc.second == Inf) { + cur += *cur; + } else { + cur *= repc.first; + if (repc.second == Inf) { + cur += *orig; + } else if (repc.second != repc.first) { + cur += (orig | Fsm()) * (repc.second - repc.first); + } + } rlex.Parenthesized($$->As<Fsm>()); - delete $1; - delete $2; - } - ; + delete $1; + delete $2; + } + ; term - : YRE_LETTERS - | YRE_DOT - | '^' - | '$' + : YRE_LETTERS + | YRE_DOT + | '^' + | '$' | '(' alternative ')' { $$ = $2; rlex.Parenthesized($$->As<Fsm>()); delete $1; delete $3; } - ; + ; %% @@ -181,60 +181,60 @@ void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& c TVector<ystring> strings; for (auto&& i : cr.first) { - ystring s; + ystring s; for (auto&& j : i) { ystring c = encoding.ToLocal(j); - if (c.empty()) { - s.clear(); - break; - } else + if (c.empty()) { + s.clear(); + break; + } else s += encoding.ToLocal(j); - } - if (!s.empty()) - strings.push_back(s); - } - if (strings.empty()) - // Strings accepted by this FSM are not representable in the current encoding. - // Hence, FSM will accept nothing, and we simply can clear it. - a = Fsm::MakeFalse(); - else - a.AppendStrings(strings); + } + if (!s.empty()) + strings.push_back(s); + } + if (strings.empty()) + // Strings accepted by this FSM are not representable in the current encoding. + // Hence, FSM will accept nothing, and we simply can clear it. + a = Fsm::MakeFalse(); + else + a.AppendStrings(strings); } Fsm& ConvertToFSM(const Encoding& encoding, Any* any) { - if (any->IsA<Fsm>()) - return any->As<Fsm>(); - - Any ret = Fsm(); - Fsm& a = ret.As<Fsm>(); - - if (any->IsA<Term::DotTag>()) { - encoding.AppendDot(a); - } else if (any->IsA<Term::BeginTag>()) { - a.AppendSpecial(BeginMark); - } else if (any->IsA<Term::EndTag>()) { - a.AppendSpecial(EndMark); - } else { - Term::CharacterRange cr = any->As<Term::CharacterRange>(); - AppendRange(encoding, a, cr); - if (cr.second) { - Fsm x; - encoding.AppendDot(x); - x.Complement(); - a |= x; - a.Complement(); - a.RemoveDeadEnds(); - } - } - any->Swap(ret); - return a; + if (any->IsA<Fsm>()) + return any->As<Fsm>(); + + Any ret = Fsm(); + Fsm& a = ret.As<Fsm>(); + + if (any->IsA<Term::DotTag>()) { + encoding.AppendDot(a); + } else if (any->IsA<Term::BeginTag>()) { + a.AppendSpecial(BeginMark); + } else if (any->IsA<Term::EndTag>()) { + a.AppendSpecial(EndMark); + } else { + Term::CharacterRange cr = any->As<Term::CharacterRange>(); + AppendRange(encoding, a, cr); + if (cr.second) { + Fsm x; + encoding.AppendDot(x); + x.Complement(); + a |= x; + a.Complement(); + a.RemoveDeadEnds(); + } + } + any->Swap(ret); + return a; } } namespace Pire { - namespace Impl { + namespace Impl { int yre_parse(Pire::Lexer& rlex) { int rc = yyparse(rlex); @@ -243,5 +243,5 @@ namespace Pire { throw Error(rlex.GetError()); return rc; } - } + } } |