aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/pire
diff options
context:
space:
mode:
authoraxc <axc@yandex-team.ru>2022-02-10 16:47:35 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:35 +0300
commit1f5217043ad70f25dc35e75b3bd261a1e23d045e (patch)
tree11bf68c1fa5272d3d3446cbd5a0ff96ed9d75788 /contrib/libs/pire
parent69505a07cbb096113e85aa02e7d136cac4aa826c (diff)
downloadydb-1f5217043ad70f25dc35e75b3bd261a1e23d045e.tar.gz
Restoring authorship annotation for <axc@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/pire')
-rw-r--r--contrib/libs/pire/pire/platform.h176
-rw-r--r--contrib/libs/pire/pire/re_lexer.cpp426
-rw-r--r--contrib/libs/pire/pire/re_parser.y212
3 files changed, 407 insertions, 407 deletions
diff --git a/contrib/libs/pire/pire/platform.h b/contrib/libs/pire/pire/platform.h
index 54ded6b387..69680ad88f 100644
--- a/contrib/libs/pire/pire/platform.h
+++ b/contrib/libs/pire/pire/platform.h
@@ -74,19 +74,19 @@ typedef i32 ssize_t;
inline int snprintf(char *str, size_t size, const char *format, ...)
{
- va_list argptr;
- va_start(argptr, format);
- int i = _vsnprintf(str, size-1, format, argptr);
- va_end(argptr);
-
- // A workaround for some bug
- if (i < 0) {
- str[size - 1] = '\x00';
- i = (int)size;
- } else if (i < (int)size) {
- str[i] = '\x00';
- }
- return i;
+ va_list argptr;
+ va_start(argptr, format);
+ int i = _vsnprintf(str, size-1, format, argptr);
+ va_end(argptr);
+
+ // A workaround for some bug
+ if (i < 0) {
+ str[size - 1] = '\x00';
+ i = (int)size;
+ } else if (i < (int)size) {
+ str[i] = '\x00';
+ }
+ return i;
}
}
@@ -95,40 +95,40 @@ inline int snprintf(char *str, size_t size, const char *format, ...)
namespace Pire {
namespace Impl {
-// A portable way to define a constant like `(size_t)0101010101010101ull' without any warnings.
-template<unsigned Pos, unsigned char Byte>
-struct DoGenerateConst {
- static const size_t Value = DoGenerateConst<Pos-1, Byte>::Value << 8 | (size_t) Byte;
-};
-
-template<unsigned char Byte>
-struct DoGenerateConst<0, Byte> {
- static const size_t Value = 0;
-};
-
-template<unsigned char Byte>
-struct GenerateConst {
- static const size_t Value = DoGenerateConst<sizeof(size_t), Byte>::Value;
-};
-
-
+// A portable way to define a constant like `(size_t)0101010101010101ull' without any warnings.
+template<unsigned Pos, unsigned char Byte>
+struct DoGenerateConst {
+ static const size_t Value = DoGenerateConst<Pos-1, Byte>::Value << 8 | (size_t) Byte;
+};
+
+template<unsigned char Byte>
+struct DoGenerateConst<0, Byte> {
+ static const size_t Value = 0;
+};
+
+template<unsigned char Byte>
+struct GenerateConst {
+ static const size_t Value = DoGenerateConst<sizeof(size_t), Byte>::Value;
+};
+
+
// Common implementation of mask comparison logic suitable for
// any instruction set
struct BasicInstructionSet {
- typedef size_t Vector;
+ typedef size_t Vector;
- // Check bytes in the chunk against bytes in the mask
- static inline Vector CheckBytes(Vector mask, Vector chunk)
- {
- const size_t mask0x01 = GenerateConst<0x01>::Value;
- const size_t mask0x80 = GenerateConst<0x80>::Value;
- size_t mc = chunk ^ mask;
- return ((mc - mask0x01) & ~mc & mask0x80);
- }
+ // Check bytes in the chunk against bytes in the mask
+ static inline Vector CheckBytes(Vector mask, Vector chunk)
+ {
+ const size_t mask0x01 = GenerateConst<0x01>::Value;
+ const size_t mask0x80 = GenerateConst<0x80>::Value;
+ size_t mc = chunk ^ mask;
+ return ((mc - mask0x01) & ~mc & mask0x80);
+ }
- static inline Vector Or(Vector mask1, Vector mask2) { return (mask1 | mask2); }
+ static inline Vector Or(Vector mask1, Vector mask2) { return (mask1 | mask2); }
- static inline bool IsAnySet(Vector mask) { return (mask != 0); }
+ static inline bool IsAnySet(Vector mask) { return (mask != 0); }
};
}}
@@ -141,22 +141,22 @@ namespace Impl {
// SSE2-optimized mask comparison logic
struct AvailSSE2 {
- typedef __m128i Vector;
-
- static inline Vector CheckBytes(Vector mask, Vector chunk)
- {
- return _mm_cmpeq_epi8(mask, chunk);
- }
-
- static inline Vector Or(Vector mask1, Vector mask2)
- {
- return _mm_or_si128(mask1, mask2);
- }
-
- static inline bool IsAnySet(Vector mask)
- {
- return _mm_movemask_epi8(mask);
- }
+ typedef __m128i Vector;
+
+ static inline Vector CheckBytes(Vector mask, Vector chunk)
+ {
+ return _mm_cmpeq_epi8(mask, chunk);
+ }
+
+ static inline Vector Or(Vector mask1, Vector mask2)
+ {
+ return _mm_or_si128(mask1, mask2);
+ }
+
+ static inline bool IsAnySet(Vector mask)
+ {
+ return _mm_movemask_epi8(mask);
+ }
};
typedef AvailSSE2 AvailInstructionSet;
@@ -173,27 +173,27 @@ namespace Impl {
// MMX-optimized mask comparison logic
struct AvailMMX {
- typedef __m64 Vector;
-
- static inline Vector CheckBytes(Vector mask, Vector chunk)
- {
- return _mm_cmpeq_pi8(mask, chunk);
- }
-
- static inline Vector Or(Vector mask1, Vector mask2)
- {
- return _mm_or_si64(mask1, mask2);
- }
-
- static inline bool IsAnySet(Vector mask)
- {
- union {
- Vector mmxMask;
- ui64 ui64Mask;
- };
- mmxMask = mask;
- return ui64Mask;
- }
+ typedef __m64 Vector;
+
+ static inline Vector CheckBytes(Vector mask, Vector chunk)
+ {
+ return _mm_cmpeq_pi8(mask, chunk);
+ }
+
+ static inline Vector Or(Vector mask1, Vector mask2)
+ {
+ return _mm_or_si64(mask1, mask2);
+ }
+
+ static inline bool IsAnySet(Vector mask)
+ {
+ union {
+ Vector mmxMask;
+ ui64 ui64Mask;
+ };
+ mmxMask = mask;
+ return ui64Mask;
+ }
};
typedef AvailMMX AvailInstructionSet;
@@ -234,25 +234,25 @@ template <size_t Size> struct MaxWordSizeHelper;
// Maximum size of SSE register is 128 bit on x86 and x86_64
template <>
struct MaxWordSizeHelper<16> {
- struct MaxSizeWord {
- char val[16];
- };
+ struct MaxSizeWord {
+ char val[16];
+ };
};
typedef MaxWordSizeHelper<16>::MaxSizeWord MaxSizeWord;
// MaxSizeWord size should be a multiple of size_t size and a multipe of Word size
PIRE_STATIC_ASSERT(
- (sizeof(MaxSizeWord) % sizeof(size_t) == 0) &&
- (sizeof(MaxSizeWord) % sizeof(Word) == 0));
+ (sizeof(MaxSizeWord) % sizeof(size_t) == 0) &&
+ (sizeof(MaxSizeWord) % sizeof(Word) == 0));
inline size_t FillSizeT(char c)
{
- size_t w = c;
- w &= 0x0ff;
- for (size_t i = 8; i != sizeof(size_t)*8; i <<= 1)
- w = (w << i) | w;
- return w;
+ size_t w = c;
+ w &= 0x0ff;
+ for (size_t i = 8; i != sizeof(size_t)*8; i <<= 1)
+ w = (w << i) | w;
+ return w;
}
}}
diff --git a/contrib/libs/pire/pire/re_lexer.cpp b/contrib/libs/pire/pire/re_lexer.cpp
index 132fbeb039..afb194e437 100644
--- a/contrib/libs/pire/pire/re_lexer.cpp
+++ b/contrib/libs/pire/pire/re_lexer.cpp
@@ -29,15 +29,15 @@
#include <contrib/libs/pire/pire/stub/singleton.h>
#include "fsm.h"
-#include "re_lexer.h"
-#include "re_parser.h"
+#include "re_lexer.h"
+#include "re_parser.h"
#include "read_unicode.h"
namespace Pire {
namespace Impl {
- int yre_parse(Pire::Lexer& lexer);
+ int yre_parse(Pire::Lexer& lexer);
}
Term Term::Character(wchar32 c) { Term::CharacterRange cr; cr.first.insert(Term::String(1, c)); cr.second = false; return Term(TokenTypes::Letters, cr); }
@@ -50,51 +50,51 @@ Lexer::~Lexer() = default;
wchar32 Lexer::GetChar()
{
- if (m_input.empty())
- return End;
- else if (m_input.front() == '\\') {
- m_input.pop_front();
- if (m_input.empty())
- Error("Regexp must not end with a backslash");
- wchar32 ch = m_input.front();
- m_input.pop_front();
- return Control | ch;
- } else {
- wchar32 ch = m_input.front();
- m_input.pop_front();
- return ch;
- }
+ if (m_input.empty())
+ return End;
+ else if (m_input.front() == '\\') {
+ m_input.pop_front();
+ if (m_input.empty())
+ Error("Regexp must not end with a backslash");
+ wchar32 ch = m_input.front();
+ m_input.pop_front();
+ return Control | ch;
+ } else {
+ wchar32 ch = m_input.front();
+ m_input.pop_front();
+ return ch;
+ }
}
wchar32 Lexer::PeekChar()
{
- if (m_input.empty())
- return End;
- else
- return m_input.front();
+ if (m_input.empty())
+ return End;
+ else
+ return m_input.front();
}
void Lexer::UngetChar(wchar32 c)
{
- if (c != End)
- m_input.push_front(c);
+ if (c != End)
+ m_input.push_front(c);
}
namespace {
class CompareFeaturesByPriority: public ybinary_function<const Feature::Ptr&, const Feature::Ptr&, bool> {
- public:
+ public:
bool operator()(const Feature::Ptr& a, const Feature::Ptr& b) const
- {
- return a->Priority() < b->Priority();
- }
- };
+ {
+ return a->Priority() < b->Priority();
+ }
+ };
}
Lexer& Lexer::AddFeature(Feature::Ptr& feature)
{
- feature->m_lexer = this;
+ feature->m_lexer = this;
m_features.insert(LowerBound(m_features.begin(), m_features.end(), feature, CompareFeaturesByPriority()), std::move(feature));
- return *this;
+ return *this;
}
Lexer& Lexer::AddFeature(Feature::Ptr&& feature)
@@ -106,107 +106,107 @@ Lexer& Lexer::AddFeature(Feature::Ptr&& feature)
Term Lexer::DoLex()
{
- static const char* controls = "|().*+?^$\\";
- for (;;) {
- UngetChar(GetChar());
- wchar32 ch = PeekChar();
- if (ch == End)
- return Term(TokenTypes::End);
+ static const char* controls = "|().*+?^$\\";
+ for (;;) {
+ UngetChar(GetChar());
+ wchar32 ch = PeekChar();
+ if (ch == End)
+ return Term(TokenTypes::End);
for (auto&& i : m_features) {
if (i->Accepts(ch)) {
Term ret = i->Lex();
- if (ret.Type())
- return ret;
- }
- }
- ch = GetChar();
-
- if (ch == '|')
- return Term(TokenTypes::Or);
- else if (ch == '(') {
- return Term(TokenTypes::Open);
- } else if (ch == ')')
- return Term(TokenTypes::Close);
- else if (ch == '.')
- return Term::Dot();
- else if (ch == '*')
- return Term::Repetition(0, Inf);
- else if (ch == '+')
- return Term::Repetition(1, Inf);
- else if (ch == '?')
- return Term::Repetition(0, 1);
- else if (ch == '^')
- return Term::BeginMark();
- else if (ch == '$')
- return Term::EndMark();
- else if ((ch & ControlMask) == Control && strchr(controls, ch & ~ControlMask))
- return Term::Character(ch & ~ControlMask);
- else
- return Term::Character(ch);
- }
+ if (ret.Type())
+ return ret;
+ }
+ }
+ ch = GetChar();
+
+ if (ch == '|')
+ return Term(TokenTypes::Or);
+ else if (ch == '(') {
+ return Term(TokenTypes::Open);
+ } else if (ch == ')')
+ return Term(TokenTypes::Close);
+ else if (ch == '.')
+ return Term::Dot();
+ else if (ch == '*')
+ return Term::Repetition(0, Inf);
+ else if (ch == '+')
+ return Term::Repetition(1, Inf);
+ else if (ch == '?')
+ return Term::Repetition(0, 1);
+ else if (ch == '^')
+ return Term::BeginMark();
+ else if (ch == '$')
+ return Term::EndMark();
+ else if ((ch & ControlMask) == Control && strchr(controls, ch & ~ControlMask))
+ return Term::Character(ch & ~ControlMask);
+ else
+ return Term::Character(ch);
+ }
}
Term Lexer::Lex()
{
- Term t = DoLex();
+ Term t = DoLex();
for (auto i = m_features.rbegin(), ie = m_features.rend(); i != ie; ++i)
- (*i)->Alter(t);
+ (*i)->Alter(t);
- if (t.Value().IsA<Term::CharacterRange>()) {
+ if (t.Value().IsA<Term::CharacterRange>()) {
const auto& chars = t.Value().As<Term::CharacterRange>();
- //std::cerr << "lex: type " << t.type() << "; chars = { " << join(chars.first.begin(), chars.first.end(), ", ") << " }" << std::endl;
+ //std::cerr << "lex: type " << t.type() << "; chars = { " << join(chars.first.begin(), chars.first.end(), ", ") << " }" << std::endl;
for (auto&& i : chars.first)
for (auto&& j : i)
if ((j & ControlMask) == Control)
- Error("Control character in tokens sequence");
- }
-
- int type = t.Type();
- if (type == TokenTypes::Letters)
- type = YRE_LETTERS;
- else if (type == TokenTypes::Count)
- type = YRE_COUNT;
- else if (type == TokenTypes::Dot)
- type = YRE_DOT;
- else if (type == TokenTypes::Open)
- type = '(';
- else if (type == TokenTypes::Close)
- type = ')';
- else if (type == TokenTypes::Or)
- type = '|';
- else if (type == TokenTypes::And)
- type = YRE_AND;
- else if (type == TokenTypes::Not)
- type = YRE_NOT;
- else if (type == TokenTypes::BeginMark)
- type = '^';
- else if (type == TokenTypes::EndMark)
- type = '$';
- else if (type == TokenTypes::End)
- type = 0;
- return Term(type, t.Value());
+ Error("Control character in tokens sequence");
+ }
+
+ int type = t.Type();
+ if (type == TokenTypes::Letters)
+ type = YRE_LETTERS;
+ else if (type == TokenTypes::Count)
+ type = YRE_COUNT;
+ else if (type == TokenTypes::Dot)
+ type = YRE_DOT;
+ else if (type == TokenTypes::Open)
+ type = '(';
+ else if (type == TokenTypes::Close)
+ type = ')';
+ else if (type == TokenTypes::Or)
+ type = '|';
+ else if (type == TokenTypes::And)
+ type = YRE_AND;
+ else if (type == TokenTypes::Not)
+ type = YRE_NOT;
+ else if (type == TokenTypes::BeginMark)
+ type = '^';
+ else if (type == TokenTypes::EndMark)
+ type = '$';
+ else if (type == TokenTypes::End)
+ type = 0;
+ return Term(type, t.Value());
}
void Lexer::Parenthesized(Fsm& fsm)
{
for (auto i = m_features.rbegin(), ie = m_features.rend(); i != ie; ++i)
- (*i)->Parenthesized(fsm);
+ (*i)->Parenthesized(fsm);
}
wchar32 Feature::CorrectChar(wchar32 c, const char* controls)
{
- bool ctrl = (strchr(controls, c & 0xFF) != 0);
- if ((c & ControlMask) == Control && ctrl)
- return c & ~ControlMask;
- if (c <= 0xFF && ctrl)
- return c | Control;
- return c;
+ bool ctrl = (strchr(controls, c & 0xFF) != 0);
+ if ((c & ControlMask) == Control && ctrl)
+ return c & ~ControlMask;
+ if (c <= 0xFF && ctrl)
+ return c | Control;
+ return c;
}
namespace {
class EnableUnicodeSequencesImpl : public UnicodeReader {
- public:
+ public:
bool Accepts(wchar32 c) const {
return c == (Control | 'x');
}
@@ -218,27 +218,27 @@ namespace {
class CharacterRangeReader: public UnicodeReader {
public:
- bool Accepts(wchar32 c) const { return c == '[' || c == (Control | '[') || c == (Control | ']'); }
-
- Term Lex()
- {
- static const char* controls = "^[]-\\";
- static const char* controls2 = "*+{}()$?.&~";
- wchar32 ch = CorrectChar(GetChar(), controls);
- if (ch == '[' || ch == ']')
- return Term::Character(ch);
-
- Term::CharacterRange cs;
- ch = CorrectChar(GetChar(), controls);
- if (ch == (Control | '^')) {
- cs.second = true;
- ch = CorrectChar(GetChar(), controls);
- }
+ bool Accepts(wchar32 c) const { return c == '[' || c == (Control | '[') || c == (Control | ']'); }
+
+ Term Lex()
+ {
+ static const char* controls = "^[]-\\";
+ static const char* controls2 = "*+{}()$?.&~";
+ wchar32 ch = CorrectChar(GetChar(), controls);
+ if (ch == '[' || ch == ']')
+ return Term::Character(ch);
+
+ Term::CharacterRange cs;
+ ch = CorrectChar(GetChar(), controls);
+ if (ch == (Control | '^')) {
+ cs.second = true;
+ ch = CorrectChar(GetChar(), controls);
+ }
bool firstUnicode;
wchar32 unicodeSymbol = 0;
- for (; ch != End && ch != (Control | ']'); ch = CorrectChar(GetChar(), controls)) {
+ for (; ch != End && ch != (Control | ']'); ch = CorrectChar(GetChar(), controls)) {
if (ch == (Control | 'x')) {
UngetChar(ch);
firstUnicode = true;
@@ -248,7 +248,7 @@ namespace {
}
if (((ch & ControlMask) != Control || firstUnicode) && CorrectChar(PeekChar(), controls) == (Control | '-')) {
- GetChar();
+ GetChar();
wchar32 current = GetChar();
bool secondUnicode = (current == (Control | 'x'));
@@ -265,104 +265,104 @@ namespace {
}
for (ch = begin; ch <= end; ++ch) {
- cs.first.insert(Term::String(1, ch));
+ cs.first.insert(Term::String(1, ch));
}
} else if (ch == (Control | '-')) {
- cs.first.insert(Term::String(1, '-'));
+ cs.first.insert(Term::String(1, '-'));
}
else if ((ch & ControlMask) == Control && (strchr(controls2, ch & ~ControlMask) || strchr(controls, ch & ~ControlMask))) {
- cs.first.insert(Term::String(1, ch & ~ControlMask));
+ cs.first.insert(Term::String(1, ch & ~ControlMask));
}
else if ((ch & ControlMask) != Control || !strchr(controls, ch & ~ControlMask)) {
cs.first.insert(Term::String(1, (firstUnicode) ? unicodeSymbol : ch));
} else {
- Error("Wrong character in range");
+ Error("Wrong character in range");
}
- }
- if (ch == End)
- Error("Unexpected end of pattern");
-
- return Term(TokenTypes::Letters, cs);
- }
- };
-
- class RepetitionCountReader: public Feature {
- public:
- bool Accepts(wchar32 c) const { return c == '{' || c == (Control | '{') || c == (Control | '}'); }
-
- Term Lex()
- {
- wchar32 ch = GetChar();
- if (ch == (Control | '{') || ch == (Control | '}'))
- return Term::Character(ch & ~ControlMask);
- ch = GetChar();
- int lower = 0, upper = 0;
-
- if (!is_digit(ch))
- Error("Wrong repetition count");
-
- for (; is_digit(ch); ch = GetChar())
- lower = lower * 10 + (ch - '0');
- if (ch == '}')
- return Term::Repetition(lower, lower);
- else if (ch != ',')
- Error("Wrong repetition count");
-
- ch = GetChar();
- if (ch == '}')
- return Term::Repetition(lower, Inf);
- else if (!is_digit(ch))
- Error("Wrong repetition count");
- for (; is_digit(ch); ch = GetChar())
- upper = upper * 10 + (ch - '0');
-
- if (ch != '}')
- Error("Wrong repetition count");
- return Term::Repetition(lower, upper);
- }
- };
-
- class CaseInsensitiveImpl: public Feature {
- public:
- void Alter(Term& t)
- {
- if (t.Value().IsA<Term::CharacterRange>()) {
- typedef Term::CharacterRange::first_type CharSet;
- const CharSet& old = t.Value().As<Term::CharacterRange>().first;
- CharSet altered;
+ }
+ if (ch == End)
+ Error("Unexpected end of pattern");
+
+ return Term(TokenTypes::Letters, cs);
+ }
+ };
+
+ class RepetitionCountReader: public Feature {
+ public:
+ bool Accepts(wchar32 c) const { return c == '{' || c == (Control | '{') || c == (Control | '}'); }
+
+ Term Lex()
+ {
+ wchar32 ch = GetChar();
+ if (ch == (Control | '{') || ch == (Control | '}'))
+ return Term::Character(ch & ~ControlMask);
+ ch = GetChar();
+ int lower = 0, upper = 0;
+
+ if (!is_digit(ch))
+ Error("Wrong repetition count");
+
+ for (; is_digit(ch); ch = GetChar())
+ lower = lower * 10 + (ch - '0');
+ if (ch == '}')
+ return Term::Repetition(lower, lower);
+ else if (ch != ',')
+ Error("Wrong repetition count");
+
+ ch = GetChar();
+ if (ch == '}')
+ return Term::Repetition(lower, Inf);
+ else if (!is_digit(ch))
+ Error("Wrong repetition count");
+ for (; is_digit(ch); ch = GetChar())
+ upper = upper * 10 + (ch - '0');
+
+ if (ch != '}')
+ Error("Wrong repetition count");
+ return Term::Repetition(lower, upper);
+ }
+ };
+
+ class CaseInsensitiveImpl: public Feature {
+ public:
+ void Alter(Term& t)
+ {
+ if (t.Value().IsA<Term::CharacterRange>()) {
+ typedef Term::CharacterRange::first_type CharSet;
+ const CharSet& old = t.Value().As<Term::CharacterRange>().first;
+ CharSet altered;
for (auto&& i : old) {
if (i.size() == 1) {
altered.insert(Term::String(1, to_upper(i[0])));
altered.insert(Term::String(1, to_lower(i[0])));
- } else
+ } else
altered.insert(i);
- }
- t = Term(t.Type(), Term::CharacterRange(altered, t.Value().As<Term::CharacterRange>().second));
- }
- }
- };
- class AndNotSupportImpl: public Feature {
- public:
- bool Accepts(wchar32 c) const
- {
- return c == '&' || c == '~' || c == (Control | '&') || c == (Control | '~');
- }
-
- Term Lex()
- {
- wchar32 ch = GetChar();
- if (ch == (Control | '&') || ch == (Control | '~'))
- return Term::Character(ch & ~ControlMask);
- else if (ch == '&')
- return Term(TokenTypes::And);
- else if (ch == '~')
- return Term(TokenTypes::Not);
- else {
- Error("Pire::AndNotSupport::Lex(): strange input character");
- return Term(0); // Make compiler happy
- }
- }
- };
+ }
+ t = Term(t.Type(), Term::CharacterRange(altered, t.Value().As<Term::CharacterRange>().second));
+ }
+ }
+ };
+ class AndNotSupportImpl: public Feature {
+ public:
+ bool Accepts(wchar32 c) const
+ {
+ return c == '&' || c == '~' || c == (Control | '&') || c == (Control | '~');
+ }
+
+ Term Lex()
+ {
+ wchar32 ch = GetChar();
+ if (ch == (Control | '&') || ch == (Control | '~'))
+ return Term::Character(ch & ~ControlMask);
+ else if (ch == '&')
+ return Term(TokenTypes::And);
+ else if (ch == '~')
+ return Term(TokenTypes::Not);
+ else {
+ Error("Pire::AndNotSupport::Lex(): strange input character");
+ return Term(0); // Make compiler happy
+ }
+ }
+ };
}
namespace Features {
@@ -375,18 +375,18 @@ void Lexer::InstallDefaultFeatures()
{
AddFeature(Feature::Ptr(new CharacterRangeReader));
AddFeature(Feature::Ptr(new RepetitionCountReader));
- AddFeature(Features::CharClasses());
+ AddFeature(Features::CharClasses());
AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl));
}
Fsm Lexer::Parse()
{
- if (!Impl::yre_parse(*this))
- return m_retval.As<Fsm>();
- else {
- Error("Syntax error in regexp");
- return Fsm(); // Make compiler happy
- }
+ if (!Impl::yre_parse(*this))
+ return m_retval.As<Fsm>();
+ else {
+ Error("Syntax error in regexp");
+ return Fsm(); // Make compiler happy
+ }
}
}
diff --git a/contrib/libs/pire/pire/re_parser.y b/contrib/libs/pire/pire/re_parser.y
index dbad88e287..39de0a92f0 100644
--- a/contrib/libs/pire/pire/re_parser.y
+++ b/contrib/libs/pire/pire/re_parser.y
@@ -52,16 +52,16 @@ using Pire::Fsm;
using Pire::Encoding;
int yylex(YYSTYPE*, Lexer&);
-void yyerror(Pire::Lexer&, const char*);
+void yyerror(Pire::Lexer&, const char*);
Fsm& ConvertToFSM(const Encoding& encoding, Any* any);
void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr);
%}
-%parse-param { Pire::Lexer& rlex }
-%lex-param { Pire::Lexer& rlex }
-%pure-parser
+%parse-param { Pire::Lexer& rlex }
+%lex-param { Pire::Lexer& rlex }
+%pure-parser
// Terminal declarations
%term YRE_LETTERS
@@ -75,83 +75,83 @@ void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& c
%%
regexp
- : alternative
- {
- ConvertToFSM(rlex.Encoding(), $1);
- DoSwap(rlex.Retval(), *$1);
- delete $1;
+ : alternative
+ {
+ ConvertToFSM(rlex.Encoding(), $1);
+ DoSwap(rlex.Retval(), *$1);
+ delete $1;
$$ = nullptr;
- }
- ;
+ }
+ ;
alternative
- : conjunction
+ : conjunction
| alternative '|' conjunction { ConvertToFSM(rlex.Encoding(), ($$ = $1)) |= ConvertToFSM(rlex.Encoding(), $3); delete $2; delete $3; }
- ;
+ ;
conjunction
- : negation
+ : negation
| conjunction YRE_AND negation { ConvertToFSM(rlex.Encoding(), ($$ = $1)) &= ConvertToFSM(rlex.Encoding(), $3); delete $2; delete $3; }
- ;
+ ;
negation
- : concatenation
+ : concatenation
| YRE_NOT concatenation { ConvertToFSM(rlex.Encoding(), ($$ = $2)).Complement(); delete $1; }
- ;
+ ;
concatenation
- : { $$ = new Any(Fsm()); }
- | concatenation iteration
- {
- Fsm& a = ConvertToFSM(rlex.Encoding(), ($$ = $1));
- if ($2->IsA<Term::CharacterRange>() && !$2->As<Term::CharacterRange>().second)
- AppendRange(rlex.Encoding(), a, $2->As<Term::CharacterRange>());
- else if ($2->IsA<Term::DotTag>())
- rlex.Encoding().AppendDot(a);
- else
- a += ConvertToFSM(rlex.Encoding(), $2);
- delete $2;
- }
- ;
+ : { $$ = new Any(Fsm()); }
+ | concatenation iteration
+ {
+ Fsm& a = ConvertToFSM(rlex.Encoding(), ($$ = $1));
+ if ($2->IsA<Term::CharacterRange>() && !$2->As<Term::CharacterRange>().second)
+ AppendRange(rlex.Encoding(), a, $2->As<Term::CharacterRange>());
+ else if ($2->IsA<Term::DotTag>())
+ rlex.Encoding().AppendDot(a);
+ else
+ a += ConvertToFSM(rlex.Encoding(), $2);
+ delete $2;
+ }
+ ;
iteration
- : term
- | term YRE_COUNT
- {
- Fsm& orig = ConvertToFSM(rlex.Encoding(), $1);
- $$ = new Any(orig);
- Fsm& cur = $$->As<Fsm>();
- const Term::RepetitionCount& repc = $2->As<Term::RepetitionCount>();
-
-
- if (repc.first == 0 && repc.second == 1) {
- Fsm empty;
- cur |= empty;
- } else if (repc.first == 0 && repc.second == Inf) {
- cur.Iterate();
- } else if (repc.first == 1 && repc.second == Inf) {
- cur += *cur;
- } else {
- cur *= repc.first;
- if (repc.second == Inf) {
- cur += *orig;
- } else if (repc.second != repc.first) {
- cur += (orig | Fsm()) * (repc.second - repc.first);
- }
- }
+ : term
+ | term YRE_COUNT
+ {
+ Fsm& orig = ConvertToFSM(rlex.Encoding(), $1);
+ $$ = new Any(orig);
+ Fsm& cur = $$->As<Fsm>();
+ const Term::RepetitionCount& repc = $2->As<Term::RepetitionCount>();
+
+
+ if (repc.first == 0 && repc.second == 1) {
+ Fsm empty;
+ cur |= empty;
+ } else if (repc.first == 0 && repc.second == Inf) {
+ cur.Iterate();
+ } else if (repc.first == 1 && repc.second == Inf) {
+ cur += *cur;
+ } else {
+ cur *= repc.first;
+ if (repc.second == Inf) {
+ cur += *orig;
+ } else if (repc.second != repc.first) {
+ cur += (orig | Fsm()) * (repc.second - repc.first);
+ }
+ }
rlex.Parenthesized($$->As<Fsm>());
- delete $1;
- delete $2;
- }
- ;
+ delete $1;
+ delete $2;
+ }
+ ;
term
- : YRE_LETTERS
- | YRE_DOT
- | '^'
- | '$'
+ : YRE_LETTERS
+ | YRE_DOT
+ | '^'
+ | '$'
| '(' alternative ')' { $$ = $2; rlex.Parenthesized($$->As<Fsm>()); delete $1; delete $3; }
- ;
+ ;
%%
@@ -181,60 +181,60 @@ void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& c
TVector<ystring> strings;
for (auto&& i : cr.first) {
- ystring s;
+ ystring s;
for (auto&& j : i) {
ystring c = encoding.ToLocal(j);
- if (c.empty()) {
- s.clear();
- break;
- } else
+ if (c.empty()) {
+ s.clear();
+ break;
+ } else
s += encoding.ToLocal(j);
- }
- if (!s.empty())
- strings.push_back(s);
- }
- if (strings.empty())
- // Strings accepted by this FSM are not representable in the current encoding.
- // Hence, FSM will accept nothing, and we simply can clear it.
- a = Fsm::MakeFalse();
- else
- a.AppendStrings(strings);
+ }
+ if (!s.empty())
+ strings.push_back(s);
+ }
+ if (strings.empty())
+ // Strings accepted by this FSM are not representable in the current encoding.
+ // Hence, FSM will accept nothing, and we simply can clear it.
+ a = Fsm::MakeFalse();
+ else
+ a.AppendStrings(strings);
}
Fsm& ConvertToFSM(const Encoding& encoding, Any* any)
{
- if (any->IsA<Fsm>())
- return any->As<Fsm>();
-
- Any ret = Fsm();
- Fsm& a = ret.As<Fsm>();
-
- if (any->IsA<Term::DotTag>()) {
- encoding.AppendDot(a);
- } else if (any->IsA<Term::BeginTag>()) {
- a.AppendSpecial(BeginMark);
- } else if (any->IsA<Term::EndTag>()) {
- a.AppendSpecial(EndMark);
- } else {
- Term::CharacterRange cr = any->As<Term::CharacterRange>();
- AppendRange(encoding, a, cr);
- if (cr.second) {
- Fsm x;
- encoding.AppendDot(x);
- x.Complement();
- a |= x;
- a.Complement();
- a.RemoveDeadEnds();
- }
- }
- any->Swap(ret);
- return a;
+ if (any->IsA<Fsm>())
+ return any->As<Fsm>();
+
+ Any ret = Fsm();
+ Fsm& a = ret.As<Fsm>();
+
+ if (any->IsA<Term::DotTag>()) {
+ encoding.AppendDot(a);
+ } else if (any->IsA<Term::BeginTag>()) {
+ a.AppendSpecial(BeginMark);
+ } else if (any->IsA<Term::EndTag>()) {
+ a.AppendSpecial(EndMark);
+ } else {
+ Term::CharacterRange cr = any->As<Term::CharacterRange>();
+ AppendRange(encoding, a, cr);
+ if (cr.second) {
+ Fsm x;
+ encoding.AppendDot(x);
+ x.Complement();
+ a |= x;
+ a.Complement();
+ a.RemoveDeadEnds();
+ }
+ }
+ any->Swap(ret);
+ return a;
}
}
namespace Pire {
- namespace Impl {
+ namespace Impl {
int yre_parse(Pire::Lexer& rlex)
{
int rc = yyparse(rlex);
@@ -243,5 +243,5 @@ namespace Pire {
throw Error(rlex.GetError());
return rc;
}
- }
+ }
}