diff options
author | karina-usm <karina-usm@yandex-team.ru> | 2022-02-10 16:48:05 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:05 +0300 |
commit | 3305cedaf9e392ab24e4b7dd6072976748ce60bf (patch) | |
tree | b222e5ac2e2e98872661c51ccceee5da0d291e13 /contrib | |
parent | 62517661cde7aa7c93efe0281ec48eeb70ea420c (diff) | |
download | ydb-3305cedaf9e392ab24e4b7dd6072976748ce60bf.tar.gz |
Restoring authorship annotation for <karina-usm@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/libs/pire/pire/approx_matching.cpp | 188 | ||||
-rw-r--r-- | contrib/libs/pire/pire/approx_matching.h | 56 | ||||
-rw-r--r-- | contrib/libs/pire/pire/extra/capture.h | 14 | ||||
-rw-r--r-- | contrib/libs/pire/pire/fsm.h | 10 | ||||
-rw-r--r-- | contrib/libs/pire/pire/re_lexer.cpp | 112 | ||||
-rw-r--r-- | contrib/libs/pire/pire/read_unicode.cpp | 166 | ||||
-rw-r--r-- | contrib/libs/pire/pire/read_unicode.h | 80 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanners/loaded.h | 10 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanners/multi.h | 10 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanners/simple.h | 12 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanners/slow.h | 34 |
11 files changed, 346 insertions, 346 deletions
diff --git a/contrib/libs/pire/pire/approx_matching.cpp b/contrib/libs/pire/pire/approx_matching.cpp index 8c393b39e0..23f74ca01d 100644 --- a/contrib/libs/pire/pire/approx_matching.cpp +++ b/contrib/libs/pire/pire/approx_matching.cpp @@ -1,94 +1,94 @@ -/* - * approx_matching.cpp -- implementation of CreateApproxFsm function - * - * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#include "approx_matching.h" - -namespace Pire { - Fsm CreateApproxFsm(const Fsm& regexp, size_t distance) { - Fsm approxFsm = regexp; - - TVector<TSet<Char>> outgoingLettersTable(regexp.Size()); - for (size_t state = 0; state < regexp.Size(); ++state) { - outgoingLettersTable[state] = regexp.OutgoingLetters(state); - } - - TVector<TMap<Char, Fsm::StatesSet>> destinationsTable(regexp.Size()); - for (size_t state = 0; state < regexp.Size(); ++state) { - for (Char letter : outgoingLettersTable[state]) { - destinationsTable[state][letter] = regexp.Destinations(state, letter); - } - } - - for (size_t fsmIdx = 0; fsmIdx < distance; ++fsmIdx) { - approxFsm.Import(regexp); - const auto shift = fsmIdx * regexp.Size(); - - for (size_t state = 0; state < regexp.Size(); ++state) { - for (Char letter : outgoingLettersTable[state]) { - for (size_t to : destinationsTable[state][letter]) { - for (Char ch = 0; ch < MaxChar; ++ch) { - if (!approxFsm.Connected(state + shift, to + shift, ch)) { - approxFsm.Connect(state + shift, to + shift + regexp.Size(), ch); - } - } - - approxFsm.Connect(state + shift, to + shift + regexp.Size(), Epsilon); - } - - for (Char ch = 0; ch < MaxChar; ++ch) { - approxFsm.Connect(state + shift, state + shift + regexp.Size(), ch); - } - } - - if (regexp.IsFinal(state)) { - approxFsm.SetFinal(state + shift + regexp.Size(), true); - } - } - } - - size_t maxState = (distance > 0) ? approxFsm.Size() - regexp.Size() : 0; - for (size_t state = 0; state < maxState; ++state) { - size_t currentDist = state / regexp.Size(); - size_t intState = state % regexp.Size(); - - for (Char firstLetter : outgoingLettersTable[intState]) { - for (size_t firstDest : destinationsTable[intState][firstLetter]) { - for (Char secondLetter : outgoingLettersTable[firstDest]) { - for (size_t secondDest : destinationsTable[firstDest][secondLetter]) { - if (secondDest != intState || firstDest != intState) { - approxFsm.Resize(approxFsm.Size() + 1); - - size_t to = secondDest + (currentDist + 1) * regexp.Size(); - size_t middle = approxFsm.Size() - 1; - - approxFsm.Connect(state, middle, secondLetter); - approxFsm.Connect(middle, to, firstLetter); - } - } - } - } - } - } - - return approxFsm; - } -} +/* + * approx_matching.cpp -- implementation of CreateApproxFsm function + * + * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#include "approx_matching.h" + +namespace Pire { + Fsm CreateApproxFsm(const Fsm& regexp, size_t distance) { + Fsm approxFsm = regexp; + + TVector<TSet<Char>> outgoingLettersTable(regexp.Size()); + for (size_t state = 0; state < regexp.Size(); ++state) { + outgoingLettersTable[state] = regexp.OutgoingLetters(state); + } + + TVector<TMap<Char, Fsm::StatesSet>> destinationsTable(regexp.Size()); + for (size_t state = 0; state < regexp.Size(); ++state) { + for (Char letter : outgoingLettersTable[state]) { + destinationsTable[state][letter] = regexp.Destinations(state, letter); + } + } + + for (size_t fsmIdx = 0; fsmIdx < distance; ++fsmIdx) { + approxFsm.Import(regexp); + const auto shift = fsmIdx * regexp.Size(); + + for (size_t state = 0; state < regexp.Size(); ++state) { + for (Char letter : outgoingLettersTable[state]) { + for (size_t to : destinationsTable[state][letter]) { + for (Char ch = 0; ch < MaxChar; ++ch) { + if (!approxFsm.Connected(state + shift, to + shift, ch)) { + approxFsm.Connect(state + shift, to + shift + regexp.Size(), ch); + } + } + + approxFsm.Connect(state + shift, to + shift + regexp.Size(), Epsilon); + } + + for (Char ch = 0; ch < MaxChar; ++ch) { + approxFsm.Connect(state + shift, state + shift + regexp.Size(), ch); + } + } + + if (regexp.IsFinal(state)) { + approxFsm.SetFinal(state + shift + regexp.Size(), true); + } + } + } + + size_t maxState = (distance > 0) ? approxFsm.Size() - regexp.Size() : 0; + for (size_t state = 0; state < maxState; ++state) { + size_t currentDist = state / regexp.Size(); + size_t intState = state % regexp.Size(); + + for (Char firstLetter : outgoingLettersTable[intState]) { + for (size_t firstDest : destinationsTable[intState][firstLetter]) { + for (Char secondLetter : outgoingLettersTable[firstDest]) { + for (size_t secondDest : destinationsTable[firstDest][secondLetter]) { + if (secondDest != intState || firstDest != intState) { + approxFsm.Resize(approxFsm.Size() + 1); + + size_t to = secondDest + (currentDist + 1) * regexp.Size(); + size_t middle = approxFsm.Size() - 1; + + approxFsm.Connect(state, middle, secondLetter); + approxFsm.Connect(middle, to, firstLetter); + } + } + } + } + } + } + + return approxFsm; + } +} diff --git a/contrib/libs/pire/pire/approx_matching.h b/contrib/libs/pire/pire/approx_matching.h index 2b2568d96b..fc2a9fd61c 100644 --- a/contrib/libs/pire/pire/approx_matching.h +++ b/contrib/libs/pire/pire/approx_matching.h @@ -1,28 +1,28 @@ -/* - * approx_matching.h -- function for creating fsm which matches words - * within a levenshtein distance - * - * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#include "fsm.h" - -namespace Pire { - Fsm CreateApproxFsm(const Fsm& regexp, size_t distance); -} +/* + * approx_matching.h -- function for creating fsm which matches words + * within a levenshtein distance + * + * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#include "fsm.h" + +namespace Pire { + Fsm CreateApproxFsm(const Fsm& regexp, size_t distance); +} diff --git a/contrib/libs/pire/pire/extra/capture.h b/contrib/libs/pire/pire/extra/capture.h index 7bca334eac..8399914a67 100644 --- a/contrib/libs/pire/pire/extra/capture.h +++ b/contrib/libs/pire/pire/extra/capture.h @@ -25,7 +25,7 @@ #define PIRE_EXTRA_CAPTURE_H -#include <contrib/libs/pire/pire/approx_matching.h> +#include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/scanners/loaded.h> #include <contrib/libs/pire/pire/scanners/multi.h> #include <contrib/libs/pire/pire/scanners/slow.h> @@ -139,11 +139,11 @@ public: CapturingScanner() {} CapturingScanner(const CapturingScanner& s): LoadedScanner(s) {} - explicit CapturingScanner(Fsm& fsm, size_t distance = 0) + explicit CapturingScanner(Fsm& fsm, size_t distance = 0) { - if (distance) { - fsm = CreateApproxFsm(fsm, distance); - } + if (distance) { + fsm = CreateApproxFsm(fsm, distance); + } fsm.Canonize(); Init(fsm.Size(), fsm.Letters(), fsm.Initial()); BuildScanner(fsm, *this); @@ -576,8 +576,8 @@ public: { } - SlowCapturingScanner(Fsm& fsm, size_t distance = 0) - : SlowScanner(fsm, true, false, distance) + SlowCapturingScanner(Fsm& fsm, size_t distance = 0) + : SlowScanner(fsm, true, false, distance) { } }; diff --git a/contrib/libs/pire/pire/fsm.h b/contrib/libs/pire/pire/fsm.h index 348e6b6216..4dad06ca06 100644 --- a/contrib/libs/pire/pire/fsm.h +++ b/contrib/libs/pire/pire/fsm.h @@ -115,9 +115,9 @@ namespace Pire { /// Determines and minimizes the FSM if neccessary. Returns *this. Fsm& Canonize(size_t maxSize = 0); - + template<class Scanner> - Scanner Compile(size_t distance = 0); + Scanner Compile(size_t distance = 0); void DumpState(yostream& s, size_t state) const; void DumpTo(yostream& s, const ystring& name = "") const; @@ -270,11 +270,11 @@ namespace Pire { r.FinishBuild(); } - + template<class Scanner> - inline Scanner Fsm::Compile(size_t distance) + inline Scanner Fsm::Compile(size_t distance) { - return Scanner(*this, distance); + return Scanner(*this, distance); } yostream& operator << (yostream&, const Fsm&); diff --git a/contrib/libs/pire/pire/re_lexer.cpp b/contrib/libs/pire/pire/re_lexer.cpp index dbae421f16..132fbeb039 100644 --- a/contrib/libs/pire/pire/re_lexer.cpp +++ b/contrib/libs/pire/pire/re_lexer.cpp @@ -11,7 +11,7 @@ * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * Pire is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -28,12 +28,12 @@ #include <contrib/libs/pire/pire/stub/utf8.h> #include <contrib/libs/pire/pire/stub/singleton.h> -#include "fsm.h" +#include "fsm.h" #include "re_lexer.h" #include "re_parser.h" -#include "read_unicode.h" +#include "read_unicode.h" + - namespace Pire { namespace Impl { @@ -161,7 +161,7 @@ Term Lexer::Lex() if ((j & ControlMask) == Control) Error("Control character in tokens sequence"); } - + int type = t.Type(); if (type == TokenTypes::Letters) type = YRE_LETTERS; @@ -205,19 +205,19 @@ wchar32 Feature::CorrectChar(wchar32 c, const char* controls) } namespace { - class EnableUnicodeSequencesImpl : public UnicodeReader { + class EnableUnicodeSequencesImpl : public UnicodeReader { + public: + bool Accepts(wchar32 c) const { + return c == (Control | 'x'); + } + + Term Lex() { + return Term::Character(ReadUnicodeCharacter()); + } + }; + + class CharacterRangeReader: public UnicodeReader { public: - bool Accepts(wchar32 c) const { - return c == (Control | 'x'); - } - - Term Lex() { - return Term::Character(ReadUnicodeCharacter()); - } - }; - - class CharacterRangeReader: public UnicodeReader { - public: bool Accepts(wchar32 c) const { return c == '[' || c == (Control | '[') || c == (Control | ']'); } Term Lex() @@ -235,49 +235,49 @@ namespace { ch = CorrectChar(GetChar(), controls); } - bool firstUnicode; - wchar32 unicodeSymbol = 0; - + bool firstUnicode; + wchar32 unicodeSymbol = 0; + for (; ch != End && ch != (Control | ']'); ch = CorrectChar(GetChar(), controls)) { - if (ch == (Control | 'x')) { - UngetChar(ch); - firstUnicode = true; - unicodeSymbol = ReadUnicodeCharacter(); - } else { - firstUnicode = false; - } - - if (((ch & ControlMask) != Control || firstUnicode) && CorrectChar(PeekChar(), controls) == (Control | '-')) { + if (ch == (Control | 'x')) { + UngetChar(ch); + firstUnicode = true; + unicodeSymbol = ReadUnicodeCharacter(); + } else { + firstUnicode = false; + } + + if (((ch & ControlMask) != Control || firstUnicode) && CorrectChar(PeekChar(), controls) == (Control | '-')) { GetChar(); - wchar32 current = GetChar(); - - bool secondUnicode = (current == (Control | 'x')); - - wchar32 begin = (firstUnicode) ? unicodeSymbol : ch; - wchar32 end; - if (secondUnicode) { - UngetChar(current); - end = ReadUnicodeCharacter(); - } else { - end = CorrectChar(current, controls); - if ((end & ControlMask) == Control) - Error("Wrong character range"); - } - - for (ch = begin; ch <= end; ++ch) { + wchar32 current = GetChar(); + + bool secondUnicode = (current == (Control | 'x')); + + wchar32 begin = (firstUnicode) ? unicodeSymbol : ch; + wchar32 end; + if (secondUnicode) { + UngetChar(current); + end = ReadUnicodeCharacter(); + } else { + end = CorrectChar(current, controls); + if ((end & ControlMask) == Control) + Error("Wrong character range"); + } + + for (ch = begin; ch <= end; ++ch) { cs.first.insert(Term::String(1, ch)); - } - } else if (ch == (Control | '-')) { + } + } else if (ch == (Control | '-')) { cs.first.insert(Term::String(1, '-')); - } - else if ((ch & ControlMask) == Control && (strchr(controls2, ch & ~ControlMask) || strchr(controls, ch & ~ControlMask))) { + } + else if ((ch & ControlMask) == Control && (strchr(controls2, ch & ~ControlMask) || strchr(controls, ch & ~ControlMask))) { cs.first.insert(Term::String(1, ch & ~ControlMask)); - } - else if ((ch & ControlMask) != Control || !strchr(controls, ch & ~ControlMask)) { - cs.first.insert(Term::String(1, (firstUnicode) ? unicodeSymbol : ch)); - } else { + } + else if ((ch & ControlMask) != Control || !strchr(controls, ch & ~ControlMask)) { + cs.first.insert(Term::String(1, (firstUnicode) ? unicodeSymbol : ch)); + } else { Error("Wrong character in range"); - } + } } if (ch == End) Error("Unexpected end of pattern"); @@ -347,7 +347,7 @@ namespace { { return c == '&' || c == '~' || c == (Control | '&') || c == (Control | '~'); } - + Term Lex() { wchar32 ch = GetChar(); @@ -376,7 +376,7 @@ void Lexer::InstallDefaultFeatures() AddFeature(Feature::Ptr(new CharacterRangeReader)); AddFeature(Feature::Ptr(new RepetitionCountReader)); AddFeature(Features::CharClasses()); - AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl)); + AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl)); } Fsm Lexer::Parse() diff --git a/contrib/libs/pire/pire/read_unicode.cpp b/contrib/libs/pire/pire/read_unicode.cpp index e167cf5cca..5b21e4eb28 100644 --- a/contrib/libs/pire/pire/read_unicode.cpp +++ b/contrib/libs/pire/pire/read_unicode.cpp @@ -1,83 +1,83 @@ -/* - * read_unicode.cpp -- implementation of the UnicodeReader. - * - * Copyright (c) 2019 YANDEX LLC - * Author: Karina Usmanova <usmanova.karin@yandex.ru> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#include "read_unicode.h" - -#include <contrib/libs/pire/pire/re_lexer.h> - -namespace Pire { - wchar32 UnicodeReader::ReadUnicodeCharacter() { - ystring hexStr; - GetChar(); - wchar32 ch = PeekChar(); - - if (ch == '{') { - GetChar(); - hexStr = ReadHexDigit( - [](wchar32 ch, size_t numAdded) -> bool { return ch == End || (numAdded != 0 && ch == '}'); }); - ch = GetChar(); - if (ch != '}') { - Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x{...\" sequence should be closed by \"}\""); - } - } else { - hexStr = ReadHexDigit([](wchar32, size_t numAdded) -> bool { return numAdded == 2; }); - if (hexStr.size() != 2) { - Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x...\" sequence should contain two symbols"); - } - } - return HexToDec(hexStr); - } - - bool UnicodeReader::IsHexDigit(wchar32 ch) { - return ch < 256 && std::isxdigit(ch) != 0; - } - - ystring UnicodeReader::ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop) { - ystring result; - wchar32 ch = GetChar(); - while (!shouldStop(ch, result.size())) { - if (!IsHexDigit(ch)) { - Error("Pire::UnicodeReader::ReadHexDigit(): \"\\x...\" sequence contains non-valid hex number"); - } - result.push_back(ch); - ch = GetChar(); - } - UngetChar(ch); - return result; - } - - wchar32 UnicodeReader::HexToDec(const ystring &hexStr) { - wchar32 converted; - try { - converted = std::stoul(hexStr, 0, 16); - } catch (std::out_of_range &) { - converted = MAX_UNICODE + 1; - } - if (converted > MAX_UNICODE) { - Error("Pire::UnicodeReader::HexToDec(): hex number in \"\\x...\" sequence is too large"); - } - return converted; - } -} - - +/* + * read_unicode.cpp -- implementation of the UnicodeReader. + * + * Copyright (c) 2019 YANDEX LLC + * Author: Karina Usmanova <usmanova.karin@yandex.ru> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#include "read_unicode.h" + +#include <contrib/libs/pire/pire/re_lexer.h> + +namespace Pire { + wchar32 UnicodeReader::ReadUnicodeCharacter() { + ystring hexStr; + GetChar(); + wchar32 ch = PeekChar(); + + if (ch == '{') { + GetChar(); + hexStr = ReadHexDigit( + [](wchar32 ch, size_t numAdded) -> bool { return ch == End || (numAdded != 0 && ch == '}'); }); + ch = GetChar(); + if (ch != '}') { + Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x{...\" sequence should be closed by \"}\""); + } + } else { + hexStr = ReadHexDigit([](wchar32, size_t numAdded) -> bool { return numAdded == 2; }); + if (hexStr.size() != 2) { + Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x...\" sequence should contain two symbols"); + } + } + return HexToDec(hexStr); + } + + bool UnicodeReader::IsHexDigit(wchar32 ch) { + return ch < 256 && std::isxdigit(ch) != 0; + } + + ystring UnicodeReader::ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop) { + ystring result; + wchar32 ch = GetChar(); + while (!shouldStop(ch, result.size())) { + if (!IsHexDigit(ch)) { + Error("Pire::UnicodeReader::ReadHexDigit(): \"\\x...\" sequence contains non-valid hex number"); + } + result.push_back(ch); + ch = GetChar(); + } + UngetChar(ch); + return result; + } + + wchar32 UnicodeReader::HexToDec(const ystring &hexStr) { + wchar32 converted; + try { + converted = std::stoul(hexStr, 0, 16); + } catch (std::out_of_range &) { + converted = MAX_UNICODE + 1; + } + if (converted > MAX_UNICODE) { + Error("Pire::UnicodeReader::HexToDec(): hex number in \"\\x...\" sequence is too large"); + } + return converted; + } +} + + diff --git a/contrib/libs/pire/pire/read_unicode.h b/contrib/libs/pire/pire/read_unicode.h index f0705c14aa..107545e5a1 100644 --- a/contrib/libs/pire/pire/read_unicode.h +++ b/contrib/libs/pire/pire/read_unicode.h @@ -1,40 +1,40 @@ -/* - * read_unicode.h -- declaration of the UnicodeReader class, helper for UnicodeRange and EnableUnicodeSequences. - * - * Copyright (c) 2019 YANDEX LLC - * Author: Karina Usmanova <usmanova.karin@yandex.ru> - * - * This file is part of Pire, the Perl Incompatible - * Regular Expressions library. - * - * Pire is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Pire is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * You should have received a copy of the GNU Lesser Public License - * along with Pire. If not, see <http://www.gnu.org/licenses>. - */ - - -#include <contrib/libs/pire/pire/re_lexer.h> - -namespace Pire { - class UnicodeReader : public Feature { - public: - wchar32 ReadUnicodeCharacter(); - - private: - static const wchar32 MAX_UNICODE = 0x10FFFF; - - bool IsHexDigit(wchar32 ch); - ystring ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop); - wchar32 HexToDec(const ystring& hexStr); - }; -} - - +/* + * read_unicode.h -- declaration of the UnicodeReader class, helper for UnicodeRange and EnableUnicodeSequences. + * + * Copyright (c) 2019 YANDEX LLC + * Author: Karina Usmanova <usmanova.karin@yandex.ru> + * + * This file is part of Pire, the Perl Incompatible + * Regular Expressions library. + * + * Pire is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pire is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * You should have received a copy of the GNU Lesser Public License + * along with Pire. If not, see <http://www.gnu.org/licenses>. + */ + + +#include <contrib/libs/pire/pire/re_lexer.h> + +namespace Pire { + class UnicodeReader : public Feature { + public: + wchar32 ReadUnicodeCharacter(); + + private: + static const wchar32 MAX_UNICODE = 0x10FFFF; + + bool IsHexDigit(wchar32 ch); + ystring ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop); + wchar32 HexToDec(const ystring& hexStr); + }; +} + + diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h index 24ded64a68..120dc403b7 100644 --- a/contrib/libs/pire/pire/scanners/loaded.h +++ b/contrib/libs/pire/pire/scanners/loaded.h @@ -26,7 +26,7 @@ #include <string.h> -#include <contrib/libs/pire/pire/approx_matching.h> +#include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/fsm.h> #include <contrib/libs/pire/pire/partition.h> @@ -245,11 +245,11 @@ protected: virtual ~LoadedScanner(); private: - explicit LoadedScanner(Fsm& fsm, size_t distance = 0) + explicit LoadedScanner(Fsm& fsm, size_t distance = 0) { - if (distance) { - fsm = CreateApproxFsm(fsm, distance); - } + if (distance) { + fsm = CreateApproxFsm(fsm, distance); + } fsm.Canonize(); Init(fsm.Size(), fsm.Letters(), fsm.Initial()); BuildScanner(fsm, *this); diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h index b6cdceaa32..29679e416e 100644 --- a/contrib/libs/pire/pire/scanners/multi.h +++ b/contrib/libs/pire/pire/scanners/multi.h @@ -26,7 +26,7 @@ #include <cstring> #include <string.h> -#include <contrib/libs/pire/pire/approx_matching.h> +#include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/fsm.h> #include <contrib/libs/pire/pire/partition.h> #include <contrib/libs/pire/pire/run.h> @@ -121,11 +121,11 @@ public: Scanner() { Alias(Null()); } - explicit Scanner(Fsm& fsm, size_t distance = 0) + explicit Scanner(Fsm& fsm, size_t distance = 0) { - if (distance) { - fsm = CreateApproxFsm(fsm, distance); - } + if (distance) { + fsm = CreateApproxFsm(fsm, distance); + } fsm.Canonize(); Init(fsm.Size(), fsm.Letters(), fsm.Finals().size(), fsm.Initial(), 1); BuildScanner(fsm, *this); diff --git a/contrib/libs/pire/pire/scanners/simple.h b/contrib/libs/pire/pire/scanners/simple.h index 6874e1f2a3..ef959aeed1 100644 --- a/contrib/libs/pire/pire/scanners/simple.h +++ b/contrib/libs/pire/pire/scanners/simple.h @@ -24,7 +24,7 @@ #ifndef PIRE_SCANNERS_SIMPLE_H #define PIRE_SCANNERS_SIMPLE_H -#include <contrib/libs/pire/pire/approx_matching.h> +#include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/stub/stl.h> #include <contrib/libs/pire/pire/stub/defaults.h> #include <contrib/libs/pire/pire/stub/saveload.h> @@ -49,7 +49,7 @@ public: SimpleScanner() { Alias(Null()); } - explicit SimpleScanner(Fsm& fsm, size_t distance = 0); + explicit SimpleScanner(Fsm& fsm, size_t distance = 0); size_t Size() const { return m.statesCount; } bool Empty() const { return m_transitions == Null().m_transitions; } @@ -229,11 +229,11 @@ protected: } }; -inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance) +inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance) { - if (distance) { - fsm = CreateApproxFsm(fsm, distance); - } + if (distance) { + fsm = CreateApproxFsm(fsm, distance); + } fsm.Canonize(); m.statesCount = fsm.Size(); diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h index 8f1e4ca4d0..6adfcb8c1d 100644 --- a/contrib/libs/pire/pire/scanners/slow.h +++ b/contrib/libs/pire/pire/scanners/slow.h @@ -24,7 +24,7 @@ #ifndef PIRE_SCANNERS_SLOW_H #define PIRE_SCANNERS_SLOW_H -#include <contrib/libs/pire/pire/approx_matching.h> +#include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/partition.h> #include <contrib/libs/pire/pire/vbitset.h> #include <contrib/libs/pire/pire/fsm.h> @@ -250,12 +250,12 @@ public: } } - explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0) + explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0) : need_actions(needActions) { - if (distance) { - fsm = CreateApproxFsm(fsm, distance); - } + if (distance) { + fsm = CreateApproxFsm(fsm, distance); + } if (removeEpsilons) fsm.RemoveEpsilons(); fsm.Sparse(!removeEpsilons); @@ -357,7 +357,7 @@ private: bool need_actions; TVector<TVector<Action>> m_actionsvec; - static const SlowScanner& Null(); + static const SlowScanner& Null(); template<class T> void alloc(T*& p, size_t size) { @@ -416,17 +416,17 @@ private: friend void BuildScanner<SlowScanner>(const Fsm&, SlowScanner&); }; -template<> -inline SlowScanner Fsm::Compile(size_t distance) { - return SlowScanner(*this, false, true, distance); -} - -inline const SlowScanner& SlowScanner::Null() -{ - static const SlowScanner n = Fsm::MakeFalse().Compile<SlowScanner>(); - return n; -} - +template<> +inline SlowScanner Fsm::Compile(size_t distance) { + return SlowScanner(*this, false, true, distance); +} + +inline const SlowScanner& SlowScanner::Null() +{ + static const SlowScanner n = Fsm::MakeFalse().Compile<SlowScanner>(); + return n; +} + #ifndef PIRE_DEBUG /// A specialization of Run(), since its state is much heavier than other ones /// and we thus want to avoid copying states. |