aboutsummaryrefslogtreecommitdiffstats
path: root/contrib
diff options
context:
space:
mode:
authorkarina-usm <karina-usm@yandex-team.ru>2022-02-10 16:48:05 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:48:05 +0300
commit3305cedaf9e392ab24e4b7dd6072976748ce60bf (patch)
treeb222e5ac2e2e98872661c51ccceee5da0d291e13 /contrib
parent62517661cde7aa7c93efe0281ec48eeb70ea420c (diff)
downloadydb-3305cedaf9e392ab24e4b7dd6072976748ce60bf.tar.gz
Restoring authorship annotation for <karina-usm@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib')
-rw-r--r--contrib/libs/pire/pire/approx_matching.cpp188
-rw-r--r--contrib/libs/pire/pire/approx_matching.h56
-rw-r--r--contrib/libs/pire/pire/extra/capture.h14
-rw-r--r--contrib/libs/pire/pire/fsm.h10
-rw-r--r--contrib/libs/pire/pire/re_lexer.cpp112
-rw-r--r--contrib/libs/pire/pire/read_unicode.cpp166
-rw-r--r--contrib/libs/pire/pire/read_unicode.h80
-rw-r--r--contrib/libs/pire/pire/scanners/loaded.h10
-rw-r--r--contrib/libs/pire/pire/scanners/multi.h10
-rw-r--r--contrib/libs/pire/pire/scanners/simple.h12
-rw-r--r--contrib/libs/pire/pire/scanners/slow.h34
11 files changed, 346 insertions, 346 deletions
diff --git a/contrib/libs/pire/pire/approx_matching.cpp b/contrib/libs/pire/pire/approx_matching.cpp
index 8c393b39e0..23f74ca01d 100644
--- a/contrib/libs/pire/pire/approx_matching.cpp
+++ b/contrib/libs/pire/pire/approx_matching.cpp
@@ -1,94 +1,94 @@
-/*
- * approx_matching.cpp -- implementation of CreateApproxFsm function
- *
- * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include "approx_matching.h"
-
-namespace Pire {
- Fsm CreateApproxFsm(const Fsm& regexp, size_t distance) {
- Fsm approxFsm = regexp;
-
- TVector<TSet<Char>> outgoingLettersTable(regexp.Size());
- for (size_t state = 0; state < regexp.Size(); ++state) {
- outgoingLettersTable[state] = regexp.OutgoingLetters(state);
- }
-
- TVector<TMap<Char, Fsm::StatesSet>> destinationsTable(regexp.Size());
- for (size_t state = 0; state < regexp.Size(); ++state) {
- for (Char letter : outgoingLettersTable[state]) {
- destinationsTable[state][letter] = regexp.Destinations(state, letter);
- }
- }
-
- for (size_t fsmIdx = 0; fsmIdx < distance; ++fsmIdx) {
- approxFsm.Import(regexp);
- const auto shift = fsmIdx * regexp.Size();
-
- for (size_t state = 0; state < regexp.Size(); ++state) {
- for (Char letter : outgoingLettersTable[state]) {
- for (size_t to : destinationsTable[state][letter]) {
- for (Char ch = 0; ch < MaxChar; ++ch) {
- if (!approxFsm.Connected(state + shift, to + shift, ch)) {
- approxFsm.Connect(state + shift, to + shift + regexp.Size(), ch);
- }
- }
-
- approxFsm.Connect(state + shift, to + shift + regexp.Size(), Epsilon);
- }
-
- for (Char ch = 0; ch < MaxChar; ++ch) {
- approxFsm.Connect(state + shift, state + shift + regexp.Size(), ch);
- }
- }
-
- if (regexp.IsFinal(state)) {
- approxFsm.SetFinal(state + shift + regexp.Size(), true);
- }
- }
- }
-
- size_t maxState = (distance > 0) ? approxFsm.Size() - regexp.Size() : 0;
- for (size_t state = 0; state < maxState; ++state) {
- size_t currentDist = state / regexp.Size();
- size_t intState = state % regexp.Size();
-
- for (Char firstLetter : outgoingLettersTable[intState]) {
- for (size_t firstDest : destinationsTable[intState][firstLetter]) {
- for (Char secondLetter : outgoingLettersTable[firstDest]) {
- for (size_t secondDest : destinationsTable[firstDest][secondLetter]) {
- if (secondDest != intState || firstDest != intState) {
- approxFsm.Resize(approxFsm.Size() + 1);
-
- size_t to = secondDest + (currentDist + 1) * regexp.Size();
- size_t middle = approxFsm.Size() - 1;
-
- approxFsm.Connect(state, middle, secondLetter);
- approxFsm.Connect(middle, to, firstLetter);
- }
- }
- }
- }
- }
- }
-
- return approxFsm;
- }
-}
+/*
+ * approx_matching.cpp -- implementation of CreateApproxFsm function
+ *
+ * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include "approx_matching.h"
+
+namespace Pire {
+ Fsm CreateApproxFsm(const Fsm& regexp, size_t distance) {
+ Fsm approxFsm = regexp;
+
+ TVector<TSet<Char>> outgoingLettersTable(regexp.Size());
+ for (size_t state = 0; state < regexp.Size(); ++state) {
+ outgoingLettersTable[state] = regexp.OutgoingLetters(state);
+ }
+
+ TVector<TMap<Char, Fsm::StatesSet>> destinationsTable(regexp.Size());
+ for (size_t state = 0; state < regexp.Size(); ++state) {
+ for (Char letter : outgoingLettersTable[state]) {
+ destinationsTable[state][letter] = regexp.Destinations(state, letter);
+ }
+ }
+
+ for (size_t fsmIdx = 0; fsmIdx < distance; ++fsmIdx) {
+ approxFsm.Import(regexp);
+ const auto shift = fsmIdx * regexp.Size();
+
+ for (size_t state = 0; state < regexp.Size(); ++state) {
+ for (Char letter : outgoingLettersTable[state]) {
+ for (size_t to : destinationsTable[state][letter]) {
+ for (Char ch = 0; ch < MaxChar; ++ch) {
+ if (!approxFsm.Connected(state + shift, to + shift, ch)) {
+ approxFsm.Connect(state + shift, to + shift + regexp.Size(), ch);
+ }
+ }
+
+ approxFsm.Connect(state + shift, to + shift + regexp.Size(), Epsilon);
+ }
+
+ for (Char ch = 0; ch < MaxChar; ++ch) {
+ approxFsm.Connect(state + shift, state + shift + regexp.Size(), ch);
+ }
+ }
+
+ if (regexp.IsFinal(state)) {
+ approxFsm.SetFinal(state + shift + regexp.Size(), true);
+ }
+ }
+ }
+
+ size_t maxState = (distance > 0) ? approxFsm.Size() - regexp.Size() : 0;
+ for (size_t state = 0; state < maxState; ++state) {
+ size_t currentDist = state / regexp.Size();
+ size_t intState = state % regexp.Size();
+
+ for (Char firstLetter : outgoingLettersTable[intState]) {
+ for (size_t firstDest : destinationsTable[intState][firstLetter]) {
+ for (Char secondLetter : outgoingLettersTable[firstDest]) {
+ for (size_t secondDest : destinationsTable[firstDest][secondLetter]) {
+ if (secondDest != intState || firstDest != intState) {
+ approxFsm.Resize(approxFsm.Size() + 1);
+
+ size_t to = secondDest + (currentDist + 1) * regexp.Size();
+ size_t middle = approxFsm.Size() - 1;
+
+ approxFsm.Connect(state, middle, secondLetter);
+ approxFsm.Connect(middle, to, firstLetter);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return approxFsm;
+ }
+}
diff --git a/contrib/libs/pire/pire/approx_matching.h b/contrib/libs/pire/pire/approx_matching.h
index 2b2568d96b..fc2a9fd61c 100644
--- a/contrib/libs/pire/pire/approx_matching.h
+++ b/contrib/libs/pire/pire/approx_matching.h
@@ -1,28 +1,28 @@
-/*
- * approx_matching.h -- function for creating fsm which matches words
- * within a levenshtein distance
- *
- * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include "fsm.h"
-
-namespace Pire {
- Fsm CreateApproxFsm(const Fsm& regexp, size_t distance);
-}
+/*
+ * approx_matching.h -- function for creating fsm which matches words
+ * within a levenshtein distance
+ *
+ * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include "fsm.h"
+
+namespace Pire {
+ Fsm CreateApproxFsm(const Fsm& regexp, size_t distance);
+}
diff --git a/contrib/libs/pire/pire/extra/capture.h b/contrib/libs/pire/pire/extra/capture.h
index 7bca334eac..8399914a67 100644
--- a/contrib/libs/pire/pire/extra/capture.h
+++ b/contrib/libs/pire/pire/extra/capture.h
@@ -25,7 +25,7 @@
#define PIRE_EXTRA_CAPTURE_H
-#include <contrib/libs/pire/pire/approx_matching.h>
+#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/scanners/loaded.h>
#include <contrib/libs/pire/pire/scanners/multi.h>
#include <contrib/libs/pire/pire/scanners/slow.h>
@@ -139,11 +139,11 @@ public:
CapturingScanner() {}
CapturingScanner(const CapturingScanner& s): LoadedScanner(s) {}
- explicit CapturingScanner(Fsm& fsm, size_t distance = 0)
+ explicit CapturingScanner(Fsm& fsm, size_t distance = 0)
{
- if (distance) {
- fsm = CreateApproxFsm(fsm, distance);
- }
+ if (distance) {
+ fsm = CreateApproxFsm(fsm, distance);
+ }
fsm.Canonize();
Init(fsm.Size(), fsm.Letters(), fsm.Initial());
BuildScanner(fsm, *this);
@@ -576,8 +576,8 @@ public:
{
}
- SlowCapturingScanner(Fsm& fsm, size_t distance = 0)
- : SlowScanner(fsm, true, false, distance)
+ SlowCapturingScanner(Fsm& fsm, size_t distance = 0)
+ : SlowScanner(fsm, true, false, distance)
{
}
};
diff --git a/contrib/libs/pire/pire/fsm.h b/contrib/libs/pire/pire/fsm.h
index 348e6b6216..4dad06ca06 100644
--- a/contrib/libs/pire/pire/fsm.h
+++ b/contrib/libs/pire/pire/fsm.h
@@ -115,9 +115,9 @@ namespace Pire {
/// Determines and minimizes the FSM if neccessary. Returns *this.
Fsm& Canonize(size_t maxSize = 0);
-
+
template<class Scanner>
- Scanner Compile(size_t distance = 0);
+ Scanner Compile(size_t distance = 0);
void DumpState(yostream& s, size_t state) const;
void DumpTo(yostream& s, const ystring& name = "") const;
@@ -270,11 +270,11 @@ namespace Pire {
r.FinishBuild();
}
-
+
template<class Scanner>
- inline Scanner Fsm::Compile(size_t distance)
+ inline Scanner Fsm::Compile(size_t distance)
{
- return Scanner(*this, distance);
+ return Scanner(*this, distance);
}
yostream& operator << (yostream&, const Fsm&);
diff --git a/contrib/libs/pire/pire/re_lexer.cpp b/contrib/libs/pire/pire/re_lexer.cpp
index dbae421f16..132fbeb039 100644
--- a/contrib/libs/pire/pire/re_lexer.cpp
+++ b/contrib/libs/pire/pire/re_lexer.cpp
@@ -11,7 +11,7 @@
* it under the terms of the GNU Lesser Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
- *
+ *
* Pire is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@@ -28,12 +28,12 @@
#include <contrib/libs/pire/pire/stub/utf8.h>
#include <contrib/libs/pire/pire/stub/singleton.h>
-#include "fsm.h"
+#include "fsm.h"
#include "re_lexer.h"
#include "re_parser.h"
-#include "read_unicode.h"
+#include "read_unicode.h"
+
-
namespace Pire {
namespace Impl {
@@ -161,7 +161,7 @@ Term Lexer::Lex()
if ((j & ControlMask) == Control)
Error("Control character in tokens sequence");
}
-
+
int type = t.Type();
if (type == TokenTypes::Letters)
type = YRE_LETTERS;
@@ -205,19 +205,19 @@ wchar32 Feature::CorrectChar(wchar32 c, const char* controls)
}
namespace {
- class EnableUnicodeSequencesImpl : public UnicodeReader {
+ class EnableUnicodeSequencesImpl : public UnicodeReader {
+ public:
+ bool Accepts(wchar32 c) const {
+ return c == (Control | 'x');
+ }
+
+ Term Lex() {
+ return Term::Character(ReadUnicodeCharacter());
+ }
+ };
+
+ class CharacterRangeReader: public UnicodeReader {
public:
- bool Accepts(wchar32 c) const {
- return c == (Control | 'x');
- }
-
- Term Lex() {
- return Term::Character(ReadUnicodeCharacter());
- }
- };
-
- class CharacterRangeReader: public UnicodeReader {
- public:
bool Accepts(wchar32 c) const { return c == '[' || c == (Control | '[') || c == (Control | ']'); }
Term Lex()
@@ -235,49 +235,49 @@ namespace {
ch = CorrectChar(GetChar(), controls);
}
- bool firstUnicode;
- wchar32 unicodeSymbol = 0;
-
+ bool firstUnicode;
+ wchar32 unicodeSymbol = 0;
+
for (; ch != End && ch != (Control | ']'); ch = CorrectChar(GetChar(), controls)) {
- if (ch == (Control | 'x')) {
- UngetChar(ch);
- firstUnicode = true;
- unicodeSymbol = ReadUnicodeCharacter();
- } else {
- firstUnicode = false;
- }
-
- if (((ch & ControlMask) != Control || firstUnicode) && CorrectChar(PeekChar(), controls) == (Control | '-')) {
+ if (ch == (Control | 'x')) {
+ UngetChar(ch);
+ firstUnicode = true;
+ unicodeSymbol = ReadUnicodeCharacter();
+ } else {
+ firstUnicode = false;
+ }
+
+ if (((ch & ControlMask) != Control || firstUnicode) && CorrectChar(PeekChar(), controls) == (Control | '-')) {
GetChar();
- wchar32 current = GetChar();
-
- bool secondUnicode = (current == (Control | 'x'));
-
- wchar32 begin = (firstUnicode) ? unicodeSymbol : ch;
- wchar32 end;
- if (secondUnicode) {
- UngetChar(current);
- end = ReadUnicodeCharacter();
- } else {
- end = CorrectChar(current, controls);
- if ((end & ControlMask) == Control)
- Error("Wrong character range");
- }
-
- for (ch = begin; ch <= end; ++ch) {
+ wchar32 current = GetChar();
+
+ bool secondUnicode = (current == (Control | 'x'));
+
+ wchar32 begin = (firstUnicode) ? unicodeSymbol : ch;
+ wchar32 end;
+ if (secondUnicode) {
+ UngetChar(current);
+ end = ReadUnicodeCharacter();
+ } else {
+ end = CorrectChar(current, controls);
+ if ((end & ControlMask) == Control)
+ Error("Wrong character range");
+ }
+
+ for (ch = begin; ch <= end; ++ch) {
cs.first.insert(Term::String(1, ch));
- }
- } else if (ch == (Control | '-')) {
+ }
+ } else if (ch == (Control | '-')) {
cs.first.insert(Term::String(1, '-'));
- }
- else if ((ch & ControlMask) == Control && (strchr(controls2, ch & ~ControlMask) || strchr(controls, ch & ~ControlMask))) {
+ }
+ else if ((ch & ControlMask) == Control && (strchr(controls2, ch & ~ControlMask) || strchr(controls, ch & ~ControlMask))) {
cs.first.insert(Term::String(1, ch & ~ControlMask));
- }
- else if ((ch & ControlMask) != Control || !strchr(controls, ch & ~ControlMask)) {
- cs.first.insert(Term::String(1, (firstUnicode) ? unicodeSymbol : ch));
- } else {
+ }
+ else if ((ch & ControlMask) != Control || !strchr(controls, ch & ~ControlMask)) {
+ cs.first.insert(Term::String(1, (firstUnicode) ? unicodeSymbol : ch));
+ } else {
Error("Wrong character in range");
- }
+ }
}
if (ch == End)
Error("Unexpected end of pattern");
@@ -347,7 +347,7 @@ namespace {
{
return c == '&' || c == '~' || c == (Control | '&') || c == (Control | '~');
}
-
+
Term Lex()
{
wchar32 ch = GetChar();
@@ -376,7 +376,7 @@ void Lexer::InstallDefaultFeatures()
AddFeature(Feature::Ptr(new CharacterRangeReader));
AddFeature(Feature::Ptr(new RepetitionCountReader));
AddFeature(Features::CharClasses());
- AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl));
+ AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl));
}
Fsm Lexer::Parse()
diff --git a/contrib/libs/pire/pire/read_unicode.cpp b/contrib/libs/pire/pire/read_unicode.cpp
index e167cf5cca..5b21e4eb28 100644
--- a/contrib/libs/pire/pire/read_unicode.cpp
+++ b/contrib/libs/pire/pire/read_unicode.cpp
@@ -1,83 +1,83 @@
-/*
- * read_unicode.cpp -- implementation of the UnicodeReader.
- *
- * Copyright (c) 2019 YANDEX LLC
- * Author: Karina Usmanova <usmanova.karin@yandex.ru>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include "read_unicode.h"
-
-#include <contrib/libs/pire/pire/re_lexer.h>
-
-namespace Pire {
- wchar32 UnicodeReader::ReadUnicodeCharacter() {
- ystring hexStr;
- GetChar();
- wchar32 ch = PeekChar();
-
- if (ch == '{') {
- GetChar();
- hexStr = ReadHexDigit(
- [](wchar32 ch, size_t numAdded) -> bool { return ch == End || (numAdded != 0 && ch == '}'); });
- ch = GetChar();
- if (ch != '}') {
- Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x{...\" sequence should be closed by \"}\"");
- }
- } else {
- hexStr = ReadHexDigit([](wchar32, size_t numAdded) -> bool { return numAdded == 2; });
- if (hexStr.size() != 2) {
- Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x...\" sequence should contain two symbols");
- }
- }
- return HexToDec(hexStr);
- }
-
- bool UnicodeReader::IsHexDigit(wchar32 ch) {
- return ch < 256 && std::isxdigit(ch) != 0;
- }
-
- ystring UnicodeReader::ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop) {
- ystring result;
- wchar32 ch = GetChar();
- while (!shouldStop(ch, result.size())) {
- if (!IsHexDigit(ch)) {
- Error("Pire::UnicodeReader::ReadHexDigit(): \"\\x...\" sequence contains non-valid hex number");
- }
- result.push_back(ch);
- ch = GetChar();
- }
- UngetChar(ch);
- return result;
- }
-
- wchar32 UnicodeReader::HexToDec(const ystring &hexStr) {
- wchar32 converted;
- try {
- converted = std::stoul(hexStr, 0, 16);
- } catch (std::out_of_range &) {
- converted = MAX_UNICODE + 1;
- }
- if (converted > MAX_UNICODE) {
- Error("Pire::UnicodeReader::HexToDec(): hex number in \"\\x...\" sequence is too large");
- }
- return converted;
- }
-}
-
-
+/*
+ * read_unicode.cpp -- implementation of the UnicodeReader.
+ *
+ * Copyright (c) 2019 YANDEX LLC
+ * Author: Karina Usmanova <usmanova.karin@yandex.ru>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include "read_unicode.h"
+
+#include <contrib/libs/pire/pire/re_lexer.h>
+
+namespace Pire {
+ wchar32 UnicodeReader::ReadUnicodeCharacter() {
+ ystring hexStr;
+ GetChar();
+ wchar32 ch = PeekChar();
+
+ if (ch == '{') {
+ GetChar();
+ hexStr = ReadHexDigit(
+ [](wchar32 ch, size_t numAdded) -> bool { return ch == End || (numAdded != 0 && ch == '}'); });
+ ch = GetChar();
+ if (ch != '}') {
+ Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x{...\" sequence should be closed by \"}\"");
+ }
+ } else {
+ hexStr = ReadHexDigit([](wchar32, size_t numAdded) -> bool { return numAdded == 2; });
+ if (hexStr.size() != 2) {
+ Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x...\" sequence should contain two symbols");
+ }
+ }
+ return HexToDec(hexStr);
+ }
+
+ bool UnicodeReader::IsHexDigit(wchar32 ch) {
+ return ch < 256 && std::isxdigit(ch) != 0;
+ }
+
+ ystring UnicodeReader::ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop) {
+ ystring result;
+ wchar32 ch = GetChar();
+ while (!shouldStop(ch, result.size())) {
+ if (!IsHexDigit(ch)) {
+ Error("Pire::UnicodeReader::ReadHexDigit(): \"\\x...\" sequence contains non-valid hex number");
+ }
+ result.push_back(ch);
+ ch = GetChar();
+ }
+ UngetChar(ch);
+ return result;
+ }
+
+ wchar32 UnicodeReader::HexToDec(const ystring &hexStr) {
+ wchar32 converted;
+ try {
+ converted = std::stoul(hexStr, 0, 16);
+ } catch (std::out_of_range &) {
+ converted = MAX_UNICODE + 1;
+ }
+ if (converted > MAX_UNICODE) {
+ Error("Pire::UnicodeReader::HexToDec(): hex number in \"\\x...\" sequence is too large");
+ }
+ return converted;
+ }
+}
+
+
diff --git a/contrib/libs/pire/pire/read_unicode.h b/contrib/libs/pire/pire/read_unicode.h
index f0705c14aa..107545e5a1 100644
--- a/contrib/libs/pire/pire/read_unicode.h
+++ b/contrib/libs/pire/pire/read_unicode.h
@@ -1,40 +1,40 @@
-/*
- * read_unicode.h -- declaration of the UnicodeReader class, helper for UnicodeRange and EnableUnicodeSequences.
- *
- * Copyright (c) 2019 YANDEX LLC
- * Author: Karina Usmanova <usmanova.karin@yandex.ru>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire. If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include <contrib/libs/pire/pire/re_lexer.h>
-
-namespace Pire {
- class UnicodeReader : public Feature {
- public:
- wchar32 ReadUnicodeCharacter();
-
- private:
- static const wchar32 MAX_UNICODE = 0x10FFFF;
-
- bool IsHexDigit(wchar32 ch);
- ystring ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop);
- wchar32 HexToDec(const ystring& hexStr);
- };
-}
-
-
+/*
+ * read_unicode.h -- declaration of the UnicodeReader class, helper for UnicodeRange and EnableUnicodeSequences.
+ *
+ * Copyright (c) 2019 YANDEX LLC
+ * Author: Karina Usmanova <usmanova.karin@yandex.ru>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire. If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include <contrib/libs/pire/pire/re_lexer.h>
+
+namespace Pire {
+ class UnicodeReader : public Feature {
+ public:
+ wchar32 ReadUnicodeCharacter();
+
+ private:
+ static const wchar32 MAX_UNICODE = 0x10FFFF;
+
+ bool IsHexDigit(wchar32 ch);
+ ystring ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop);
+ wchar32 HexToDec(const ystring& hexStr);
+ };
+}
+
+
diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h
index 24ded64a68..120dc403b7 100644
--- a/contrib/libs/pire/pire/scanners/loaded.h
+++ b/contrib/libs/pire/pire/scanners/loaded.h
@@ -26,7 +26,7 @@
#include <string.h>
-#include <contrib/libs/pire/pire/approx_matching.h>
+#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/fsm.h>
#include <contrib/libs/pire/pire/partition.h>
@@ -245,11 +245,11 @@ protected:
virtual ~LoadedScanner();
private:
- explicit LoadedScanner(Fsm& fsm, size_t distance = 0)
+ explicit LoadedScanner(Fsm& fsm, size_t distance = 0)
{
- if (distance) {
- fsm = CreateApproxFsm(fsm, distance);
- }
+ if (distance) {
+ fsm = CreateApproxFsm(fsm, distance);
+ }
fsm.Canonize();
Init(fsm.Size(), fsm.Letters(), fsm.Initial());
BuildScanner(fsm, *this);
diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h
index b6cdceaa32..29679e416e 100644
--- a/contrib/libs/pire/pire/scanners/multi.h
+++ b/contrib/libs/pire/pire/scanners/multi.h
@@ -26,7 +26,7 @@
#include <cstring>
#include <string.h>
-#include <contrib/libs/pire/pire/approx_matching.h>
+#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/fsm.h>
#include <contrib/libs/pire/pire/partition.h>
#include <contrib/libs/pire/pire/run.h>
@@ -121,11 +121,11 @@ public:
Scanner() { Alias(Null()); }
- explicit Scanner(Fsm& fsm, size_t distance = 0)
+ explicit Scanner(Fsm& fsm, size_t distance = 0)
{
- if (distance) {
- fsm = CreateApproxFsm(fsm, distance);
- }
+ if (distance) {
+ fsm = CreateApproxFsm(fsm, distance);
+ }
fsm.Canonize();
Init(fsm.Size(), fsm.Letters(), fsm.Finals().size(), fsm.Initial(), 1);
BuildScanner(fsm, *this);
diff --git a/contrib/libs/pire/pire/scanners/simple.h b/contrib/libs/pire/pire/scanners/simple.h
index 6874e1f2a3..ef959aeed1 100644
--- a/contrib/libs/pire/pire/scanners/simple.h
+++ b/contrib/libs/pire/pire/scanners/simple.h
@@ -24,7 +24,7 @@
#ifndef PIRE_SCANNERS_SIMPLE_H
#define PIRE_SCANNERS_SIMPLE_H
-#include <contrib/libs/pire/pire/approx_matching.h>
+#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/stub/stl.h>
#include <contrib/libs/pire/pire/stub/defaults.h>
#include <contrib/libs/pire/pire/stub/saveload.h>
@@ -49,7 +49,7 @@ public:
SimpleScanner() { Alias(Null()); }
- explicit SimpleScanner(Fsm& fsm, size_t distance = 0);
+ explicit SimpleScanner(Fsm& fsm, size_t distance = 0);
size_t Size() const { return m.statesCount; }
bool Empty() const { return m_transitions == Null().m_transitions; }
@@ -229,11 +229,11 @@ protected:
}
};
-inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance)
+inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance)
{
- if (distance) {
- fsm = CreateApproxFsm(fsm, distance);
- }
+ if (distance) {
+ fsm = CreateApproxFsm(fsm, distance);
+ }
fsm.Canonize();
m.statesCount = fsm.Size();
diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h
index 8f1e4ca4d0..6adfcb8c1d 100644
--- a/contrib/libs/pire/pire/scanners/slow.h
+++ b/contrib/libs/pire/pire/scanners/slow.h
@@ -24,7 +24,7 @@
#ifndef PIRE_SCANNERS_SLOW_H
#define PIRE_SCANNERS_SLOW_H
-#include <contrib/libs/pire/pire/approx_matching.h>
+#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/partition.h>
#include <contrib/libs/pire/pire/vbitset.h>
#include <contrib/libs/pire/pire/fsm.h>
@@ -250,12 +250,12 @@ public:
}
}
- explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0)
+ explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0)
: need_actions(needActions)
{
- if (distance) {
- fsm = CreateApproxFsm(fsm, distance);
- }
+ if (distance) {
+ fsm = CreateApproxFsm(fsm, distance);
+ }
if (removeEpsilons)
fsm.RemoveEpsilons();
fsm.Sparse(!removeEpsilons);
@@ -357,7 +357,7 @@ private:
bool need_actions;
TVector<TVector<Action>> m_actionsvec;
- static const SlowScanner& Null();
+ static const SlowScanner& Null();
template<class T> void alloc(T*& p, size_t size)
{
@@ -416,17 +416,17 @@ private:
friend void BuildScanner<SlowScanner>(const Fsm&, SlowScanner&);
};
-template<>
-inline SlowScanner Fsm::Compile(size_t distance) {
- return SlowScanner(*this, false, true, distance);
-}
-
-inline const SlowScanner& SlowScanner::Null()
-{
- static const SlowScanner n = Fsm::MakeFalse().Compile<SlowScanner>();
- return n;
-}
-
+template<>
+inline SlowScanner Fsm::Compile(size_t distance) {
+ return SlowScanner(*this, false, true, distance);
+}
+
+inline const SlowScanner& SlowScanner::Null()
+{
+ static const SlowScanner n = Fsm::MakeFalse().Compile<SlowScanner>();
+ return n;
+}
+
#ifndef PIRE_DEBUG
/// A specialization of Run(), since its state is much heavier than other ones
/// and we thus want to avoid copying states.