Restoring authorship annotation for <karina-usm@yandex-team.ru>. Commit 2 of 2.

author: karina-usm <karina-usm@yandex-team.ru> 2022-02-10 16:48:05 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:48:05 +0300
commit: 3305cedaf9e392ab24e4b7dd6072976748ce60bf (patch)
tree: b222e5ac2e2e98872661c51ccceee5da0d291e13 /contrib
parent: 62517661cde7aa7c93efe0281ec48eeb70ea420c (diff)
download: ydb-3305cedaf9e392ab24e4b7dd6072976748ce60bf.tar.gz
11 files changed, 346 insertions, 346 deletions
diff --git a/contrib/libs/pire/pire/approx_matching.cpp b/contrib/libs/pire/pire/approx_matching.cpp
index 8c393b39e0..23f74ca01d 100644
--- a/contrib/libs/pire/pire/approx_matching.cpp
+++ b/contrib/libs/pire/pire/approx_matching.cpp
@@ -1,94 +1,94 @@
-/* 
- * approx_matching.cpp -- implementation of CreateApproxFsm function 
- * 
- * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru> 
- * 
- * This file is part of Pire, the Perl Incompatible 
- * Regular Expressions library. 
- * 
- * Pire is free software: you can redistribute it and/or modify 
- * it under the terms of the GNU Lesser Public License as published by 
- * the Free Software Foundation, either version 3 of the License, or 
- * (at your option) any later version. 
- * 
- * Pire is distributed in the hope that it will be useful, 
- * but WITHOUT ANY WARRANTY; without even the implied warranty of 
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
- * GNU Lesser Public License for more details. 
- * You should have received a copy of the GNU Lesser Public License 
- * along with Pire.  If not, see <http://www.gnu.org/licenses>. 
- */ 
- 
- 
-#include "approx_matching.h" 
- 
-namespace Pire { 
-	Fsm CreateApproxFsm(const Fsm& regexp, size_t distance) { 
-		Fsm approxFsm = regexp; 
- 
-		TVector<TSet<Char>> outgoingLettersTable(regexp.Size()); 
-		for (size_t state = 0; state < regexp.Size(); ++state) { 
-			outgoingLettersTable[state] = regexp.OutgoingLetters(state); 
-		} 
- 
-		TVector<TMap<Char, Fsm::StatesSet>> destinationsTable(regexp.Size()); 
-		for (size_t state = 0; state < regexp.Size(); ++state) { 
-			for (Char letter : outgoingLettersTable[state]) { 
-				destinationsTable[state][letter] = regexp.Destinations(state, letter); 
-			} 
-		} 
- 
-		for (size_t fsmIdx = 0; fsmIdx < distance; ++fsmIdx) { 
-			approxFsm.Import(regexp); 
-			const auto shift = fsmIdx * regexp.Size(); 
- 
-			for (size_t state = 0; state < regexp.Size(); ++state) { 
-				for (Char letter : outgoingLettersTable[state]) { 
-					for (size_t to : destinationsTable[state][letter]) { 
-						for (Char ch = 0; ch < MaxChar; ++ch) { 
-							if (!approxFsm.Connected(state + shift, to + shift, ch)) { 
-								approxFsm.Connect(state + shift, to + shift + regexp.Size(), ch); 
-							} 
-						} 
- 
-						approxFsm.Connect(state + shift, to + shift + regexp.Size(), Epsilon); 
-					} 
- 
-					for (Char ch = 0; ch < MaxChar; ++ch) { 
-						approxFsm.Connect(state + shift, state + shift + regexp.Size(), ch); 
-					} 
-				} 
- 
-				if (regexp.IsFinal(state)) { 
-					approxFsm.SetFinal(state + shift + regexp.Size(), true); 
-				} 
-			} 
-		} 
- 
-		size_t maxState = (distance > 0) ? approxFsm.Size() - regexp.Size() : 0; 
-		for (size_t state = 0; state < maxState; ++state) { 
-			size_t currentDist = state / regexp.Size(); 
-			size_t intState = state % regexp.Size(); 
- 
-			for (Char firstLetter : outgoingLettersTable[intState]) { 
-				for (size_t firstDest : destinationsTable[intState][firstLetter]) { 
-					for (Char secondLetter : outgoingLettersTable[firstDest]) { 
-						for (size_t secondDest : destinationsTable[firstDest][secondLetter]) { 
-							if (secondDest != intState || firstDest != intState) { 
-								approxFsm.Resize(approxFsm.Size() + 1); 
- 
-								size_t to = secondDest + (currentDist + 1) * regexp.Size(); 
-								size_t middle = approxFsm.Size() - 1; 
- 
-								approxFsm.Connect(state, middle, secondLetter); 
-								approxFsm.Connect(middle, to, firstLetter); 
-							} 
-						} 
-					} 
-				} 
-			} 
-		} 
- 
-		return approxFsm; 
-	} 
-} 
+/*
+ * approx_matching.cpp -- implementation of CreateApproxFsm function
+ *
+ * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire.  If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include "approx_matching.h"
+
+namespace Pire {
+	Fsm CreateApproxFsm(const Fsm& regexp, size_t distance) {
+		Fsm approxFsm = regexp;
+
+		TVector<TSet<Char>> outgoingLettersTable(regexp.Size());
+		for (size_t state = 0; state < regexp.Size(); ++state) {
+			outgoingLettersTable[state] = regexp.OutgoingLetters(state);
+		}
+
+		TVector<TMap<Char, Fsm::StatesSet>> destinationsTable(regexp.Size());
+		for (size_t state = 0; state < regexp.Size(); ++state) {
+			for (Char letter : outgoingLettersTable[state]) {
+				destinationsTable[state][letter] = regexp.Destinations(state, letter);
+			}
+		}
+
+		for (size_t fsmIdx = 0; fsmIdx < distance; ++fsmIdx) {
+			approxFsm.Import(regexp);
+			const auto shift = fsmIdx * regexp.Size();
+
+			for (size_t state = 0; state < regexp.Size(); ++state) {
+				for (Char letter : outgoingLettersTable[state]) {
+					for (size_t to : destinationsTable[state][letter]) {
+						for (Char ch = 0; ch < MaxChar; ++ch) {
+							if (!approxFsm.Connected(state + shift, to + shift, ch)) {
+								approxFsm.Connect(state + shift, to + shift + regexp.Size(), ch);
+							}
+						}
+
+						approxFsm.Connect(state + shift, to + shift + regexp.Size(), Epsilon);
+					}
+
+					for (Char ch = 0; ch < MaxChar; ++ch) {
+						approxFsm.Connect(state + shift, state + shift + regexp.Size(), ch);
+					}
+				}
+
+				if (regexp.IsFinal(state)) {
+					approxFsm.SetFinal(state + shift + regexp.Size(), true);
+				}
+			}
+		}
+
+		size_t maxState = (distance > 0) ? approxFsm.Size() - regexp.Size() : 0;
+		for (size_t state = 0; state < maxState; ++state) {
+			size_t currentDist = state / regexp.Size();
+			size_t intState = state % regexp.Size();
+
+			for (Char firstLetter : outgoingLettersTable[intState]) {
+				for (size_t firstDest : destinationsTable[intState][firstLetter]) {
+					for (Char secondLetter : outgoingLettersTable[firstDest]) {
+						for (size_t secondDest : destinationsTable[firstDest][secondLetter]) {
+							if (secondDest != intState || firstDest != intState) {
+								approxFsm.Resize(approxFsm.Size() + 1);
+
+								size_t to = secondDest + (currentDist + 1) * regexp.Size();
+								size_t middle = approxFsm.Size() - 1;
+
+								approxFsm.Connect(state, middle, secondLetter);
+								approxFsm.Connect(middle, to, firstLetter);
+							}
+						}
+					}
+				}
+			}
+		}
+
+		return approxFsm;
+	}
+}
diff --git a/contrib/libs/pire/pire/approx_matching.h b/contrib/libs/pire/pire/approx_matching.h
index 2b2568d96b..fc2a9fd61c 100644
--- a/contrib/libs/pire/pire/approx_matching.h
+++ b/contrib/libs/pire/pire/approx_matching.h
@@ -1,28 +1,28 @@
-/* 
- * approx_matching.h -- function for creating fsm which matches words 
- *                      within a levenshtein distance 
- * 
- * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru> 
- * 
- * This file is part of Pire, the Perl Incompatible 
- * Regular Expressions library. 
- * 
- * Pire is free software: you can redistribute it and/or modify 
- * it under the terms of the GNU Lesser Public License as published by 
- * the Free Software Foundation, either version 3 of the License, or 
- * (at your option) any later version. 
- * 
- * Pire is distributed in the hope that it will be useful, 
- * but WITHOUT ANY WARRANTY; without even the implied warranty of 
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
- * GNU Lesser Public License for more details. 
- * You should have received a copy of the GNU Lesser Public License 
- * along with Pire.  If not, see <http://www.gnu.org/licenses>. 
- */ 
- 
- 
-#include "fsm.h" 
- 
-namespace Pire { 
-	Fsm CreateApproxFsm(const Fsm& regexp, size_t distance); 
-} 
+/*
+ * approx_matching.h -- function for creating fsm which matches words
+ *                      within a levenshtein distance
+ *
+ * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <usmanova.karin@yandex.ru>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire.  If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include "fsm.h"
+
+namespace Pire {
+	Fsm CreateApproxFsm(const Fsm& regexp, size_t distance);
+}
diff --git a/contrib/libs/pire/pire/extra/capture.h b/contrib/libs/pire/pire/extra/capture.h
index 7bca334eac..8399914a67 100644
--- a/contrib/libs/pire/pire/extra/capture.h
+++ b/contrib/libs/pire/pire/extra/capture.h
@@ -25,7 +25,7 @@
 #define PIRE_EXTRA_CAPTURE_H
 
 
-#include <contrib/libs/pire/pire/approx_matching.h> 
+#include <contrib/libs/pire/pire/approx_matching.h>
 #include <contrib/libs/pire/pire/scanners/loaded.h>
 #include <contrib/libs/pire/pire/scanners/multi.h>
 #include <contrib/libs/pire/pire/scanners/slow.h>
@@ -139,11 +139,11 @@ public:
 
 	CapturingScanner() {}
 	CapturingScanner(const CapturingScanner& s): LoadedScanner(s) {}
-	explicit CapturingScanner(Fsm& fsm, size_t distance = 0) 
+	explicit CapturingScanner(Fsm& fsm, size_t distance = 0)
 	{
-		if (distance) { 
-			fsm = CreateApproxFsm(fsm, distance); 
-		} 
+		if (distance) {
+			fsm = CreateApproxFsm(fsm, distance);
+		}
 		fsm.Canonize();
 		Init(fsm.Size(), fsm.Letters(), fsm.Initial());
 		BuildScanner(fsm, *this);
@@ -576,8 +576,8 @@ public:
 	{
 	}
 
-	SlowCapturingScanner(Fsm& fsm, size_t distance = 0) 
-		: SlowScanner(fsm, true, false, distance) 
+	SlowCapturingScanner(Fsm& fsm, size_t distance = 0)
+		: SlowScanner(fsm, true, false, distance)
 	{
 	}
 };
diff --git a/contrib/libs/pire/pire/fsm.h b/contrib/libs/pire/pire/fsm.h
index 348e6b6216..4dad06ca06 100644
--- a/contrib/libs/pire/pire/fsm.h
+++ b/contrib/libs/pire/pire/fsm.h
@@ -115,9 +115,9 @@ namespace Pire {
 
 		/// Determines and minimizes the FSM if neccessary. Returns *this.
 		Fsm& Canonize(size_t maxSize = 0);
- 
+
 		template<class Scanner>
-		Scanner Compile(size_t distance = 0); 
+		Scanner Compile(size_t distance = 0);
 
 		void DumpState(yostream& s, size_t state) const;
 		void DumpTo(yostream& s, const ystring& name = "") const;
@@ -270,11 +270,11 @@ namespace Pire {
 		
 		r.FinishBuild();
 	}
- 
+
 	template<class Scanner>
-	inline Scanner Fsm::Compile(size_t distance) 
+	inline Scanner Fsm::Compile(size_t distance)
 	{
-		return Scanner(*this, distance); 
+		return Scanner(*this, distance);
 	}
 
 	yostream& operator << (yostream&, const Fsm&);
diff --git a/contrib/libs/pire/pire/re_lexer.cpp b/contrib/libs/pire/pire/re_lexer.cpp
index dbae421f16..132fbeb039 100644
--- a/contrib/libs/pire/pire/re_lexer.cpp
+++ b/contrib/libs/pire/pire/re_lexer.cpp
@@ -11,7 +11,7 @@
  * it under the terms of the GNU Lesser Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * Pire is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@@ -28,12 +28,12 @@
 #include <contrib/libs/pire/pire/stub/utf8.h>
 #include <contrib/libs/pire/pire/stub/singleton.h>
 
-#include "fsm.h" 
+#include "fsm.h"
 #include "re_lexer.h"
 #include "re_parser.h"
-#include "read_unicode.h" 
+#include "read_unicode.h"
+
 
- 
 namespace Pire {
 
 namespace Impl {
@@ -161,7 +161,7 @@ Term Lexer::Lex()
                 if ((j & ControlMask) == Control)
                     Error("Control character in tokens sequence");
     }
- 
+
     int type = t.Type();
     if (type == TokenTypes::Letters)
         type = YRE_LETTERS;
@@ -205,19 +205,19 @@ wchar32 Feature::CorrectChar(wchar32 c, const char* controls)
 }
 
 namespace {
-    class EnableUnicodeSequencesImpl : public UnicodeReader { 
+    class EnableUnicodeSequencesImpl : public UnicodeReader {
+    public:
+        bool Accepts(wchar32 c) const {
+            return c == (Control | 'x');
+        }
+
+        Term Lex() {
+            return Term::Character(ReadUnicodeCharacter());
+        }
+    };
+
+    class CharacterRangeReader: public UnicodeReader {
     public:
-        bool Accepts(wchar32 c) const { 
-            return c == (Control | 'x'); 
-        } 
- 
-        Term Lex() { 
-            return Term::Character(ReadUnicodeCharacter()); 
-        } 
-    }; 
- 
-    class CharacterRangeReader: public UnicodeReader { 
-    public: 
         bool Accepts(wchar32 c) const { return c == '[' || c == (Control | '[') || c == (Control | ']'); }
 
         Term Lex()
@@ -235,49 +235,49 @@ namespace {
                 ch = CorrectChar(GetChar(), controls);
             }
 
-            bool firstUnicode; 
-            wchar32 unicodeSymbol = 0; 
- 
+            bool firstUnicode;
+            wchar32 unicodeSymbol = 0;
+
             for (; ch != End && ch != (Control | ']'); ch = CorrectChar(GetChar(), controls)) {
-                if (ch == (Control | 'x')) { 
-                    UngetChar(ch); 
-					firstUnicode = true; 
-					unicodeSymbol = ReadUnicodeCharacter(); 
-                } else { 
-                    firstUnicode = false; 
-                } 
- 
-                if (((ch & ControlMask) != Control || firstUnicode) && CorrectChar(PeekChar(), controls) == (Control | '-')) { 
+                if (ch == (Control | 'x')) {
+                    UngetChar(ch);
+					firstUnicode = true;
+					unicodeSymbol = ReadUnicodeCharacter();
+                } else {
+                    firstUnicode = false;
+                }
+
+                if (((ch & ControlMask) != Control || firstUnicode) && CorrectChar(PeekChar(), controls) == (Control | '-')) {
                     GetChar();
-                    wchar32 current = GetChar(); 
- 
-                    bool secondUnicode = (current == (Control | 'x')); 
- 
-                    wchar32 begin = (firstUnicode) ? unicodeSymbol : ch; 
-                    wchar32 end; 
-                    if (secondUnicode) { 
-                        UngetChar(current); 
-                        end = ReadUnicodeCharacter(); 
-                    } else { 
-                        end = CorrectChar(current, controls); 
-                        if ((end & ControlMask) == Control) 
-                            Error("Wrong character range"); 
-                    } 
- 
-                    for (ch = begin; ch <= end; ++ch) { 
+                    wchar32 current = GetChar();
+
+                    bool secondUnicode = (current == (Control | 'x'));
+
+                    wchar32 begin = (firstUnicode) ? unicodeSymbol : ch;
+                    wchar32 end;
+                    if (secondUnicode) {
+                        UngetChar(current);
+                        end = ReadUnicodeCharacter();
+                    } else {
+                        end = CorrectChar(current, controls);
+                        if ((end & ControlMask) == Control)
+                            Error("Wrong character range");
+                    }
+
+                    for (ch = begin; ch <= end; ++ch) {
                         cs.first.insert(Term::String(1, ch));
-                    } 
-                } else if (ch == (Control | '-')) { 
+                    }
+                } else if (ch == (Control | '-')) {
                     cs.first.insert(Term::String(1, '-'));
-                } 
-                else if ((ch & ControlMask) == Control && (strchr(controls2, ch & ~ControlMask) || strchr(controls, ch & ~ControlMask))) { 
+                }
+                else if ((ch & ControlMask) == Control && (strchr(controls2, ch & ~ControlMask) || strchr(controls, ch & ~ControlMask))) {
                     cs.first.insert(Term::String(1, ch & ~ControlMask));
-                } 
-                else if ((ch & ControlMask) != Control || !strchr(controls, ch & ~ControlMask)) { 
-                    cs.first.insert(Term::String(1, (firstUnicode) ? unicodeSymbol : ch)); 
-                } else { 
+                }
+                else if ((ch & ControlMask) != Control || !strchr(controls, ch & ~ControlMask)) {
+                    cs.first.insert(Term::String(1, (firstUnicode) ? unicodeSymbol : ch));
+                } else {
                     Error("Wrong character in range");
-                } 
+                }
             }
             if (ch == End)
                 Error("Unexpected end of pattern");
@@ -347,7 +347,7 @@ namespace {
         {
             return c == '&' || c == '~' || c == (Control | '&') || c == (Control | '~');
         }
- 
+
         Term Lex()
         {
             wchar32 ch = GetChar();
@@ -376,7 +376,7 @@ void Lexer::InstallDefaultFeatures()
     AddFeature(Feature::Ptr(new CharacterRangeReader));
     AddFeature(Feature::Ptr(new RepetitionCountReader));
     AddFeature(Features::CharClasses());
-    AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl)); 
+    AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl));
 }
 
 Fsm Lexer::Parse()
diff --git a/contrib/libs/pire/pire/read_unicode.cpp b/contrib/libs/pire/pire/read_unicode.cpp
index e167cf5cca..5b21e4eb28 100644
--- a/contrib/libs/pire/pire/read_unicode.cpp
+++ b/contrib/libs/pire/pire/read_unicode.cpp
@@ -1,83 +1,83 @@
-/* 
- * read_unicode.cpp -- implementation of the UnicodeReader. 
- * 
- * Copyright (c) 2019 YANDEX LLC 
- * Author: Karina Usmanova <usmanova.karin@yandex.ru> 
- * 
- * This file is part of Pire, the Perl Incompatible 
- * Regular Expressions library. 
- * 
- * Pire is free software: you can redistribute it and/or modify 
- * it under the terms of the GNU Lesser Public License as published by 
- * the Free Software Foundation, either version 3 of the License, or 
- * (at your option) any later version. 
- * 
- * Pire is distributed in the hope that it will be useful, 
- * but WITHOUT ANY WARRANTY; without even the implied warranty of 
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
- * GNU Lesser Public License for more details. 
- * You should have received a copy of the GNU Lesser Public License 
- * along with Pire.  If not, see <http://www.gnu.org/licenses>. 
- */ 
- 
- 
-#include "read_unicode.h" 
- 
-#include <contrib/libs/pire/pire/re_lexer.h> 
- 
-namespace Pire { 
-	wchar32 UnicodeReader::ReadUnicodeCharacter() { 
-		ystring hexStr; 
-		GetChar(); 
-		wchar32 ch = PeekChar(); 
- 
-		if (ch == '{') { 
-			GetChar(); 
-			hexStr = ReadHexDigit( 
-					[](wchar32 ch, size_t numAdded) -> bool { return ch == End || (numAdded != 0 && ch == '}'); }); 
-			ch = GetChar(); 
-			if (ch != '}') { 
-				Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x{...\" sequence should be closed by \"}\""); 
-			} 
-		} else { 
-			hexStr = ReadHexDigit([](wchar32, size_t numAdded) -> bool { return numAdded == 2; }); 
-			if (hexStr.size() != 2) { 
-				Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x...\" sequence should contain two symbols"); 
-			} 
-		} 
-		return HexToDec(hexStr); 
-	} 
- 
-	bool UnicodeReader::IsHexDigit(wchar32 ch) { 
-		return ch < 256 && std::isxdigit(ch) != 0; 
-	} 
- 
-	ystring UnicodeReader::ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop) { 
-		ystring result; 
-		wchar32 ch = GetChar(); 
-		while (!shouldStop(ch, result.size())) { 
-			if (!IsHexDigit(ch)) { 
-				Error("Pire::UnicodeReader::ReadHexDigit(): \"\\x...\" sequence contains non-valid hex number"); 
-			} 
-			result.push_back(ch); 
-			ch = GetChar(); 
-		} 
-		UngetChar(ch); 
-		return result; 
-	} 
- 
-	wchar32 UnicodeReader::HexToDec(const ystring &hexStr) { 
-		wchar32 converted; 
-		try { 
-			converted = std::stoul(hexStr, 0, 16); 
-		} catch (std::out_of_range &) { 
-			converted = MAX_UNICODE + 1; 
-		} 
-		if (converted > MAX_UNICODE) { 
-			Error("Pire::UnicodeReader::HexToDec(): hex number in \"\\x...\" sequence is too large"); 
-		} 
-		return converted; 
-	} 
-} 
- 
- 
+/*
+ * read_unicode.cpp -- implementation of the UnicodeReader.
+ *
+ * Copyright (c) 2019 YANDEX LLC
+ * Author: Karina Usmanova <usmanova.karin@yandex.ru>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire.  If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include "read_unicode.h"
+
+#include <contrib/libs/pire/pire/re_lexer.h>
+
+namespace Pire {
+	wchar32 UnicodeReader::ReadUnicodeCharacter() {
+		ystring hexStr;
+		GetChar();
+		wchar32 ch = PeekChar();
+
+		if (ch == '{') {
+			GetChar();
+			hexStr = ReadHexDigit(
+					[](wchar32 ch, size_t numAdded) -> bool { return ch == End || (numAdded != 0 && ch == '}'); });
+			ch = GetChar();
+			if (ch != '}') {
+				Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x{...\" sequence should be closed by \"}\"");
+			}
+		} else {
+			hexStr = ReadHexDigit([](wchar32, size_t numAdded) -> bool { return numAdded == 2; });
+			if (hexStr.size() != 2) {
+				Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x...\" sequence should contain two symbols");
+			}
+		}
+		return HexToDec(hexStr);
+	}
+
+	bool UnicodeReader::IsHexDigit(wchar32 ch) {
+		return ch < 256 && std::isxdigit(ch) != 0;
+	}
+
+	ystring UnicodeReader::ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop) {
+		ystring result;
+		wchar32 ch = GetChar();
+		while (!shouldStop(ch, result.size())) {
+			if (!IsHexDigit(ch)) {
+				Error("Pire::UnicodeReader::ReadHexDigit(): \"\\x...\" sequence contains non-valid hex number");
+			}
+			result.push_back(ch);
+			ch = GetChar();
+		}
+		UngetChar(ch);
+		return result;
+	}
+
+	wchar32 UnicodeReader::HexToDec(const ystring &hexStr) {
+		wchar32 converted;
+		try {
+			converted = std::stoul(hexStr, 0, 16);
+		} catch (std::out_of_range &) {
+			converted = MAX_UNICODE + 1;
+		}
+		if (converted > MAX_UNICODE) {
+			Error("Pire::UnicodeReader::HexToDec(): hex number in \"\\x...\" sequence is too large");
+		}
+		return converted;
+	}
+}
+
+
diff --git a/contrib/libs/pire/pire/read_unicode.h b/contrib/libs/pire/pire/read_unicode.h
index f0705c14aa..107545e5a1 100644
--- a/contrib/libs/pire/pire/read_unicode.h
+++ b/contrib/libs/pire/pire/read_unicode.h
@@ -1,40 +1,40 @@
-/* 
- * read_unicode.h -- declaration of the UnicodeReader class, helper for UnicodeRange and EnableUnicodeSequences. 
- * 
- * Copyright (c) 2019 YANDEX LLC 
- * Author: Karina Usmanova <usmanova.karin@yandex.ru> 
- * 
- * This file is part of Pire, the Perl Incompatible 
- * Regular Expressions library. 
- * 
- * Pire is free software: you can redistribute it and/or modify 
- * it under the terms of the GNU Lesser Public License as published by 
- * the Free Software Foundation, either version 3 of the License, or 
- * (at your option) any later version. 
- * 
- * Pire is distributed in the hope that it will be useful, 
- * but WITHOUT ANY WARRANTY; without even the implied warranty of 
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
- * GNU Lesser Public License for more details. 
- * You should have received a copy of the GNU Lesser Public License 
- * along with Pire.  If not, see <http://www.gnu.org/licenses>. 
- */ 
- 
- 
-#include <contrib/libs/pire/pire/re_lexer.h> 
- 
-namespace Pire { 
-	class UnicodeReader : public Feature { 
-	public: 
-		wchar32 ReadUnicodeCharacter(); 
- 
-	private: 
-		static const wchar32 MAX_UNICODE = 0x10FFFF; 
- 
-		bool IsHexDigit(wchar32 ch); 
-		ystring ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop); 
-		wchar32 HexToDec(const ystring& hexStr); 
-	}; 
-} 
- 
- 
+/*
+ * read_unicode.h -- declaration of the UnicodeReader class, helper for UnicodeRange and EnableUnicodeSequences.
+ *
+ * Copyright (c) 2019 YANDEX LLC
+ * Author: Karina Usmanova <usmanova.karin@yandex.ru>
+ *
+ * This file is part of Pire, the Perl Incompatible
+ * Regular Expressions library.
+ *
+ * Pire is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Pire is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser Public License for more details.
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Pire.  If not, see <http://www.gnu.org/licenses>.
+ */
+
+
+#include <contrib/libs/pire/pire/re_lexer.h>
+
+namespace Pire {
+	class UnicodeReader : public Feature {
+	public:
+		wchar32 ReadUnicodeCharacter();
+
+	private:
+		static const wchar32 MAX_UNICODE = 0x10FFFF;
+
+		bool IsHexDigit(wchar32 ch);
+		ystring ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop);
+		wchar32 HexToDec(const ystring& hexStr);
+	};
+}
+
+
diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h
index 24ded64a68..120dc403b7 100644
--- a/contrib/libs/pire/pire/scanners/loaded.h
+++ b/contrib/libs/pire/pire/scanners/loaded.h
@@ -26,7 +26,7 @@
 
 #include <string.h>
 
-#include <contrib/libs/pire/pire/approx_matching.h> 
+#include <contrib/libs/pire/pire/approx_matching.h>
 #include <contrib/libs/pire/pire/fsm.h>
 #include <contrib/libs/pire/pire/partition.h>
 
@@ -245,11 +245,11 @@ protected:
 	virtual ~LoadedScanner();
 
 private:
-	explicit LoadedScanner(Fsm& fsm, size_t distance = 0) 
+	explicit LoadedScanner(Fsm& fsm, size_t distance = 0)
 	{
-		if (distance) { 
-			fsm = CreateApproxFsm(fsm, distance); 
-		} 
+		if (distance) {
+			fsm = CreateApproxFsm(fsm, distance);
+		}
 		fsm.Canonize();
 		Init(fsm.Size(), fsm.Letters(), fsm.Initial());
 		BuildScanner(fsm, *this);
diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h
index b6cdceaa32..29679e416e 100644
--- a/contrib/libs/pire/pire/scanners/multi.h
+++ b/contrib/libs/pire/pire/scanners/multi.h
@@ -26,7 +26,7 @@
 
 #include <cstring>
 #include <string.h>
-#include <contrib/libs/pire/pire/approx_matching.h> 
+#include <contrib/libs/pire/pire/approx_matching.h>
 #include <contrib/libs/pire/pire/fsm.h>
 #include <contrib/libs/pire/pire/partition.h>
 #include <contrib/libs/pire/pire/run.h>
@@ -121,11 +121,11 @@ public:
 
 	Scanner() { Alias(Null()); }
 
-	explicit Scanner(Fsm& fsm, size_t distance = 0) 
+	explicit Scanner(Fsm& fsm, size_t distance = 0)
 	{
-		if (distance) { 
-			fsm = CreateApproxFsm(fsm, distance); 
-		} 
+		if (distance) {
+			fsm = CreateApproxFsm(fsm, distance);
+		}
 		fsm.Canonize();
 		Init(fsm.Size(), fsm.Letters(), fsm.Finals().size(), fsm.Initial(), 1);
 		BuildScanner(fsm, *this);
diff --git a/contrib/libs/pire/pire/scanners/simple.h b/contrib/libs/pire/pire/scanners/simple.h
index 6874e1f2a3..ef959aeed1 100644
--- a/contrib/libs/pire/pire/scanners/simple.h
+++ b/contrib/libs/pire/pire/scanners/simple.h
@@ -24,7 +24,7 @@
 #ifndef PIRE_SCANNERS_SIMPLE_H
 #define PIRE_SCANNERS_SIMPLE_H
 
-#include <contrib/libs/pire/pire/approx_matching.h> 
+#include <contrib/libs/pire/pire/approx_matching.h>
 #include <contrib/libs/pire/pire/stub/stl.h>
 #include <contrib/libs/pire/pire/stub/defaults.h>
 #include <contrib/libs/pire/pire/stub/saveload.h>
@@ -49,7 +49,7 @@ public:
 
 	SimpleScanner()	{ Alias(Null()); }
 
-	explicit SimpleScanner(Fsm& fsm, size_t distance = 0); 
+	explicit SimpleScanner(Fsm& fsm, size_t distance = 0);
 
 	size_t Size() const { return m.statesCount; }
 	bool Empty() const { return m_transitions == Null().m_transitions; }
@@ -229,11 +229,11 @@ protected:
 	}
 
 };
-inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance) 
+inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance)
 {
-	if (distance) { 
-		fsm = CreateApproxFsm(fsm, distance); 
-	} 
+	if (distance) {
+		fsm = CreateApproxFsm(fsm, distance);
+	}
 	fsm.Canonize();
 
 	m.statesCount = fsm.Size();
diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h
index 8f1e4ca4d0..6adfcb8c1d 100644
--- a/contrib/libs/pire/pire/scanners/slow.h
+++ b/contrib/libs/pire/pire/scanners/slow.h
@@ -24,7 +24,7 @@
 #ifndef PIRE_SCANNERS_SLOW_H
 #define PIRE_SCANNERS_SLOW_H
 
-#include <contrib/libs/pire/pire/approx_matching.h> 
+#include <contrib/libs/pire/pire/approx_matching.h>
 #include <contrib/libs/pire/pire/partition.h>
 #include <contrib/libs/pire/pire/vbitset.h>
 #include <contrib/libs/pire/pire/fsm.h>
@@ -250,12 +250,12 @@ public:
 		}
 	}
 
-	explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0) 
+	explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0)
 		: need_actions(needActions)
 	{
-		if (distance) { 
-			fsm = CreateApproxFsm(fsm, distance); 
-		} 
+		if (distance) {
+			fsm = CreateApproxFsm(fsm, distance);
+		}
 		if (removeEpsilons)
 			fsm.RemoveEpsilons();
 		fsm.Sparse(!removeEpsilons);
@@ -357,7 +357,7 @@ private:
 
 	bool need_actions;
 	TVector<TVector<Action>> m_actionsvec;
-	static const SlowScanner& Null(); 
+	static const SlowScanner& Null();
 
 	template<class T> void alloc(T*& p, size_t size)
 	{
@@ -416,17 +416,17 @@ private:
 	friend void BuildScanner<SlowScanner>(const Fsm&, SlowScanner&);
 };
 
-template<> 
-inline SlowScanner Fsm::Compile(size_t distance) { 
-	return SlowScanner(*this, false, true, distance); 
-} 
- 
-inline const SlowScanner& SlowScanner::Null() 
-{ 
-	static const SlowScanner n = Fsm::MakeFalse().Compile<SlowScanner>(); 
-	return n; 
-} 
- 
+template<>
+inline SlowScanner Fsm::Compile(size_t distance) {
+	return SlowScanner(*this, false, true, distance);
+}
+
+inline const SlowScanner& SlowScanner::Null()
+{
+	static const SlowScanner n = Fsm::MakeFalse().Compile<SlowScanner>();
+	return n;
+}
+
 #ifndef PIRE_DEBUG
 /// A specialization of Run(), since its state is much heavier than other ones
 /// and we thus want to avoid copying states.
author	karina-usm <karina-usm@yandex-team.ru>	2022-02-10 16:48:05 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:48:05 +0300
commit	3305cedaf9e392ab24e4b7dd6072976748ce60bf (patch)
tree	b222e5ac2e2e98872661c51ccceee5da0d291e13 /contrib
parent	62517661cde7aa7c93efe0281ec48eeb70ea420c (diff)
download	ydb-3305cedaf9e392ab24e4b7dd6072976748ce60bf.tar.gz