Restoring authorship annotation for <[email protected]>. Commit 1 of 2.

author: karina-usm <[email protected]> 2022-02-10 16:48:05 +0300
committer: Daniil Cherednik <[email protected]> 2022-02-10 16:48:05 +0300
commit: 62517661cde7aa7c93efe0281ec48eeb70ea420c (patch)
tree: 066f34bb401d85fa43842442fb0d888ffb2a305f
parent: 5f8a2ce7b1dc3b3e1fae197610f189e7ed1d5723 (diff)
14 files changed, 355 insertions, 355 deletions
diff --git a/contrib/libs/pire/pire/approx_matching.cpp b/contrib/libs/pire/pire/approx_matching.cpp
index 23f74ca01df..8c393b39e05 100644
--- a/contrib/libs/pire/pire/approx_matching.cpp
+++ b/contrib/libs/pire/pire/approx_matching.cpp
@@ -1,94 +1,94 @@
-/*
- * approx_matching.cpp -- implementation of CreateApproxFsm function
- *
- * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <[email protected]>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire.  If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include "approx_matching.h"
-
-namespace Pire {
-	Fsm CreateApproxFsm(const Fsm& regexp, size_t distance) {
-		Fsm approxFsm = regexp;
-
-		TVector<TSet<Char>> outgoingLettersTable(regexp.Size());
-		for (size_t state = 0; state < regexp.Size(); ++state) {
-			outgoingLettersTable[state] = regexp.OutgoingLetters(state);
-		}
-
-		TVector<TMap<Char, Fsm::StatesSet>> destinationsTable(regexp.Size());
-		for (size_t state = 0; state < regexp.Size(); ++state) {
-			for (Char letter : outgoingLettersTable[state]) {
-				destinationsTable[state][letter] = regexp.Destinations(state, letter);
-			}
-		}
-
-		for (size_t fsmIdx = 0; fsmIdx < distance; ++fsmIdx) {
-			approxFsm.Import(regexp);
-			const auto shift = fsmIdx * regexp.Size();
-
-			for (size_t state = 0; state < regexp.Size(); ++state) {
-				for (Char letter : outgoingLettersTable[state]) {
-					for (size_t to : destinationsTable[state][letter]) {
-						for (Char ch = 0; ch < MaxChar; ++ch) {
-							if (!approxFsm.Connected(state + shift, to + shift, ch)) {
-								approxFsm.Connect(state + shift, to + shift + regexp.Size(), ch);
-							}
-						}
-
-						approxFsm.Connect(state + shift, to + shift + regexp.Size(), Epsilon);
-					}
-
-					for (Char ch = 0; ch < MaxChar; ++ch) {
-						approxFsm.Connect(state + shift, state + shift + regexp.Size(), ch);
-					}
-				}
-
-				if (regexp.IsFinal(state)) {
-					approxFsm.SetFinal(state + shift + regexp.Size(), true);
-				}
-			}
-		}
-
-		size_t maxState = (distance > 0) ? approxFsm.Size() - regexp.Size() : 0;
-		for (size_t state = 0; state < maxState; ++state) {
-			size_t currentDist = state / regexp.Size();
-			size_t intState = state % regexp.Size();
-
-			for (Char firstLetter : outgoingLettersTable[intState]) {
-				for (size_t firstDest : destinationsTable[intState][firstLetter]) {
-					for (Char secondLetter : outgoingLettersTable[firstDest]) {
-						for (size_t secondDest : destinationsTable[firstDest][secondLetter]) {
-							if (secondDest != intState || firstDest != intState) {
-								approxFsm.Resize(approxFsm.Size() + 1);
-
-								size_t to = secondDest + (currentDist + 1) * regexp.Size();
-								size_t middle = approxFsm.Size() - 1;
-
-								approxFsm.Connect(state, middle, secondLetter);
-								approxFsm.Connect(middle, to, firstLetter);
-							}
-						}
-					}
-				}
-			}
-		}
-
-		return approxFsm;
-	}
-}
+/* 
+ * approx_matching.cpp -- implementation of CreateApproxFsm function 
+ * 
+ * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <[email protected]> 
+ * 
+ * This file is part of Pire, the Perl Incompatible 
+ * Regular Expressions library. 
+ * 
+ * Pire is free software: you can redistribute it and/or modify 
+ * it under the terms of the GNU Lesser Public License as published by 
+ * the Free Software Foundation, either version 3 of the License, or 
+ * (at your option) any later version. 
+ * 
+ * Pire is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
+ * GNU Lesser Public License for more details. 
+ * You should have received a copy of the GNU Lesser Public License 
+ * along with Pire.  If not, see <http://www.gnu.org/licenses>. 
+ */ 
+ 
+ 
+#include "approx_matching.h" 
+ 
+namespace Pire { 
+	Fsm CreateApproxFsm(const Fsm& regexp, size_t distance) { 
+		Fsm approxFsm = regexp; 
+ 
+		TVector<TSet<Char>> outgoingLettersTable(regexp.Size()); 
+		for (size_t state = 0; state < regexp.Size(); ++state) { 
+			outgoingLettersTable[state] = regexp.OutgoingLetters(state); 
+		} 
+ 
+		TVector<TMap<Char, Fsm::StatesSet>> destinationsTable(regexp.Size()); 
+		for (size_t state = 0; state < regexp.Size(); ++state) { 
+			for (Char letter : outgoingLettersTable[state]) { 
+				destinationsTable[state][letter] = regexp.Destinations(state, letter); 
+			} 
+		} 
+ 
+		for (size_t fsmIdx = 0; fsmIdx < distance; ++fsmIdx) { 
+			approxFsm.Import(regexp); 
+			const auto shift = fsmIdx * regexp.Size(); 
+ 
+			for (size_t state = 0; state < regexp.Size(); ++state) { 
+				for (Char letter : outgoingLettersTable[state]) { 
+					for (size_t to : destinationsTable[state][letter]) { 
+						for (Char ch = 0; ch < MaxChar; ++ch) { 
+							if (!approxFsm.Connected(state + shift, to + shift, ch)) { 
+								approxFsm.Connect(state + shift, to + shift + regexp.Size(), ch); 
+							} 
+						} 
+ 
+						approxFsm.Connect(state + shift, to + shift + regexp.Size(), Epsilon); 
+					} 
+ 
+					for (Char ch = 0; ch < MaxChar; ++ch) { 
+						approxFsm.Connect(state + shift, state + shift + regexp.Size(), ch); 
+					} 
+				} 
+ 
+				if (regexp.IsFinal(state)) { 
+					approxFsm.SetFinal(state + shift + regexp.Size(), true); 
+				} 
+			} 
+		} 
+ 
+		size_t maxState = (distance > 0) ? approxFsm.Size() - regexp.Size() : 0; 
+		for (size_t state = 0; state < maxState; ++state) { 
+			size_t currentDist = state / regexp.Size(); 
+			size_t intState = state % regexp.Size(); 
+ 
+			for (Char firstLetter : outgoingLettersTable[intState]) { 
+				for (size_t firstDest : destinationsTable[intState][firstLetter]) { 
+					for (Char secondLetter : outgoingLettersTable[firstDest]) { 
+						for (size_t secondDest : destinationsTable[firstDest][secondLetter]) { 
+							if (secondDest != intState || firstDest != intState) { 
+								approxFsm.Resize(approxFsm.Size() + 1); 
+ 
+								size_t to = secondDest + (currentDist + 1) * regexp.Size(); 
+								size_t middle = approxFsm.Size() - 1; 
+ 
+								approxFsm.Connect(state, middle, secondLetter); 
+								approxFsm.Connect(middle, to, firstLetter); 
+							} 
+						} 
+					} 
+				} 
+			} 
+		} 
+ 
+		return approxFsm; 
+	} 
+} 
diff --git a/contrib/libs/pire/pire/approx_matching.h b/contrib/libs/pire/pire/approx_matching.h
index fc2a9fd61c1..2b2568d96ba 100644
--- a/contrib/libs/pire/pire/approx_matching.h
+++ b/contrib/libs/pire/pire/approx_matching.h
@@ -1,28 +1,28 @@
-/*
- * approx_matching.h -- function for creating fsm which matches words
- *                      within a levenshtein distance
- *
- * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <[email protected]>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire.  If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include "fsm.h"
-
-namespace Pire {
-	Fsm CreateApproxFsm(const Fsm& regexp, size_t distance);
-}
+/* 
+ * approx_matching.h -- function for creating fsm which matches words 
+ *                      within a levenshtein distance 
+ * 
+ * Copyright (c) 2019 YANDEX LLC, Karina Usmanova <[email protected]> 
+ * 
+ * This file is part of Pire, the Perl Incompatible 
+ * Regular Expressions library. 
+ * 
+ * Pire is free software: you can redistribute it and/or modify 
+ * it under the terms of the GNU Lesser Public License as published by 
+ * the Free Software Foundation, either version 3 of the License, or 
+ * (at your option) any later version. 
+ * 
+ * Pire is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
+ * GNU Lesser Public License for more details. 
+ * You should have received a copy of the GNU Lesser Public License 
+ * along with Pire.  If not, see <http://www.gnu.org/licenses>. 
+ */ 
+ 
+ 
+#include "fsm.h" 
+ 
+namespace Pire { 
+	Fsm CreateApproxFsm(const Fsm& regexp, size_t distance); 
+} 
diff --git a/contrib/libs/pire/pire/extra/capture.h b/contrib/libs/pire/pire/extra/capture.h
index 8399914a67f..7bca334eacb 100644
--- a/contrib/libs/pire/pire/extra/capture.h
+++ b/contrib/libs/pire/pire/extra/capture.h
@@ -25,7 +25,7 @@
 #define PIRE_EXTRA_CAPTURE_H
 
 
-#include <contrib/libs/pire/pire/approx_matching.h>
+#include <contrib/libs/pire/pire/approx_matching.h> 
 #include <contrib/libs/pire/pire/scanners/loaded.h>
 #include <contrib/libs/pire/pire/scanners/multi.h>
 #include <contrib/libs/pire/pire/scanners/slow.h>
@@ -139,11 +139,11 @@ public:
 
 	CapturingScanner() {}
 	CapturingScanner(const CapturingScanner& s): LoadedScanner(s) {}
-	explicit CapturingScanner(Fsm& fsm, size_t distance = 0)
+	explicit CapturingScanner(Fsm& fsm, size_t distance = 0) 
 	{
-		if (distance) {
-			fsm = CreateApproxFsm(fsm, distance);
-		}
+		if (distance) { 
+			fsm = CreateApproxFsm(fsm, distance); 
+		} 
 		fsm.Canonize();
 		Init(fsm.Size(), fsm.Letters(), fsm.Initial());
 		BuildScanner(fsm, *this);
@@ -576,8 +576,8 @@ public:
 	{
 	}
 
-	SlowCapturingScanner(Fsm& fsm, size_t distance = 0)
-		: SlowScanner(fsm, true, false, distance)
+	SlowCapturingScanner(Fsm& fsm, size_t distance = 0) 
+		: SlowScanner(fsm, true, false, distance) 
 	{
 	}
 };
diff --git a/contrib/libs/pire/pire/fsm.h b/contrib/libs/pire/pire/fsm.h
index 4dad06ca065..348e6b62168 100644
--- a/contrib/libs/pire/pire/fsm.h
+++ b/contrib/libs/pire/pire/fsm.h
@@ -115,9 +115,9 @@ namespace Pire {
 
 		/// Determines and minimizes the FSM if neccessary. Returns *this.
 		Fsm& Canonize(size_t maxSize = 0);
-
+ 
 		template<class Scanner>
-		Scanner Compile(size_t distance = 0);
+		Scanner Compile(size_t distance = 0); 
 
 		void DumpState(yostream& s, size_t state) const;
 		void DumpTo(yostream& s, const ystring& name = "") const;
@@ -270,11 +270,11 @@ namespace Pire {
 		
 		r.FinishBuild();
 	}
-
+ 
 	template<class Scanner>
-	inline Scanner Fsm::Compile(size_t distance)
+	inline Scanner Fsm::Compile(size_t distance) 
 	{
-		return Scanner(*this, distance);
+		return Scanner(*this, distance); 
 	}
 
 	yostream& operator << (yostream&, const Fsm&);
diff --git a/contrib/libs/pire/pire/re_lexer.cpp b/contrib/libs/pire/pire/re_lexer.cpp
index 132fbeb0399..dbae421f160 100644
--- a/contrib/libs/pire/pire/re_lexer.cpp
+++ b/contrib/libs/pire/pire/re_lexer.cpp
@@ -11,7 +11,7 @@
  * it under the terms of the GNU Lesser Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- *
+ * 
  * Pire is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@@ -28,12 +28,12 @@
 #include <contrib/libs/pire/pire/stub/utf8.h>
 #include <contrib/libs/pire/pire/stub/singleton.h>
 
-#include "fsm.h"
+#include "fsm.h" 
 #include "re_lexer.h"
 #include "re_parser.h"
-#include "read_unicode.h"
-
+#include "read_unicode.h" 
 
+ 
 namespace Pire {
 
 namespace Impl {
@@ -161,7 +161,7 @@ Term Lexer::Lex()
                 if ((j & ControlMask) == Control)
                     Error("Control character in tokens sequence");
     }
-
+ 
     int type = t.Type();
     if (type == TokenTypes::Letters)
         type = YRE_LETTERS;
@@ -205,19 +205,19 @@ wchar32 Feature::CorrectChar(wchar32 c, const char* controls)
 }
 
 namespace {
-    class EnableUnicodeSequencesImpl : public UnicodeReader {
-    public:
-        bool Accepts(wchar32 c) const {
-            return c == (Control | 'x');
-        }
-
-        Term Lex() {
-            return Term::Character(ReadUnicodeCharacter());
-        }
-    };
-
-    class CharacterRangeReader: public UnicodeReader {
+    class EnableUnicodeSequencesImpl : public UnicodeReader { 
     public:
+        bool Accepts(wchar32 c) const { 
+            return c == (Control | 'x'); 
+        } 
+ 
+        Term Lex() { 
+            return Term::Character(ReadUnicodeCharacter()); 
+        } 
+    }; 
+ 
+    class CharacterRangeReader: public UnicodeReader { 
+    public: 
         bool Accepts(wchar32 c) const { return c == '[' || c == (Control | '[') || c == (Control | ']'); }
 
         Term Lex()
@@ -235,49 +235,49 @@ namespace {
                 ch = CorrectChar(GetChar(), controls);
             }
 
-            bool firstUnicode;
-            wchar32 unicodeSymbol = 0;
-
+            bool firstUnicode; 
+            wchar32 unicodeSymbol = 0; 
+ 
             for (; ch != End && ch != (Control | ']'); ch = CorrectChar(GetChar(), controls)) {
-                if (ch == (Control | 'x')) {
-                    UngetChar(ch);
-					firstUnicode = true;
-					unicodeSymbol = ReadUnicodeCharacter();
-                } else {
-                    firstUnicode = false;
-                }
-
-                if (((ch & ControlMask) != Control || firstUnicode) && CorrectChar(PeekChar(), controls) == (Control | '-')) {
+                if (ch == (Control | 'x')) { 
+                    UngetChar(ch); 
+					firstUnicode = true; 
+					unicodeSymbol = ReadUnicodeCharacter(); 
+                } else { 
+                    firstUnicode = false; 
+                } 
+ 
+                if (((ch & ControlMask) != Control || firstUnicode) && CorrectChar(PeekChar(), controls) == (Control | '-')) { 
                     GetChar();
-                    wchar32 current = GetChar();
-
-                    bool secondUnicode = (current == (Control | 'x'));
-
-                    wchar32 begin = (firstUnicode) ? unicodeSymbol : ch;
-                    wchar32 end;
-                    if (secondUnicode) {
-                        UngetChar(current);
-                        end = ReadUnicodeCharacter();
-                    } else {
-                        end = CorrectChar(current, controls);
-                        if ((end & ControlMask) == Control)
-                            Error("Wrong character range");
-                    }
-
-                    for (ch = begin; ch <= end; ++ch) {
+                    wchar32 current = GetChar(); 
+ 
+                    bool secondUnicode = (current == (Control | 'x')); 
+ 
+                    wchar32 begin = (firstUnicode) ? unicodeSymbol : ch; 
+                    wchar32 end; 
+                    if (secondUnicode) { 
+                        UngetChar(current); 
+                        end = ReadUnicodeCharacter(); 
+                    } else { 
+                        end = CorrectChar(current, controls); 
+                        if ((end & ControlMask) == Control) 
+                            Error("Wrong character range"); 
+                    } 
+ 
+                    for (ch = begin; ch <= end; ++ch) { 
                         cs.first.insert(Term::String(1, ch));
-                    }
-                } else if (ch == (Control | '-')) {
+                    } 
+                } else if (ch == (Control | '-')) { 
                     cs.first.insert(Term::String(1, '-'));
-                }
-                else if ((ch & ControlMask) == Control && (strchr(controls2, ch & ~ControlMask) || strchr(controls, ch & ~ControlMask))) {
+                } 
+                else if ((ch & ControlMask) == Control && (strchr(controls2, ch & ~ControlMask) || strchr(controls, ch & ~ControlMask))) { 
                     cs.first.insert(Term::String(1, ch & ~ControlMask));
-                }
-                else if ((ch & ControlMask) != Control || !strchr(controls, ch & ~ControlMask)) {
-                    cs.first.insert(Term::String(1, (firstUnicode) ? unicodeSymbol : ch));
-                } else {
+                } 
+                else if ((ch & ControlMask) != Control || !strchr(controls, ch & ~ControlMask)) { 
+                    cs.first.insert(Term::String(1, (firstUnicode) ? unicodeSymbol : ch)); 
+                } else { 
                     Error("Wrong character in range");
-                }
+                } 
             }
             if (ch == End)
                 Error("Unexpected end of pattern");
@@ -347,7 +347,7 @@ namespace {
         {
             return c == '&' || c == '~' || c == (Control | '&') || c == (Control | '~');
         }
-
+ 
         Term Lex()
         {
             wchar32 ch = GetChar();
@@ -376,7 +376,7 @@ void Lexer::InstallDefaultFeatures()
     AddFeature(Feature::Ptr(new CharacterRangeReader));
     AddFeature(Feature::Ptr(new RepetitionCountReader));
     AddFeature(Features::CharClasses());
-    AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl));
+    AddFeature(Feature::Ptr(new EnableUnicodeSequencesImpl)); 
 }
 
 Fsm Lexer::Parse()
diff --git a/contrib/libs/pire/pire/read_unicode.cpp b/contrib/libs/pire/pire/read_unicode.cpp
index 5b21e4eb285..e167cf5ccaa 100644
--- a/contrib/libs/pire/pire/read_unicode.cpp
+++ b/contrib/libs/pire/pire/read_unicode.cpp
@@ -1,83 +1,83 @@
-/*
- * read_unicode.cpp -- implementation of the UnicodeReader.
- *
- * Copyright (c) 2019 YANDEX LLC
- * Author: Karina Usmanova <[email protected]>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire.  If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include "read_unicode.h"
-
-#include <contrib/libs/pire/pire/re_lexer.h>
-
-namespace Pire {
-	wchar32 UnicodeReader::ReadUnicodeCharacter() {
-		ystring hexStr;
-		GetChar();
-		wchar32 ch = PeekChar();
-
-		if (ch == '{') {
-			GetChar();
-			hexStr = ReadHexDigit(
-					[](wchar32 ch, size_t numAdded) -> bool { return ch == End || (numAdded != 0 && ch == '}'); });
-			ch = GetChar();
-			if (ch != '}') {
-				Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x{...\" sequence should be closed by \"}\"");
-			}
-		} else {
-			hexStr = ReadHexDigit([](wchar32, size_t numAdded) -> bool { return numAdded == 2; });
-			if (hexStr.size() != 2) {
-				Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x...\" sequence should contain two symbols");
-			}
-		}
-		return HexToDec(hexStr);
-	}
-
-	bool UnicodeReader::IsHexDigit(wchar32 ch) {
-		return ch < 256 && std::isxdigit(ch) != 0;
-	}
-
-	ystring UnicodeReader::ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop) {
-		ystring result;
-		wchar32 ch = GetChar();
-		while (!shouldStop(ch, result.size())) {
-			if (!IsHexDigit(ch)) {
-				Error("Pire::UnicodeReader::ReadHexDigit(): \"\\x...\" sequence contains non-valid hex number");
-			}
-			result.push_back(ch);
-			ch = GetChar();
-		}
-		UngetChar(ch);
-		return result;
-	}
-
-	wchar32 UnicodeReader::HexToDec(const ystring &hexStr) {
-		wchar32 converted;
-		try {
-			converted = std::stoul(hexStr, 0, 16);
-		} catch (std::out_of_range &) {
-			converted = MAX_UNICODE + 1;
-		}
-		if (converted > MAX_UNICODE) {
-			Error("Pire::UnicodeReader::HexToDec(): hex number in \"\\x...\" sequence is too large");
-		}
-		return converted;
-	}
-}
-
-
+/* 
+ * read_unicode.cpp -- implementation of the UnicodeReader. 
+ * 
+ * Copyright (c) 2019 YANDEX LLC 
+ * Author: Karina Usmanova <[email protected]> 
+ * 
+ * This file is part of Pire, the Perl Incompatible 
+ * Regular Expressions library. 
+ * 
+ * Pire is free software: you can redistribute it and/or modify 
+ * it under the terms of the GNU Lesser Public License as published by 
+ * the Free Software Foundation, either version 3 of the License, or 
+ * (at your option) any later version. 
+ * 
+ * Pire is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
+ * GNU Lesser Public License for more details. 
+ * You should have received a copy of the GNU Lesser Public License 
+ * along with Pire.  If not, see <http://www.gnu.org/licenses>. 
+ */ 
+ 
+ 
+#include "read_unicode.h" 
+ 
+#include <contrib/libs/pire/pire/re_lexer.h> 
+ 
+namespace Pire { 
+	wchar32 UnicodeReader::ReadUnicodeCharacter() { 
+		ystring hexStr; 
+		GetChar(); 
+		wchar32 ch = PeekChar(); 
+ 
+		if (ch == '{') { 
+			GetChar(); 
+			hexStr = ReadHexDigit( 
+					[](wchar32 ch, size_t numAdded) -> bool { return ch == End || (numAdded != 0 && ch == '}'); }); 
+			ch = GetChar(); 
+			if (ch != '}') { 
+				Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x{...\" sequence should be closed by \"}\""); 
+			} 
+		} else { 
+			hexStr = ReadHexDigit([](wchar32, size_t numAdded) -> bool { return numAdded == 2; }); 
+			if (hexStr.size() != 2) { 
+				Error("Pire::UnicodeReader::ReadUnicodeCharacter(): \"\\x...\" sequence should contain two symbols"); 
+			} 
+		} 
+		return HexToDec(hexStr); 
+	} 
+ 
+	bool UnicodeReader::IsHexDigit(wchar32 ch) { 
+		return ch < 256 && std::isxdigit(ch) != 0; 
+	} 
+ 
+	ystring UnicodeReader::ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop) { 
+		ystring result; 
+		wchar32 ch = GetChar(); 
+		while (!shouldStop(ch, result.size())) { 
+			if (!IsHexDigit(ch)) { 
+				Error("Pire::UnicodeReader::ReadHexDigit(): \"\\x...\" sequence contains non-valid hex number"); 
+			} 
+			result.push_back(ch); 
+			ch = GetChar(); 
+		} 
+		UngetChar(ch); 
+		return result; 
+	} 
+ 
+	wchar32 UnicodeReader::HexToDec(const ystring &hexStr) { 
+		wchar32 converted; 
+		try { 
+			converted = std::stoul(hexStr, 0, 16); 
+		} catch (std::out_of_range &) { 
+			converted = MAX_UNICODE + 1; 
+		} 
+		if (converted > MAX_UNICODE) { 
+			Error("Pire::UnicodeReader::HexToDec(): hex number in \"\\x...\" sequence is too large"); 
+		} 
+		return converted; 
+	} 
+} 
+ 
+ 
diff --git a/contrib/libs/pire/pire/read_unicode.h b/contrib/libs/pire/pire/read_unicode.h
index 107545e5a18..f0705c14aab 100644
--- a/contrib/libs/pire/pire/read_unicode.h
+++ b/contrib/libs/pire/pire/read_unicode.h
@@ -1,40 +1,40 @@
-/*
- * read_unicode.h -- declaration of the UnicodeReader class, helper for UnicodeRange and EnableUnicodeSequences.
- *
- * Copyright (c) 2019 YANDEX LLC
- * Author: Karina Usmanova <[email protected]>
- *
- * This file is part of Pire, the Perl Incompatible
- * Regular Expressions library.
- *
- * Pire is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Pire is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser Public License for more details.
- * You should have received a copy of the GNU Lesser Public License
- * along with Pire.  If not, see <http://www.gnu.org/licenses>.
- */
-
-
-#include <contrib/libs/pire/pire/re_lexer.h>
-
-namespace Pire {
-	class UnicodeReader : public Feature {
-	public:
-		wchar32 ReadUnicodeCharacter();
-
-	private:
-		static const wchar32 MAX_UNICODE = 0x10FFFF;
-
-		bool IsHexDigit(wchar32 ch);
-		ystring ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop);
-		wchar32 HexToDec(const ystring& hexStr);
-	};
-}
-
-
+/* 
+ * read_unicode.h -- declaration of the UnicodeReader class, helper for UnicodeRange and EnableUnicodeSequences. 
+ * 
+ * Copyright (c) 2019 YANDEX LLC 
+ * Author: Karina Usmanova <[email protected]> 
+ * 
+ * This file is part of Pire, the Perl Incompatible 
+ * Regular Expressions library. 
+ * 
+ * Pire is free software: you can redistribute it and/or modify 
+ * it under the terms of the GNU Lesser Public License as published by 
+ * the Free Software Foundation, either version 3 of the License, or 
+ * (at your option) any later version. 
+ * 
+ * Pire is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
+ * GNU Lesser Public License for more details. 
+ * You should have received a copy of the GNU Lesser Public License 
+ * along with Pire.  If not, see <http://www.gnu.org/licenses>. 
+ */ 
+ 
+ 
+#include <contrib/libs/pire/pire/re_lexer.h> 
+ 
+namespace Pire { 
+	class UnicodeReader : public Feature { 
+	public: 
+		wchar32 ReadUnicodeCharacter(); 
+ 
+	private: 
+		static const wchar32 MAX_UNICODE = 0x10FFFF; 
+ 
+		bool IsHexDigit(wchar32 ch); 
+		ystring ReadHexDigit(std::function<bool(wchar32, size_t)> shouldStop); 
+		wchar32 HexToDec(const ystring& hexStr); 
+	}; 
+} 
+ 
+ 
diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h
index 120dc403b75..24ded64a68c 100644
--- a/contrib/libs/pire/pire/scanners/loaded.h
+++ b/contrib/libs/pire/pire/scanners/loaded.h
@@ -26,7 +26,7 @@
 
 #include <string.h>
 
-#include <contrib/libs/pire/pire/approx_matching.h>
+#include <contrib/libs/pire/pire/approx_matching.h> 
 #include <contrib/libs/pire/pire/fsm.h>
 #include <contrib/libs/pire/pire/partition.h>
 
@@ -245,11 +245,11 @@ protected:
 	virtual ~LoadedScanner();
 
 private:
-	explicit LoadedScanner(Fsm& fsm, size_t distance = 0)
+	explicit LoadedScanner(Fsm& fsm, size_t distance = 0) 
 	{
-		if (distance) {
-			fsm = CreateApproxFsm(fsm, distance);
-		}
+		if (distance) { 
+			fsm = CreateApproxFsm(fsm, distance); 
+		} 
 		fsm.Canonize();
 		Init(fsm.Size(), fsm.Letters(), fsm.Initial());
 		BuildScanner(fsm, *this);
diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h
index 29679e416ed..b6cdceaa327 100644
--- a/contrib/libs/pire/pire/scanners/multi.h
+++ b/contrib/libs/pire/pire/scanners/multi.h
@@ -26,7 +26,7 @@
 
 #include <cstring>
 #include <string.h>
-#include <contrib/libs/pire/pire/approx_matching.h>
+#include <contrib/libs/pire/pire/approx_matching.h> 
 #include <contrib/libs/pire/pire/fsm.h>
 #include <contrib/libs/pire/pire/partition.h>
 #include <contrib/libs/pire/pire/run.h>
@@ -121,11 +121,11 @@ public:
 
 	Scanner() { Alias(Null()); }
 
-	explicit Scanner(Fsm& fsm, size_t distance = 0)
+	explicit Scanner(Fsm& fsm, size_t distance = 0) 
 	{
-		if (distance) {
-			fsm = CreateApproxFsm(fsm, distance);
-		}
+		if (distance) { 
+			fsm = CreateApproxFsm(fsm, distance); 
+		} 
 		fsm.Canonize();
 		Init(fsm.Size(), fsm.Letters(), fsm.Finals().size(), fsm.Initial(), 1);
 		BuildScanner(fsm, *this);
diff --git a/contrib/libs/pire/pire/scanners/simple.h b/contrib/libs/pire/pire/scanners/simple.h
index ef959aeed13..6874e1f2a30 100644
--- a/contrib/libs/pire/pire/scanners/simple.h
+++ b/contrib/libs/pire/pire/scanners/simple.h
@@ -24,7 +24,7 @@
 #ifndef PIRE_SCANNERS_SIMPLE_H
 #define PIRE_SCANNERS_SIMPLE_H
 
-#include <contrib/libs/pire/pire/approx_matching.h>
+#include <contrib/libs/pire/pire/approx_matching.h> 
 #include <contrib/libs/pire/pire/stub/stl.h>
 #include <contrib/libs/pire/pire/stub/defaults.h>
 #include <contrib/libs/pire/pire/stub/saveload.h>
@@ -49,7 +49,7 @@ public:
 
 	SimpleScanner()	{ Alias(Null()); }
 
-	explicit SimpleScanner(Fsm& fsm, size_t distance = 0);
+	explicit SimpleScanner(Fsm& fsm, size_t distance = 0); 
 
 	size_t Size() const { return m.statesCount; }
 	bool Empty() const { return m_transitions == Null().m_transitions; }
@@ -229,11 +229,11 @@ protected:
 	}
 
 };
-inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance)
+inline SimpleScanner::SimpleScanner(Fsm& fsm, size_t distance) 
 {
-	if (distance) {
-		fsm = CreateApproxFsm(fsm, distance);
-	}
+	if (distance) { 
+		fsm = CreateApproxFsm(fsm, distance); 
+	} 
 	fsm.Canonize();
 
 	m.statesCount = fsm.Size();
diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h
index 6adfcb8c1d0..8f1e4ca4d0c 100644
--- a/contrib/libs/pire/pire/scanners/slow.h
+++ b/contrib/libs/pire/pire/scanners/slow.h
@@ -24,7 +24,7 @@
 #ifndef PIRE_SCANNERS_SLOW_H
 #define PIRE_SCANNERS_SLOW_H
 
-#include <contrib/libs/pire/pire/approx_matching.h>
+#include <contrib/libs/pire/pire/approx_matching.h> 
 #include <contrib/libs/pire/pire/partition.h>
 #include <contrib/libs/pire/pire/vbitset.h>
 #include <contrib/libs/pire/pire/fsm.h>
@@ -250,12 +250,12 @@ public:
 		}
 	}
 
-	explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0)
+	explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0) 
 		: need_actions(needActions)
 	{
-		if (distance) {
-			fsm = CreateApproxFsm(fsm, distance);
-		}
+		if (distance) { 
+			fsm = CreateApproxFsm(fsm, distance); 
+		} 
 		if (removeEpsilons)
 			fsm.RemoveEpsilons();
 		fsm.Sparse(!removeEpsilons);
@@ -357,7 +357,7 @@ private:
 
 	bool need_actions;
 	TVector<TVector<Action>> m_actionsvec;
-	static const SlowScanner& Null();
+	static const SlowScanner& Null(); 
 
 	template<class T> void alloc(T*& p, size_t size)
 	{
@@ -416,17 +416,17 @@ private:
 	friend void BuildScanner<SlowScanner>(const Fsm&, SlowScanner&);
 };
 
-template<>
-inline SlowScanner Fsm::Compile(size_t distance) {
-	return SlowScanner(*this, false, true, distance);
-}
-
-inline const SlowScanner& SlowScanner::Null()
-{
-	static const SlowScanner n = Fsm::MakeFalse().Compile<SlowScanner>();
-	return n;
-}
-
+template<> 
+inline SlowScanner Fsm::Compile(size_t distance) { 
+	return SlowScanner(*this, false, true, distance); 
+} 
+ 
+inline const SlowScanner& SlowScanner::Null() 
+{ 
+	static const SlowScanner n = Fsm::MakeFalse().Compile<SlowScanner>(); 
+	return n; 
+} 
+ 
 #ifndef PIRE_DEBUG
 /// A specialization of Run(), since its state is much heavier than other ones
 /// and we thus want to avoid copying states.
diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp
index e7206de9ad4..fd7ac68a2cf 100644
--- a/library/cpp/regex/pire/ut/regexp_ut.cpp
+++ b/library/cpp/regex/pire/ut/regexp_ut.cpp
@@ -37,11 +37,11 @@ Y_UNIT_TEST_SUITE(TRegExp) {
         UNIT_ASSERT(!TMatcher(TFsm("qw", TFsm::TOptions().SetCaseInsensitive(false))).Match("Qw").Final());
     }
 
-    Y_UNIT_TEST(UnicodeCase) {
-        UNIT_ASSERT(TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(true))).Match("Ab").Final());
-        UNIT_ASSERT(!TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(false))).Match("Ab").Final());
-    }
-
+    Y_UNIT_TEST(UnicodeCase) { 
+        UNIT_ASSERT(TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(true))).Match("Ab").Final()); 
+        UNIT_ASSERT(!TMatcher(TFsm("\\x{61}\\x{62}", TFsm::TOptions().SetCaseInsensitive(false))).Match("Ab").Final()); 
+    } 
+ 
     Y_UNIT_TEST(Utf) {
         NRegExp::TFsmBase::TOptions opts;
         opts.Charset = CODES_UTF8;
diff --git a/library/cpp/regex/pire/ut/ya.make b/library/cpp/regex/pire/ut/ya.make
index 8776695f405..bf068415862 100644
--- a/library/cpp/regex/pire/ut/ya.make
+++ b/library/cpp/regex/pire/ut/ya.make
@@ -30,9 +30,9 @@ SRCS(
     count_ut.cpp
     glyph_ut.cpp
     easy_ut.cpp
-    read_unicode_ut.cpp
+    read_unicode_ut.cpp 
     regexp_ut.cpp
-    approx_matching_ut.cpp
+    approx_matching_ut.cpp 
 )
 
 SIZE(MEDIUM)
diff --git a/library/cpp/regex/pire/ya.make b/library/cpp/regex/pire/ya.make
index c857e6d18bc..88afde3c61d 100644
--- a/library/cpp/regex/pire/ya.make
+++ b/library/cpp/regex/pire/ya.make
@@ -24,9 +24,9 @@ SRCS(
     extra/glyphs.cpp
     re_lexer.cpp
     re_parser.y
-    read_unicode.cpp
+    read_unicode.cpp 
     extraencodings.cpp
-    approx_matching.cpp
+    approx_matching.cpp 
     half_final_fsm.cpp
     minimize.h
 )
author	karina-usm <[email protected]>	2022-02-10 16:48:05 +0300
committer	Daniil Cherednik <[email protected]>	2022-02-10 16:48:05 +0300
commit	62517661cde7aa7c93efe0281ec48eeb70ea420c (patch)
tree	066f34bb401d85fa43842442fb0d888ffb2a305f
parent	5f8a2ce7b1dc3b3e1fae197610f189e7ed1d5723 (diff)