aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex
diff options
context:
space:
mode:
authorsmikler <smikler@yandex-team.ru>2022-02-10 16:49:32 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:49:32 +0300
commite4f0fd4ab53ca40eb91e750cf3e7f76c21e930db (patch)
treeafee3c8173a0960bf439959f26e7624d1212e11a /library/cpp/regex
parent1503061b80644305b2e6dd1327b57118e35ebd31 (diff)
downloadydb-e4f0fd4ab53ca40eb91e750cf3e7f76c21e930db.tar.gz
Restoring authorship annotation for <smikler@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r--library/cpp/regex/pire/pcre2pire.cpp210
-rw-r--r--library/cpp/regex/pire/pcre2pire.h30
-rw-r--r--library/cpp/regex/pire/ut/regexp_ut.cpp20
-rw-r--r--library/cpp/regex/pire/ya.make2
4 files changed, 131 insertions, 131 deletions
diff --git a/library/cpp/regex/pire/pcre2pire.cpp b/library/cpp/regex/pire/pcre2pire.cpp
index f788beb85f..bb5c79634d 100644
--- a/library/cpp/regex/pire/pcre2pire.cpp
+++ b/library/cpp/regex/pire/pcre2pire.cpp
@@ -1,110 +1,110 @@
-#include "pcre2pire.h"
-#include <util/generic/vector.h>
-#include <util/generic/yexception.h>
-
+#include "pcre2pire.h"
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+
TString Pcre2Pire(const TString& src) {
TVector<char> result;
result.reserve(src.size() + 1);
-
- enum EState {
- S_SIMPLE,
- S_SLASH,
- S_BRACE,
- S_EXPECT_Q,
- S_QUESTION,
- S_P,
- S_COMMA,
- S_IN,
- };
-
- EState state = S_SIMPLE;
-
+
+ enum EState {
+ S_SIMPLE,
+ S_SLASH,
+ S_BRACE,
+ S_EXPECT_Q,
+ S_QUESTION,
+ S_P,
+ S_COMMA,
+ S_IN,
+ };
+
+ EState state = S_SIMPLE;
+
for (ui32 i = 0; i < src.size(); ++i) {
- const char c = src[i];
-
- switch (state) {
- case S_SIMPLE:
- if (c == '\\') {
- state = S_SLASH;
- } else if (c == '(') {
- state = S_BRACE;
- } else if (c == '*' || c == '?') {
- state = S_EXPECT_Q;
- result.push_back(c);
- } else {
+ const char c = src[i];
+
+ switch (state) {
+ case S_SIMPLE:
+ if (c == '\\') {
+ state = S_SLASH;
+ } else if (c == '(') {
+ state = S_BRACE;
+ } else if (c == '*' || c == '?') {
+ state = S_EXPECT_Q;
+ result.push_back(c);
+ } else {
if (c == ')' && result.size() > 0 && result.back() == '(') {
- // eliminating "()"
- result.pop_back();
- } else {
- result.push_back(c);
- }
- }
- break;
- case S_SLASH:
- state = S_SIMPLE;
- if (c == ':' || c == '=' || c == '#' || c == '&') {
- result.push_back(c);
- } else {
- result.push_back('\\');
- --i;
- }
- break;
- case S_BRACE:
- if (c == '?') {
- state = S_QUESTION;
- } else {
- state = S_COMMA;
- --i;
- }
- break;
- case S_EXPECT_Q:
- state = S_SIMPLE;
- if (c != '?') {
- --i;
- }
- break;
- case S_QUESTION:
- if (c == 'P') {
- state = S_P;
- } else if (c == ':' || c == '=') {
- state = S_COMMA;
- } else {
- ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!";
- }
- break;
- case S_P:
- if (c == '<') {
- state = S_IN;
- } else {
- ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!";
- }
- break;
- case S_IN:
- if (c == '>') {
- state = S_COMMA;
- } else {
- // nothing to do
- }
- break;
- case S_COMMA:
- state = S_SIMPLE;
- if (c == ')') {
- // nothing to do
- } else {
- result.push_back('(');
- --i;
- }
- break;
- default:
- ythrow yexception() << "Pcre to pire convertaion failed: unexpected automata state!";
- }
- }
-
- if (state != S_SIMPLE && state != S_EXPECT_Q) {
- ythrow yexception() << "Pcre to pire convertaion failed: unexpected end of expression!";
- }
-
- result.push_back('\0');
-
- return &result[0];
-}
+ // eliminating "()"
+ result.pop_back();
+ } else {
+ result.push_back(c);
+ }
+ }
+ break;
+ case S_SLASH:
+ state = S_SIMPLE;
+ if (c == ':' || c == '=' || c == '#' || c == '&') {
+ result.push_back(c);
+ } else {
+ result.push_back('\\');
+ --i;
+ }
+ break;
+ case S_BRACE:
+ if (c == '?') {
+ state = S_QUESTION;
+ } else {
+ state = S_COMMA;
+ --i;
+ }
+ break;
+ case S_EXPECT_Q:
+ state = S_SIMPLE;
+ if (c != '?') {
+ --i;
+ }
+ break;
+ case S_QUESTION:
+ if (c == 'P') {
+ state = S_P;
+ } else if (c == ':' || c == '=') {
+ state = S_COMMA;
+ } else {
+ ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!";
+ }
+ break;
+ case S_P:
+ if (c == '<') {
+ state = S_IN;
+ } else {
+ ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!";
+ }
+ break;
+ case S_IN:
+ if (c == '>') {
+ state = S_COMMA;
+ } else {
+ // nothing to do
+ }
+ break;
+ case S_COMMA:
+ state = S_SIMPLE;
+ if (c == ')') {
+ // nothing to do
+ } else {
+ result.push_back('(');
+ --i;
+ }
+ break;
+ default:
+ ythrow yexception() << "Pcre to pire convertaion failed: unexpected automata state!";
+ }
+ }
+
+ if (state != S_SIMPLE && state != S_EXPECT_Q) {
+ ythrow yexception() << "Pcre to pire convertaion failed: unexpected end of expression!";
+ }
+
+ result.push_back('\0');
+
+ return &result[0];
+}
diff --git a/library/cpp/regex/pire/pcre2pire.h b/library/cpp/regex/pire/pcre2pire.h
index 46e45b9193..9a52e1c70f 100644
--- a/library/cpp/regex/pire/pcre2pire.h
+++ b/library/cpp/regex/pire/pcre2pire.h
@@ -1,19 +1,19 @@
#pragma once
-// Author: smikler@yandex-team.ru
-
+// Author: smikler@yandex-team.ru
+
#include <util/generic/string.h>
-
-/* Converts pcre regular expression to pire compatible format:
- * - replaces "\\#" with "#"
- * - replaces "\\=" with "="
- * - replaces "\\:" with ":"
- * - removes "?P<...>"
- * - removes "?:"
- * - removes "()" recursively
- * - replaces "??" with "?"
- * - replaces "*?" with "*"
- * NOTE:
- * - Not fully tested!
- */
+
+/* Converts pcre regular expression to pire compatible format:
+ * - replaces "\\#" with "#"
+ * - replaces "\\=" with "="
+ * - replaces "\\:" with ":"
+ * - removes "?P<...>"
+ * - removes "?:"
+ * - removes "()" recursively
+ * - replaces "??" with "?"
+ * - replaces "*?" with "*"
+ * NOTE:
+ * - Not fully tested!
+ */
TString Pcre2Pire(const TString& src);
diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp
index e7206de9ad..00211a6f3c 100644
--- a/library/cpp/regex/pire/ut/regexp_ut.cpp
+++ b/library/cpp/regex/pire/ut/regexp_ut.cpp
@@ -131,7 +131,7 @@ Y_UNIT_TEST_SUITE(TRegExp) {
UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("Уважаемый (-ая)"));
}
-
+
Y_UNIT_TEST(Capture5) {
TCapturingFsm fsm("away\\.php\\?to=http:([^\"])+\"");
TSearcher searcher(fsm);
@@ -306,13 +306,13 @@ Y_UNIT_TEST_SUITE(TRegExp) {
}
Y_UNIT_TEST(Pcre2PireTest) {
- UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)");
- UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?");
- UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)*?fake"), "(fake)*fake");
- UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>fake)"), "(fake)");
- UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("fake\\#"), "fake#");
- UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>)fake"), "fake");
- UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?P<field1>)(?P<field2>))"), "");
- UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?:fake))"), "((fake))");
- }
+ UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)");
+ UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?");
+ UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)*?fake"), "(fake)*fake");
+ UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>fake)"), "(fake)");
+ UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("fake\\#"), "fake#");
+ UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>)fake"), "fake");
+ UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?P<field1>)(?P<field2>))"), "");
+ UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?:fake))"), "((fake))");
+ }
}
diff --git a/library/cpp/regex/pire/ya.make b/library/cpp/regex/pire/ya.make
index c857e6d18b..cc42ecc7f9 100644
--- a/library/cpp/regex/pire/ya.make
+++ b/library/cpp/regex/pire/ya.make
@@ -12,7 +12,7 @@ CFLAGS(-DPIRE_NO_CONFIG)
SRCDIR(contrib/libs/pire/pire)
SRCS(
- pcre2pire.cpp
+ pcre2pire.cpp
classes.cpp
encoding.cpp
fsm.cpp