diff options
author | smikler <smikler@yandex-team.ru> | 2022-02-10 16:49:32 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:32 +0300 |
commit | e4f0fd4ab53ca40eb91e750cf3e7f76c21e930db (patch) | |
tree | afee3c8173a0960bf439959f26e7624d1212e11a /library/cpp/regex | |
parent | 1503061b80644305b2e6dd1327b57118e35ebd31 (diff) | |
download | ydb-e4f0fd4ab53ca40eb91e750cf3e7f76c21e930db.tar.gz |
Restoring authorship annotation for <smikler@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r-- | library/cpp/regex/pire/pcre2pire.cpp | 210 | ||||
-rw-r--r-- | library/cpp/regex/pire/pcre2pire.h | 30 | ||||
-rw-r--r-- | library/cpp/regex/pire/ut/regexp_ut.cpp | 20 | ||||
-rw-r--r-- | library/cpp/regex/pire/ya.make | 2 |
4 files changed, 131 insertions, 131 deletions
diff --git a/library/cpp/regex/pire/pcre2pire.cpp b/library/cpp/regex/pire/pcre2pire.cpp index f788beb85f..bb5c79634d 100644 --- a/library/cpp/regex/pire/pcre2pire.cpp +++ b/library/cpp/regex/pire/pcre2pire.cpp @@ -1,110 +1,110 @@ -#include "pcre2pire.h" -#include <util/generic/vector.h> -#include <util/generic/yexception.h> - +#include "pcre2pire.h" +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + TString Pcre2Pire(const TString& src) { TVector<char> result; result.reserve(src.size() + 1); - - enum EState { - S_SIMPLE, - S_SLASH, - S_BRACE, - S_EXPECT_Q, - S_QUESTION, - S_P, - S_COMMA, - S_IN, - }; - - EState state = S_SIMPLE; - + + enum EState { + S_SIMPLE, + S_SLASH, + S_BRACE, + S_EXPECT_Q, + S_QUESTION, + S_P, + S_COMMA, + S_IN, + }; + + EState state = S_SIMPLE; + for (ui32 i = 0; i < src.size(); ++i) { - const char c = src[i]; - - switch (state) { - case S_SIMPLE: - if (c == '\\') { - state = S_SLASH; - } else if (c == '(') { - state = S_BRACE; - } else if (c == '*' || c == '?') { - state = S_EXPECT_Q; - result.push_back(c); - } else { + const char c = src[i]; + + switch (state) { + case S_SIMPLE: + if (c == '\\') { + state = S_SLASH; + } else if (c == '(') { + state = S_BRACE; + } else if (c == '*' || c == '?') { + state = S_EXPECT_Q; + result.push_back(c); + } else { if (c == ')' && result.size() > 0 && result.back() == '(') { - // eliminating "()" - result.pop_back(); - } else { - result.push_back(c); - } - } - break; - case S_SLASH: - state = S_SIMPLE; - if (c == ':' || c == '=' || c == '#' || c == '&') { - result.push_back(c); - } else { - result.push_back('\\'); - --i; - } - break; - case S_BRACE: - if (c == '?') { - state = S_QUESTION; - } else { - state = S_COMMA; - --i; - } - break; - case S_EXPECT_Q: - state = S_SIMPLE; - if (c != '?') { - --i; - } - break; - case S_QUESTION: - if (c == 'P') { - state = S_P; - } else if (c == ':' || c == '=') { - state = S_COMMA; - } else { - ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!"; - } - break; - case S_P: - if (c == '<') { - state = S_IN; - } else { - ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!"; - } - break; - case S_IN: - if (c == '>') { - state = S_COMMA; - } else { - // nothing to do - } - break; - case S_COMMA: - state = S_SIMPLE; - if (c == ')') { - // nothing to do - } else { - result.push_back('('); - --i; - } - break; - default: - ythrow yexception() << "Pcre to pire convertaion failed: unexpected automata state!"; - } - } - - if (state != S_SIMPLE && state != S_EXPECT_Q) { - ythrow yexception() << "Pcre to pire convertaion failed: unexpected end of expression!"; - } - - result.push_back('\0'); - - return &result[0]; -} + // eliminating "()" + result.pop_back(); + } else { + result.push_back(c); + } + } + break; + case S_SLASH: + state = S_SIMPLE; + if (c == ':' || c == '=' || c == '#' || c == '&') { + result.push_back(c); + } else { + result.push_back('\\'); + --i; + } + break; + case S_BRACE: + if (c == '?') { + state = S_QUESTION; + } else { + state = S_COMMA; + --i; + } + break; + case S_EXPECT_Q: + state = S_SIMPLE; + if (c != '?') { + --i; + } + break; + case S_QUESTION: + if (c == 'P') { + state = S_P; + } else if (c == ':' || c == '=') { + state = S_COMMA; + } else { + ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!"; + } + break; + case S_P: + if (c == '<') { + state = S_IN; + } else { + ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!"; + } + break; + case S_IN: + if (c == '>') { + state = S_COMMA; + } else { + // nothing to do + } + break; + case S_COMMA: + state = S_SIMPLE; + if (c == ')') { + // nothing to do + } else { + result.push_back('('); + --i; + } + break; + default: + ythrow yexception() << "Pcre to pire convertaion failed: unexpected automata state!"; + } + } + + if (state != S_SIMPLE && state != S_EXPECT_Q) { + ythrow yexception() << "Pcre to pire convertaion failed: unexpected end of expression!"; + } + + result.push_back('\0'); + + return &result[0]; +} diff --git a/library/cpp/regex/pire/pcre2pire.h b/library/cpp/regex/pire/pcre2pire.h index 46e45b9193..9a52e1c70f 100644 --- a/library/cpp/regex/pire/pcre2pire.h +++ b/library/cpp/regex/pire/pcre2pire.h @@ -1,19 +1,19 @@ #pragma once -// Author: smikler@yandex-team.ru - +// Author: smikler@yandex-team.ru + #include <util/generic/string.h> - -/* Converts pcre regular expression to pire compatible format: - * - replaces "\\#" with "#" - * - replaces "\\=" with "=" - * - replaces "\\:" with ":" - * - removes "?P<...>" - * - removes "?:" - * - removes "()" recursively - * - replaces "??" with "?" - * - replaces "*?" with "*" - * NOTE: - * - Not fully tested! - */ + +/* Converts pcre regular expression to pire compatible format: + * - replaces "\\#" with "#" + * - replaces "\\=" with "=" + * - replaces "\\:" with ":" + * - removes "?P<...>" + * - removes "?:" + * - removes "()" recursively + * - replaces "??" with "?" + * - replaces "*?" with "*" + * NOTE: + * - Not fully tested! + */ TString Pcre2Pire(const TString& src); diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp index e7206de9ad..00211a6f3c 100644 --- a/library/cpp/regex/pire/ut/regexp_ut.cpp +++ b/library/cpp/regex/pire/ut/regexp_ut.cpp @@ -131,7 +131,7 @@ Y_UNIT_TEST_SUITE(TRegExp) { UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("Уважаемый (-ая)")); } - + Y_UNIT_TEST(Capture5) { TCapturingFsm fsm("away\\.php\\?to=http:([^\"])+\""); TSearcher searcher(fsm); @@ -306,13 +306,13 @@ Y_UNIT_TEST_SUITE(TRegExp) { } Y_UNIT_TEST(Pcre2PireTest) { - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)*?fake"), "(fake)*fake"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>fake)"), "(fake)"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("fake\\#"), "fake#"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>)fake"), "fake"); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?P<field1>)(?P<field2>))"), ""); - UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?:fake))"), "((fake))"); - } + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)*?fake"), "(fake)*fake"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>fake)"), "(fake)"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("fake\\#"), "fake#"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?P<field>)fake"), "fake"); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?P<field1>)(?P<field2>))"), ""); + UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:(?:fake))"), "((fake))"); + } } diff --git a/library/cpp/regex/pire/ya.make b/library/cpp/regex/pire/ya.make index c857e6d18b..cc42ecc7f9 100644 --- a/library/cpp/regex/pire/ya.make +++ b/library/cpp/regex/pire/ya.make @@ -12,7 +12,7 @@ CFLAGS(-DPIRE_NO_CONFIG) SRCDIR(contrib/libs/pire/pire) SRCS( - pcre2pire.cpp + pcre2pire.cpp classes.cpp encoding.cpp fsm.cpp |