aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex/pire/pcre2pire.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/regex/pire/pcre2pire.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/regex/pire/pcre2pire.cpp')
-rw-r--r--library/cpp/regex/pire/pcre2pire.cpp110
1 files changed, 110 insertions, 0 deletions
diff --git a/library/cpp/regex/pire/pcre2pire.cpp b/library/cpp/regex/pire/pcre2pire.cpp
new file mode 100644
index 0000000000..f788beb85f
--- /dev/null
+++ b/library/cpp/regex/pire/pcre2pire.cpp
@@ -0,0 +1,110 @@
+#include "pcre2pire.h"
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+
+TString Pcre2Pire(const TString& src) {
+ TVector<char> result;
+ result.reserve(src.size() + 1);
+
+ enum EState {
+ S_SIMPLE,
+ S_SLASH,
+ S_BRACE,
+ S_EXPECT_Q,
+ S_QUESTION,
+ S_P,
+ S_COMMA,
+ S_IN,
+ };
+
+ EState state = S_SIMPLE;
+
+ for (ui32 i = 0; i < src.size(); ++i) {
+ const char c = src[i];
+
+ switch (state) {
+ case S_SIMPLE:
+ if (c == '\\') {
+ state = S_SLASH;
+ } else if (c == '(') {
+ state = S_BRACE;
+ } else if (c == '*' || c == '?') {
+ state = S_EXPECT_Q;
+ result.push_back(c);
+ } else {
+ if (c == ')' && result.size() > 0 && result.back() == '(') {
+ // eliminating "()"
+ result.pop_back();
+ } else {
+ result.push_back(c);
+ }
+ }
+ break;
+ case S_SLASH:
+ state = S_SIMPLE;
+ if (c == ':' || c == '=' || c == '#' || c == '&') {
+ result.push_back(c);
+ } else {
+ result.push_back('\\');
+ --i;
+ }
+ break;
+ case S_BRACE:
+ if (c == '?') {
+ state = S_QUESTION;
+ } else {
+ state = S_COMMA;
+ --i;
+ }
+ break;
+ case S_EXPECT_Q:
+ state = S_SIMPLE;
+ if (c != '?') {
+ --i;
+ }
+ break;
+ case S_QUESTION:
+ if (c == 'P') {
+ state = S_P;
+ } else if (c == ':' || c == '=') {
+ state = S_COMMA;
+ } else {
+ ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!";
+ }
+ break;
+ case S_P:
+ if (c == '<') {
+ state = S_IN;
+ } else {
+ ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!";
+ }
+ break;
+ case S_IN:
+ if (c == '>') {
+ state = S_COMMA;
+ } else {
+ // nothing to do
+ }
+ break;
+ case S_COMMA:
+ state = S_SIMPLE;
+ if (c == ')') {
+ // nothing to do
+ } else {
+ result.push_back('(');
+ --i;
+ }
+ break;
+ default:
+ ythrow yexception() << "Pcre to pire convertaion failed: unexpected automata state!";
+ }
+ }
+
+ if (state != S_SIMPLE && state != S_EXPECT_Q) {
+ ythrow yexception() << "Pcre to pire convertaion failed: unexpected end of expression!";
+ }
+
+ result.push_back('\0');
+
+ return &result[0];
+}