aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex/pire/pcre2pire.cpp
blob: bb5c79634debc759f9014cc8ec0ee717de7e5b08 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include "pcre2pire.h" 
#include <util/generic/vector.h> 
#include <util/generic/yexception.h> 
 
TString Pcre2Pire(const TString& src) {
    TVector<char> result;
    result.reserve(src.size() + 1);
 
    enum EState { 
        S_SIMPLE, 
        S_SLASH, 
        S_BRACE, 
        S_EXPECT_Q, 
        S_QUESTION, 
        S_P, 
        S_COMMA, 
        S_IN, 
    }; 
 
    EState state = S_SIMPLE; 
 
    for (ui32 i = 0; i < src.size(); ++i) {
        const char c = src[i]; 
 
        switch (state) { 
            case S_SIMPLE: 
                if (c == '\\') { 
                    state = S_SLASH; 
                } else if (c == '(') { 
                    state = S_BRACE; 
                } else if (c == '*' || c == '?') { 
                    state = S_EXPECT_Q; 
                    result.push_back(c); 
                } else { 
                    if (c == ')' && result.size() > 0 && result.back() == '(') {
                        // eliminating "()" 
                        result.pop_back(); 
                    } else { 
                        result.push_back(c); 
                    } 
                } 
                break; 
            case S_SLASH: 
                state = S_SIMPLE; 
                if (c == ':' || c == '=' || c == '#' || c == '&') { 
                    result.push_back(c); 
                } else { 
                    result.push_back('\\'); 
                    --i; 
                } 
                break; 
            case S_BRACE: 
                if (c == '?') { 
                    state = S_QUESTION; 
                } else { 
                    state = S_COMMA; 
                    --i; 
                } 
                break; 
            case S_EXPECT_Q: 
                state = S_SIMPLE; 
                if (c != '?') { 
                    --i; 
                } 
                break; 
            case S_QUESTION: 
                if (c == 'P') { 
                    state = S_P; 
                } else if (c == ':' || c == '=') { 
                    state = S_COMMA; 
                } else { 
                    ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!"; 
                } 
                break; 
            case S_P: 
                if (c == '<') { 
                    state = S_IN; 
                } else { 
                    ythrow yexception() << "Pcre to pire convertaion failed: unexpected symbol '" << c << "' at posiotion " << i << "!"; 
                } 
                break; 
            case S_IN: 
                if (c == '>') { 
                    state = S_COMMA; 
                } else { 
                    // nothing to do 
                } 
                break; 
            case S_COMMA: 
                state = S_SIMPLE; 
                if (c == ')') { 
                    // nothing to do 
                } else { 
                    result.push_back('('); 
                    --i; 
                } 
                break; 
            default: 
                ythrow yexception() << "Pcre to pire convertaion failed: unexpected automata state!"; 
        } 
    } 
 
    if (state != S_SIMPLE && state != S_EXPECT_Q) { 
        ythrow yexception() << "Pcre to pire convertaion failed: unexpected end of expression!"; 
    } 
 
    result.push_back('\0'); 
 
    return &result[0]; 
}