1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
#include <library/cpp/unicode/set/unicode_set_lexer.h>
#include <util/generic/yexception.h>
namespace NUnicode {
namespace NPrivate {
%%{
machine unicode_set_lexer;
alphtype unsigned short;
action IncorrectCategoryError {
throw yexception() << "incorrect category";
}
action IncorrectEscapedCodepointError {
throw yexception() << "incorrect escaped codepoint";
}
action IncorrectQuotedPairError {
throw yexception() << "incorrect quoted pair";
}
id = alpha (alnum | '_')*;
escape = [%\\];
category = (':' id ':') <>^IncorrectCategoryError;
xdigit8 = xdigit{8} @^IncorrectEscapedCodepointError;
xdigit4 = xdigit{4} @^IncorrectEscapedCodepointError;
xdigit2 = xdigit{2} @^IncorrectEscapedCodepointError;
symbol = any @^IncorrectQuotedPairError;
main := |*
'^' => {
return YieldToken(USTT_NEGATION);
};
'-' => {
return YieldToken(USTT_RANGE);
};
'[' => {
return YieldToken(USTT_LBRACKET);
};
']' => {
return YieldToken(USTT_RBRACKET);
};
category => {
return YieldToken(USTT_CATEGORY, ts + 1, te - ts -2);
};
escape 'U' xdigit8 => {
return YieldToken(USTT_CODEPOINT32, ts + 2, 8);
};
escape 'u' xdigit4 => {
return YieldToken(USTT_CODEPOINT16, ts + 2, 4);
};
escape 'x' xdigit2 => {
return YieldToken(USTT_CODEPOINT8, ts + 2, 2);
};
escape symbol => {
return YieldToken(USTT_QUOTED_PAIR, *(ts + 1));
};
any => {
return YieldToken(USTT_SYMBOL, *ts);
};
*|;
}%%
namespace {
%% write data;
}
TUnicodeSetLexer::TUnicodeSetLexer(const TWtringBuf& data)
: Data(data)
, cs(0)
, act(0)
, ts(NULL)
, te(NULL)
, p(Data.data())
, pe(Data.data() + Data.size())
, eof(pe)
, UseLast(false)
{
%% write init;
}
EUnicodeSetTokenType TUnicodeSetLexer::GetToken() {
if (UseLast) {
UseLast = false;
return LastToken.Type;
}
%% write exec;
return YieldToken(USTT_EOS);
}
EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type) {
Reset();
LastToken = TUnicodeSetToken(type);
return type;
}
EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type, wchar16 symbol) {
Reset();
LastToken = TUnicodeSetToken(type, symbol);
return type;
}
EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type, const wchar16* dataBegin, size_t dataSize) {
Reset();
LastToken = TUnicodeSetToken(type, dataBegin, dataSize);
return type;
}
void TUnicodeSetLexer::Reset() {
p = te;
ts = NULL;
te = NULL;
}
} // NPrivate
} // NUnicode
|