1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
#include "sql_highlighter.h"
#include <yql/essentials/sql/v1/lexer/regex/lexer.h>
#include <contrib/libs/re2/re2/re2.h>
#include <util/generic/deque.h>
#include <util/generic/maybe.h>
namespace NSQLHighlight {
using NSQLTranslationV1::Compile;
using NSQLTranslationV1::IGenericLexer;
using NSQLTranslationV1::TGenericLexerGrammar;
using NSQLTranslationV1::TGenericToken;
THashMap<EUnitKind, TString> NamesByUnitKind = [] {
THashMap<EUnitKind, TString> names;
names[EUnitKind::Keyword] = "K";
names[EUnitKind::Punctuation] = "P";
names[EUnitKind::QuotedIdentifier] = "Q";
names[EUnitKind::BindParamterIdentifier] = "B";
names[EUnitKind::TypeIdentifier] = "T";
names[EUnitKind::FunctionIdentifier] = "F";
names[EUnitKind::Identifier] = "I";
names[EUnitKind::Literal] = "L";
names[EUnitKind::StringLiteral] = "S";
names[EUnitKind::Comment] = "C";
names[EUnitKind::Whitespace] = "W";
names[EUnitKind::Error] = TGenericToken::Error;
return names;
}();
THashMap<TString, EUnitKind> UnitKindsByName = [] {
THashMap<TString, EUnitKind> kinds;
for (const auto& [kind, name] : NamesByUnitKind) {
Y_ENSURE(!kinds.contains(name));
kinds[name] = kind;
}
return kinds;
}();
TGenericLexerGrammar ToGenericLexerGrammar(const THighlighting& highlighting, bool ansi) {
using NSQLTranslationV1::ANSICommentMatcher;
TGenericLexerGrammar grammar;
for (const auto& unit : highlighting.Units) {
const auto* patterns = &unit.Patterns;
if (!unit.PatternsANSI.Empty() && ansi) {
patterns = unit.PatternsANSI.Get();
}
const auto& name = NamesByUnitKind.at(unit.Kind);
if (unit.Kind == EUnitKind::Comment && ansi) {
Y_ENSURE(unit.Patterns.size() == 1);
auto matcher = Compile(name, unit.Patterns[0]);
grammar.emplace_back(ANSICommentMatcher(name, std::move(matcher)));
}
for (const auto& pattern : *patterns) {
grammar.emplace_back(Compile(name, pattern));
}
}
return grammar;
}
class THighlighter: public IHighlighter {
public:
explicit THighlighter(NSQLTranslationV1::IGenericLexer::TPtr lexer)
: Lexer_(std::move(lexer))
{
}
bool Tokenize(TStringBuf text, const TTokenCallback& onNext, size_t maxErrors) const override {
const auto onNextToken = [&](NSQLTranslationV1::TGenericToken&& token) {
if (token.Name == "EOF") {
return;
}
onNext({
.Kind = UnitKindsByName.at(token.Name),
.Begin = token.Begin,
.Length = token.Content.size(),
});
};
return Lexer_->Tokenize(text, onNextToken, maxErrors);
}
private:
NSQLTranslationV1::IGenericLexer::TPtr Lexer_;
};
class TCombinedHighlighter: public IHighlighter {
public:
explicit TCombinedHighlighter(const THighlighting& highlighting)
: LexerDefault_(NSQLTranslationV1::MakeGenericLexer(
ToGenericLexerGrammar(highlighting, /* ansi = */ false)))
, LexerANSI_(NSQLTranslationV1::MakeGenericLexer(
ToGenericLexerGrammar(highlighting, /* ansi = */ true)))
{
}
bool Tokenize(TStringBuf text, const TTokenCallback& onNext, size_t maxErrors) const override {
return Alt(text).Tokenize(text, onNext, maxErrors);
}
private:
const IHighlighter& Alt(TStringBuf text) const {
if (text.After('-').StartsWith("-!ansi_lexer")) {
return LexerANSI_;
}
return LexerDefault_;
}
THighlighter LexerDefault_;
THighlighter LexerANSI_;
};
TVector<TToken> Tokenize(IHighlighter& highlighter, TStringBuf text) {
TVector<TToken> tokens;
highlighter.Tokenize(text, [&](TToken&& token) {
tokens.emplace_back(std::move(token));
});
return tokens;
}
IHighlighter::TPtr MakeHighlighter(const THighlighting& highlighting) {
return IHighlighter::TPtr(new TCombinedHighlighter(highlighting));
}
} // namespace NSQLHighlight
|