blob: cde028cc599852703f21bde09a1fcd9bd0fad555 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
#pragma once
#include <util/generic/ptr.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>
#include <util/generic/maybe.h>
#include <util/generic/ylimits.h>
#include <functional>
namespace NSQLTranslationV1 {
struct TGenericToken {
static constexpr const char* Error = "<ERROR>";
TStringBuf Name;
TStringBuf Content;
size_t Begin = 0; // In bytes
};
class IGenericLexer: public TThrRefBase {
public:
using TPtr = TIntrusivePtr<IGenericLexer>;
using TTokenCallback = std::function<void(TGenericToken&& token)>;
static constexpr size_t MaxErrorsLimit = Max<size_t>();
virtual ~IGenericLexer() = default;
virtual bool Tokenize(
TStringBuf text,
const TTokenCallback& onNext,
size_t maxErrors = IGenericLexer::MaxErrorsLimit) const = 0;
};
using TTokenMatcher = std::function<TMaybe<TStringBuf>(TStringBuf prefix)>;
struct TTokenRule {
TString TokenName;
TTokenMatcher Match;
};
using TGenericLexerGrammar = TVector<TTokenRule>;
struct TRegexPattern {
TString Body;
TString After = "";
bool IsCaseInsensitive = false;
};
TTokenMatcher Compile(const TRegexPattern& regex);
IGenericLexer::TPtr MakeGenericLexer(TGenericLexerGrammar grammar);
TVector<TGenericToken> Tokenize(IGenericLexer::TPtr& lexer, TStringBuf text);
} // namespace NSQLTranslationV1
|