summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/lexer/regex/generic.h
blob: 52d1498106cfe148659bcd38e9789f5de18a2211 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#pragma once

#include <util/generic/ptr.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>
#include <util/generic/maybe.h>
#include <util/generic/ylimits.h>

#include <functional>

namespace NSQLTranslationV1 {

struct TGenericToken {
    static constexpr const char* Error = "<ERROR>";

    TString Name;
    TStringBuf Content;
    size_t Begin = 0; // In bytes
};

class IGenericLexer: public TThrRefBase {
public:
    using TPtr = TIntrusivePtr<IGenericLexer>;
    using TTokenCallback = std::function<void(TGenericToken&& token)>;

    static constexpr size_t MaxErrorsLimit = Max<size_t>();

    virtual ~IGenericLexer() = default;
    virtual bool Tokenize(
        TStringBuf text,
        const TTokenCallback& onNext,
        size_t maxErrors = IGenericLexer::MaxErrorsLimit) const = 0;
};

using TTokenMatcher = std::function<TMaybe<TGenericToken>(TStringBuf prefix)>;

using TGenericLexerGrammar = TVector<TTokenMatcher>;

struct TRegexPattern {
    TString Body;
    TString After = "";
    TString Before = "";
    bool IsCaseInsensitive = false;
};

TTokenMatcher Compile(TString name, const TRegexPattern& regex);
TRegexPattern Merged(TVector<TRegexPattern> patterns);

IGenericLexer::TPtr MakeGenericLexer(TGenericLexerGrammar grammar);

TVector<TGenericToken> Tokenize(IGenericLexer::TPtr& lexer, TStringBuf text);

} // namespace NSQLTranslationV1