summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/ide/pure_ast/parser.cpp
blob: 4b9fd1e8d59a2662ad960ba06ec18ccc40914b5d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include "parser.h"

#include "parse_tree.h"

#include <yql/essentials/parser/antlr_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
#include <yql/essentials/parser/antlr_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h>

#include <util/system/yassert.h>
#include <util/charset/utf8.h>
#include <util/string/builder.h>

namespace NSQLPureAST {

namespace {

class TErrorStrategy: public antlr4::DefaultErrorStrategy {
public:
    antlr4::Token* singleTokenDeletion(antlr4::Parser* /* recognizer */) override {
        return nullptr;
    }
};

template <bool IsAnsiLexer>
class TParser: public IParser {
public:
    using TLexer = std::conditional_t<
        IsAnsiLexer,
        NALAAnsiAntlr4::SQLv1Antlr4Lexer,
        NALADefaultAntlr4::SQLv1Antlr4Lexer>;

    TParser()
        : Chars_()
        , Lexer_(&Chars_)
        , Tokens_(&Lexer_)
        , Parser_(&Tokens_)
    {
        Lexer_.removeErrorListeners();
        Parser_.removeErrorListeners();
        Parser_.setErrorHandler(std::make_shared<TErrorStrategy>());
    }

    TParseTree Parse(TStringBuf text) override {
        SQLv1::Sql_queryContext* sqlQuery = ParseText(text);
        Y_ENSURE(sqlQuery);

#ifdef YQL_DEBUG_GLOBAL_ANALYSIS
        Cerr << DebugDisplay(Tokens_) << Endl;
        Cerr << DebugDisplay(sqlQuery) << Endl;
#endif

        return {
            .Text = text,
            .Tokens = &Tokens_,
            .Parser = &Parser_,
            .SqlQuery = sqlQuery,
        };
    }

private:
    SQLv1::Sql_queryContext* ParseText(TStringBuf text) {
        Chars_.load(text.Data(), text.Size(), /* lenient = */ false);
        Lexer_.reset();
        Tokens_.setTokenSource(&Lexer_);
        Parser_.reset();
        return Parser_.sql_query();
    }

    TString DebugDisplay(antlr4::CommonTokenStream& tokens) {
        TStringBuilder sb;
        for (size_t i = 0; i < tokens.size(); ++i) {
            sb << DebugDisplay(tokens.get(i)) << '\n';
        }
        return sb;
    }

    TString DebugDisplay(const antlr4::Token* token) {
        return TStringBuilder()
               << token->getStartIndex()
               << "\t"
               << token->getStopIndex()
               << "\t"
               << Parser_.getVocabulary().getSymbolicName(token->getType());
    }

    TString DebugDisplay(antlr4::tree::ParseTree* tree) {
        return tree->toStringTree(&Parser_, /*pretty=*/true);
    }

    antlr4::ANTLRInputStream Chars_;
    TLexer Lexer_;
    antlr4::CommonTokenStream Tokens_;
    SQLv1 Parser_;
};

} // namespace

IParser::TPtr MakeParser(bool isAnsiLexer) {
    if (isAnsiLexer) {
        return MakeHolder<TParser<true>>();
    }
    return MakeHolder<TParser<false>>();
}

} // namespace NSQLPureAST