aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp
blob: 47a94f53ed0a9b3db8084957598e0f1b3827a75c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include "regex.h"

#include <library/cpp/testing/unittest/registar.h>

#include <contrib/libs/re2/re2/re2.h>

using namespace NSQLTranslationV1;

namespace {
    auto grammar = NSQLReflect::LoadLexerGrammar();
    auto defaultRegexes = MakeRegexByOtherNameMap(grammar, /* ansi = */ false);
    auto ansiRegexes = MakeRegexByOtherNameMap(grammar, /* ansi = */ true);

    void CheckRegex(bool ansi, const TStringBuf name, const TStringBuf expected) {
        const auto& regexes = ansi ? ansiRegexes : defaultRegexes;
        const TString regex = regexes.at(name);

        const RE2 re2(regex);
        Y_ENSURE(re2.ok(), re2.error());

        UNIT_ASSERT_VALUES_EQUAL(regex, expected);
    }

} // namespace

Y_UNIT_TEST_SUITE(SqlRegexTests) {
    Y_UNIT_TEST(StringValue) {
        CheckRegex(
            /* ansi = */ false,
            "STRING_VALUE",
            R"(((((\'([^'\\]|(\\(.|\n)))*\'))|((\"([^"\\]|(\\(.|\n)))*\"))|((\@\@(.|\n)*?\@\@)+\@?))([sS]|[uU]|[yY]|[jJ]|[pP]([tT]|[bB]|[vV])?)?))");
    }

    Y_UNIT_TEST(AnsiStringValue) {
        CheckRegex(
            /* ansi = */ true,
            "STRING_VALUE",
            R"(((((\'([^']|(\'\'))*\'))|((\"([^"]|(\"\"))*\"))|((\@\@(.|\n)*?\@\@)+\@?))([sS]|[uU]|[yY]|[jJ]|[pP]([tT]|[bB]|[vV])?)?))");
    }

    Y_UNIT_TEST(IdPlain) {
        CheckRegex(
            /* ansi = */ false,
            "ID_PLAIN",
            R"(([a-z]|[A-Z]|_)([a-z]|[A-Z]|_|[0-9])*)");
    }

    Y_UNIT_TEST(IdQuoted) {
        CheckRegex(
            /* ansi = */ false,
            "ID_QUOTED",
            R"(\`(\\(.|\n)|\`\`|[^`\\])*\`)");
    }

    Y_UNIT_TEST(Digits) {
        CheckRegex(
            /* ansi = */ false,
            "DIGITS",
            R"(([0-9]+)|(0[xX]([0-9]|[a-f]|[A-F])+)|(0[oO][0-8]+)|(0[bB](0|1)+))");
    }

    Y_UNIT_TEST(Real) {
        CheckRegex(
            /* ansi = */ false,
            "REAL",
            R"((([0-9]+)\.[0-9]*([eE](\+|\-)?([0-9]+))?|([0-9]+)([eE](\+|\-)?([0-9]+)))([fF]|[pP]([fF](4|8)|[nN])?)?)");
    }

    Y_UNIT_TEST(Ws) {
        CheckRegex(
            /* ansi = */ false,
            "WS",
            R"(( |\r|\t|\n))");
    }

    Y_UNIT_TEST(Comment) {
        CheckRegex(
            /* ansi = */ false,
            "COMMENT",
            R"(((\/\*(.|\n)*?\*\/)|(\-\-[^\n\r]*(\r\n?|\n|$))))");
    }

    Y_UNIT_TEST(AnsiCommentSameAsDefault) {
        // Because of recursive definition
        UNIT_ASSERT_VALUES_EQUAL(
            ansiRegexes.at("COMMENT"),
            defaultRegexes.at("COMMENT"));
    }

} // Y_UNIT_TEST_SUITE(SqlRegexTests)