diff options
author | vityaman <vityaman.dev@yandex.ru> | 2025-03-28 18:29:24 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-03-28 18:50:04 +0300 |
commit | 60b99f11bcb2386c2a1c36ffd2e96e69d0105dac (patch) | |
tree | 08c15d732484c6accf16658b09ed8f07286a9338 /yql/essentials/sql/v1/lexer/regex/regex_ut.cpp | |
parent | 1e214be59cbf130bee433c422b42f16148e5acff (diff) | |
download | ydb-60b99f11bcb2386c2a1c36ffd2e96e69d0105dac.tar.gz |
YQL-19616 Convert YQL lexer grammar to regexes
- [x] Parse YQL grammar to extract lexer grammar into `TLexerGrammar`.
- [x] Translate `TLexerGrammar` into regexes.
- [x] Implement a lexer via regexes `TRegexLexer` to test generated regexes validity.
- [x] Test on `Default` syntax mode.
- [x] Test on `ANSI` syntax mode.
---
- Related to https://github.com/ydb-platform/ydb/issues/15129
- Requirement for https://github.com/ytsaurus/ytsaurus/pull/1112
---
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1127
commit_hash:03ffffe81cdafe7f93a4d3fd9a3212fe67f1c72d
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex/regex_ut.cpp')
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/regex_ut.cpp | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp b/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp new file mode 100644 index 00000000000..47a94f53ed0 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp @@ -0,0 +1,90 @@ +#include "regex.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <contrib/libs/re2/re2/re2.h> + +using namespace NSQLTranslationV1; + +namespace { + auto grammar = NSQLReflect::LoadLexerGrammar(); + auto defaultRegexes = MakeRegexByOtherNameMap(grammar, /* ansi = */ false); + auto ansiRegexes = MakeRegexByOtherNameMap(grammar, /* ansi = */ true); + + void CheckRegex(bool ansi, const TStringBuf name, const TStringBuf expected) { + const auto& regexes = ansi ? ansiRegexes : defaultRegexes; + const TString regex = regexes.at(name); + + const RE2 re2(regex); + Y_ENSURE(re2.ok(), re2.error()); + + UNIT_ASSERT_VALUES_EQUAL(regex, expected); + } + +} // namespace + +Y_UNIT_TEST_SUITE(SqlRegexTests) { + Y_UNIT_TEST(StringValue) { + CheckRegex( + /* ansi = */ false, + "STRING_VALUE", + R"(((((\'([^'\\]|(\\(.|\n)))*\'))|((\"([^"\\]|(\\(.|\n)))*\"))|((\@\@(.|\n)*?\@\@)+\@?))([sS]|[uU]|[yY]|[jJ]|[pP]([tT]|[bB]|[vV])?)?))"); + } + + Y_UNIT_TEST(AnsiStringValue) { + CheckRegex( + /* ansi = */ true, + "STRING_VALUE", + R"(((((\'([^']|(\'\'))*\'))|((\"([^"]|(\"\"))*\"))|((\@\@(.|\n)*?\@\@)+\@?))([sS]|[uU]|[yY]|[jJ]|[pP]([tT]|[bB]|[vV])?)?))"); + } + + Y_UNIT_TEST(IdPlain) { + CheckRegex( + /* ansi = */ false, + "ID_PLAIN", + R"(([a-z]|[A-Z]|_)([a-z]|[A-Z]|_|[0-9])*)"); + } + + Y_UNIT_TEST(IdQuoted) { + CheckRegex( + /* ansi = */ false, + "ID_QUOTED", + R"(\`(\\(.|\n)|\`\`|[^`\\])*\`)"); + } + + Y_UNIT_TEST(Digits) { + CheckRegex( + /* ansi = */ false, + "DIGITS", + R"(([0-9]+)|(0[xX]([0-9]|[a-f]|[A-F])+)|(0[oO][0-8]+)|(0[bB](0|1)+))"); + } + + Y_UNIT_TEST(Real) { + CheckRegex( + /* ansi = */ false, + "REAL", + R"((([0-9]+)\.[0-9]*([eE](\+|\-)?([0-9]+))?|([0-9]+)([eE](\+|\-)?([0-9]+)))([fF]|[pP]([fF](4|8)|[nN])?)?)"); + } + + Y_UNIT_TEST(Ws) { + CheckRegex( + /* ansi = */ false, + "WS", + R"(( |\r|\t|\n))"); + } + + Y_UNIT_TEST(Comment) { + CheckRegex( + /* ansi = */ false, + "COMMENT", + R"(((\/\*(.|\n)*?\*\/)|(\-\-[^\n\r]*(\r\n?|\n|$))))"); + } + + Y_UNIT_TEST(AnsiCommentSameAsDefault) { + // Because of recursive definition + UNIT_ASSERT_VALUES_EQUAL( + ansiRegexes.at("COMMENT"), + defaultRegexes.at("COMMENT")); + } + +} // Y_UNIT_TEST_SUITE(SqlRegexTests) |