aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp
diff options
context:
space:
mode:
authorvityaman <vityaman.dev@yandex.ru>2025-03-28 18:29:24 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2025-03-28 18:50:04 +0300
commit60b99f11bcb2386c2a1c36ffd2e96e69d0105dac (patch)
tree08c15d732484c6accf16658b09ed8f07286a9338 /yql/essentials/sql/v1/lexer/regex/regex_ut.cpp
parent1e214be59cbf130bee433c422b42f16148e5acff (diff)
downloadydb-60b99f11bcb2386c2a1c36ffd2e96e69d0105dac.tar.gz
YQL-19616 Convert YQL lexer grammar to regexes
- [x] Parse YQL grammar to extract lexer grammar into `TLexerGrammar`. - [x] Translate `TLexerGrammar` into regexes. - [x] Implement a lexer via regexes `TRegexLexer` to test generated regexes validity. - [x] Test on `Default` syntax mode. - [x] Test on `ANSI` syntax mode. --- - Related to https://github.com/ydb-platform/ydb/issues/15129 - Requirement for https://github.com/ytsaurus/ytsaurus/pull/1112 --- Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1127 commit_hash:03ffffe81cdafe7f93a4d3fd9a3212fe67f1c72d
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex/regex_ut.cpp')
-rw-r--r--yql/essentials/sql/v1/lexer/regex/regex_ut.cpp90
1 files changed, 90 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp b/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp
new file mode 100644
index 00000000000..47a94f53ed0
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp
@@ -0,0 +1,90 @@
+#include "regex.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <contrib/libs/re2/re2/re2.h>
+
+using namespace NSQLTranslationV1;
+
+namespace {
+ auto grammar = NSQLReflect::LoadLexerGrammar();
+ auto defaultRegexes = MakeRegexByOtherNameMap(grammar, /* ansi = */ false);
+ auto ansiRegexes = MakeRegexByOtherNameMap(grammar, /* ansi = */ true);
+
+ void CheckRegex(bool ansi, const TStringBuf name, const TStringBuf expected) {
+ const auto& regexes = ansi ? ansiRegexes : defaultRegexes;
+ const TString regex = regexes.at(name);
+
+ const RE2 re2(regex);
+ Y_ENSURE(re2.ok(), re2.error());
+
+ UNIT_ASSERT_VALUES_EQUAL(regex, expected);
+ }
+
+} // namespace
+
+Y_UNIT_TEST_SUITE(SqlRegexTests) {
+ Y_UNIT_TEST(StringValue) {
+ CheckRegex(
+ /* ansi = */ false,
+ "STRING_VALUE",
+ R"(((((\'([^'\\]|(\\(.|\n)))*\'))|((\"([^"\\]|(\\(.|\n)))*\"))|((\@\@(.|\n)*?\@\@)+\@?))([sS]|[uU]|[yY]|[jJ]|[pP]([tT]|[bB]|[vV])?)?))");
+ }
+
+ Y_UNIT_TEST(AnsiStringValue) {
+ CheckRegex(
+ /* ansi = */ true,
+ "STRING_VALUE",
+ R"(((((\'([^']|(\'\'))*\'))|((\"([^"]|(\"\"))*\"))|((\@\@(.|\n)*?\@\@)+\@?))([sS]|[uU]|[yY]|[jJ]|[pP]([tT]|[bB]|[vV])?)?))");
+ }
+
+ Y_UNIT_TEST(IdPlain) {
+ CheckRegex(
+ /* ansi = */ false,
+ "ID_PLAIN",
+ R"(([a-z]|[A-Z]|_)([a-z]|[A-Z]|_|[0-9])*)");
+ }
+
+ Y_UNIT_TEST(IdQuoted) {
+ CheckRegex(
+ /* ansi = */ false,
+ "ID_QUOTED",
+ R"(\`(\\(.|\n)|\`\`|[^`\\])*\`)");
+ }
+
+ Y_UNIT_TEST(Digits) {
+ CheckRegex(
+ /* ansi = */ false,
+ "DIGITS",
+ R"(([0-9]+)|(0[xX]([0-9]|[a-f]|[A-F])+)|(0[oO][0-8]+)|(0[bB](0|1)+))");
+ }
+
+ Y_UNIT_TEST(Real) {
+ CheckRegex(
+ /* ansi = */ false,
+ "REAL",
+ R"((([0-9]+)\.[0-9]*([eE](\+|\-)?([0-9]+))?|([0-9]+)([eE](\+|\-)?([0-9]+)))([fF]|[pP]([fF](4|8)|[nN])?)?)");
+ }
+
+ Y_UNIT_TEST(Ws) {
+ CheckRegex(
+ /* ansi = */ false,
+ "WS",
+ R"(( |\r|\t|\n))");
+ }
+
+ Y_UNIT_TEST(Comment) {
+ CheckRegex(
+ /* ansi = */ false,
+ "COMMENT",
+ R"(((\/\*(.|\n)*?\*\/)|(\-\-[^\n\r]*(\r\n?|\n|$))))");
+ }
+
+ Y_UNIT_TEST(AnsiCommentSameAsDefault) {
+ // Because of recursive definition
+ UNIT_ASSERT_VALUES_EQUAL(
+ ansiRegexes.at("COMMENT"),
+ defaultRegexes.at("COMMENT"));
+ }
+
+} // Y_UNIT_TEST_SUITE(SqlRegexTests)