summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/lexer/regex
diff options
context:
space:
mode:
authorvityaman <[email protected]>2025-04-11 17:00:21 +0300
committerrobot-piglet <[email protected]>2025-04-11 17:16:43 +0300
commit35a2668d661e1293630350bf07ad4d297ae338ee (patch)
tree584795bf9334ed7cd6b16e98b102bd92c118472f /yql/essentials/sql/v1/lexer/regex
parentffa276d35af2cc7ab17b850915674b60900c8b56 (diff)
YQL-19616 Fix lexer/regex STRING_VALUE and TSKIP recognition
- Related to https://github.com/ydb-platform/ydb/issues/15129 - Related to https://github.com/vityaman/ydb/issues/11 --- Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1201 commit_hash:53ef677a35649a6dc77d8c4269a8aceefcd15026
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex')
-rw-r--r--yql/essentials/sql/v1/lexer/regex/lexer.cpp19
1 files changed, 12 insertions, 7 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/lexer.cpp b/yql/essentials/sql/v1/lexer/regex/lexer.cpp
index 9f96e444ac7..820cbebf235 100644
--- a/yql/essentials/sql/v1/lexer/regex/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/lexer.cpp
@@ -19,6 +19,7 @@ namespace NSQLTranslationV1 {
class TRegexLexer: public NSQLTranslation::ILexer {
static constexpr const char* CommentTokenName = "COMMENT";
+ static constexpr const char* StringValueName = "STRING_VALUE";
public:
TRegexLexer(
@@ -28,14 +29,17 @@ namespace NSQLTranslationV1 {
: Grammar_(std::move(grammar))
, Ansi_(ansi)
{
- RE2::Options custom;
- custom.set_longest_match(true);
-
for (const auto& [token, regex] : RegexByOtherName) {
+ RE2::Options custom;
+ if (token != CommentTokenName && token != StringValueName) {
+ custom.set_longest_match(true);
+ }
+
+ RE2* re2 = new RE2(regex, custom);
if (token == CommentTokenName) {
- CommentRegex_.Reset(new RE2(regex));
+ CommentRegex_.Reset(re2);
} else {
- OtherRegexes_.emplace_back(token, new RE2(regex, custom));
+ OtherRegexes_.emplace_back(token, re2);
}
}
}
@@ -112,8 +116,9 @@ namespace NSQLTranslationV1 {
bool MatchKeyword(const TStringBuf prefix, TParsedTokenList& matches) {
size_t count = 0;
for (const auto& keyword : Grammar_.KeywordNames) {
- const TStringBuf content = prefix.substr(0, keyword.length());
- if (AsciiEqualsIgnoreCase(content, NSQLReflect::TLexerGrammar::KeywordBlock(keyword))) {
+ const TStringBuf block = NSQLReflect::TLexerGrammar::KeywordBlock(keyword);
+ const TStringBuf content = prefix.substr(0, block.length());
+ if (AsciiEqualsIgnoreCase(content, block)) {
matches.emplace_back(keyword, TString(content));
count += 1;
}