diff options
author | robot-piglet <[email protected]> | 2025-04-14 12:57:31 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-04-14 13:17:30 +0300 |
commit | 0a3f7c36d20990ff22167d8af3ea54798cdb9d73 (patch) | |
tree | b42dabb1d603409ad86974f6737506341ffd9740 /yql/essentials/sql/v1/lexer/regex | |
parent | e11dfa5f63fe4c15672ffbe1e7974c39b6d2ac59 (diff) |
Intermediate changes
commit_hash:b6187f8eba6e8debc23f1928b2e44a396f3511ad
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex')
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/lexer.cpp | 10 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/regex.cpp | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/regex_ut.cpp | 2 |
3 files changed, 11 insertions, 3 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/lexer.cpp b/yql/essentials/sql/v1/lexer/regex/lexer.cpp index 820cbebf235..a1d96253bf7 100644 --- a/yql/essentials/sql/v1/lexer/regex/lexer.cpp +++ b/yql/essentials/sql/v1/lexer/regex/lexer.cpp @@ -21,6 +21,8 @@ namespace NSQLTranslationV1 { static constexpr const char* CommentTokenName = "COMMENT"; static constexpr const char* StringValueName = "STRING_VALUE"; + static constexpr const TStringBuf Utf8BOM = "\xEF\xBB\xBF"; + public: TRegexLexer( bool ansi, @@ -51,7 +53,13 @@ namespace NSQLTranslationV1 { NYql::TIssues& issues, size_t maxErrors) override { size_t errors = 0; - for (size_t pos = 0; pos < query.size();) { + + size_t pos = 0; + if (query.StartsWith(Utf8BOM)) { + pos += Utf8BOM.size(); + } + + while (pos < query.size()) { TParsedToken matched = Match(TStringBuf(query, pos)); if (matched.Name.empty() && maxErrors == errors) { diff --git a/yql/essentials/sql/v1/lexer/regex/regex.cpp b/yql/essentials/sql/v1/lexer/regex/regex.cpp index 937d21572fc..e634ff009a7 100644 --- a/yql/essentials/sql/v1/lexer/regex/regex.cpp +++ b/yql/essentials/sql/v1/lexer/regex/regex.cpp @@ -135,7 +135,7 @@ namespace NSQLTranslationV1 { R"(\bEOF\b)", R"($)")); rules.emplace_back(RegexRewriteRule( - R"('\\u000C' \|)", "")); + R"('\\u000C' \|)", R"('\\f' |)")); } void Finalize(TString& text) { diff --git a/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp b/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp index 8f22bda5886..e62bb0e609f 100644 --- a/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp +++ b/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp @@ -76,7 +76,7 @@ Y_UNIT_TEST_SUITE(SqlRegexTests) { CheckRegex( /* ansi = */ false, "WS", - R"(( |\r|\t|\n))"); + R"(( |\r|\t|\f|\n))"); } Y_UNIT_TEST(Comment) { |