summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/lexer/regex
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2025-04-14 12:57:31 +0300
committerrobot-piglet <[email protected]>2025-04-14 13:17:30 +0300
commit0a3f7c36d20990ff22167d8af3ea54798cdb9d73 (patch)
treeb42dabb1d603409ad86974f6737506341ffd9740 /yql/essentials/sql/v1/lexer/regex
parente11dfa5f63fe4c15672ffbe1e7974c39b6d2ac59 (diff)
Intermediate changes
commit_hash:b6187f8eba6e8debc23f1928b2e44a396f3511ad
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex')
-rw-r--r--yql/essentials/sql/v1/lexer/regex/lexer.cpp10
-rw-r--r--yql/essentials/sql/v1/lexer/regex/regex.cpp2
-rw-r--r--yql/essentials/sql/v1/lexer/regex/regex_ut.cpp2
3 files changed, 11 insertions, 3 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/lexer.cpp b/yql/essentials/sql/v1/lexer/regex/lexer.cpp
index 820cbebf235..a1d96253bf7 100644
--- a/yql/essentials/sql/v1/lexer/regex/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/lexer.cpp
@@ -21,6 +21,8 @@ namespace NSQLTranslationV1 {
static constexpr const char* CommentTokenName = "COMMENT";
static constexpr const char* StringValueName = "STRING_VALUE";
+ static constexpr const TStringBuf Utf8BOM = "\xEF\xBB\xBF";
+
public:
TRegexLexer(
bool ansi,
@@ -51,7 +53,13 @@ namespace NSQLTranslationV1 {
NYql::TIssues& issues,
size_t maxErrors) override {
size_t errors = 0;
- for (size_t pos = 0; pos < query.size();) {
+
+ size_t pos = 0;
+ if (query.StartsWith(Utf8BOM)) {
+ pos += Utf8BOM.size();
+ }
+
+ while (pos < query.size()) {
TParsedToken matched = Match(TStringBuf(query, pos));
if (matched.Name.empty() && maxErrors == errors) {
diff --git a/yql/essentials/sql/v1/lexer/regex/regex.cpp b/yql/essentials/sql/v1/lexer/regex/regex.cpp
index 937d21572fc..e634ff009a7 100644
--- a/yql/essentials/sql/v1/lexer/regex/regex.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/regex.cpp
@@ -135,7 +135,7 @@ namespace NSQLTranslationV1 {
R"(\bEOF\b)", R"($)"));
rules.emplace_back(RegexRewriteRule(
- R"('\\u000C' \|)", ""));
+ R"('\\u000C' \|)", R"('\\f' |)"));
}
void Finalize(TString& text) {
diff --git a/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp b/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp
index 8f22bda5886..e62bb0e609f 100644
--- a/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/regex_ut.cpp
@@ -76,7 +76,7 @@ Y_UNIT_TEST_SUITE(SqlRegexTests) {
CheckRegex(
/* ansi = */ false,
"WS",
- R"(( |\r|\t|\n))");
+ R"(( |\r|\t|\f|\n))");
}
Y_UNIT_TEST(Comment) {