Intermediate changes

commit_hash:e57b3e95787cc8037f200f1b6b6073e35403b27e
author: robot-piglet <[email protected]> 2025-04-01 01:12:58 +0300
committer: robot-piglet <[email protected]> 2025-04-01 01:23:36 +0300
commit: f93076bbe93dd6ebb8d75a930268d30839b9011a (patch)
tree: dbfc5b2bea8bf16b1599a69f0f721a2acdc5dac2 /yql/essentials/sql/v1/lexer/regex
parent: 2d512f78c593c3f4573742129c281d0fc5479de0 (diff)
3 files changed, 24 insertions, 23 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/lexer.cpp b/yql/essentials/sql/v1/lexer/regex/lexer.cpp
index 1c8f2104a48..b0b5c2dad44 100644
--- a/yql/essentials/sql/v1/lexer/regex/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/lexer.cpp
@@ -10,6 +10,7 @@
 #include <util/generic/algorithm.h>
 #include <util/generic/string.h>
 #include <util/string/subst.h>
+#include <util/string/ascii.h>
 
 namespace NSQLTranslationV1 {
 
@@ -23,15 +24,15 @@ namespace NSQLTranslationV1 {
         TRegexLexer(
             bool ansi,
             NSQLReflect::TLexerGrammar grammar,
-            const THashMap<TString, TString>& RegexByOtherNameMap)
+            const TVector<std::tuple<TString, TString>>& RegexByOtherName)
             : Grammar_(std::move(grammar))
             , Ansi_(ansi)
         {
-            for (auto& [token, regex] : RegexByOtherNameMap) {
+            for (const auto& [token, regex] : RegexByOtherName) {
                 if (token == CommentTokenName) {
                     CommentRegex_.Reset(new RE2(regex));
                 } else {
-                    OtherRegexes_.emplace(std::move(token), std::move(regex));
+                    OtherRegexes_.emplace_back(token, new RE2(regex));
                 }
             }
         }
@@ -71,27 +72,27 @@ namespace NSQLTranslationV1 {
 
             size_t keywordCount = MatchKeyword(prefix, matches);
             MatchPunctuation(prefix, matches);
-            size_t otherCount = MatchRegex(prefix, matches);
+            MatchRegex(prefix, matches);
             MatchComment(prefix, matches);
 
-            auto max = MaxElementBy(matches, [](const TParsedToken& m) {
-                return m.Content.length();
-            });
-
-            if (max == std::end(matches)) {
+            if (matches.empty()) {
                 return {};
             }
 
+            auto maxLength = MaxElementBy(matches, [](const TParsedToken& m) {
+                                 return m.Content.length();
+                             })->Content.length();
+
+            auto max = FindIf(matches, [&](const TParsedToken& m) {
+                return m.Content.length() == maxLength;
+            });
+
             auto isMatched = [&](const TStringBuf name) {
                 return std::end(matches) != FindIf(matches, [&](const auto& m) {
                            return m.Name == name;
                        });
             };
 
-            Y_ENSURE(
-                otherCount <= 1 ||
-                (otherCount == 2 && isMatched("DIGITS") && isMatched("INTEGER_VALUE")));
-
             size_t conflicts = CountIf(matches, [&](const TParsedToken& m) {
                 return m.Content.length() == max->Content.length();
             });
@@ -108,7 +109,7 @@ namespace NSQLTranslationV1 {
         bool MatchKeyword(const TStringBuf prefix, TParsedTokenList& matches) {
             size_t count = 0;
             for (const auto& keyword : Grammar_.KeywordNames) {
-                if (prefix.substr(0, keyword.length()) == keyword) {
+                if (AsciiEqualsIgnoreCase(prefix.substr(0, keyword.length()), keyword)) {
                     matches.emplace_back(keyword, keyword);
                     count += 1;
                 }
@@ -131,7 +132,7 @@ namespace NSQLTranslationV1 {
         size_t MatchRegex(const TStringBuf prefix, TParsedTokenList& matches) {
             size_t count = 0;
             for (const auto& [token, regex] : OtherRegexes_) {
-                if (const TStringBuf match = TryMatchRegex(prefix, regex); !match.empty()) {
+                if (const TStringBuf match = TryMatchRegex(prefix, *regex); !match.empty()) {
                     matches.emplace_back(token, TString(match));
                     count += 1;
                 }
@@ -216,7 +217,7 @@ namespace NSQLTranslationV1 {
         }
 
         NSQLReflect::TLexerGrammar Grammar_;
-        THashMap<TString, RE2> OtherRegexes_;
+        TVector<std::tuple<TString, THolder<RE2>>> OtherRegexes_;
         THolder<RE2> CommentRegex_;
         bool Ansi_;
     };
@@ -228,19 +229,19 @@ namespace NSQLTranslationV1 {
             explicit TFactory(bool ansi)
                 : Ansi_(ansi)
                 , Grammar_(NSQLReflect::LoadLexerGrammar())
-                , RegexByOtherNameMap_(MakeRegexByOtherNameMap(Grammar_, Ansi_))
+                , RegexByOtherName_(MakeRegexByOtherName(Grammar_, Ansi_))
             {
             }
 
             NSQLTranslation::ILexer::TPtr MakeLexer() const override {
                 return NSQLTranslation::ILexer::TPtr(
-                    new TRegexLexer(Ansi_, Grammar_, RegexByOtherNameMap_));
+                    new TRegexLexer(Ansi_, Grammar_, RegexByOtherName_));
             }
 
         private:
             bool Ansi_;
             NSQLReflect::TLexerGrammar Grammar_;
-            THashMap<TString, TString> RegexByOtherNameMap_;
+            TVector<std::tuple<TString, TString>> RegexByOtherName_;
         };
 
     } // namespace
diff --git a/yql/essentials/sql/v1/lexer/regex/regex.cpp b/yql/essentials/sql/v1/lexer/regex/regex.cpp
index a8aca8a1318..937d21572fc 100644
--- a/yql/essentials/sql/v1/lexer/regex/regex.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/regex.cpp
@@ -227,12 +227,12 @@ namespace NSQLTranslationV1 {
         TRewriteRule UnwrapQuotedSpace_;
     };
 
-    THashMap<TString, TString> MakeRegexByOtherNameMap(const NSQLReflect::TLexerGrammar& grammar, bool ansi) {
+    TVector<std::tuple<TString, TString>> MakeRegexByOtherName(const NSQLReflect::TLexerGrammar& grammar, bool ansi) {
         TLexerGrammarToRegexTranslator translator(grammar, ansi);
 
-        THashMap<TString, TString> regexes;
+        TVector<std::tuple<TString, TString>> regexes;
         for (const auto& token : grammar.OtherNames) {
-            regexes.emplace(token, translator.ToRegex(token));
+            regexes.emplace_back(token, translator.ToRegex(token));
         }
         return regexes;
     }
diff --git a/yql/essentials/sql/v1/lexer/regex/regex.h b/yql/essentials/sql/v1/lexer/regex/regex.h
index 9e29c3df25b..1e9d92b6535 100644
--- a/yql/essentials/sql/v1/lexer/regex/regex.h
+++ b/yql/essentials/sql/v1/lexer/regex/regex.h
@@ -8,7 +8,7 @@ namespace NSQLTranslationV1 {
 
     // Makes regexes only for tokens from OtherNames,
     // as keywords and punctuation are trivially matched.
-    THashMap<TString, TString> MakeRegexByOtherNameMap(
+    TVector<std::tuple<TString, TString>> MakeRegexByOtherName(
         const NSQLReflect::TLexerGrammar& grammar, bool ansi);
 
 } // namespace NSQLTranslationV1
author	robot-piglet <[email protected]>	2025-04-01 01:12:58 +0300
committer	robot-piglet <[email protected]>	2025-04-01 01:23:36 +0300
commit	f93076bbe93dd6ebb8d75a930268d30839b9011a (patch)
tree	dbfc5b2bea8bf16b1599a69f0f721a2acdc5dac2 /yql/essentials/sql/v1/lexer/regex
parent	2d512f78c593c3f4573742129c281d0fc5479de0 (diff)