diff options
author | robot-piglet <[email protected]> | 2025-04-01 01:12:58 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-04-01 01:23:36 +0300 |
commit | f93076bbe93dd6ebb8d75a930268d30839b9011a (patch) | |
tree | dbfc5b2bea8bf16b1599a69f0f721a2acdc5dac2 /yql/essentials/sql/v1/lexer/regex | |
parent | 2d512f78c593c3f4573742129c281d0fc5479de0 (diff) |
Intermediate changes
commit_hash:e57b3e95787cc8037f200f1b6b6073e35403b27e
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex')
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/lexer.cpp | 39 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/regex.cpp | 6 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/regex.h | 2 |
3 files changed, 24 insertions, 23 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/lexer.cpp b/yql/essentials/sql/v1/lexer/regex/lexer.cpp index 1c8f2104a48..b0b5c2dad44 100644 --- a/yql/essentials/sql/v1/lexer/regex/lexer.cpp +++ b/yql/essentials/sql/v1/lexer/regex/lexer.cpp @@ -10,6 +10,7 @@ #include <util/generic/algorithm.h> #include <util/generic/string.h> #include <util/string/subst.h> +#include <util/string/ascii.h> namespace NSQLTranslationV1 { @@ -23,15 +24,15 @@ namespace NSQLTranslationV1 { TRegexLexer( bool ansi, NSQLReflect::TLexerGrammar grammar, - const THashMap<TString, TString>& RegexByOtherNameMap) + const TVector<std::tuple<TString, TString>>& RegexByOtherName) : Grammar_(std::move(grammar)) , Ansi_(ansi) { - for (auto& [token, regex] : RegexByOtherNameMap) { + for (const auto& [token, regex] : RegexByOtherName) { if (token == CommentTokenName) { CommentRegex_.Reset(new RE2(regex)); } else { - OtherRegexes_.emplace(std::move(token), std::move(regex)); + OtherRegexes_.emplace_back(token, new RE2(regex)); } } } @@ -71,27 +72,27 @@ namespace NSQLTranslationV1 { size_t keywordCount = MatchKeyword(prefix, matches); MatchPunctuation(prefix, matches); - size_t otherCount = MatchRegex(prefix, matches); + MatchRegex(prefix, matches); MatchComment(prefix, matches); - auto max = MaxElementBy(matches, [](const TParsedToken& m) { - return m.Content.length(); - }); - - if (max == std::end(matches)) { + if (matches.empty()) { return {}; } + auto maxLength = MaxElementBy(matches, [](const TParsedToken& m) { + return m.Content.length(); + })->Content.length(); + + auto max = FindIf(matches, [&](const TParsedToken& m) { + return m.Content.length() == maxLength; + }); + auto isMatched = [&](const TStringBuf name) { return std::end(matches) != FindIf(matches, [&](const auto& m) { return m.Name == name; }); }; - Y_ENSURE( - otherCount <= 1 || - (otherCount == 2 && isMatched("DIGITS") && isMatched("INTEGER_VALUE"))); - size_t conflicts = CountIf(matches, [&](const TParsedToken& m) { return m.Content.length() == max->Content.length(); }); @@ -108,7 +109,7 @@ namespace NSQLTranslationV1 { bool MatchKeyword(const TStringBuf prefix, TParsedTokenList& matches) { size_t count = 0; for (const auto& keyword : Grammar_.KeywordNames) { - if (prefix.substr(0, keyword.length()) == keyword) { + if (AsciiEqualsIgnoreCase(prefix.substr(0, keyword.length()), keyword)) { matches.emplace_back(keyword, keyword); count += 1; } @@ -131,7 +132,7 @@ namespace NSQLTranslationV1 { size_t MatchRegex(const TStringBuf prefix, TParsedTokenList& matches) { size_t count = 0; for (const auto& [token, regex] : OtherRegexes_) { - if (const TStringBuf match = TryMatchRegex(prefix, regex); !match.empty()) { + if (const TStringBuf match = TryMatchRegex(prefix, *regex); !match.empty()) { matches.emplace_back(token, TString(match)); count += 1; } @@ -216,7 +217,7 @@ namespace NSQLTranslationV1 { } NSQLReflect::TLexerGrammar Grammar_; - THashMap<TString, RE2> OtherRegexes_; + TVector<std::tuple<TString, THolder<RE2>>> OtherRegexes_; THolder<RE2> CommentRegex_; bool Ansi_; }; @@ -228,19 +229,19 @@ namespace NSQLTranslationV1 { explicit TFactory(bool ansi) : Ansi_(ansi) , Grammar_(NSQLReflect::LoadLexerGrammar()) - , RegexByOtherNameMap_(MakeRegexByOtherNameMap(Grammar_, Ansi_)) + , RegexByOtherName_(MakeRegexByOtherName(Grammar_, Ansi_)) { } NSQLTranslation::ILexer::TPtr MakeLexer() const override { return NSQLTranslation::ILexer::TPtr( - new TRegexLexer(Ansi_, Grammar_, RegexByOtherNameMap_)); + new TRegexLexer(Ansi_, Grammar_, RegexByOtherName_)); } private: bool Ansi_; NSQLReflect::TLexerGrammar Grammar_; - THashMap<TString, TString> RegexByOtherNameMap_; + TVector<std::tuple<TString, TString>> RegexByOtherName_; }; } // namespace diff --git a/yql/essentials/sql/v1/lexer/regex/regex.cpp b/yql/essentials/sql/v1/lexer/regex/regex.cpp index a8aca8a1318..937d21572fc 100644 --- a/yql/essentials/sql/v1/lexer/regex/regex.cpp +++ b/yql/essentials/sql/v1/lexer/regex/regex.cpp @@ -227,12 +227,12 @@ namespace NSQLTranslationV1 { TRewriteRule UnwrapQuotedSpace_; }; - THashMap<TString, TString> MakeRegexByOtherNameMap(const NSQLReflect::TLexerGrammar& grammar, bool ansi) { + TVector<std::tuple<TString, TString>> MakeRegexByOtherName(const NSQLReflect::TLexerGrammar& grammar, bool ansi) { TLexerGrammarToRegexTranslator translator(grammar, ansi); - THashMap<TString, TString> regexes; + TVector<std::tuple<TString, TString>> regexes; for (const auto& token : grammar.OtherNames) { - regexes.emplace(token, translator.ToRegex(token)); + regexes.emplace_back(token, translator.ToRegex(token)); } return regexes; } diff --git a/yql/essentials/sql/v1/lexer/regex/regex.h b/yql/essentials/sql/v1/lexer/regex/regex.h index 9e29c3df25b..1e9d92b6535 100644 --- a/yql/essentials/sql/v1/lexer/regex/regex.h +++ b/yql/essentials/sql/v1/lexer/regex/regex.h @@ -8,7 +8,7 @@ namespace NSQLTranslationV1 { // Makes regexes only for tokens from OtherNames, // as keywords and punctuation are trivially matched. - THashMap<TString, TString> MakeRegexByOtherNameMap( + TVector<std::tuple<TString, TString>> MakeRegexByOtherName( const NSQLReflect::TLexerGrammar& grammar, bool ansi); } // namespace NSQLTranslationV1 |