summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/lexer/regex
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2025-04-01 01:12:58 +0300
committerrobot-piglet <[email protected]>2025-04-01 01:23:36 +0300
commitf93076bbe93dd6ebb8d75a930268d30839b9011a (patch)
treedbfc5b2bea8bf16b1599a69f0f721a2acdc5dac2 /yql/essentials/sql/v1/lexer/regex
parent2d512f78c593c3f4573742129c281d0fc5479de0 (diff)
Intermediate changes
commit_hash:e57b3e95787cc8037f200f1b6b6073e35403b27e
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex')
-rw-r--r--yql/essentials/sql/v1/lexer/regex/lexer.cpp39
-rw-r--r--yql/essentials/sql/v1/lexer/regex/regex.cpp6
-rw-r--r--yql/essentials/sql/v1/lexer/regex/regex.h2
3 files changed, 24 insertions, 23 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/lexer.cpp b/yql/essentials/sql/v1/lexer/regex/lexer.cpp
index 1c8f2104a48..b0b5c2dad44 100644
--- a/yql/essentials/sql/v1/lexer/regex/lexer.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/lexer.cpp
@@ -10,6 +10,7 @@
#include <util/generic/algorithm.h>
#include <util/generic/string.h>
#include <util/string/subst.h>
+#include <util/string/ascii.h>
namespace NSQLTranslationV1 {
@@ -23,15 +24,15 @@ namespace NSQLTranslationV1 {
TRegexLexer(
bool ansi,
NSQLReflect::TLexerGrammar grammar,
- const THashMap<TString, TString>& RegexByOtherNameMap)
+ const TVector<std::tuple<TString, TString>>& RegexByOtherName)
: Grammar_(std::move(grammar))
, Ansi_(ansi)
{
- for (auto& [token, regex] : RegexByOtherNameMap) {
+ for (const auto& [token, regex] : RegexByOtherName) {
if (token == CommentTokenName) {
CommentRegex_.Reset(new RE2(regex));
} else {
- OtherRegexes_.emplace(std::move(token), std::move(regex));
+ OtherRegexes_.emplace_back(token, new RE2(regex));
}
}
}
@@ -71,27 +72,27 @@ namespace NSQLTranslationV1 {
size_t keywordCount = MatchKeyword(prefix, matches);
MatchPunctuation(prefix, matches);
- size_t otherCount = MatchRegex(prefix, matches);
+ MatchRegex(prefix, matches);
MatchComment(prefix, matches);
- auto max = MaxElementBy(matches, [](const TParsedToken& m) {
- return m.Content.length();
- });
-
- if (max == std::end(matches)) {
+ if (matches.empty()) {
return {};
}
+ auto maxLength = MaxElementBy(matches, [](const TParsedToken& m) {
+ return m.Content.length();
+ })->Content.length();
+
+ auto max = FindIf(matches, [&](const TParsedToken& m) {
+ return m.Content.length() == maxLength;
+ });
+
auto isMatched = [&](const TStringBuf name) {
return std::end(matches) != FindIf(matches, [&](const auto& m) {
return m.Name == name;
});
};
- Y_ENSURE(
- otherCount <= 1 ||
- (otherCount == 2 && isMatched("DIGITS") && isMatched("INTEGER_VALUE")));
-
size_t conflicts = CountIf(matches, [&](const TParsedToken& m) {
return m.Content.length() == max->Content.length();
});
@@ -108,7 +109,7 @@ namespace NSQLTranslationV1 {
bool MatchKeyword(const TStringBuf prefix, TParsedTokenList& matches) {
size_t count = 0;
for (const auto& keyword : Grammar_.KeywordNames) {
- if (prefix.substr(0, keyword.length()) == keyword) {
+ if (AsciiEqualsIgnoreCase(prefix.substr(0, keyword.length()), keyword)) {
matches.emplace_back(keyword, keyword);
count += 1;
}
@@ -131,7 +132,7 @@ namespace NSQLTranslationV1 {
size_t MatchRegex(const TStringBuf prefix, TParsedTokenList& matches) {
size_t count = 0;
for (const auto& [token, regex] : OtherRegexes_) {
- if (const TStringBuf match = TryMatchRegex(prefix, regex); !match.empty()) {
+ if (const TStringBuf match = TryMatchRegex(prefix, *regex); !match.empty()) {
matches.emplace_back(token, TString(match));
count += 1;
}
@@ -216,7 +217,7 @@ namespace NSQLTranslationV1 {
}
NSQLReflect::TLexerGrammar Grammar_;
- THashMap<TString, RE2> OtherRegexes_;
+ TVector<std::tuple<TString, THolder<RE2>>> OtherRegexes_;
THolder<RE2> CommentRegex_;
bool Ansi_;
};
@@ -228,19 +229,19 @@ namespace NSQLTranslationV1 {
explicit TFactory(bool ansi)
: Ansi_(ansi)
, Grammar_(NSQLReflect::LoadLexerGrammar())
- , RegexByOtherNameMap_(MakeRegexByOtherNameMap(Grammar_, Ansi_))
+ , RegexByOtherName_(MakeRegexByOtherName(Grammar_, Ansi_))
{
}
NSQLTranslation::ILexer::TPtr MakeLexer() const override {
return NSQLTranslation::ILexer::TPtr(
- new TRegexLexer(Ansi_, Grammar_, RegexByOtherNameMap_));
+ new TRegexLexer(Ansi_, Grammar_, RegexByOtherName_));
}
private:
bool Ansi_;
NSQLReflect::TLexerGrammar Grammar_;
- THashMap<TString, TString> RegexByOtherNameMap_;
+ TVector<std::tuple<TString, TString>> RegexByOtherName_;
};
} // namespace
diff --git a/yql/essentials/sql/v1/lexer/regex/regex.cpp b/yql/essentials/sql/v1/lexer/regex/regex.cpp
index a8aca8a1318..937d21572fc 100644
--- a/yql/essentials/sql/v1/lexer/regex/regex.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/regex.cpp
@@ -227,12 +227,12 @@ namespace NSQLTranslationV1 {
TRewriteRule UnwrapQuotedSpace_;
};
- THashMap<TString, TString> MakeRegexByOtherNameMap(const NSQLReflect::TLexerGrammar& grammar, bool ansi) {
+ TVector<std::tuple<TString, TString>> MakeRegexByOtherName(const NSQLReflect::TLexerGrammar& grammar, bool ansi) {
TLexerGrammarToRegexTranslator translator(grammar, ansi);
- THashMap<TString, TString> regexes;
+ TVector<std::tuple<TString, TString>> regexes;
for (const auto& token : grammar.OtherNames) {
- regexes.emplace(token, translator.ToRegex(token));
+ regexes.emplace_back(token, translator.ToRegex(token));
}
return regexes;
}
diff --git a/yql/essentials/sql/v1/lexer/regex/regex.h b/yql/essentials/sql/v1/lexer/regex/regex.h
index 9e29c3df25b..1e9d92b6535 100644
--- a/yql/essentials/sql/v1/lexer/regex/regex.h
+++ b/yql/essentials/sql/v1/lexer/regex/regex.h
@@ -8,7 +8,7 @@ namespace NSQLTranslationV1 {
// Makes regexes only for tokens from OtherNames,
// as keywords and punctuation are trivially matched.
- THashMap<TString, TString> MakeRegexByOtherNameMap(
+ TVector<std::tuple<TString, TString>> MakeRegexByOtherName(
const NSQLReflect::TLexerGrammar& grammar, bool ansi);
} // namespace NSQLTranslationV1