diff options
author | vityaman <vityaman.dev@yandex.ru> | 2025-05-19 11:17:12 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-05-19 11:31:23 +0300 |
commit | 50dbbb6a1e90cf9d1da40a92d563b02712b00b9e (patch) | |
tree | c9c2952f8521851540e08338d093f2067a68fdb4 /yql/essentials/sql/v1/lexer/regex/generic.h | |
parent | 511e56c14b85e20b29e77f9da53d5bb29a3e996c (diff) | |
download | ydb-50dbbb6a1e90cf9d1da40a92d563b02712b00b9e.tar.gz |
YQL-19616: Fix TRegexLexer performance
Fix `TRegexLexer` performance. Now it is just 2 times slower than a reference ANTLR implementation on Release mode, so merged regexes are 3 times better than scan&compare.

---
- Related to `YQL-19616`
- Related to https://github.com/ydb-platform/ydb/issues/15129
- Related to https://github.com/vityaman/ydb/issues/42
---
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1278
commit_hash:1529f641172fea13f0d33fbfd06a4827c6efde01
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex/generic.h')
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/generic.h | 14 |
1 files changed, 5 insertions, 9 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/generic.h b/yql/essentials/sql/v1/lexer/regex/generic.h index cde028cc599..efbac67315a 100644 --- a/yql/essentials/sql/v1/lexer/regex/generic.h +++ b/yql/essentials/sql/v1/lexer/regex/generic.h @@ -13,7 +13,7 @@ namespace NSQLTranslationV1 { struct TGenericToken { static constexpr const char* Error = "<ERROR>"; - TStringBuf Name; + TString Name; TStringBuf Content; size_t Begin = 0; // In bytes }; @@ -32,14 +32,9 @@ namespace NSQLTranslationV1 { size_t maxErrors = IGenericLexer::MaxErrorsLimit) const = 0; }; - using TTokenMatcher = std::function<TMaybe<TStringBuf>(TStringBuf prefix)>; + using TTokenMatcher = std::function<TMaybe<TGenericToken>(TStringBuf prefix)>; - struct TTokenRule { - TString TokenName; - TTokenMatcher Match; - }; - - using TGenericLexerGrammar = TVector<TTokenRule>; + using TGenericLexerGrammar = TVector<TTokenMatcher>; struct TRegexPattern { TString Body; @@ -47,7 +42,8 @@ namespace NSQLTranslationV1 { bool IsCaseInsensitive = false; }; - TTokenMatcher Compile(const TRegexPattern& regex); + TTokenMatcher Compile(TString name, const TRegexPattern& regex); + TRegexPattern Merged(TVector<TRegexPattern> patterns); IGenericLexer::TPtr MakeGenericLexer(TGenericLexerGrammar grammar); |