diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2025-05-12 13:53:24 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-05-12 14:05:50 +0300 |
commit | 7a941ebd252fd7442b4d1d34d31d72e971ad20bf (patch) | |
tree | 70c132d1b611697ad23b90cf35215b035f247ec0 /yql/essentials/sql/v1/lexer/regex/generic.h | |
parent | bf1279129bcf6c1b1001e39c39a13d80737898d3 (diff) | |
download | ydb-7a941ebd252fd7442b4d1d34d31d72e971ad20bf.tar.gz |
Intermediate changes
commit_hash:3a624a323006078de71f50747f7b2e8cadba7ccd
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex/generic.h')
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/generic.h | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/generic.h b/yql/essentials/sql/v1/lexer/regex/generic.h new file mode 100644 index 00000000000..cde028cc599 --- /dev/null +++ b/yql/essentials/sql/v1/lexer/regex/generic.h @@ -0,0 +1,56 @@ +#pragma once + +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/maybe.h> +#include <util/generic/ylimits.h> + +#include <functional> + +namespace NSQLTranslationV1 { + + struct TGenericToken { + static constexpr const char* Error = "<ERROR>"; + + TStringBuf Name; + TStringBuf Content; + size_t Begin = 0; // In bytes + }; + + class IGenericLexer: public TThrRefBase { + public: + using TPtr = TIntrusivePtr<IGenericLexer>; + using TTokenCallback = std::function<void(TGenericToken&& token)>; + + static constexpr size_t MaxErrorsLimit = Max<size_t>(); + + virtual ~IGenericLexer() = default; + virtual bool Tokenize( + TStringBuf text, + const TTokenCallback& onNext, + size_t maxErrors = IGenericLexer::MaxErrorsLimit) const = 0; + }; + + using TTokenMatcher = std::function<TMaybe<TStringBuf>(TStringBuf prefix)>; + + struct TTokenRule { + TString TokenName; + TTokenMatcher Match; + }; + + using TGenericLexerGrammar = TVector<TTokenRule>; + + struct TRegexPattern { + TString Body; + TString After = ""; + bool IsCaseInsensitive = false; + }; + + TTokenMatcher Compile(const TRegexPattern& regex); + + IGenericLexer::TPtr MakeGenericLexer(TGenericLexerGrammar grammar); + + TVector<TGenericToken> Tokenize(IGenericLexer::TPtr& lexer, TStringBuf text); + +} // namespace NSQLTranslationV1 |