aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/lexer/regex/generic.h
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2025-05-12 13:53:24 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2025-05-12 14:05:50 +0300
commit7a941ebd252fd7442b4d1d34d31d72e971ad20bf (patch)
tree70c132d1b611697ad23b90cf35215b035f247ec0 /yql/essentials/sql/v1/lexer/regex/generic.h
parentbf1279129bcf6c1b1001e39c39a13d80737898d3 (diff)
downloadydb-7a941ebd252fd7442b4d1d34d31d72e971ad20bf.tar.gz
Intermediate changes
commit_hash:3a624a323006078de71f50747f7b2e8cadba7ccd
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex/generic.h')
-rw-r--r--yql/essentials/sql/v1/lexer/regex/generic.h56
1 files changed, 56 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/generic.h b/yql/essentials/sql/v1/lexer/regex/generic.h
new file mode 100644
index 00000000000..cde028cc599
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/regex/generic.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/generic/maybe.h>
+#include <util/generic/ylimits.h>
+
+#include <functional>
+
+namespace NSQLTranslationV1 {
+
+ struct TGenericToken {
+ static constexpr const char* Error = "<ERROR>";
+
+ TStringBuf Name;
+ TStringBuf Content;
+ size_t Begin = 0; // In bytes
+ };
+
+ class IGenericLexer: public TThrRefBase {
+ public:
+ using TPtr = TIntrusivePtr<IGenericLexer>;
+ using TTokenCallback = std::function<void(TGenericToken&& token)>;
+
+ static constexpr size_t MaxErrorsLimit = Max<size_t>();
+
+ virtual ~IGenericLexer() = default;
+ virtual bool Tokenize(
+ TStringBuf text,
+ const TTokenCallback& onNext,
+ size_t maxErrors = IGenericLexer::MaxErrorsLimit) const = 0;
+ };
+
+ using TTokenMatcher = std::function<TMaybe<TStringBuf>(TStringBuf prefix)>;
+
+ struct TTokenRule {
+ TString TokenName;
+ TTokenMatcher Match;
+ };
+
+ using TGenericLexerGrammar = TVector<TTokenRule>;
+
+ struct TRegexPattern {
+ TString Body;
+ TString After = "";
+ bool IsCaseInsensitive = false;
+ };
+
+ TTokenMatcher Compile(const TRegexPattern& regex);
+
+ IGenericLexer::TPtr MakeGenericLexer(TGenericLexerGrammar grammar);
+
+ TVector<TGenericToken> Tokenize(IGenericLexer::TPtr& lexer, TStringBuf text);
+
+} // namespace NSQLTranslationV1