summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/lexer/regex/generic.cpp
diff options
context:
space:
mode:
authorvitya-smirnov <[email protected]>2025-08-01 13:53:31 +0300
committervitya-smirnov <[email protected]>2025-08-01 14:16:56 +0300
commit3b47f6e69ae8534f4595086c40faf14fb3a2661a (patch)
tree60414a19794fcfd2f168ffe44e5278c1d03dd671 /yql/essentials/sql/v1/lexer/regex/generic.cpp
parent0e455c45d67077af33f521df4382c3ca80be4e1b (diff)
YQL-19616: Generate YQLs syntax highlighting
- Support `Before` at core `TRegexPattern` and `IGenericLexer`. - Added `Name` and `Extension` to core `THighlighting`. - Added `Tighlighting` for `YQLs` factory method. - Added `--language` option to `yql_highlight`. - Added `artifact` targets for `YQLs`. Yes, using the `NSQLTranslation::THighlighting` for `YQLs` is not correct, but much simplier than generalize this infrastructure just for a `YQLs`. So here is a trade-off between development time and a clean code. Results: - JetBrains: https://nda.ya.ru/t/PXkZVE8m7H5wHS. - Vim: https://nda.ya.ru/t/Am-6ZHQa7H5wJi. - TextMate: https://nda.ya.ru/t/wH0YggAf7H5wKw. - yql_highlight: https://nda.ya.ru/t/3FaCm57q7H7QSF. commit_hash:f0e1abb8e7f1b083df531d761b357330bd514cb0
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex/generic.cpp')
-rw-r--r--yql/essentials/sql/v1/lexer/regex/generic.cpp37
1 files changed, 26 insertions, 11 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/generic.cpp b/yql/essentials/sql/v1/lexer/regex/generic.cpp
index 926c50dde2c..3603a31f690 100644
--- a/yql/essentials/sql/v1/lexer/regex/generic.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/generic.cpp
@@ -41,16 +41,27 @@ namespace NSQLTranslationV1 {
}
while (pos < text.size() && errors < maxErrors) {
- TGenericToken matched = Match(TStringBuf(text, pos));
- matched.Begin = pos;
+ TMaybe<TGenericToken> prev;
+ TGenericToken next = Match(TStringBuf(text, pos));
- pos += matched.Content.size();
+ size_t skipped = next.Begin;
+ next.Begin = skipped + pos;
- if (matched.Name == TGenericToken::Error) {
+ if (skipped != 0) {
+ prev = Match(TStringBuf(text, pos, skipped));
+ prev->Begin = pos;
+ }
+
+ pos += skipped + next.Content.size();
+
+ if (next.Name == TGenericToken::Error) {
errors += 1;
}
- onNext(std::move(matched));
+ if (prev) {
+ onNext(std::move(*prev));
+ }
+ onNext(std::move(next));
}
if (errors == maxErrors) {
@@ -100,15 +111,18 @@ namespace NSQLTranslationV1 {
RE2::Options options;
options.set_case_sensitive(!regex.IsCaseInsensitive);
- return [bodyRe = MakeAtomicShared<RE2>(regex.Body, options),
+ return [beforeRe = MakeAtomicShared<RE2>(regex.Before, options),
+ bodyRe = MakeAtomicShared<RE2>(regex.Body, options),
afterRe = MakeAtomicShared<RE2>(regex.After, options),
name = std::move(name)](TStringBuf prefix) -> TMaybe<TGenericToken> {
- TMaybe<TStringBuf> body, after;
- if ((body = Match(prefix, *bodyRe)) &&
- (after = Match(prefix.Tail(body->size()), *afterRe))) {
+ TMaybe<TStringBuf> before, body, after;
+ if ((before = Match(prefix, *beforeRe)) &&
+ (body = Match(prefix.Tail(before->size()), *bodyRe)) &&
+ (after = Match(prefix.Tail(before->size() + body->size()), *afterRe))) {
return TGenericToken{
.Name = name,
.Content = *body,
+ .Begin = before->size(),
};
}
return Nothing();
@@ -120,8 +134,8 @@ namespace NSQLTranslationV1 {
const TRegexPattern& sample = patterns.back();
Y_ENSURE(AllOf(patterns, [&](const TRegexPattern& pattern) {
- return std::tie(pattern.After, pattern.IsCaseInsensitive) ==
- std::tie(sample.After, sample.IsCaseInsensitive);
+ return std::tie(pattern.After, pattern.Before, pattern.IsCaseInsensitive) ==
+ std::tie(sample.After, sample.Before, sample.IsCaseInsensitive);
}));
Sort(patterns, [](const TRegexPattern& lhs, const TRegexPattern& rhs) {
@@ -143,6 +157,7 @@ namespace NSQLTranslationV1 {
return TRegexPattern{
.Body = std::move(body),
.After = sample.After,
+ .Before = sample.Before,
.IsCaseInsensitive = sample.IsCaseInsensitive,
};
}