diff options
author | vitya-smirnov <[email protected]> | 2025-08-01 13:53:31 +0300 |
---|---|---|
committer | vitya-smirnov <[email protected]> | 2025-08-01 14:16:56 +0300 |
commit | 3b47f6e69ae8534f4595086c40faf14fb3a2661a (patch) | |
tree | 60414a19794fcfd2f168ffe44e5278c1d03dd671 /yql/essentials/sql/v1/lexer/regex/generic.cpp | |
parent | 0e455c45d67077af33f521df4382c3ca80be4e1b (diff) |
YQL-19616: Generate YQLs syntax highlighting
- Support `Before` at core `TRegexPattern` and `IGenericLexer`.
- Added `Name` and `Extension` to core `THighlighting`.
- Added `Tighlighting` for `YQLs` factory method.
- Added `--language` option to `yql_highlight`.
- Added `artifact` targets for `YQLs`.
Yes, using the `NSQLTranslation::THighlighting` for
`YQLs` is not correct, but much simplier than generalize
this infrastructure just for a `YQLs`. So here is a
trade-off between development time and a clean code.
Results:
- JetBrains: https://nda.ya.ru/t/PXkZVE8m7H5wHS.
- Vim: https://nda.ya.ru/t/Am-6ZHQa7H5wJi.
- TextMate: https://nda.ya.ru/t/wH0YggAf7H5wKw.
- yql_highlight: https://nda.ya.ru/t/3FaCm57q7H7QSF.
commit_hash:f0e1abb8e7f1b083df531d761b357330bd514cb0
Diffstat (limited to 'yql/essentials/sql/v1/lexer/regex/generic.cpp')
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/generic.cpp | 37 |
1 files changed, 26 insertions, 11 deletions
diff --git a/yql/essentials/sql/v1/lexer/regex/generic.cpp b/yql/essentials/sql/v1/lexer/regex/generic.cpp index 926c50dde2c..3603a31f690 100644 --- a/yql/essentials/sql/v1/lexer/regex/generic.cpp +++ b/yql/essentials/sql/v1/lexer/regex/generic.cpp @@ -41,16 +41,27 @@ namespace NSQLTranslationV1 { } while (pos < text.size() && errors < maxErrors) { - TGenericToken matched = Match(TStringBuf(text, pos)); - matched.Begin = pos; + TMaybe<TGenericToken> prev; + TGenericToken next = Match(TStringBuf(text, pos)); - pos += matched.Content.size(); + size_t skipped = next.Begin; + next.Begin = skipped + pos; - if (matched.Name == TGenericToken::Error) { + if (skipped != 0) { + prev = Match(TStringBuf(text, pos, skipped)); + prev->Begin = pos; + } + + pos += skipped + next.Content.size(); + + if (next.Name == TGenericToken::Error) { errors += 1; } - onNext(std::move(matched)); + if (prev) { + onNext(std::move(*prev)); + } + onNext(std::move(next)); } if (errors == maxErrors) { @@ -100,15 +111,18 @@ namespace NSQLTranslationV1 { RE2::Options options; options.set_case_sensitive(!regex.IsCaseInsensitive); - return [bodyRe = MakeAtomicShared<RE2>(regex.Body, options), + return [beforeRe = MakeAtomicShared<RE2>(regex.Before, options), + bodyRe = MakeAtomicShared<RE2>(regex.Body, options), afterRe = MakeAtomicShared<RE2>(regex.After, options), name = std::move(name)](TStringBuf prefix) -> TMaybe<TGenericToken> { - TMaybe<TStringBuf> body, after; - if ((body = Match(prefix, *bodyRe)) && - (after = Match(prefix.Tail(body->size()), *afterRe))) { + TMaybe<TStringBuf> before, body, after; + if ((before = Match(prefix, *beforeRe)) && + (body = Match(prefix.Tail(before->size()), *bodyRe)) && + (after = Match(prefix.Tail(before->size() + body->size()), *afterRe))) { return TGenericToken{ .Name = name, .Content = *body, + .Begin = before->size(), }; } return Nothing(); @@ -120,8 +134,8 @@ namespace NSQLTranslationV1 { const TRegexPattern& sample = patterns.back(); Y_ENSURE(AllOf(patterns, [&](const TRegexPattern& pattern) { - return std::tie(pattern.After, pattern.IsCaseInsensitive) == - std::tie(sample.After, sample.IsCaseInsensitive); + return std::tie(pattern.After, pattern.Before, pattern.IsCaseInsensitive) == + std::tie(sample.After, sample.Before, sample.IsCaseInsensitive); })); Sort(patterns, [](const TRegexPattern& lhs, const TRegexPattern& rhs) { @@ -143,6 +157,7 @@ namespace NSQLTranslationV1 { return TRegexPattern{ .Body = std::move(body), .After = sample.After, + .Before = sample.Before, .IsCaseInsensitive = sample.IsCaseInsensitive, }; } |