diff options
author | vitya-smirnov <[email protected]> | 2025-10-09 11:47:11 +0300 |
---|---|---|
committer | vitya-smirnov <[email protected]> | 2025-10-09 12:29:52 +0300 |
commit | 724570b69c83f07a4402c1e5de524d3a9724374f (patch) | |
tree | 6dbfccadcabc3c1e2b7565e39aa3107d5eaaebe8 | |
parent | 03e69055c041c7bd87765d093463ab1941dc30db (diff) |
YQL-20496: Improve YQL highlighting
commit_hash:edf72fd8386c41f858f677152612bd58680506a6
11 files changed, 125 insertions, 15 deletions
diff --git a/yql/essentials/sql/v1/highlight/sql_highlight.cpp b/yql/essentials/sql/v1/highlight/sql_highlight.cpp index 72dd095ec3f..3f17934cef4 100644 --- a/yql/essentials/sql/v1/highlight/sql_highlight.cpp +++ b/yql/essentials/sql/v1/highlight/sql_highlight.cpp @@ -86,7 +86,7 @@ TUnit MakeUnit<EUnitKind::QuotedIdentifier>(Syntax& s) { .Kind = EUnitKind::QuotedIdentifier, .Patterns = { {s.Get("ID_QUOTED")}, - }, + {s.Concat({"COMMAT", "ID_PLAIN"})}}, .IsPlain = false, }; } @@ -103,6 +103,30 @@ TUnit MakeUnit<EUnitKind::BindParameterIdentifier>(Syntax& s) { } template <> +TUnit MakeUnit<EUnitKind::OptionIdentifier>(Syntax& s) { + return { + .Kind = EUnitKind::OptionIdentifier, + .Patterns = { + { + .Body = TStringBuilder() + << s.Get("ID_PLAIN") << "(\\." << s.Get("ID_PLAIN") << ")?", + .Before = TStringBuilder() << "PRAGMA" << s.Get("WS"), + .IsCaseInsensitive = true, + }, + { + .Body = s.Get("ID_PLAIN"), + .Before = TStringBuilder() << "WITH" << s.Get("WS"), + .IsCaseInsensitive = true, + }, + { + .Body = s.Get("ID_PLAIN"), + .After = " ?" + s.Get("EQUALS"), + .IsCaseInsensitive = true, + }}, + }; +} + +template <> TUnit MakeUnit<EUnitKind::TypeIdentifier>(Syntax& s) { TVector<NSQLTranslationV1::TRegexPattern> types; NJson::TJsonValue json = NJson::ReadJsonFastTree(NResource::Find("types.json")); @@ -148,6 +172,8 @@ TUnit MakeUnit<EUnitKind::Literal>(Syntax& s) { {s.Get("REAL")}, {s.Get("INTEGER_VALUE")}, {s.Get("DIGITS")}, + {.Body = "TRUE", .IsCaseInsensitive = true}, + {.Body = "FALSE", .IsCaseInsensitive = true}, }, }; } @@ -218,13 +244,14 @@ THighlighting MakeHighlighting(const NSQLReflect::TLexerGrammar& grammar) { THighlighting h; h.Units.emplace_back(MakeUnit<EUnitKind::Comment>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::Punctuation>(s)); + h.Units.emplace_back(MakeUnit<EUnitKind::OptionIdentifier>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::FunctionIdentifier>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::TypeIdentifier>(s)); + h.Units.emplace_back(MakeUnit<EUnitKind::Literal>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::Keyword>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::QuotedIdentifier>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::BindParameterIdentifier>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::Identifier>(s)); - h.Units.emplace_back(MakeUnit<EUnitKind::Literal>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::StringLiteral>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::Whitespace>(s)); @@ -248,6 +275,9 @@ void Out<NSQLHighlight::EUnitKind>(IOutputStream& out, NSQLHighlight::EUnitKind case NSQLHighlight::EUnitKind::BindParameterIdentifier: out << "bind-parameter-identifier"; break; + case NSQLHighlight::EUnitKind::OptionIdentifier: + out << "option-identifier"; + break; case NSQLHighlight::EUnitKind::TypeIdentifier: out << "type-identifier"; break; diff --git a/yql/essentials/sql/v1/highlight/sql_highlight.h b/yql/essentials/sql/v1/highlight/sql_highlight.h index df11668bbf0..2e8fbc4a533 100644 --- a/yql/essentials/sql/v1/highlight/sql_highlight.h +++ b/yql/essentials/sql/v1/highlight/sql_highlight.h @@ -7,9 +7,6 @@ #include <util/generic/vector.h> #include <util/generic/map.h> -// TODO(vityaman): Migrate YDB to corrected version -#define BindParamterIdentifier BindParameterIdentifier // NOLINT - namespace NSQLHighlight { enum class EUnitKind { @@ -17,6 +14,7 @@ enum class EUnitKind { Punctuation, QuotedIdentifier, BindParameterIdentifier, + OptionIdentifier, TypeIdentifier, FunctionIdentifier, Identifier, diff --git a/yql/essentials/sql/v1/highlight/sql_highlighter.cpp b/yql/essentials/sql/v1/highlight/sql_highlighter.cpp index bbba982c6f5..4e6ab4fee58 100644 --- a/yql/essentials/sql/v1/highlight/sql_highlighter.cpp +++ b/yql/essentials/sql/v1/highlight/sql_highlighter.cpp @@ -20,6 +20,7 @@ THashMap<EUnitKind, TString> NamesByUnitKind = [] { names[EUnitKind::Punctuation] = "P"; names[EUnitKind::QuotedIdentifier] = "Q"; names[EUnitKind::BindParameterIdentifier] = "B"; + names[EUnitKind::OptionIdentifier] = "O"; names[EUnitKind::TypeIdentifier] = "T"; names[EUnitKind::FunctionIdentifier] = "F"; names[EUnitKind::Identifier] = "I"; diff --git a/yql/essentials/sql/v1/highlight/sql_highlighter_ut.cpp b/yql/essentials/sql/v1/highlight/sql_highlighter_ut.cpp index bff14bc2e38..a912a5de081 100644 --- a/yql/essentials/sql/v1/highlight/sql_highlighter_ut.cpp +++ b/yql/essentials/sql/v1/highlight/sql_highlighter_ut.cpp @@ -50,6 +50,8 @@ char ToChar(EUnitKind kind) { return 'Q'; case EUnitKind::BindParameterIdentifier: return 'B'; + case NSQLHighlight::EUnitKind::OptionIdentifier: + return 'O'; case EUnitKind::TypeIdentifier: return 'T'; case EUnitKind::FunctionIdentifier: diff --git a/yql/essentials/sql/v1/highlight/ut/suite.json b/yql/essentials/sql/v1/highlight/ut/suite.json index 624327e161f..b28bed4f8e2 100644 --- a/yql/essentials/sql/v1/highlight/ut/suite.json +++ b/yql/essentials/sql/v1/highlight/ut/suite.json @@ -23,6 +23,18 @@ ["-123", "P#LLL"], ["SELECT 123l AS `Int64`, 0b01u AS `Uint32`, 0xFFul AS `Uint64`, 0o7ut AS `Uint8`, 456s AS `Int16`, 1.2345f AS `Float`;", "KKKKKK#_#LLLL#_#KK#_#QQQQQQQ#P#_#LLLLL#_#KK#_#QQQQQQQQ#P#_#LLLLLL#_#KK#_#QQQQQQQQ#P#_#LLLLL#_#KK#_#QQQQQQQ#P#_#LLLL#_#KK#_#QQQQQQQ#P#_#LLLLLLL#_#KK#_#QQQQQQQ#P"] ], + "Boolean": [ + ["true", "LLLL"], + ["fAlSe", "LLLLL"] + ], + "Pragma": [ + ["PRAGMA Debug", "KKKKKK#_#OOOOO"], + ["PRAGMA yt.Pool", "KKKKKK#_#OOOOOOO"], + ["PRAGMA Warning('*')", "KKKKKK#_#OOOOOOO#P#SSS#P"] + ], + "Hint": [ + ["INSERT INTO t WITH TRUNCATE", "KKKKKK#_#KKKK#_#I#_#KKKK#_#OOOOOOOO"] + ], "Comment": [ ["- select", "P#_#KKKKKK"], ["select -- select", "KKKKKK#_#CCCCCCCCC"], @@ -91,6 +103,7 @@ ["`/cluster/database`", "QQQQQQQQQQQQQQQQQQQ"], ["`test`select", "QQQQQQ#KKKKKK"], ["`/cluster", "E#P#IIIIIII"], - ["`\uD83D\uDE00`", "QQQQQQ"] + ["`\uD83D\uDE00`", "QQQQQQ"], + ["@tmp", "QQQQ"] ] } diff --git a/yql/essentials/sql/v1/lexer/regex/generic.cpp b/yql/essentials/sql/v1/lexer/regex/generic.cpp index c27eec99b28..3815493c923 100644 --- a/yql/essentials/sql/v1/lexer/regex/generic.cpp +++ b/yql/essentials/sql/v1/lexer/regex/generic.cpp @@ -41,26 +41,25 @@ public: } while (pos < text.size() && errors < maxErrors) { - TMaybe<TGenericToken> prev; TGenericToken next = Match(TStringBuf(text, pos)); size_t skipped = next.Begin; next.Begin = skipped + pos; - if (skipped != 0) { - prev = Match(TStringBuf(text, pos, skipped)); - prev->Begin = pos; + const size_t matchPos = pos; + while (pos < matchPos + skipped) { + TGenericToken prev = Match(TStringBuf(text, pos, skipped)); + prev.Begin = pos; + pos += prev.Content.size(); + onNext(std::move(prev)); } - pos += skipped + next.Content.size(); + pos += next.Content.size(); if (next.Name == TGenericToken::Error) { errors += 1; } - if (prev) { - onNext(std::move(*prev)); - } onNext(std::move(next)); } @@ -81,7 +80,7 @@ private: TGenericToken Match(TStringBuf prefix) const { TMaybe<TGenericToken> max; Match(prefix, [&](TGenericToken&& token) { - if (max.Empty() || max->Content.size() < token.Content.size()) { + if (max.Empty() || (max->Begin + max->Content.size()) < (token.Begin + token.Content.size())) { max = std::move(token); } }); diff --git a/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp b/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp index 1a04c7d6b4d..41d6f3bcfc3 100644 --- a/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp +++ b/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp @@ -18,6 +18,8 @@ TString ToHighlightJSClass(EUnitKind kind) { return "symbol"; case EUnitKind::BindParameterIdentifier: return "variable"; + case EUnitKind::OptionIdentifier: + return "variable.constant"; case EUnitKind::TypeIdentifier: return "type"; case EUnitKind::FunctionIdentifier: diff --git a/yql/essentials/tools/yql_highlight/generator_monarch.cpp b/yql/essentials/tools/yql_highlight/generator_monarch.cpp index 8ad28e7d15d..25b2b1c8524 100644 --- a/yql/essentials/tools/yql_highlight/generator_monarch.cpp +++ b/yql/essentials/tools/yql_highlight/generator_monarch.cpp @@ -10,6 +10,19 @@ namespace NSQLHighlight { +template <std::invocable<const TUnit&, const NSQLTranslationV1::TRegexPattern&, size_t> Action> +void ForEachBeforablePattern(const THighlighting& highlighting, Action action) { + for (const TUnit& unit : highlighting.Units) { + size_t i = 0; + for (const NSQLTranslationV1::TRegexPattern& regex : unit.Patterns) { + if (!regex.Before.empty()) { + i += 1; + action(unit, regex, i); + } + } + } +} + TString ToMonarchRegex(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { TStringBuilder regex; @@ -40,6 +53,8 @@ TString ToMonarchSelector(EUnitKind kind) { return "string.tablepath"; case EUnitKind::BindParameterIdentifier: return "variable"; + case EUnitKind::OptionIdentifier: + return "constant"; case EUnitKind::TypeIdentifier: return "keyword.type"; case EUnitKind::FunctionIdentifier: @@ -69,6 +84,8 @@ TString ToMonarchStateName(EUnitKind kind) { return "quotedIdentifier"; case EUnitKind::BindParameterIdentifier: return "bindParameterIdentifier"; + case EUnitKind::OptionIdentifier: + return "optionIdentifier"; case EUnitKind::TypeIdentifier: return "typeIdentifier"; case EUnitKind::FunctionIdentifier: @@ -136,6 +153,14 @@ NJson::TJsonValue ToMonarchMultiLineState(const TUnit& unit, bool ansi) { return json; } +NJson::TJsonValue ToMonarchBeforableState(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) { + NJson::TJsonValue json; + json.AppendValue(ToMonarchRegex(unit, pattern)); + json.AppendValue(ToMonarchSelector(unit.Kind)); + json.AppendValue("@pop"); + return NJson::TJsonArray({json}); +} + NJson::TJsonValue MonarchEmbeddedState() { return NJson::TJsonArray{{NJson::TJsonArray{ "([^@]|^)([@]{4})*[@]{2}([@]([^@]|$)|[^@]|$)", @@ -164,6 +189,15 @@ NJson::TJsonValue ToMonarchWhitespaceState(const THighlighting& highlighting) { }); }); + ForEachBeforablePattern(highlighting, [&](const auto& unit, const auto& pattern, auto i) { + // Note: Assume that before is always a keyword. + json.AppendValue(NJson::TJsonArray{ + pattern.Before, + ToMonarchSelector(EUnitKind::Keyword), + "@" + ToMonarchStateName(unit.Kind) + ToString(i), + }); + }); + return json; } @@ -183,6 +217,10 @@ NJson::TJsonValue ToMonarchRootState(const THighlighting& highlighting, bool ans } for (const NSQLTranslationV1::TRegexPattern& pattern : *patterns) { + if (!pattern.Before.empty()) { + continue; + } + TString regex = ToMonarchRegex(unit, pattern); json.AppendValue(NJson::TJsonArray{regex, group}); } @@ -212,6 +250,12 @@ void GenerateMonarch(IOutputStream& out, const THighlighting& highlighting, bool write_json(ToMonarchStateName(unit.Kind), ToMonarchMultiLineState(unit, ansi)); }); write_json("embedded", MonarchEmbeddedState()); + ForEachBeforablePattern(highlighting, [&](const auto& unit, const auto& pattern, auto i) { + write_json( + ToMonarchStateName(unit.Kind) + ToString(i), + ToMonarchBeforableState(unit, pattern)); + }); + buf.EndObject(); buf.EndObject(); diff --git a/yql/essentials/tools/yql_highlight/generator_textmate.cpp b/yql/essentials/tools/yql_highlight/generator_textmate.cpp index 3450715ecb6..11f6b86ea35 100644 --- a/yql/essentials/tools/yql_highlight/generator_textmate.cpp +++ b/yql/essentials/tools/yql_highlight/generator_textmate.cpp @@ -78,6 +78,8 @@ TString ToTextMateGroup(EUnitKind kind) { return "string.interpolated"; case EUnitKind::BindParameterIdentifier: return "variable.parameter"; + case EUnitKind::OptionIdentifier: + return "support.constant"; case EUnitKind::TypeIdentifier: return "entity.name.type"; case EUnitKind::FunctionIdentifier: diff --git a/yql/essentials/tools/yql_highlight/generator_vim.cpp b/yql/essentials/tools/yql_highlight/generator_vim.cpp index 4029844dc0d..a74cadf72bb 100644 --- a/yql/essentials/tools/yql_highlight/generator_vim.cpp +++ b/yql/essentials/tools/yql_highlight/generator_vim.cpp @@ -75,6 +75,8 @@ TString ToVimName(EUnitKind kind) { return "yqlQuotedIdentifier"; case EUnitKind::BindParameterIdentifier: return "yqlBindParameterIdentifier"; + case EUnitKind::OptionIdentifier: + return "yqlOptionIdentifier"; case EUnitKind::TypeIdentifier: return "yqlTypeIdentifier"; case EUnitKind::FunctionIdentifier: @@ -123,6 +125,8 @@ TVector<TStringBuf> ToVimGroups(EUnitKind kind) { return {"Special", "Underlined"}; case EUnitKind::BindParameterIdentifier: return {"Define"}; + case EUnitKind::OptionIdentifier: + return {"Keyword"}; case EUnitKind::TypeIdentifier: return {"Type"}; case EUnitKind::FunctionIdentifier: diff --git a/yql/essentials/tools/yql_highlight/ut/query.yql b/yql/essentials/tools/yql_highlight/ut/query.yql index 7fabaae9dee..64d6564f3b8 100644 --- a/yql/essentials/tools/yql_highlight/ut/query.yql +++ b/yql/essentials/tools/yql_highlight/ut/query.yql @@ -1,3 +1,10 @@ +USE plato; + +PRAGMA AutoCommit; +PRAGMA TablePathPrefix = 'home/yql'; +PRAGMA Warning('disable', '1101'); +PRAGMA yson.Strict; + SELECT 1; -- A single-line comment SELECT 1 select 1; /* Some SELECT 1 @@ -18,6 +25,14 @@ multiline with double at: @@@@ text@@; SELECT "foo"u, '[1;2]'y, @@{"a":null}@@j; +INSERT INTO @tmp WITH TRUNCATE; + +CREATE TABLE t (id Uint64 PRIMARY KEY (id)) WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_BY_SIZE = ENABLED, + AUTO_PARTITIONING_PARTITION_SIZE_MB = 512 +); + SELECT CAST(123l AS Int64) AS `Int64`, CAST(0b01u AS Uint32) AS `Uint32`, |