summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvitya-smirnov <[email protected]>2025-10-09 11:47:11 +0300
committervitya-smirnov <[email protected]>2025-10-09 12:29:52 +0300
commit724570b69c83f07a4402c1e5de524d3a9724374f (patch)
tree6dbfccadcabc3c1e2b7565e39aa3107d5eaaebe8
parent03e69055c041c7bd87765d093463ab1941dc30db (diff)
YQL-20496: Improve YQL highlighting
commit_hash:edf72fd8386c41f858f677152612bd58680506a6
-rw-r--r--yql/essentials/sql/v1/highlight/sql_highlight.cpp34
-rw-r--r--yql/essentials/sql/v1/highlight/sql_highlight.h4
-rw-r--r--yql/essentials/sql/v1/highlight/sql_highlighter.cpp1
-rw-r--r--yql/essentials/sql/v1/highlight/sql_highlighter_ut.cpp2
-rw-r--r--yql/essentials/sql/v1/highlight/ut/suite.json15
-rw-r--r--yql/essentials/sql/v1/lexer/regex/generic.cpp17
-rw-r--r--yql/essentials/tools/yql_highlight/generator_highlight_js.cpp2
-rw-r--r--yql/essentials/tools/yql_highlight/generator_monarch.cpp44
-rw-r--r--yql/essentials/tools/yql_highlight/generator_textmate.cpp2
-rw-r--r--yql/essentials/tools/yql_highlight/generator_vim.cpp4
-rw-r--r--yql/essentials/tools/yql_highlight/ut/query.yql15
11 files changed, 125 insertions, 15 deletions
diff --git a/yql/essentials/sql/v1/highlight/sql_highlight.cpp b/yql/essentials/sql/v1/highlight/sql_highlight.cpp
index 72dd095ec3f..3f17934cef4 100644
--- a/yql/essentials/sql/v1/highlight/sql_highlight.cpp
+++ b/yql/essentials/sql/v1/highlight/sql_highlight.cpp
@@ -86,7 +86,7 @@ TUnit MakeUnit<EUnitKind::QuotedIdentifier>(Syntax& s) {
.Kind = EUnitKind::QuotedIdentifier,
.Patterns = {
{s.Get("ID_QUOTED")},
- },
+ {s.Concat({"COMMAT", "ID_PLAIN"})}},
.IsPlain = false,
};
}
@@ -103,6 +103,30 @@ TUnit MakeUnit<EUnitKind::BindParameterIdentifier>(Syntax& s) {
}
template <>
+TUnit MakeUnit<EUnitKind::OptionIdentifier>(Syntax& s) {
+ return {
+ .Kind = EUnitKind::OptionIdentifier,
+ .Patterns = {
+ {
+ .Body = TStringBuilder()
+ << s.Get("ID_PLAIN") << "(\\." << s.Get("ID_PLAIN") << ")?",
+ .Before = TStringBuilder() << "PRAGMA" << s.Get("WS"),
+ .IsCaseInsensitive = true,
+ },
+ {
+ .Body = s.Get("ID_PLAIN"),
+ .Before = TStringBuilder() << "WITH" << s.Get("WS"),
+ .IsCaseInsensitive = true,
+ },
+ {
+ .Body = s.Get("ID_PLAIN"),
+ .After = " ?" + s.Get("EQUALS"),
+ .IsCaseInsensitive = true,
+ }},
+ };
+}
+
+template <>
TUnit MakeUnit<EUnitKind::TypeIdentifier>(Syntax& s) {
TVector<NSQLTranslationV1::TRegexPattern> types;
NJson::TJsonValue json = NJson::ReadJsonFastTree(NResource::Find("types.json"));
@@ -148,6 +172,8 @@ TUnit MakeUnit<EUnitKind::Literal>(Syntax& s) {
{s.Get("REAL")},
{s.Get("INTEGER_VALUE")},
{s.Get("DIGITS")},
+ {.Body = "TRUE", .IsCaseInsensitive = true},
+ {.Body = "FALSE", .IsCaseInsensitive = true},
},
};
}
@@ -218,13 +244,14 @@ THighlighting MakeHighlighting(const NSQLReflect::TLexerGrammar& grammar) {
THighlighting h;
h.Units.emplace_back(MakeUnit<EUnitKind::Comment>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::Punctuation>(s));
+ h.Units.emplace_back(MakeUnit<EUnitKind::OptionIdentifier>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::FunctionIdentifier>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::TypeIdentifier>(s));
+ h.Units.emplace_back(MakeUnit<EUnitKind::Literal>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::Keyword>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::QuotedIdentifier>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::BindParameterIdentifier>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::Identifier>(s));
- h.Units.emplace_back(MakeUnit<EUnitKind::Literal>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::StringLiteral>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::Whitespace>(s));
@@ -248,6 +275,9 @@ void Out<NSQLHighlight::EUnitKind>(IOutputStream& out, NSQLHighlight::EUnitKind
case NSQLHighlight::EUnitKind::BindParameterIdentifier:
out << "bind-parameter-identifier";
break;
+ case NSQLHighlight::EUnitKind::OptionIdentifier:
+ out << "option-identifier";
+ break;
case NSQLHighlight::EUnitKind::TypeIdentifier:
out << "type-identifier";
break;
diff --git a/yql/essentials/sql/v1/highlight/sql_highlight.h b/yql/essentials/sql/v1/highlight/sql_highlight.h
index df11668bbf0..2e8fbc4a533 100644
--- a/yql/essentials/sql/v1/highlight/sql_highlight.h
+++ b/yql/essentials/sql/v1/highlight/sql_highlight.h
@@ -7,9 +7,6 @@
#include <util/generic/vector.h>
#include <util/generic/map.h>
-// TODO(vityaman): Migrate YDB to corrected version
-#define BindParamterIdentifier BindParameterIdentifier // NOLINT
-
namespace NSQLHighlight {
enum class EUnitKind {
@@ -17,6 +14,7 @@ enum class EUnitKind {
Punctuation,
QuotedIdentifier,
BindParameterIdentifier,
+ OptionIdentifier,
TypeIdentifier,
FunctionIdentifier,
Identifier,
diff --git a/yql/essentials/sql/v1/highlight/sql_highlighter.cpp b/yql/essentials/sql/v1/highlight/sql_highlighter.cpp
index bbba982c6f5..4e6ab4fee58 100644
--- a/yql/essentials/sql/v1/highlight/sql_highlighter.cpp
+++ b/yql/essentials/sql/v1/highlight/sql_highlighter.cpp
@@ -20,6 +20,7 @@ THashMap<EUnitKind, TString> NamesByUnitKind = [] {
names[EUnitKind::Punctuation] = "P";
names[EUnitKind::QuotedIdentifier] = "Q";
names[EUnitKind::BindParameterIdentifier] = "B";
+ names[EUnitKind::OptionIdentifier] = "O";
names[EUnitKind::TypeIdentifier] = "T";
names[EUnitKind::FunctionIdentifier] = "F";
names[EUnitKind::Identifier] = "I";
diff --git a/yql/essentials/sql/v1/highlight/sql_highlighter_ut.cpp b/yql/essentials/sql/v1/highlight/sql_highlighter_ut.cpp
index bff14bc2e38..a912a5de081 100644
--- a/yql/essentials/sql/v1/highlight/sql_highlighter_ut.cpp
+++ b/yql/essentials/sql/v1/highlight/sql_highlighter_ut.cpp
@@ -50,6 +50,8 @@ char ToChar(EUnitKind kind) {
return 'Q';
case EUnitKind::BindParameterIdentifier:
return 'B';
+ case NSQLHighlight::EUnitKind::OptionIdentifier:
+ return 'O';
case EUnitKind::TypeIdentifier:
return 'T';
case EUnitKind::FunctionIdentifier:
diff --git a/yql/essentials/sql/v1/highlight/ut/suite.json b/yql/essentials/sql/v1/highlight/ut/suite.json
index 624327e161f..b28bed4f8e2 100644
--- a/yql/essentials/sql/v1/highlight/ut/suite.json
+++ b/yql/essentials/sql/v1/highlight/ut/suite.json
@@ -23,6 +23,18 @@
["-123", "P#LLL"],
["SELECT 123l AS `Int64`, 0b01u AS `Uint32`, 0xFFul AS `Uint64`, 0o7ut AS `Uint8`, 456s AS `Int16`, 1.2345f AS `Float`;", "KKKKKK#_#LLLL#_#KK#_#QQQQQQQ#P#_#LLLLL#_#KK#_#QQQQQQQQ#P#_#LLLLLL#_#KK#_#QQQQQQQQ#P#_#LLLLL#_#KK#_#QQQQQQQ#P#_#LLLL#_#KK#_#QQQQQQQ#P#_#LLLLLLL#_#KK#_#QQQQQQQ#P"]
],
+ "Boolean": [
+ ["true", "LLLL"],
+ ["fAlSe", "LLLLL"]
+ ],
+ "Pragma": [
+ ["PRAGMA Debug", "KKKKKK#_#OOOOO"],
+ ["PRAGMA yt.Pool", "KKKKKK#_#OOOOOOO"],
+ ["PRAGMA Warning('*')", "KKKKKK#_#OOOOOOO#P#SSS#P"]
+ ],
+ "Hint": [
+ ["INSERT INTO t WITH TRUNCATE", "KKKKKK#_#KKKK#_#I#_#KKKK#_#OOOOOOOO"]
+ ],
"Comment": [
["- select", "P#_#KKKKKK"],
["select -- select", "KKKKKK#_#CCCCCCCCC"],
@@ -91,6 +103,7 @@
["`/cluster/database`", "QQQQQQQQQQQQQQQQQQQ"],
["`test`select", "QQQQQQ#KKKKKK"],
["`/cluster", "E#P#IIIIIII"],
- ["`\uD83D\uDE00`", "QQQQQQ"]
+ ["`\uD83D\uDE00`", "QQQQQQ"],
+ ["@tmp", "QQQQ"]
]
}
diff --git a/yql/essentials/sql/v1/lexer/regex/generic.cpp b/yql/essentials/sql/v1/lexer/regex/generic.cpp
index c27eec99b28..3815493c923 100644
--- a/yql/essentials/sql/v1/lexer/regex/generic.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/generic.cpp
@@ -41,26 +41,25 @@ public:
}
while (pos < text.size() && errors < maxErrors) {
- TMaybe<TGenericToken> prev;
TGenericToken next = Match(TStringBuf(text, pos));
size_t skipped = next.Begin;
next.Begin = skipped + pos;
- if (skipped != 0) {
- prev = Match(TStringBuf(text, pos, skipped));
- prev->Begin = pos;
+ const size_t matchPos = pos;
+ while (pos < matchPos + skipped) {
+ TGenericToken prev = Match(TStringBuf(text, pos, skipped));
+ prev.Begin = pos;
+ pos += prev.Content.size();
+ onNext(std::move(prev));
}
- pos += skipped + next.Content.size();
+ pos += next.Content.size();
if (next.Name == TGenericToken::Error) {
errors += 1;
}
- if (prev) {
- onNext(std::move(*prev));
- }
onNext(std::move(next));
}
@@ -81,7 +80,7 @@ private:
TGenericToken Match(TStringBuf prefix) const {
TMaybe<TGenericToken> max;
Match(prefix, [&](TGenericToken&& token) {
- if (max.Empty() || max->Content.size() < token.Content.size()) {
+ if (max.Empty() || (max->Begin + max->Content.size()) < (token.Begin + token.Content.size())) {
max = std::move(token);
}
});
diff --git a/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp b/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp
index 1a04c7d6b4d..41d6f3bcfc3 100644
--- a/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp
+++ b/yql/essentials/tools/yql_highlight/generator_highlight_js.cpp
@@ -18,6 +18,8 @@ TString ToHighlightJSClass(EUnitKind kind) {
return "symbol";
case EUnitKind::BindParameterIdentifier:
return "variable";
+ case EUnitKind::OptionIdentifier:
+ return "variable.constant";
case EUnitKind::TypeIdentifier:
return "type";
case EUnitKind::FunctionIdentifier:
diff --git a/yql/essentials/tools/yql_highlight/generator_monarch.cpp b/yql/essentials/tools/yql_highlight/generator_monarch.cpp
index 8ad28e7d15d..25b2b1c8524 100644
--- a/yql/essentials/tools/yql_highlight/generator_monarch.cpp
+++ b/yql/essentials/tools/yql_highlight/generator_monarch.cpp
@@ -10,6 +10,19 @@
namespace NSQLHighlight {
+template <std::invocable<const TUnit&, const NSQLTranslationV1::TRegexPattern&, size_t> Action>
+void ForEachBeforablePattern(const THighlighting& highlighting, Action action) {
+ for (const TUnit& unit : highlighting.Units) {
+ size_t i = 0;
+ for (const NSQLTranslationV1::TRegexPattern& regex : unit.Patterns) {
+ if (!regex.Before.empty()) {
+ i += 1;
+ action(unit, regex, i);
+ }
+ }
+ }
+}
+
TString ToMonarchRegex(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) {
TStringBuilder regex;
@@ -40,6 +53,8 @@ TString ToMonarchSelector(EUnitKind kind) {
return "string.tablepath";
case EUnitKind::BindParameterIdentifier:
return "variable";
+ case EUnitKind::OptionIdentifier:
+ return "constant";
case EUnitKind::TypeIdentifier:
return "keyword.type";
case EUnitKind::FunctionIdentifier:
@@ -69,6 +84,8 @@ TString ToMonarchStateName(EUnitKind kind) {
return "quotedIdentifier";
case EUnitKind::BindParameterIdentifier:
return "bindParameterIdentifier";
+ case EUnitKind::OptionIdentifier:
+ return "optionIdentifier";
case EUnitKind::TypeIdentifier:
return "typeIdentifier";
case EUnitKind::FunctionIdentifier:
@@ -136,6 +153,14 @@ NJson::TJsonValue ToMonarchMultiLineState(const TUnit& unit, bool ansi) {
return json;
}
+NJson::TJsonValue ToMonarchBeforableState(const TUnit& unit, const NSQLTranslationV1::TRegexPattern& pattern) {
+ NJson::TJsonValue json;
+ json.AppendValue(ToMonarchRegex(unit, pattern));
+ json.AppendValue(ToMonarchSelector(unit.Kind));
+ json.AppendValue("@pop");
+ return NJson::TJsonArray({json});
+}
+
NJson::TJsonValue MonarchEmbeddedState() {
return NJson::TJsonArray{{NJson::TJsonArray{
"([^@]|^)([@]{4})*[@]{2}([@]([^@]|$)|[^@]|$)",
@@ -164,6 +189,15 @@ NJson::TJsonValue ToMonarchWhitespaceState(const THighlighting& highlighting) {
});
});
+ ForEachBeforablePattern(highlighting, [&](const auto& unit, const auto& pattern, auto i) {
+ // Note: Assume that before is always a keyword.
+ json.AppendValue(NJson::TJsonArray{
+ pattern.Before,
+ ToMonarchSelector(EUnitKind::Keyword),
+ "@" + ToMonarchStateName(unit.Kind) + ToString(i),
+ });
+ });
+
return json;
}
@@ -183,6 +217,10 @@ NJson::TJsonValue ToMonarchRootState(const THighlighting& highlighting, bool ans
}
for (const NSQLTranslationV1::TRegexPattern& pattern : *patterns) {
+ if (!pattern.Before.empty()) {
+ continue;
+ }
+
TString regex = ToMonarchRegex(unit, pattern);
json.AppendValue(NJson::TJsonArray{regex, group});
}
@@ -212,6 +250,12 @@ void GenerateMonarch(IOutputStream& out, const THighlighting& highlighting, bool
write_json(ToMonarchStateName(unit.Kind), ToMonarchMultiLineState(unit, ansi));
});
write_json("embedded", MonarchEmbeddedState());
+ ForEachBeforablePattern(highlighting, [&](const auto& unit, const auto& pattern, auto i) {
+ write_json(
+ ToMonarchStateName(unit.Kind) + ToString(i),
+ ToMonarchBeforableState(unit, pattern));
+ });
+
buf.EndObject();
buf.EndObject();
diff --git a/yql/essentials/tools/yql_highlight/generator_textmate.cpp b/yql/essentials/tools/yql_highlight/generator_textmate.cpp
index 3450715ecb6..11f6b86ea35 100644
--- a/yql/essentials/tools/yql_highlight/generator_textmate.cpp
+++ b/yql/essentials/tools/yql_highlight/generator_textmate.cpp
@@ -78,6 +78,8 @@ TString ToTextMateGroup(EUnitKind kind) {
return "string.interpolated";
case EUnitKind::BindParameterIdentifier:
return "variable.parameter";
+ case EUnitKind::OptionIdentifier:
+ return "support.constant";
case EUnitKind::TypeIdentifier:
return "entity.name.type";
case EUnitKind::FunctionIdentifier:
diff --git a/yql/essentials/tools/yql_highlight/generator_vim.cpp b/yql/essentials/tools/yql_highlight/generator_vim.cpp
index 4029844dc0d..a74cadf72bb 100644
--- a/yql/essentials/tools/yql_highlight/generator_vim.cpp
+++ b/yql/essentials/tools/yql_highlight/generator_vim.cpp
@@ -75,6 +75,8 @@ TString ToVimName(EUnitKind kind) {
return "yqlQuotedIdentifier";
case EUnitKind::BindParameterIdentifier:
return "yqlBindParameterIdentifier";
+ case EUnitKind::OptionIdentifier:
+ return "yqlOptionIdentifier";
case EUnitKind::TypeIdentifier:
return "yqlTypeIdentifier";
case EUnitKind::FunctionIdentifier:
@@ -123,6 +125,8 @@ TVector<TStringBuf> ToVimGroups(EUnitKind kind) {
return {"Special", "Underlined"};
case EUnitKind::BindParameterIdentifier:
return {"Define"};
+ case EUnitKind::OptionIdentifier:
+ return {"Keyword"};
case EUnitKind::TypeIdentifier:
return {"Type"};
case EUnitKind::FunctionIdentifier:
diff --git a/yql/essentials/tools/yql_highlight/ut/query.yql b/yql/essentials/tools/yql_highlight/ut/query.yql
index 7fabaae9dee..64d6564f3b8 100644
--- a/yql/essentials/tools/yql_highlight/ut/query.yql
+++ b/yql/essentials/tools/yql_highlight/ut/query.yql
@@ -1,3 +1,10 @@
+USE plato;
+
+PRAGMA AutoCommit;
+PRAGMA TablePathPrefix = 'home/yql';
+PRAGMA Warning('disable', '1101');
+PRAGMA yson.Strict;
+
SELECT 1; -- A single-line comment SELECT 1
select 1; /*
Some SELECT 1
@@ -18,6 +25,14 @@ multiline with double at: @@@@
text@@;
SELECT "foo"u, '[1;2]'y, @@{"a":null}@@j;
+INSERT INTO @tmp WITH TRUNCATE;
+
+CREATE TABLE t (id Uint64 PRIMARY KEY (id)) WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_BY_SIZE = ENABLED,
+ AUTO_PARTITIONING_PARTITION_SIZE_MB = 512
+);
+
SELECT
CAST(123l AS Int64) AS `Int64`,
CAST(0b01u AS Uint32) AS `Uint32`,