aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvitya-smirnov <vitya-smirnov@yandex-team.com>2025-07-15 18:32:56 +0300
committervitya-smirnov <vitya-smirnov@yandex-team.com>2025-07-15 18:59:24 +0300
commit9fa5e2a8b51a9c6072fc06bb8cd3ba0994e1cf7e (patch)
tree41ba4e41729cf88f32cb6ad492b25120b662a2ea
parent892100046de6ef219e524c90cdd95bc4e81a128a (diff)
downloadydb-9fa5e2a8b51a9c6072fc06bb8cd3ba0994e1cf7e.tar.gz
YQL-19616: Generate Vim SQL syntax highlighting
Introduced a Vim syntax highlighting for YQL. This is a replacement for an existing almost manually written conguration. It uses regexes generated from the original ANTLR4 grammar. Now only Default lexer mode is supported. commit_hash:85fa094593bd9d80373754a492b46ede1a50148d
-rw-r--r--yql/essentials/sql/v1/highlight/sql_highlight.cpp4
-rw-r--r--yql/essentials/sql/v1/highlight/ut/suite.json4
-rw-r--r--yql/essentials/sql/v1/lexer/regex/generic.cpp7
-rw-r--r--yql/essentials/tools/yql_highlight/generate_vim.cpp166
-rw-r--r--yql/essentials/tools/yql_highlight/generate_vim.h11
-rw-r--r--yql/essentials/tools/yql_highlight/ya.make1
-rw-r--r--yql/essentials/tools/yql_highlight/yql_highlight.cpp13
7 files changed, 200 insertions, 6 deletions
diff --git a/yql/essentials/sql/v1/highlight/sql_highlight.cpp b/yql/essentials/sql/v1/highlight/sql_highlight.cpp
index e35bb5fb736..ff5b06a77ca 100644
--- a/yql/essentials/sql/v1/highlight/sql_highlight.cpp
+++ b/yql/essentials/sql/v1/highlight/sql_highlight.cpp
@@ -227,16 +227,16 @@ namespace NSQLHighlight {
Syntax s = MakeSyntax(grammar);
THighlighting h;
+ h.Units.emplace_back(MakeUnit<EUnitKind::Comment>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::Keyword>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::Punctuation>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::QuotedIdentifier>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::BindParamterIdentifier>(s));
- h.Units.emplace_back(MakeUnit<EUnitKind::TypeIdentifier>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::FunctionIdentifier>(s));
+ h.Units.emplace_back(MakeUnit<EUnitKind::TypeIdentifier>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::Identifier>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::Literal>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::StringLiteral>(s));
- h.Units.emplace_back(MakeUnit<EUnitKind::Comment>(s));
h.Units.emplace_back(MakeUnit<EUnitKind::Whitespace>(s));
return h;
diff --git a/yql/essentials/sql/v1/highlight/ut/suite.json b/yql/essentials/sql/v1/highlight/ut/suite.json
index 04e961a6f91..26fb2f3f4f4 100644
--- a/yql/essentials/sql/v1/highlight/ut/suite.json
+++ b/yql/essentials/sql/v1/highlight/ut/suite.json
@@ -47,11 +47,11 @@
["SELECT id, alias from users", "KKKKKK#_#II#P#_#IIIII#_#KKKK#_#IIIII"],
["INSERT INTO users (id, alias) VALUES (12, \"tester\")", "KKKKKK#_#KKKK#_#IIIII#_#P#II#P#_#IIIII#P#_#KKKKKK#_#P#LL#P#_#SSSSSSSS#P"],
["SELECT 123467, \"HeLLo, {name}!\", (1 + (5 * 1 / 0)), MIN(identifier) FROM `local/test/space/table` JOIN test;", "KKKKKK#_#LLLLLL#P#_#SSSSSSSSSSSSSSSS#P#_#P#L#_#P#_#P#L#_#P#_#L#_#P#_#L#P#P#P#_#FFF#P#IIIIIIIIII#P#_#KKKK#_#QQQQQQQQQQQQQQQQQQQQQQQQ#_#KKKK#_#IIII#P"],
- ["SELECT Bool(phone) FROM customer", "KKKKKK#_#TTTT#P#IIIII#P#_#KKKK#_#IIIIIIII"]
+ ["SELECT Bool(phone) FROM customer", "KKKKKK#_#FFFF#P#IIIII#P#_#KKKK#_#IIIIIIII"]
],
"TypeIdentifier": [
["Bool", "TTTT"],
- ["Bool(value)", "TTTT#P#IIIII#P"]
+ ["Bool(value)", "FFFF#P#IIIII#P"]
],
"Identifier": [
["test", "IIII"]
diff --git a/yql/essentials/sql/v1/lexer/regex/generic.cpp b/yql/essentials/sql/v1/lexer/regex/generic.cpp
index 83ad5b4155d..926c50dde2c 100644
--- a/yql/essentials/sql/v1/lexer/regex/generic.cpp
+++ b/yql/essentials/sql/v1/lexer/regex/generic.cpp
@@ -130,7 +130,12 @@ namespace NSQLTranslationV1 {
TStringBuilder body;
for (const auto& pattern : patterns) {
- body << "(" << pattern.Body << ")|";
+ TString regex = pattern.Body;
+ if (pattern.Body.Contains('|')) {
+ regex.prepend('(');
+ regex.append(')');
+ }
+ body << regex << "|";
}
Y_ENSURE(body.back() == '|');
body.pop_back();
diff --git a/yql/essentials/tools/yql_highlight/generate_vim.cpp b/yql/essentials/tools/yql_highlight/generate_vim.cpp
new file mode 100644
index 00000000000..c569769b115
--- /dev/null
+++ b/yql/essentials/tools/yql_highlight/generate_vim.cpp
@@ -0,0 +1,166 @@
+#include "generate_vim.h"
+
+#include <contrib/libs/re2/re2/re2.h>
+
+#include <util/string/builder.h>
+
+#include <ranges>
+
+namespace NSQLHighlight {
+
+ namespace {
+
+ bool IsPlain(EUnitKind kind) {
+ return (kind != EUnitKind::Comment) &&
+ (kind != EUnitKind::StringLiteral) &&
+ (kind != EUnitKind::QuotedIdentifier) &&
+ (kind != EUnitKind::BindParamterIdentifier);
+ }
+
+ TString ToVim(TString regex) {
+ static RE2 LikelyUnquotedLParen(R"((^|[^\\])(\())");
+ static RE2 LikelyNonGreedyMatch(R"re((^|[^\\])(\*\?))re");
+
+ // We can leave some capturing groups in case `\\\\(`,
+ // but it is okay as the goal is to meet the Vim limit.
+
+ Y_ENSURE(!regex.Contains(R"(\\*?)"), regex);
+
+ RE2::GlobalReplace(&regex, LikelyUnquotedLParen, R"(\1%()");
+ RE2::GlobalReplace(&regex, LikelyNonGreedyMatch, R"re(\1{-})re");
+
+ return regex;
+ }
+
+ TString ToVim(EUnitKind kind, const NSQLTranslationV1::TRegexPattern& pattern) {
+ TStringBuilder vim;
+
+ vim << R"(")";
+ vim << R"(\v)";
+
+ if (IsPlain(kind)) {
+ vim << R"(<)";
+ }
+
+ if (pattern.IsCaseInsensitive) {
+ vim << R"(\c)";
+ }
+
+ vim << "(" << ToVim(pattern.Body) << ")";
+
+ if (!pattern.After.empty()) {
+ vim << "(" << ToVim(pattern.After) << ")@=";
+ }
+
+ if (IsPlain(kind)) {
+ vim << R"(>)";
+ }
+
+ vim << R"(")";
+
+ return vim;
+ }
+
+ TString ToVimName(EUnitKind kind) {
+ switch (kind) {
+ case EUnitKind::Keyword:
+ return "yqlKeyword";
+ case EUnitKind::Punctuation:
+ return "yqlPunctuation";
+ case EUnitKind::QuotedIdentifier:
+ return "yqlQuotedIdentifier";
+ case EUnitKind::BindParamterIdentifier:
+ return "yqlBindParamterIdentifier";
+ case EUnitKind::TypeIdentifier:
+ return "yqlTypeIdentifier";
+ case EUnitKind::FunctionIdentifier:
+ return "yqlFunctionIdentifier";
+ case EUnitKind::Identifier:
+ return "yqlIdentifier";
+ case EUnitKind::Literal:
+ return "yqlLiteral";
+ case EUnitKind::StringLiteral:
+ return "yqlStringLiteral";
+ case EUnitKind::Comment:
+ return "yqlComment";
+ case EUnitKind::Whitespace:
+ return "yqlWhitespace";
+ case EUnitKind::Error:
+ return "yqlError";
+ }
+ }
+
+ void PrintRules(IOutputStream& out, const TUnit& unit) {
+ TString name = ToVimName(unit.Kind);
+ for (const NSQLTranslationV1::TRegexPattern& pattern : unit.Patterns) {
+ out << "syn match " << ToVimName(unit.Kind) << " "
+ << ToVim(unit.Kind, pattern) << '\n';
+ }
+ }
+
+ TVector<TStringBuf> ToVimGroups(EUnitKind kind) {
+ switch (kind) {
+ case EUnitKind::Keyword:
+ return {"Keyword"};
+ case EUnitKind::Punctuation:
+ return {"Operator"};
+ case EUnitKind::QuotedIdentifier:
+ return {"Special", "Underlined"};
+ case EUnitKind::BindParamterIdentifier:
+ return {"Identifier"};
+ case EUnitKind::TypeIdentifier:
+ return {"Type"};
+ case EUnitKind::FunctionIdentifier:
+ return {"Function"};
+ case EUnitKind::Identifier:
+ return {"Identifier"};
+ case EUnitKind::Literal:
+ return {"Number"};
+ case EUnitKind::StringLiteral:
+ return {"String"};
+ case EUnitKind::Comment:
+ return {"Comment"};
+ case EUnitKind::Whitespace:
+ return {};
+ case EUnitKind::Error:
+ return {};
+ }
+ }
+
+ bool IsIgnored(EUnitKind kind) {
+ return ToVimGroups(kind).empty();
+ }
+
+ } // namespace
+
+ void GenerateVim(IOutputStream& out, const THighlighting& highlighting) {
+ const auto units = std::ranges::reverse_view(highlighting.Units);
+
+ out << "if exists(\"b:current_syntax\")" << '\n';
+ out << " finish" << '\n';
+ out << "endif" << '\n';
+ out << '\n';
+
+ for (const TUnit& unit : units) {
+ if (IsIgnored(unit.Kind)) {
+ continue;
+ }
+
+ PrintRules(out, unit);
+ }
+
+ out << '\n';
+
+ for (const TUnit& unit : units) {
+ for (TStringBuf group : ToVimGroups(unit.Kind)) {
+ out << "highlight default link " << ToVimName(unit.Kind) << " " << group << '\n';
+ }
+ }
+
+ out << '\n';
+
+ out << "let b:current_syntax = \"yql\"" << '\n';
+ out.Flush();
+ }
+
+} // namespace NSQLHighlight
diff --git a/yql/essentials/tools/yql_highlight/generate_vim.h b/yql/essentials/tools/yql_highlight/generate_vim.h
new file mode 100644
index 00000000000..54d8e8b41eb
--- /dev/null
+++ b/yql/essentials/tools/yql_highlight/generate_vim.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <yql/essentials/sql/v1/highlight/sql_highlight.h>
+
+#include <util/stream/output.h>
+
+namespace NSQLHighlight {
+
+ void GenerateVim(IOutputStream& out, const THighlighting& highlighting);
+
+} // namespace NSQLHighlight
diff --git a/yql/essentials/tools/yql_highlight/ya.make b/yql/essentials/tools/yql_highlight/ya.make
index b9f5da4a808..75fb17eff28 100644
--- a/yql/essentials/tools/yql_highlight/ya.make
+++ b/yql/essentials/tools/yql_highlight/ya.make
@@ -8,6 +8,7 @@ PEERDIR(
)
SRCS(
+ generate_vim.cpp
yql_highlight.cpp
)
diff --git a/yql/essentials/tools/yql_highlight/yql_highlight.cpp b/yql/essentials/tools/yql_highlight/yql_highlight.cpp
index 01dd9efe343..5982c7406d4 100644
--- a/yql/essentials/tools/yql_highlight/yql_highlight.cpp
+++ b/yql/essentials/tools/yql_highlight/yql_highlight.cpp
@@ -1,3 +1,5 @@
+#include "generate_vim.h"
+
#include <yql/essentials/sql/v1/highlight/sql_highlight_json.h>
#include <yql/essentials/sql/v1/highlight/sql_highlight.h>
#include <yql/essentials/sql/v1/highlight/sql_highlighter.h>
@@ -17,6 +19,12 @@ int RunGenerateJSON() {
return 0;
}
+int RunGenerateVim() {
+ THighlighting highlighting = MakeHighlighting();
+ GenerateVim(Cout, highlighting);
+ return 0;
+}
+
int RunHighlighter() {
THashMap<EUnitKind, NColorizer::EAnsiCode> ColorByKind = {
{EUnitKind::Keyword, NColorizer::BLUE},
@@ -53,7 +61,7 @@ int Run(int argc, char* argv[]) {
NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();
opts.AddLongOption('g', "generate", "generate a highlighting configuration")
.RequiredArgument("target")
- .Choices({"json"})
+ .Choices({"json", "vim"})
.StoreResult(&target);
opts.SetFreeArgsNum(0);
opts.AddHelpOption();
@@ -63,6 +71,9 @@ int Run(int argc, char* argv[]) {
if (target == "json") {
return RunGenerateJSON();
}
+ if (target == "vim") {
+ return RunGenerateVim();
+ }
Y_ABORT();
}
return RunHighlighter();