diff options
author | vitya-smirnov <vitya-smirnov@yandex-team.com> | 2025-07-15 18:32:56 +0300 |
---|---|---|
committer | vitya-smirnov <vitya-smirnov@yandex-team.com> | 2025-07-15 18:59:24 +0300 |
commit | 9fa5e2a8b51a9c6072fc06bb8cd3ba0994e1cf7e (patch) | |
tree | 41ba4e41729cf88f32cb6ad492b25120b662a2ea | |
parent | 892100046de6ef219e524c90cdd95bc4e81a128a (diff) | |
download | ydb-9fa5e2a8b51a9c6072fc06bb8cd3ba0994e1cf7e.tar.gz |
YQL-19616: Generate Vim SQL syntax highlighting
Introduced a Vim syntax highlighting for YQL.
This is a replacement for an existing almost
manually written conguration. It uses regexes
generated from the original ANTLR4 grammar.
Now only Default lexer mode is supported.
commit_hash:85fa094593bd9d80373754a492b46ede1a50148d
-rw-r--r-- | yql/essentials/sql/v1/highlight/sql_highlight.cpp | 4 | ||||
-rw-r--r-- | yql/essentials/sql/v1/highlight/ut/suite.json | 4 | ||||
-rw-r--r-- | yql/essentials/sql/v1/lexer/regex/generic.cpp | 7 | ||||
-rw-r--r-- | yql/essentials/tools/yql_highlight/generate_vim.cpp | 166 | ||||
-rw-r--r-- | yql/essentials/tools/yql_highlight/generate_vim.h | 11 | ||||
-rw-r--r-- | yql/essentials/tools/yql_highlight/ya.make | 1 | ||||
-rw-r--r-- | yql/essentials/tools/yql_highlight/yql_highlight.cpp | 13 |
7 files changed, 200 insertions, 6 deletions
diff --git a/yql/essentials/sql/v1/highlight/sql_highlight.cpp b/yql/essentials/sql/v1/highlight/sql_highlight.cpp index e35bb5fb736..ff5b06a77ca 100644 --- a/yql/essentials/sql/v1/highlight/sql_highlight.cpp +++ b/yql/essentials/sql/v1/highlight/sql_highlight.cpp @@ -227,16 +227,16 @@ namespace NSQLHighlight { Syntax s = MakeSyntax(grammar); THighlighting h; + h.Units.emplace_back(MakeUnit<EUnitKind::Comment>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::Keyword>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::Punctuation>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::QuotedIdentifier>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::BindParamterIdentifier>(s)); - h.Units.emplace_back(MakeUnit<EUnitKind::TypeIdentifier>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::FunctionIdentifier>(s)); + h.Units.emplace_back(MakeUnit<EUnitKind::TypeIdentifier>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::Identifier>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::Literal>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::StringLiteral>(s)); - h.Units.emplace_back(MakeUnit<EUnitKind::Comment>(s)); h.Units.emplace_back(MakeUnit<EUnitKind::Whitespace>(s)); return h; diff --git a/yql/essentials/sql/v1/highlight/ut/suite.json b/yql/essentials/sql/v1/highlight/ut/suite.json index 04e961a6f91..26fb2f3f4f4 100644 --- a/yql/essentials/sql/v1/highlight/ut/suite.json +++ b/yql/essentials/sql/v1/highlight/ut/suite.json @@ -47,11 +47,11 @@ ["SELECT id, alias from users", "KKKKKK#_#II#P#_#IIIII#_#KKKK#_#IIIII"], ["INSERT INTO users (id, alias) VALUES (12, \"tester\")", "KKKKKK#_#KKKK#_#IIIII#_#P#II#P#_#IIIII#P#_#KKKKKK#_#P#LL#P#_#SSSSSSSS#P"], ["SELECT 123467, \"HeLLo, {name}!\", (1 + (5 * 1 / 0)), MIN(identifier) FROM `local/test/space/table` JOIN test;", "KKKKKK#_#LLLLLL#P#_#SSSSSSSSSSSSSSSS#P#_#P#L#_#P#_#P#L#_#P#_#L#_#P#_#L#P#P#P#_#FFF#P#IIIIIIIIII#P#_#KKKK#_#QQQQQQQQQQQQQQQQQQQQQQQQ#_#KKKK#_#IIII#P"], - ["SELECT Bool(phone) FROM customer", "KKKKKK#_#TTTT#P#IIIII#P#_#KKKK#_#IIIIIIII"] + ["SELECT Bool(phone) FROM customer", "KKKKKK#_#FFFF#P#IIIII#P#_#KKKK#_#IIIIIIII"] ], "TypeIdentifier": [ ["Bool", "TTTT"], - ["Bool(value)", "TTTT#P#IIIII#P"] + ["Bool(value)", "FFFF#P#IIIII#P"] ], "Identifier": [ ["test", "IIII"] diff --git a/yql/essentials/sql/v1/lexer/regex/generic.cpp b/yql/essentials/sql/v1/lexer/regex/generic.cpp index 83ad5b4155d..926c50dde2c 100644 --- a/yql/essentials/sql/v1/lexer/regex/generic.cpp +++ b/yql/essentials/sql/v1/lexer/regex/generic.cpp @@ -130,7 +130,12 @@ namespace NSQLTranslationV1 { TStringBuilder body; for (const auto& pattern : patterns) { - body << "(" << pattern.Body << ")|"; + TString regex = pattern.Body; + if (pattern.Body.Contains('|')) { + regex.prepend('('); + regex.append(')'); + } + body << regex << "|"; } Y_ENSURE(body.back() == '|'); body.pop_back(); diff --git a/yql/essentials/tools/yql_highlight/generate_vim.cpp b/yql/essentials/tools/yql_highlight/generate_vim.cpp new file mode 100644 index 00000000000..c569769b115 --- /dev/null +++ b/yql/essentials/tools/yql_highlight/generate_vim.cpp @@ -0,0 +1,166 @@ +#include "generate_vim.h" + +#include <contrib/libs/re2/re2/re2.h> + +#include <util/string/builder.h> + +#include <ranges> + +namespace NSQLHighlight { + + namespace { + + bool IsPlain(EUnitKind kind) { + return (kind != EUnitKind::Comment) && + (kind != EUnitKind::StringLiteral) && + (kind != EUnitKind::QuotedIdentifier) && + (kind != EUnitKind::BindParamterIdentifier); + } + + TString ToVim(TString regex) { + static RE2 LikelyUnquotedLParen(R"((^|[^\\])(\())"); + static RE2 LikelyNonGreedyMatch(R"re((^|[^\\])(\*\?))re"); + + // We can leave some capturing groups in case `\\\\(`, + // but it is okay as the goal is to meet the Vim limit. + + Y_ENSURE(!regex.Contains(R"(\\*?)"), regex); + + RE2::GlobalReplace(®ex, LikelyUnquotedLParen, R"(\1%()"); + RE2::GlobalReplace(®ex, LikelyNonGreedyMatch, R"re(\1{-})re"); + + return regex; + } + + TString ToVim(EUnitKind kind, const NSQLTranslationV1::TRegexPattern& pattern) { + TStringBuilder vim; + + vim << R"(")"; + vim << R"(\v)"; + + if (IsPlain(kind)) { + vim << R"(<)"; + } + + if (pattern.IsCaseInsensitive) { + vim << R"(\c)"; + } + + vim << "(" << ToVim(pattern.Body) << ")"; + + if (!pattern.After.empty()) { + vim << "(" << ToVim(pattern.After) << ")@="; + } + + if (IsPlain(kind)) { + vim << R"(>)"; + } + + vim << R"(")"; + + return vim; + } + + TString ToVimName(EUnitKind kind) { + switch (kind) { + case EUnitKind::Keyword: + return "yqlKeyword"; + case EUnitKind::Punctuation: + return "yqlPunctuation"; + case EUnitKind::QuotedIdentifier: + return "yqlQuotedIdentifier"; + case EUnitKind::BindParamterIdentifier: + return "yqlBindParamterIdentifier"; + case EUnitKind::TypeIdentifier: + return "yqlTypeIdentifier"; + case EUnitKind::FunctionIdentifier: + return "yqlFunctionIdentifier"; + case EUnitKind::Identifier: + return "yqlIdentifier"; + case EUnitKind::Literal: + return "yqlLiteral"; + case EUnitKind::StringLiteral: + return "yqlStringLiteral"; + case EUnitKind::Comment: + return "yqlComment"; + case EUnitKind::Whitespace: + return "yqlWhitespace"; + case EUnitKind::Error: + return "yqlError"; + } + } + + void PrintRules(IOutputStream& out, const TUnit& unit) { + TString name = ToVimName(unit.Kind); + for (const NSQLTranslationV1::TRegexPattern& pattern : unit.Patterns) { + out << "syn match " << ToVimName(unit.Kind) << " " + << ToVim(unit.Kind, pattern) << '\n'; + } + } + + TVector<TStringBuf> ToVimGroups(EUnitKind kind) { + switch (kind) { + case EUnitKind::Keyword: + return {"Keyword"}; + case EUnitKind::Punctuation: + return {"Operator"}; + case EUnitKind::QuotedIdentifier: + return {"Special", "Underlined"}; + case EUnitKind::BindParamterIdentifier: + return {"Identifier"}; + case EUnitKind::TypeIdentifier: + return {"Type"}; + case EUnitKind::FunctionIdentifier: + return {"Function"}; + case EUnitKind::Identifier: + return {"Identifier"}; + case EUnitKind::Literal: + return {"Number"}; + case EUnitKind::StringLiteral: + return {"String"}; + case EUnitKind::Comment: + return {"Comment"}; + case EUnitKind::Whitespace: + return {}; + case EUnitKind::Error: + return {}; + } + } + + bool IsIgnored(EUnitKind kind) { + return ToVimGroups(kind).empty(); + } + + } // namespace + + void GenerateVim(IOutputStream& out, const THighlighting& highlighting) { + const auto units = std::ranges::reverse_view(highlighting.Units); + + out << "if exists(\"b:current_syntax\")" << '\n'; + out << " finish" << '\n'; + out << "endif" << '\n'; + out << '\n'; + + for (const TUnit& unit : units) { + if (IsIgnored(unit.Kind)) { + continue; + } + + PrintRules(out, unit); + } + + out << '\n'; + + for (const TUnit& unit : units) { + for (TStringBuf group : ToVimGroups(unit.Kind)) { + out << "highlight default link " << ToVimName(unit.Kind) << " " << group << '\n'; + } + } + + out << '\n'; + + out << "let b:current_syntax = \"yql\"" << '\n'; + out.Flush(); + } + +} // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/generate_vim.h b/yql/essentials/tools/yql_highlight/generate_vim.h new file mode 100644 index 00000000000..54d8e8b41eb --- /dev/null +++ b/yql/essentials/tools/yql_highlight/generate_vim.h @@ -0,0 +1,11 @@ +#pragma once + +#include <yql/essentials/sql/v1/highlight/sql_highlight.h> + +#include <util/stream/output.h> + +namespace NSQLHighlight { + + void GenerateVim(IOutputStream& out, const THighlighting& highlighting); + +} // namespace NSQLHighlight diff --git a/yql/essentials/tools/yql_highlight/ya.make b/yql/essentials/tools/yql_highlight/ya.make index b9f5da4a808..75fb17eff28 100644 --- a/yql/essentials/tools/yql_highlight/ya.make +++ b/yql/essentials/tools/yql_highlight/ya.make @@ -8,6 +8,7 @@ PEERDIR( ) SRCS( + generate_vim.cpp yql_highlight.cpp ) diff --git a/yql/essentials/tools/yql_highlight/yql_highlight.cpp b/yql/essentials/tools/yql_highlight/yql_highlight.cpp index 01dd9efe343..5982c7406d4 100644 --- a/yql/essentials/tools/yql_highlight/yql_highlight.cpp +++ b/yql/essentials/tools/yql_highlight/yql_highlight.cpp @@ -1,3 +1,5 @@ +#include "generate_vim.h" + #include <yql/essentials/sql/v1/highlight/sql_highlight_json.h> #include <yql/essentials/sql/v1/highlight/sql_highlight.h> #include <yql/essentials/sql/v1/highlight/sql_highlighter.h> @@ -17,6 +19,12 @@ int RunGenerateJSON() { return 0; } +int RunGenerateVim() { + THighlighting highlighting = MakeHighlighting(); + GenerateVim(Cout, highlighting); + return 0; +} + int RunHighlighter() { THashMap<EUnitKind, NColorizer::EAnsiCode> ColorByKind = { {EUnitKind::Keyword, NColorizer::BLUE}, @@ -53,7 +61,7 @@ int Run(int argc, char* argv[]) { NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default(); opts.AddLongOption('g', "generate", "generate a highlighting configuration") .RequiredArgument("target") - .Choices({"json"}) + .Choices({"json", "vim"}) .StoreResult(&target); opts.SetFreeArgsNum(0); opts.AddHelpOption(); @@ -63,6 +71,9 @@ int Run(int argc, char* argv[]) { if (target == "json") { return RunGenerateJSON(); } + if (target == "vim") { + return RunGenerateVim(); + } Y_ABORT(); } return RunHighlighter(); |