diff options
author | vityaman <vityaman.dev@yandex.ru> | 2025-04-23 15:04:48 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-04-23 15:27:38 +0300 |
commit | c222f6f103376934dd705e16bf9a3bad66e44365 (patch) | |
tree | 93b11233a96ce821d90b854bcc1ca550de5cd526 | |
parent | f3f3e173acccdd58c3b3932acaf9319f0511531f (diff) | |
download | ydb-c222f6f103376934dd705e16bf9a3bad66e44365.tar.gz |
YQL-19747 Detect a token at the caret position
When I tried to implement a folder and object names completion at `ID_QUOTED` I faced with a problem, that I actually can't detect, that cursor is at `ID_QUOTED` token because `TCompletionInput::Text` it was cut until the `TCompletionInput::CursorPosition`, therefore at input ``` SELECT * FROM `#` ``` prefix was ``` SELECT * FROM `# ``` and then lexer failed.
While we actually want tokenize the whole current statement, `C3` still needs to receive a prefix as input. I tried to tokenize the whole statement and then on input `SELECT Optional<#>` got nothing because `<>` is solid token in the `SQL`. The only way to fix it I found is to cut a query to prefix until the cursor position.
BTW, current implementation is not so efficient as we tokenize the input multiple times. Especially `SplitQueryToStatemnts` seems heavy. In future we anyway will parse the whole input so will need to design APIs to receive ready token streams to do statements splitting, for example, just not to do the work twice.

So I introduce you the following changes
- [x] Select the whole current statement, not just prefix.
- [x] Find the token at caret and output no candidates when caret is at `STRING_VALUE`, `DIGIGTS` and so on.
- [x] Change `C3` wrapper interface to take `TCompletionInput` to hide an implementation detail that it runs on cut prefix.
- [x] `#` annotated queries in unit tests.
- [x] Detect `CaretTokenPosition` -- if is it enclosed with a token or between two.
- [x] Ensure that `maxErrors` in `ILexer::Tokenize` is positive. Just a tiny bugfix.
---
- Related to https://github.com/ytsaurus/ytsaurus/pull/1209
- Related to https://github.com/ydb-platform/ydb/issues/9056
- Related to https://github.com/vityaman/ydb/issues/14
---
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1225
commit_hash:a434b9888ec8a7356247d63d9f1420e256ae4fca
-rw-r--r-- | yql/essentials/parser/common/error.cpp | 1 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/antlr4/c3i.h | 4 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/antlr4/c3t.h | 3 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/core/input.h | 12 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete.h | 6 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/sql_complete_ut.cpp | 228 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/local.cpp | 77 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/local.h | 2 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/token.cpp | 60 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/token.h | 24 | ||||
-rw-r--r-- | yql/essentials/sql/v1/complete/syntax/ya.make | 1 |
11 files changed, 303 insertions, 115 deletions
diff --git a/yql/essentials/parser/common/error.cpp b/yql/essentials/parser/common/error.cpp index 9954a037cb4..888af7d677e 100644 --- a/yql/essentials/parser/common/error.cpp +++ b/yql/essentials/parser/common/error.cpp @@ -6,6 +6,7 @@ namespace NAST { : MaxErrors(maxErrors) , NumErrors(0) { + Y_ENSURE(0 < MaxErrors); } IErrorCollector::~IErrorCollector() diff --git a/yql/essentials/sql/v1/complete/antlr4/c3i.h b/yql/essentials/sql/v1/complete/antlr4/c3i.h index 26c71868051..74c7805387f 100644 --- a/yql/essentials/sql/v1/complete/antlr4/c3i.h +++ b/yql/essentials/sql/v1/complete/antlr4/c3i.h @@ -2,6 +2,8 @@ #include "defs.h" +#include <yql/essentials/sql/v1/complete/core/input.h> + #include <util/generic/fwd.h> #include <util/generic/string.h> #include <util/generic/vector.h> @@ -36,7 +38,7 @@ namespace NSQLComplete { std::unordered_set<TRuleId> PreferredRules; }; - virtual TC3Candidates Complete(TStringBuf prefix) = 0; + virtual TC3Candidates Complete(TCompletionInput input) = 0; virtual ~IC3Engine() = default; }; diff --git a/yql/essentials/sql/v1/complete/antlr4/c3t.h b/yql/essentials/sql/v1/complete/antlr4/c3t.h index 750da64229c..35b1f714fa7 100644 --- a/yql/essentials/sql/v1/complete/antlr4/c3t.h +++ b/yql/essentials/sql/v1/complete/antlr4/c3t.h @@ -40,7 +40,8 @@ namespace NSQLComplete { CompletionCore.preferredRules = std::move(config.PreferredRules); } - TC3Candidates Complete(TStringBuf prefix) override { + TC3Candidates Complete(TCompletionInput input) override { + auto prefix = input.Text.Head(input.CursorPosition); Assign(prefix); const auto caretTokenIndex = CaretTokenIndex(prefix); auto candidates = CompletionCore.collectCandidates(caretTokenIndex); diff --git a/yql/essentials/sql/v1/complete/core/input.h b/yql/essentials/sql/v1/complete/core/input.h new file mode 100644 index 00000000000..3bb609cbb22 --- /dev/null +++ b/yql/essentials/sql/v1/complete/core/input.h @@ -0,0 +1,12 @@ +#pragma once + +#include <util/generic/string.h> + +namespace NSQLComplete { + + struct TCompletionInput { + TStringBuf Text; + size_t CursorPosition = Text.length(); + }; + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/sql_complete.h b/yql/essentials/sql/v1/complete/sql_complete.h index 44bb98c3b21..faea272590a 100644 --- a/yql/essentials/sql/v1/complete/sql_complete.h +++ b/yql/essentials/sql/v1/complete/sql_complete.h @@ -1,5 +1,6 @@ #pragma once +#include <yql/essentials/sql/v1/complete/core/input.h> #include <yql/essentials/sql/v1/complete/name/name_service.h> #include <yql/essentials/sql/v1/lexer/lexer.h> @@ -8,11 +9,6 @@ namespace NSQLComplete { - struct TCompletionInput { - TStringBuf Text; - size_t CursorPosition = Text.length(); - }; - struct TCompletedToken { TStringBuf Content; size_t SourcePosition; diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp index d14d7b85442..5ad2f87e867 100644 --- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -11,6 +11,8 @@ #include <library/cpp/testing/unittest/registar.h> +#include <util/charset/utf8.h> + using namespace NSQLComplete; class TDummyException: public std::runtime_error { @@ -70,12 +72,30 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { return MakeSqlCompletionEngine(std::move(lexer), std::move(service)); } - TVector<TCandidate> Complete(ISqlCompletionEngine::TPtr& engine, TCompletionInput input) { - return engine->Complete(input).Candidates; + TCompletionInput SharpedInput(TString& text) { + constexpr char delim = '#'; + + size_t pos = text.find_first_of(delim); + if (pos == TString::npos) { + return { + .Text = text, + }; + } + + Y_ENSURE(!TStringBuf(text).Tail(pos + 1).Contains(delim)); + text.erase(std::begin(text) + pos); + return { + .Text = text, + .CursorPosition = pos, + }; + } + + TVector<TCandidate> Complete(ISqlCompletionEngine::TPtr& engine, TString sharped) { + return engine->Complete(SharpedInput(sharped)).Candidates; } - TVector<TCandidate> CompleteTop(size_t limit, ISqlCompletionEngine::TPtr& engine, TCompletionInput input) { - auto candidates = Complete(engine, input); + TVector<TCandidate> CompleteTop(size_t limit, ISqlCompletionEngine::TPtr& engine, TString sharped) { + auto candidates = Complete(engine, sharped); candidates.crop(limit); return candidates; } @@ -120,12 +140,12 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {""}), expected); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" "}), expected); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {";"}), expected); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"; "}), expected); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {" ; "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, ""), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, " "), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, " "), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, ";"), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "; "), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, " ; "), expected); } Y_UNIT_TEST(Alter) { @@ -146,7 +166,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"ALTER "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "ALTER "), expected); } Y_UNIT_TEST(Create) { @@ -169,7 +189,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"CREATE "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "CREATE "), expected); } Y_UNIT_TEST(Delete) { @@ -178,7 +198,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DELETE "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "DELETE "), expected); } Y_UNIT_TEST(Drop) { @@ -198,7 +218,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"DROP "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "DROP "), expected); } Y_UNIT_TEST(Explain) { @@ -241,7 +261,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"EXPLAIN "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "EXPLAIN "), expected); } Y_UNIT_TEST(Grant) { @@ -266,7 +286,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"GRANT "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "GRANT "), expected); } Y_UNIT_TEST(Insert) { @@ -276,7 +296,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"INSERT "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "INSERT "), expected); } Y_UNIT_TEST(Pragma) { @@ -349,7 +369,28 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT "), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT # FROM"), expected); + } + + Y_UNIT_TEST(SelectFrom) { + TVector<TCandidate> expected = { + {Keyword, "ANY"}, + {Keyword, "CALLABLE"}, + {Keyword, "DICT"}, + {Keyword, "ENUM"}, + {Keyword, "FLOW"}, + {Keyword, "LIST"}, + {Keyword, "OPTIONAL"}, + {Keyword, "RESOURCE"}, + {Keyword, "SET"}, + {Keyword, "STRUCT"}, + {Keyword, "TAGGED"}, + {Keyword, "TUPLE"}, + {Keyword, "VARIANT"}, + }; + auto engine = MakeSqlCompletionEngineUT(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT * FROM "), expected); } Y_UNIT_TEST(SelectWhere) { @@ -387,7 +428,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT * FROM a WHERE "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT * FROM a WHERE "), expected); } Y_UNIT_TEST(Upsert) { @@ -397,7 +438,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"UPSERT "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "UPSERT "), expected); } Y_UNIT_TEST(TypeName) { @@ -420,9 +461,10 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"CREATE TABLE table (id "}), expected); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT CAST (1 AS "}), expected); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT OPTIONAL<"}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "CREATE TABLE table (id "), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT CAST (1 AS "), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT OPTIONAL<"), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT OPTIONAL<#>"), expected); } Y_UNIT_TEST(TypeNameAsArgument) { @@ -431,13 +473,13 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { TVector<TCandidate> expected = { {TypeName, "Uint64"}, }; - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT Nothing(Uint"}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT Nothing(Uint"), expected); } { TVector<TCandidate> expected = { {Keyword, "OPTIONAL<"}, }; - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT Nothing(Option"}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT Nothing(Option"), expected); } } @@ -482,7 +524,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { TVector<TCandidate> expected = { {HintName, "XLOCK"}, }; - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"PROCESS my_table USING $udf(TableRows()) WITH "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "PROCESS my_table USING $udf(TableRows()) WITH "), expected); } { TVector<TCandidate> expected = { @@ -490,7 +532,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "SCHEMA"}, {HintName, "XLOCK"}, }; - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"REDUCE my_table WITH "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "REDUCE my_table WITH "), expected); } { TVector<TCandidate> expected = { @@ -498,7 +540,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "SCHEMA"}, {HintName, "XLOCK"}, }; - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT key FROM my_table WITH "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT key FROM my_table WITH "), expected); } } @@ -510,20 +552,46 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"INSERT INTO my_table WITH "}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "INSERT INTO my_table WITH "), expected); + } + + Y_UNIT_TEST(Enclosed) { + TVector<TCandidate> empty = {}; + + auto engine = MakeSqlCompletionEngineUT(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT \"#\""), empty); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT `#`"), empty); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT 21#21"), empty); + + UNIT_ASSERT(FindPtr(Complete(engine, "SELECT `name`#"), TCandidate{Keyword, "FROM"}) != nullptr); + UNIT_ASSERT(FindPtr(Complete(engine, "SELECT #`name`"), TCandidate{FunctionName, "StartsWith("}) != nullptr); + + UNIT_ASSERT_GT_C(Complete(engine, "SELECT \"a\"#\"b\"").size(), 0, "Between tokens"); + UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, "SELECT `a`#`b`"), empty, "Solid ID_QUOTED"); + UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, "SELECT `a#\\`b`"), empty, "Solid ID_QUOTED"); + UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, "SELECT `a\\#`b`"), empty, "Solid ID_QUOTED"); + UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, "SELECT `a\\`#b`"), empty, "Solid ID_QUOTED"); + } + + Y_UNIT_TEST(SemiEnclosed) { + TVector<TCandidate> expected = {}; + + auto engine = MakeSqlCompletionEngineUT(); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT \""), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT `"), expected); } Y_UNIT_TEST(UTF8Wide) { auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"\xF0\x9F\x98\x8A"}).size(), 0); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"编码"}).size(), 0); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "\xF0\x9F\x98\x8A").size(), 0); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "编码").size(), 0); } Y_UNIT_TEST(WordBreak) { auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_GE(Complete(engine, {"SELECT ("}).size(), 29); - UNIT_ASSERT_GE(Complete(engine, {"SELECT (1)"}).size(), 30); - UNIT_ASSERT_GE(Complete(engine, {"SELECT 1;"}).size(), 35); + UNIT_ASSERT_GE(Complete(engine, "SELECT (").size(), 29); + UNIT_ASSERT_GE(Complete(engine, "SELECT (1)").size(), 30); + UNIT_ASSERT_GE(Complete(engine, "SELECT 1;").size(), 35); } Y_UNIT_TEST(Typing) { @@ -543,37 +611,69 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { } } + Y_UNIT_TEST(Tabbing) { + TString query = + "SELECT \n" + " 123467, \"Hello, {name}! 编码\"}, \n" + " (1 + (5 * 1 / 0)), MIN(identifier), \n" + " Bool(field), Math::Sin(var) \n" + "FROM `local/test/space/table` JOIN test;"; + query += query + ";"; + query += query + ";"; + + auto engine = MakeSqlCompletionEngineUT(); + + const auto* begin = reinterpret_cast<const unsigned char*>(query.c_str()); + const auto* end = reinterpret_cast<const unsigned char*>(begin + query.size()); + const auto* ptr = begin; + + wchar32 rune; + while (ptr < end) { + Y_ENSURE(ReadUTF8CharAndAdvance(rune, ptr, end) == RECODE_OK); + auto completion = engine->Complete({ + .Text = query, + .CursorPosition = static_cast<size_t>(std::distance(begin, ptr)), + }); + Y_DO_NOT_OPTIMIZE_AWAY(completion); + } + } + Y_UNIT_TEST(CaseInsensitivity) { TVector<TCandidate> expected = { {Keyword, "SELECT"}, }; auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"se"}), expected); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"sE"}), expected); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"Se"}), expected); - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SE"}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "se"), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "sE"), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "Se"), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SE"), expected); } Y_UNIT_TEST(InvalidStatementsRecovery) { auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_GE(Complete(engine, {"select select; "}).size(), 35); - UNIT_ASSERT_GE(Complete(engine, {"select select;"}).size(), 35); - UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, {"!;"}).size(), 0, "Lexer failing"); + UNIT_ASSERT_GE(Complete(engine, "select select; ").size(), 35); + UNIT_ASSERT_GE(Complete(engine, "select select;").size(), 35); + UNIT_ASSERT_GE(Complete(engine, "#;select select;").size(), 35); + UNIT_ASSERT_GE(Complete(engine, "# ;select select;").size(), 35); + UNIT_ASSERT_GE(Complete(engine, ";#;").size(), 35); + UNIT_ASSERT_GE(Complete(engine, "#;;").size(), 35); + UNIT_ASSERT_GE(Complete(engine, ";;#").size(), 35); + UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, "!;").size(), 0, "Lexer failing"); } Y_UNIT_TEST(InvalidCursorPosition) { auto engine = MakeSqlCompletionEngineUT(); - UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {"", 0})); - UNIT_ASSERT_EXCEPTION(Complete(engine, {"", 1}), yexception); + UNIT_ASSERT_NO_EXCEPTION(engine->Complete({"", 0})); + UNIT_ASSERT_EXCEPTION(engine->Complete({"", 1}), yexception); - UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {"s", 0})); - UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {"s", 1})); + UNIT_ASSERT_NO_EXCEPTION(engine->Complete({"s", 0})); + UNIT_ASSERT_NO_EXCEPTION(engine->Complete({"s", 1})); - UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {"ы", 0})); - UNIT_ASSERT_EXCEPTION(Complete(engine, {"ы", 1}), yexception); - UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {"ы", 2})); + UNIT_ASSERT_NO_EXCEPTION(engine->Complete({"ы", 0})); + UNIT_ASSERT_EXCEPTION(engine->Complete({"ы", 1}), yexception); + UNIT_ASSERT_NO_EXCEPTION(engine->Complete({"ы", 2})); } Y_UNIT_TEST(DefaultNameService) { @@ -590,7 +690,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {TypeName, "Unit"}, {TypeName, "Uint16"}, }; - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT OPTIONAL<U"}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT OPTIONAL<U"), expected); } { TVector<TCandidate> expected = { @@ -600,22 +700,22 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {PragmaName, "yson.CastToString"}, {PragmaName, "yson.DisableCastToString"}, }; - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"PRAGMA yson"}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "PRAGMA yson"), expected); } { TVector<TCandidate> expected = { {HintName, "IGNORE_TYPE_V3"}, }; - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"REDUCE a WITH ig"}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "REDUCE a WITH ig"), expected); } } Y_UNIT_TEST(OnFailingNameService) { auto service = MakeHolder<TFailingNameService>(); auto engine = MakeSqlCompletionEngine(MakePureLexerSupplier(), std::move(service)); - UNIT_ASSERT_EXCEPTION(Complete(engine, {""}), TDummyException); - UNIT_ASSERT_EXCEPTION(Complete(engine, {"SELECT OPTIONAL<U"}), TDummyException); - UNIT_ASSERT_EXCEPTION(Complete(engine, {"SELECT CAST (1 AS "}).size(), TDummyException); + UNIT_ASSERT_EXCEPTION(Complete(engine, ""), TDummyException); + UNIT_ASSERT_EXCEPTION(Complete(engine, "SELECT OPTIONAL<U"), TDummyException); + UNIT_ASSERT_EXCEPTION(Complete(engine, "SELECT CAST (1 AS ").size(), TDummyException); } Y_UNIT_TEST(OnSilentNameService) { @@ -623,8 +723,8 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { auto deadlined = MakeDeadlinedNameService(std::move(silent), TDuration::MilliSeconds(1)); auto engine = MakeSqlCompletionEngine(MakePureLexerSupplier(), std::move(deadlined)); - UNIT_ASSERT_EXCEPTION(Complete(engine, {"SELECT OPTIONAL<U"}), NThreading::TFutureException); - UNIT_ASSERT_EXCEPTION(Complete(engine, {"SELECT OPTIONAL<"}), NThreading::TFutureException); + UNIT_ASSERT_EXCEPTION(Complete(engine, "SELECT OPTIONAL<U"), NThreading::TFutureException); + UNIT_ASSERT_EXCEPTION(Complete(engine, "SELECT OPTIONAL<"), NThreading::TFutureException); } Y_UNIT_TEST(OnFallbackNameService) { @@ -636,9 +736,9 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { auto fallback = MakeFallbackNameService(std::move(primary), std::move(standby)); auto engine = MakeSqlCompletionEngine(MakePureLexerSupplier(), std::move(fallback)); - UNIT_ASSERT_GE(Complete(engine, {"SELECT CAST (1 AS U"}).size(), 6); - UNIT_ASSERT_GE(Complete(engine, {"SELECT CAST (1 AS "}).size(), 47); - UNIT_ASSERT_GE(Complete(engine, {"SELECT "}).size(), 55); + UNIT_ASSERT_GE(Complete(engine, "SELECT CAST (1 AS U").size(), 6); + UNIT_ASSERT_GE(Complete(engine, "SELECT CAST (1 AS ").size(), 47); + UNIT_ASSERT_GE(Complete(engine, "SELECT ").size(), 55); } Y_UNIT_TEST(NameNormalization) { @@ -687,14 +787,14 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "INSERT"}, {Keyword, "SELECT"}, }; - UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, {""}), expected); + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, ""), expected); } { TVector<TCandidate> expected = { {PragmaName, "DefaultMemoryLimit"}, {PragmaName, "Annotations"}, }; - UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, {"PRAGMA yt."}), expected); + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, "PRAGMA yt."), expected); } { TVector<TCandidate> expected = { @@ -705,7 +805,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {TypeName, "Int16"}, {TypeName, "Int8"}, }; - UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SELECT OPTIONAL<I"}), expected); + UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SELECT OPTIONAL<I"), expected); } { TVector<TCandidate> expected = { @@ -718,7 +818,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {FunctionName, "Math::Acos("}, {FunctionName, "Math::Asin("}, }; - UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, {"SELECT m"}), expected); + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, "SELECT m"), expected); } { TVector<TCandidate> expected = { @@ -727,7 +827,7 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { {Keyword, "COLUMNS"}, {HintName, "FORCE_INFER_SCHEMA"}, }; - UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, {"SELECT * FROM a WITH "}), expected); + UNIT_ASSERT_VALUES_EQUAL(CompleteTop(expected.size(), engine, "SELECT * FROM a WITH "), expected); } } diff --git a/yql/essentials/sql/v1/complete/syntax/local.cpp b/yql/essentials/sql/v1/complete/syntax/local.cpp index 4b6fac094d5..5d6ca2f9760 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.cpp +++ b/yql/essentials/sql/v1/complete/syntax/local.cpp @@ -1,8 +1,9 @@ #include "local.h" #include "ansi.h" -#include "parser_call_stack.h" #include "grammar.h" +#include "parser_call_stack.h" +#include "token.h" #include <yql/essentials/sql/v1/complete/antlr4/c3i.h> #include <yql/essentials/sql/v1/complete/antlr4/c3t.h> @@ -55,14 +56,22 @@ namespace NSQLComplete { } TLocalSyntaxContext Analyze(TCompletionInput input) override { - TStringBuf prefix; - if (!GetC3Prefix(input, &prefix)) { + TCompletionInput statement; + if (!GetStatement(Lexer_, input, statement)) { return {}; } - auto candidates = C3.Complete(prefix); + auto candidates = C3.Complete(statement); - NSQLTranslation::TParsedTokenList tokens = Tokenized(prefix); + TParsedTokenList tokens; + TCaretTokenPosition caret; + if (!TokenizePrefix(statement, tokens, caret)) { + return {}; + } + + if (IsCaretEnslosed(tokens, caret)) { + return {}; + } return { .Keywords = SiftedKeywords(candidates), @@ -96,26 +105,6 @@ namespace NSQLComplete { return GetC3PreferredRules(); } - bool GetC3Prefix(TCompletionInput input, TStringBuf* prefix) { - *prefix = input.Text.Head(input.CursorPosition); - - TVector<TString> statements; - NYql::TIssues issues; - if (!NSQLTranslationV1::SplitQueryToStatements( - TString(*prefix) + (prefix->EndsWith(';') ? ";" : ""), Lexer_, - statements, issues, /* file = */ "", - /* areBlankSkipped = */ false)) { - return false; - } - - if (statements.empty()) { - return true; - } - - *prefix = prefix->Last(statements.back().size()); - return true; - } - TLocalSyntaxContext::TKeywords SiftedKeywords(const TC3Candidates& candidates) { const auto& vocabulary = Grammar->GetVocabulary(); const auto& keywordTokens = Grammar->GetKeywordTokens(); @@ -133,7 +122,7 @@ namespace NSQLComplete { } std::optional<TLocalSyntaxContext::TPragma> PragmaMatch( - const NSQLTranslation::TParsedTokenList& tokens, const TC3Candidates& candidates) { + const TParsedTokenList& tokens, const TC3Candidates& candidates) { if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyPragmaStack))) { return std::nullopt; } @@ -152,7 +141,7 @@ namespace NSQLComplete { } std::optional<TLocalSyntaxContext::TFunction> FunctionMatch( - const NSQLTranslation::TParsedTokenList& tokens, const TC3Candidates& candidates) { + const TParsedTokenList& tokens, const TC3Candidates& candidates) { if (!AnyOf(candidates.Rules, RuleAdapted(IsLikelyFunctionStack))) { return std::nullopt; } @@ -183,31 +172,33 @@ namespace NSQLComplete { }; } - NSQLTranslation::TParsedTokenList Tokenized(const TStringBuf text) { - NSQLTranslation::TParsedTokenList tokens; + bool TokenizePrefix(TCompletionInput input, TParsedTokenList& tokens, TCaretTokenPosition& caret) { NYql::TIssues issues; if (!NSQLTranslation::Tokenize( - *Lexer_, TString(text), /* queryName = */ "", - tokens, issues, /* maxErrors = */ 0)) { - return {}; + *Lexer_, TString(input.Text), /* queryName = */ "", + tokens, issues, /* maxErrors = */ 1)) { + return false; } + Y_ENSURE(!tokens.empty() && tokens.back().Name == "EOF"); tokens.pop_back(); - return tokens; + + caret = CaretTokenPosition(tokens, input.CursorPosition); + tokens.crop(caret.NextTokenIndex + 1); + return true; } - bool EndsWith( - const NSQLTranslation::TParsedTokenList& tokens, - const TVector<TStringBuf>& pattern) { - if (tokens.size() < pattern.size()) { + bool IsCaretEnslosed(const TParsedTokenList& tokens, TCaretTokenPosition caret) { + if (tokens.empty() || caret.PrevTokenIndex != caret.NextTokenIndex) { return false; } - for (yssize_t i = tokens.ysize() - 1, j = pattern.ysize() - 1; 0 <= j; --i, --j) { - if (!pattern[j].empty() && tokens[i].Name != pattern[j]) { - return false; - } - } - return true; + + const auto& token = tokens.back(); + return token.Name == "STRING_VALUE" || + token.Name == "ID_QUOTED" || + token.Name == "DIGIGTS" || + token.Name == "INTEGER_VALUE" || + token.Name == "REAL"; } const ISqlGrammar* Grammar; diff --git a/yql/essentials/sql/v1/complete/syntax/local.h b/yql/essentials/sql/v1/complete/syntax/local.h index 2b926e1296e..28bd283bab1 100644 --- a/yql/essentials/sql/v1/complete/syntax/local.h +++ b/yql/essentials/sql/v1/complete/syntax/local.h @@ -26,7 +26,7 @@ namespace NSQLComplete { TKeywords Keywords; std::optional<TPragma> Pragma; - bool IsTypeName; + bool IsTypeName = false; std::optional<TFunction> Function; std::optional<THint> Hint; }; diff --git a/yql/essentials/sql/v1/complete/syntax/token.cpp b/yql/essentials/sql/v1/complete/syntax/token.cpp new file mode 100644 index 00000000000..b8aee3211c6 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/token.cpp @@ -0,0 +1,60 @@ +#include "token.h" + +#include <yql/essentials/core/issue/yql_issue.h> +#include <yql/essentials/sql/v1/lexer/lexer.h> + +namespace NSQLComplete { + + bool GetStatement(NSQLTranslation::ILexer::TPtr& lexer, TCompletionInput input, TCompletionInput& output) { + TVector<TString> statements; + NYql::TIssues issues; + if (!NSQLTranslationV1::SplitQueryToStatements( + TString(input.Text) + ";", lexer, + statements, issues, /* file = */ "", + /* areBlankSkipped = */ false)) { + return false; + } + + size_t cursor = 0; + for (const auto& statement : statements) { + if (input.CursorPosition < cursor + statement.size()) { + output = { + .Text = input.Text.SubStr(cursor, statement.size()), + .CursorPosition = input.CursorPosition - cursor, + }; + return true; + } + cursor += statement.size(); + } + + output = input; + return true; + } + + TCaretTokenPosition CaretTokenPosition(const TParsedTokenList& tokens, size_t cursorPosition) { + size_t cursor = 0; + for (size_t i = 0; i < tokens.size(); ++i) { + const auto& content = tokens[i].Content; + cursor += content.size(); + if (cursorPosition < cursor) { + return {i, i}; + } else if (cursorPosition == cursor && IsWordBoundary(content.back())) { + return {i, i + 1}; + } + } + return {std::max(tokens.size(), static_cast<size_t>(1)) - 1, tokens.size()}; + } + + bool EndsWith(const TParsedTokenList& tokens, const TVector<TStringBuf>& pattern) { + if (tokens.size() < pattern.size()) { + return false; + } + for (yssize_t i = tokens.ysize() - 1, j = pattern.ysize() - 1; 0 <= j; --i, --j) { + if (!pattern[j].empty() && tokens[i].Name != pattern[j]) { + return false; + } + } + return true; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/token.h b/yql/essentials/sql/v1/complete/syntax/token.h new file mode 100644 index 00000000000..d1e215285a9 --- /dev/null +++ b/yql/essentials/sql/v1/complete/syntax/token.h @@ -0,0 +1,24 @@ +#pragma once + +#include <yql/essentials/sql/v1/complete/core/input.h> +#include <yql/essentials/sql/v1/complete/text/word.h> + +#include <yql/essentials/parser/lexer_common/lexer.h> + +namespace NSQLComplete { + + using NSQLTranslation::TParsedTokenList; + + // `PrevTokenIndex` = `NextTokenIndex`, iff caret is enclosed + struct TCaretTokenPosition { + size_t PrevTokenIndex; + size_t NextTokenIndex; + }; + + bool GetStatement(NSQLTranslation::ILexer::TPtr& lexer, TCompletionInput input, TCompletionInput& output); + + TCaretTokenPosition CaretTokenPosition(const TParsedTokenList& tokens, size_t cursorPosition); + + bool EndsWith(const TParsedTokenList& tokens, const TVector<TStringBuf>& pattern); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/syntax/ya.make b/yql/essentials/sql/v1/complete/syntax/ya.make index 6631a0d9c1d..9e2e908454b 100644 --- a/yql/essentials/sql/v1/complete/syntax/ya.make +++ b/yql/essentials/sql/v1/complete/syntax/ya.make @@ -6,6 +6,7 @@ SRCS( grammar.cpp local.cpp parser_call_stack.cpp + token.cpp ) ADDINCL( |