diff options
author | vityaman <[email protected]> | 2025-04-30 12:58:42 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-04-30 13:22:03 +0300 |
commit | 92d6162267a49773e5f40d0a0130edf6ca27624b (patch) | |
tree | 0cb3ed3ba1de7d3b1cb153965eaa260f6788a892 /yql/essentials/sql | |
parent | e4b9d7d82d82678e9450fe7085a94149867613a5 (diff) |
YQL-19747 Introduce Union and split Static NameServices
Essentially we need the `UnionNameService` to integrate dynamic name services into the `sql/v1/complete`. This decorator queries multiple name services and merges their responses. We will use it to union static `NameServices` and the `SchemaNameService` (and the `ClusterNameService`, and so on).
It does not deal with exceptions and fails the whole request on some subtask failed. Client should prepare robust children (later `SwallowingNameService` will be added to return an empty `NameResponse` on errors to provide best-effort dynamic object loading).
`StaticNameService` was split into micro-`NameService`s :)
`NameConstraints` are extracted to provide name qualification via `Qualified` and `Unqualified` methods. This is needed because depending on a context `NameService` can return unqualified names (for example, on `PRAGMA yt.#`). As internal indexes for scanning and ranking are built on a sorted list of, then `Ranking` actually needs an ability to get fully-qualified names, so now it via the `NameConstraints`.
Also this design potentially let us to improve internal indexes by using partitioning by a namespace. Other option was to make `PragmaName` and `FunctionName` more structured via adding a separate field for a namespace, but it seems to me that it will force to do more parsing from indexes results to a `Name`. Anyway this is an internal component so it can be changed if needed. I still doubt this decision because structured `PragmaName { Namespace, Identifier }` seems to be cleaner and there should be no noticeable overhead because of COW strings.
---
Pull Request resolved: https://github.com/ytsaurus/ytsaurus/pull/1252
commit_hash:e8a1fbb17e2ca3ffe72aafbea943a38624d74491
Diffstat (limited to 'yql/essentials/sql')
11 files changed, 328 insertions, 77 deletions
diff --git a/yql/essentials/sql/v1/complete/name/service/name_service.cpp b/yql/essentials/sql/v1/complete/name/service/name_service.cpp new file mode 100644 index 00000000000..2738905e5a3 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/service/name_service.cpp @@ -0,0 +1,64 @@ +#include "name_service.h" + +namespace NSQLComplete { + + namespace { + + void SetPrefix(TString& name, const TStringBuf delimeter, const TNamespaced& namespaced) { + if (namespaced.Namespace.empty()) { + return; + } + + name.prepend(delimeter); + name.prepend(namespaced.Namespace); + } + + void FixPrefix(TString& name, const TStringBuf delimeter, const TNamespaced& namespaced) { + if (namespaced.Namespace.empty()) { + return; + } + + name.remove(0, namespaced.Namespace.size() + delimeter.size()); + } + + } // namespace + + TGenericName TNameConstraints::Qualified(TGenericName unqualified) const { + return std::visit([&](auto&& name) -> TGenericName { + using T = std::decay_t<decltype(name)>; + if constexpr (std::is_same_v<T, TPragmaName>) { + SetPrefix(name.Indentifier, ".", *Pragma); + } else if constexpr (std::is_same_v<T, TFunctionName>) { + SetPrefix(name.Indentifier, "::", *Function); + } + return name; + }, std::move(unqualified)); + } + + TGenericName TNameConstraints::Unqualified(TGenericName qualified) const { + return std::visit([&](auto&& name) -> TGenericName { + using T = std::decay_t<decltype(name)>; + if constexpr (std::is_same_v<T, TPragmaName>) { + FixPrefix(name.Indentifier, ".", *Pragma); + } else if constexpr (std::is_same_v<T, TFunctionName>) { + FixPrefix(name.Indentifier, "::", *Function); + } + return name; + }, std::move(qualified)); + } + + TVector<TGenericName> TNameConstraints::Qualified(TVector<TGenericName> unqualified) const { + for (auto& name : unqualified) { + name = Qualified(std::move(name)); + } + return unqualified; + } + + TVector<TGenericName> TNameConstraints::Unqualified(TVector<TGenericName> qualified) const { + for (auto& name : qualified) { + name = Unqualified(std::move(name)); + } + return qualified; + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/service/name_service.h b/yql/essentials/sql/v1/complete/name/service/name_service.h index d71b2518bd0..7386687c669 100644 --- a/yql/essentials/sql/v1/complete/name/service/name_service.h +++ b/yql/essentials/sql/v1/complete/name/service/name_service.h @@ -10,7 +10,7 @@ namespace NSQLComplete { - using NThreading::TFuture; + using NThreading::TFuture; // TODO(YQL-19747): remove struct TIndentifier { TString Indentifier; @@ -49,14 +49,21 @@ namespace NSQLComplete { TFunctionName, THintName>; + struct TNameConstraints { + TMaybe<TPragmaName::TConstraints> Pragma; + TMaybe<TTypeName::TConstraints> Type; + TMaybe<TFunctionName::TConstraints> Function; + TMaybe<THintName::TConstraints> Hint; + + TGenericName Qualified(TGenericName unqualified) const; + TGenericName Unqualified(TGenericName qualified) const; + TVector<TGenericName> Qualified(TVector<TGenericName> unqualified) const; + TVector<TGenericName> Unqualified(TVector<TGenericName> qualified) const; + }; + struct TNameRequest { TVector<TString> Keywords; - struct { - TMaybe<TPragmaName::TConstraints> Pragma; - TMaybe<TTypeName::TConstraints> Type; - TMaybe<TFunctionName::TConstraints> Function; - TMaybe<THintName::TConstraints> Hint; - } Constraints; + TNameConstraints Constraints; TString Prefix = ""; size_t Limit = 128; diff --git a/yql/essentials/sql/v1/complete/name/service/ranking/ranking.cpp b/yql/essentials/sql/v1/complete/name/service/ranking/ranking.cpp index 901b841d1b5..3632a87b7b9 100644 --- a/yql/essentials/sql/v1/complete/name/service/ranking/ranking.cpp +++ b/yql/essentials/sql/v1/complete/name/service/ranking/ranking.cpp @@ -21,12 +21,16 @@ namespace NSQLComplete { { } - void CropToSortedPrefix(TVector<TGenericName>& names, size_t limit) const override { + void CropToSortedPrefix( + TVector<TGenericName>& names, + const TNameConstraints& constraints, + size_t limit) const override { limit = std::min(limit, names.size()); TVector<TRow> rows; rows.reserve(names.size()); for (TGenericName& name : names) { + name = constraints.Qualified(std::move(name)); size_t weight = Weight(name); rows.emplace_back(std::move(name), weight); } @@ -48,7 +52,7 @@ namespace NSQLComplete { rows.crop(limit); for (size_t i = 0; i < limit; ++i) { - names[i] = std::move(rows[i].Name); + names[i] = constraints.Unqualified(std::move(rows[i].Name)); } } diff --git a/yql/essentials/sql/v1/complete/name/service/ranking/ranking.h b/yql/essentials/sql/v1/complete/name/service/ranking/ranking.h index 269f46d2028..ac6329bbaf9 100644 --- a/yql/essentials/sql/v1/complete/name/service/ranking/ranking.h +++ b/yql/essentials/sql/v1/complete/name/service/ranking/ranking.h @@ -11,7 +11,10 @@ namespace NSQLComplete { public: using TPtr = TIntrusivePtr<IRanking>; - virtual void CropToSortedPrefix(TVector<TGenericName>& names, size_t limit) const = 0; + virtual void CropToSortedPrefix( + TVector<TGenericName>& names, + const TNameConstraints& constraints, + size_t limit) const = 0; virtual ~IRanking() = default; }; diff --git a/yql/essentials/sql/v1/complete/name/service/static/name_service.cpp b/yql/essentials/sql/v1/complete/name/service/static/name_service.cpp index bb8d0840a93..976646dd7f4 100644 --- a/yql/essentials/sql/v1/complete/name/service/static/name_service.cpp +++ b/yql/essentials/sql/v1/complete/name/service/static/name_service.cpp @@ -3,6 +3,7 @@ #include "name_index.h" #include <yql/essentials/sql/v1/complete/name/service/ranking/ranking.h> +#include <yql/essentials/sql/v1/complete/name/service/union/name_service.h> #include <yql/essentials/sql/v1/complete/text/case.h> namespace NSQLComplete { @@ -40,113 +41,188 @@ namespace NSQLComplete { } } - TString Prefixed(const TStringBuf requestPrefix, const TStringBuf delimeter, const TNamespaced& namespaced) { - TString prefix; - if (!namespaced.Namespace.empty()) { - prefix += namespaced.Namespace; - prefix += delimeter; - } - prefix += requestPrefix; - return prefix; + template <class T> + void NameIndexScan( + const TNameIndex& index, + const TString& prefix, + const TNameConstraints& constraints, + TVector<TGenericName>& out) { + T name; + name.Indentifier = prefix; + name = std::get<T>(constraints.Qualified(std::move(name))); + + AppendAs<T>(out, FilteredByPrefix(name.Indentifier, index)); + out = constraints.Unqualified(std::move(out)); } - void FixPrefix(TString& name, const TStringBuf delimeter, const TNamespaced& namespaced) { - if (namespaced.Namespace.empty()) { - return; + class IRankingNameService: public INameService { + private: + auto Ranking(TNameRequest request) const { + return [request = std::move(request), this](auto f) { + TNameResponse response = f.ExtractValue(); + Ranking_->CropToSortedPrefix( + response.RankedNames, + request.Constraints, + request.Limit); + return response; + }; } - name.remove(0, namespaced.Namespace.size() + delimeter.size()); - } - void FixPrefix(TGenericName& name, const TNameRequest& request) { - std::visit([&](auto& name) -> size_t { - using T = std::decay_t<decltype(name)>; - if constexpr (std::is_same_v<T, TPragmaName>) { - FixPrefix(name.Indentifier, ".", *request.Constraints.Pragma); - } - if constexpr (std::is_same_v<T, TFunctionName>) { - FixPrefix(name.Indentifier, "::", *request.Constraints.Function); - } - return 0; - }, name); - } + public: + explicit IRankingNameService(IRanking::TPtr ranking) + : Ranking_(std::move(ranking)) + { + } + + NThreading::TFuture<TNameResponse> Lookup(TNameRequest request) const override { + return LookupAllUnranked(request).Apply(Ranking(request)); + } + + virtual NThreading::TFuture<TNameResponse> LookupAllUnranked(TNameRequest request) const = 0; + + private: + IRanking::TPtr Ranking_; + }; - class TStaticNameService: public INameService { + class TKeywordNameService: public IRankingNameService { public: - explicit TStaticNameService(TNameSet names, IRanking::TPtr ranking) - : Pragmas_(BuildNameIndex(std::move(names.Pragmas), NormalizeName)) - , Types_(BuildNameIndex(std::move(names.Types), NormalizeName)) - , Functions_(BuildNameIndex(std::move(names.Functions), NormalizeName)) - , Hints_([hints = std::move(names.Hints)] { - THashMap<EStatementKind, TNameIndex> index; - for (auto& [k, hints] : hints) { - index.emplace(k, BuildNameIndex(std::move(hints), NormalizeName)); - } - return index; - }()) - , Ranking_(std::move(ranking)) + explicit TKeywordNameService(IRanking::TPtr ranking) + : IRankingNameService(std::move(ranking)) { } - TFuture<TNameResponse> Lookup(TNameRequest request) const override { + NThreading::TFuture<TNameResponse> LookupAllUnranked(TNameRequest request) const override { TNameResponse response; - Sort(request.Keywords, NoCaseCompare); AppendAs<TKeyword>( response.RankedNames, FilteredByPrefix(request.Prefix, request.Keywords)); + return NThreading::MakeFuture<TNameResponse>(std::move(response)); + } + }; + class TPragmaNameService: public IRankingNameService { + public: + explicit TPragmaNameService(IRanking::TPtr ranking, TVector<TString> pragmas) + : IRankingNameService(std::move(ranking)) + , Pragmas_(BuildNameIndex(std::move(pragmas), NormalizeName)) + { + } + + NThreading::TFuture<TNameResponse> LookupAllUnranked(TNameRequest request) const override { + TNameResponse response; if (request.Constraints.Pragma) { - auto prefix = Prefixed(request.Prefix, ".", *request.Constraints.Pragma); - auto names = FilteredByPrefix(prefix, Pragmas_); - AppendAs<TPragmaName>(response.RankedNames, names); + NameIndexScan<TPragmaName>( + Pragmas_, + request.Prefix, + request.Constraints, + response.RankedNames); } + return NThreading::MakeFuture<TNameResponse>(std::move(response)); + } + + private: + TNameIndex Pragmas_; + }; + + class TTypeNameService: public IRankingNameService { + public: + explicit TTypeNameService(IRanking::TPtr ranking, TVector<TString> types) + : IRankingNameService(std::move(ranking)) + , Types_(BuildNameIndex(std::move(types), NormalizeName)) + { + } + NThreading::TFuture<TNameResponse> LookupAllUnranked(TNameRequest request) const override { + TNameResponse response; if (request.Constraints.Type) { - AppendAs<TTypeName>( - response.RankedNames, - FilteredByPrefix(request.Prefix, Types_)); + NameIndexScan<TTypeName>( + Types_, + request.Prefix, + request.Constraints, + response.RankedNames); } + return NThreading::MakeFuture<TNameResponse>(std::move(response)); + } + + private: + TNameIndex Types_; + }; + + class TFunctionNameService: public IRankingNameService { + public: + explicit TFunctionNameService(IRanking::TPtr ranking, TVector<TString> functions) + : IRankingNameService(std::move(ranking)) + , Functions_(BuildNameIndex(std::move(functions), NormalizeName)) + { + } + NThreading::TFuture<TNameResponse> LookupAllUnranked(TNameRequest request) const override { + TNameResponse response; if (request.Constraints.Function) { - auto prefix = Prefixed(request.Prefix, "::", *request.Constraints.Function); - auto names = FilteredByPrefix(prefix, Functions_); - AppendAs<TFunctionName>(response.RankedNames, names); + NameIndexScan<TFunctionName>( + Functions_, + request.Prefix, + request.Constraints, + response.RankedNames); } + return NThreading::MakeFuture<TNameResponse>(std::move(response)); + } + private: + TNameIndex Functions_; + }; + + class THintNameService: public IRankingNameService { + public: + explicit THintNameService( + IRanking::TPtr ranking, + THashMap<EStatementKind, TVector<TString>> hints) + : IRankingNameService(std::move(ranking)) + , Hints_([hints = std::move(hints)] { + THashMap<EStatementKind, TNameIndex> index; + for (auto& [k, hints] : hints) { + index.emplace(k, BuildNameIndex(std::move(hints), NormalizeName)); + } + return index; + }()) + { + } + + NThreading::TFuture<TNameResponse> LookupAllUnranked(TNameRequest request) const override { + TNameResponse response; if (request.Constraints.Hint) { const auto stmt = request.Constraints.Hint->Statement; if (const auto* hints = Hints_.FindPtr(stmt)) { - AppendAs<THintName>( - response.RankedNames, - FilteredByPrefix(request.Prefix, *hints)); + NameIndexScan<THintName>( + *hints, + request.Prefix, + request.Constraints, + response.RankedNames); } } - - Ranking_->CropToSortedPrefix(response.RankedNames, request.Limit); - - for (auto& name : response.RankedNames) { - FixPrefix(name, request); - } - - return NThreading::MakeFuture(std::move(response)); + return NThreading::MakeFuture<TNameResponse>(std::move(response)); } private: - TNameIndex Pragmas_; - TNameIndex Types_; - TNameIndex Functions_; THashMap<EStatementKind, TNameIndex> Hints_; - IRanking::TPtr Ranking_; }; INameService::TPtr MakeStaticNameService(TNameSet names, TFrequencyData frequency) { - return INameService::TPtr(new TStaticNameService( + return MakeStaticNameService( Pruned(std::move(names), frequency), - MakeDefaultRanking(std::move(frequency)))); + MakeDefaultRanking(std::move(frequency))); } INameService::TPtr MakeStaticNameService(TNameSet names, IRanking::TPtr ranking) { - return MakeIntrusive<TStaticNameService>(std::move(names), std::move(ranking)); + TVector<INameService::TPtr> children = { + new TKeywordNameService(ranking), + new TPragmaNameService(ranking, std::move(names.Pragmas)), + new TTypeNameService(ranking, std::move(names.Types)), + new TFunctionNameService(ranking, std::move(names.Functions)), + new THintNameService(ranking, std::move(names.Hints)), + }; + return MakeUnionNameService(std::move(children), ranking); } } // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/service/static/ya.make b/yql/essentials/sql/v1/complete/name/service/static/ya.make index 0cb8be287c0..efb6851417b 100644 --- a/yql/essentials/sql/v1/complete/name/service/static/ya.make +++ b/yql/essentials/sql/v1/complete/name/service/static/ya.make @@ -11,6 +11,7 @@ PEERDIR( yql/essentials/core/sql_types yql/essentials/sql/v1/complete/name/service yql/essentials/sql/v1/complete/name/service/ranking + yql/essentials/sql/v1/complete/name/service/union yql/essentials/sql/v1/complete/text ) diff --git a/yql/essentials/sql/v1/complete/name/service/union/name_service.cpp b/yql/essentials/sql/v1/complete/name/service/union/name_service.cpp new file mode 100644 index 00000000000..c2373822f6f --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/service/union/name_service.cpp @@ -0,0 +1,60 @@ +#include "name_service.h" + +#include <library/cpp/threading/future/wait/wait.h> + +namespace NSQLComplete { + + namespace { + + class TNameService: public INameService { + public: + TNameService( + TVector<INameService::TPtr> children, + IRanking::TPtr ranking) + : Children_(std::move(children)) + , Ranking_(std::move(ranking)) + { + } + + NThreading::TFuture<TNameResponse> Lookup(TNameRequest request) const override { + TVector<NThreading::TFuture<TNameResponse>> fs; + for (const auto& c : Children_) { + fs.emplace_back(c->Lookup(request)); + } + return NThreading::WaitAll(fs) + .Apply([fs, this, request = std::move(request)](auto) { + return Union(fs, request.Constraints, request.Limit); + }); + } + + private: + TNameResponse Union( + TVector<NThreading::TFuture<TNameResponse>> fs, + const TNameConstraints& constraints, + size_t limit) const { + TNameResponse united; + for (auto f : fs) { + TNameResponse response = f.ExtractValue(); + std::ranges::move( + response.RankedNames, + std::back_inserter(united.RankedNames)); + } + Ranking_->CropToSortedPrefix(united.RankedNames, constraints, limit); + return united; + } + + TVector<INameService::TPtr> Children_; + IRanking::TPtr Ranking_; + }; + + } // namespace + + INameService::TPtr MakeUnionNameService( + TVector<INameService::TPtr> children, + IRanking::TPtr ranking) { + return INameService::TPtr(new TNameService( + std::move(children), + std::move(ranking))); + } + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/service/union/name_service.h b/yql/essentials/sql/v1/complete/name/service/union/name_service.h new file mode 100644 index 00000000000..a9cd292b39b --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/service/union/name_service.h @@ -0,0 +1,12 @@ +#pragma once + +#include <yql/essentials/sql/v1/complete/name/service/ranking/ranking.h> +#include <yql/essentials/sql/v1/complete/name/service/name_service.h> + +namespace NSQLComplete { + + INameService::TPtr MakeUnionNameService( + TVector<INameService::TPtr> children, + IRanking::TPtr ranking); + +} // namespace NSQLComplete diff --git a/yql/essentials/sql/v1/complete/name/service/union/ya.make b/yql/essentials/sql/v1/complete/name/service/union/ya.make new file mode 100644 index 00000000000..6716ee6ab20 --- /dev/null +++ b/yql/essentials/sql/v1/complete/name/service/union/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + name_service.cpp +) + +PEERDIR( + yql/essentials/sql/v1/complete/name/service + yql/essentials/sql/v1/complete/name/service/ranking +) + +END() diff --git a/yql/essentials/sql/v1/complete/name/service/ya.make b/yql/essentials/sql/v1/complete/name/service/ya.make index 473ee05f7d8..42ba63038a1 100644 --- a/yql/essentials/sql/v1/complete/name/service/ya.make +++ b/yql/essentials/sql/v1/complete/name/service/ya.make @@ -1,5 +1,9 @@ LIBRARY() +SRCS( + name_service.cpp +) + PEERDIR( yql/essentials/sql/v1/complete/core ) @@ -9,4 +13,5 @@ END() RECURSE( ranking static + union ) diff --git a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp index 54bcc35233f..c893479ffed 100644 --- a/yql/essentials/sql/v1/complete/sql_complete_ut.cpp +++ b/yql/essentials/sql/v1/complete/sql_complete_ut.cpp @@ -303,6 +303,13 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) { { TVector<TCandidate> expected = { {PragmaName, "yson.CastToString"}}; + auto completion = engine->CompleteAsync({"PRAGMA ys"}).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(completion.Candidates, expected); + UNIT_ASSERT_VALUES_EQUAL(completion.CompletedToken.Content, "ys"); + } + { + TVector<TCandidate> expected = { + {PragmaName, "yson.CastToString"}}; auto completion = engine->CompleteAsync({"PRAGMA yson"}).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL(completion.Candidates, expected); UNIT_ASSERT_VALUES_EQUAL(completion.CompletedToken.Content, "yson"); |