diff options
author | vvvv <[email protected]> | 2024-11-07 12:29:36 +0300 |
---|---|---|
committer | vvvv <[email protected]> | 2024-11-07 13:49:47 +0300 |
commit | d4c258e9431675bab6745c8638df6e3dfd4dca6b (patch) | |
tree | b5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/sql/v1 | |
parent | 13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff) |
Moved other yql/essentials libs YQL-19206
init
commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/sql/v1')
64 files changed, 58239 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/aggregation.cpp b/yql/essentials/sql/v1/aggregation.cpp new file mode 100644 index 00000000000..875ae7d97d6 --- /dev/null +++ b/yql/essentials/sql/v1/aggregation.cpp @@ -0,0 +1,1469 @@ +#include "node.h" +#include "source.h" +#include "context.h" + +#include <yql/essentials/ast/yql_type_string.h> + +#include <library/cpp/charset/ci_string.h> +#include <util/string/builder.h> +#include <util/string/cast.h> + +#include <array> + +using namespace NYql; + +namespace NSQLTranslationV1 { + +namespace { + bool BlockWindowAggregationWithoutFrameSpec(TPosition pos, TStringBuf name, ISource* src, TContext& ctx) { + if (src) { + auto winNamePtr = src->GetWindowName(); + if (winNamePtr) { + auto winSpecPtr = src->FindWindowSpecification(ctx, *winNamePtr); + if (!winSpecPtr) { + ctx.Error(pos) << "Failed to use aggregation function " << name << " without window specification or in wrong place"; + return true; + } + } + } + return false; + } + + bool ShouldEmitAggApply(const TContext& ctx) { + const bool blockEngineEnabled = ctx.BlockEngineEnable || ctx.BlockEngineForce; + return ctx.EmitAggApply.GetOrElse(blockEngineEnabled); + } +} + +static const THashSet<TString> AggApplyFuncs = { + "count_traits_factory", + "sum_traits_factory", + "avg_traits_factory", + "min_traits_factory", + "max_traits_factory", + "some_traits_factory", +}; + +class TAggregationFactory : public IAggregation { +public: + TAggregationFactory(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode, + bool multi = false, bool validateArgs = true) + : IAggregation(pos, name, func, aggMode), Factory(!func.empty() ? + BuildBind(Pos, aggMode == EAggregateMode::OverWindow || aggMode == EAggregateMode::OverWindowDistinct ? "window_module" : "aggregate_module", func) : nullptr), + Multi(multi), ValidateArgs(validateArgs), DynamicFactory(!Factory) + { + if (aggMode != EAggregateMode::OverWindow && !func.empty() && AggApplyFuncs.contains(func)) { + AggApplyName = func.substr(0, func.size() - 15); + } + + if (!Factory) { + FakeSource = BuildFakeSource(pos); + } + } + +protected: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) override { + if (!ShouldEmitAggApply(ctx)) { + AggApplyName = ""; + } + + if (ValidateArgs || isFactory) { + ui32 expectedArgs = ValidateArgs && !Factory ? 2 : (isFactory ? 0 : 1); + if (!Factory && ValidateArgs) { + YQL_ENSURE(!isFactory); + } + + if (expectedArgs != exprs.size()) { + ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name + << " requires exactly " << expectedArgs << " argument(s), given: " << exprs.size(); + return false; + } + } + + if (!ValidateArgs) { + Exprs = exprs; + } + + if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) { + return false; + } + + if (ValidateArgs) { + if (!Factory) { + Factory = exprs[1]; + } + } + + if (!isFactory) { + if (ValidateArgs) { + Expr = exprs.front(); + } + + Name = src->MakeLocalName(Name); + } + + if (Expr && Expr->IsAsterisk() && AggApplyName == "count") { + AggApplyName = "count_all"; + } + + if (!Init(ctx, src)) { + return false; + } + + if (!isFactory) { + node.Add("Member", "row", Q(Name)); + if (IsOverWindow()) { + src->AddTmpWindowColumn(Name); + } + } + + return true; + } + + TNodePtr AggregationTraitsFactory() const override { + return Factory; + } + + TNodePtr GetExtractor(bool many, TContext& ctx) const override { + Y_UNUSED(ctx); + return BuildLambda(Pos, Y("row"), Y("PersistableRepr", many ? Y("Unwrap", Expr) : Expr)); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const override { + auto extractor = GetExtractor(many, ctx); + if (!extractor) { + return nullptr; + } + + if (!Multi) { + if (!DynamicFactory && allowAggApply && !AggApplyName.empty()) { + return Y("AggApply", Q(AggApplyName), Y("ListItemType", type), extractor); + } + + return Y("Apply", Factory, (DynamicFactory ? Y("ListItemType", type) : type), + extractor); + } + + return Y("MultiAggregate", + Y("ListItemType", type), + extractor, + Factory); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArgs) { + for (auto x : Exprs) { + if (!x->Init(ctx, src)) { + return false; + } + if (x->IsAggregated() && !x->IsAggregationKey() && !IsOverWindow()) { + ctx.Error(Pos) << "Aggregation of aggregated values is forbidden"; + return false; + } + } + + return true; + } + + if (!Expr) { + return true; + } + + if (!Expr->Init(ctx, src)) { + return false; + } + if (Expr->IsAggregated() && !Expr->IsAggregationKey() && !IsOverWindow()) { + ctx.Error(Pos) << "Aggregation of aggregated values is forbidden"; + return false; + } + if (AggMode == EAggregateMode::Distinct || AggMode == EAggregateMode::OverWindowDistinct) { + const auto column = Expr->GetColumnName(); + if (!column) { + // TODO: improve TBasicAggrFunc::CollectPreaggregateExprs() + ctx.Error(Pos) << "Aggregation of aggregated values is forbidden"; + return false; + } + DistinctKey = *column; + YQL_ENSURE(src); + if (!IsGeneratedKeyColumn && src->GetJoin()) { + const auto sourcePtr = Expr->GetSourceName(); + if (!sourcePtr || !*sourcePtr) { + if (!src->IsGroupByColumn(DistinctKey)) { + ctx.Error(Expr->GetPos()) << ErrorDistinctWithoutCorrelation(DistinctKey); + return false; + } + } else { + DistinctKey = DotJoin(*sourcePtr, DistinctKey); + } + } + if (src->IsGroupByColumn(DistinctKey)) { + ctx.Error(Expr->GetPos()) << ErrorDistinctByGroupKey(DistinctKey); + return false; + } + Expr = AstNode("row"); + } + + if (FakeSource) { + if (!Factory->Init(ctx, FakeSource.Get())) { + return false; + } + + if (AggMode == EAggregateMode::OverWindow) { + Factory = BuildLambda(Pos, Y("type", "extractor"), Y("block", Q(Y( + Y("let", "x", Y("Apply", Factory, "type", "extractor")), + Y("return", Y("ToWindowTraits", "x")) + )))); + } + } + + return true; + } + + TNodePtr Factory; + TNodePtr Expr; + bool Multi; + bool ValidateArgs; + TString AggApplyName; + TVector<TNodePtr> Exprs; + +private: + TSourcePtr FakeSource; + bool DynamicFactory; +}; + +class TAggregationFactoryImpl final : public TAggregationFactory { +public: + TAggregationFactoryImpl(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode, bool multi) + : TAggregationFactory(pos, name, func, aggMode, multi) + {} + +private: + TNodePtr DoClone() const final { + return new TAggregationFactoryImpl(Pos, Name, Func, AggMode, Multi); + } +}; + +TAggregationPtr BuildFactoryAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode, bool multi) { + return new TAggregationFactoryImpl(pos, name, func, aggMode, multi); +} + +class TKeyPayloadAggregationFactory final : public TAggregationFactory { +public: + TKeyPayloadAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + , FakeSource(BuildFakeSource(pos)) + {} + +private: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + ui32 adjustArgsCount = isFactory ? 0 : 2; + if (exprs.size() < adjustArgsCount || exprs.size() > 1 + adjustArgsCount) { + ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires " + << adjustArgsCount << " or " << (1 + adjustArgsCount) << " arguments, given: " << exprs.size(); + return false; + } + if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) { + return false; + } + + if (!isFactory) { + Payload = exprs.front(); + Key = exprs[1]; + } + + if (1 + adjustArgsCount == exprs.size()) { + Limit = exprs.back(); + Func += "2"; + } else { + Func += "1"; + } + + if (Factory) { + Factory = BuildBind(Pos, AggMode == EAggregateMode::OverWindow ? "window_module" : "aggregate_module", Func); + } + + if (!isFactory) { + Name = src->MakeLocalName(Name); + } + + if (!Init(ctx, src)) { + return false; + } + + if (!isFactory) { + node.Add("Member", "row", Q(Name)); + if (IsOverWindow()) { + src->AddTmpWindowColumn(Name); + } + } + + return true; + } + + TNodePtr DoClone() const final { + return new TKeyPayloadAggregationFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetExtractor(bool many, TContext& ctx) const final { + Y_UNUSED(ctx); + return BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Payload) : Payload); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + auto apply = Y("Apply", Factory, type, + BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Key) : Key), + BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Payload) : Payload)); + AddFactoryArguments(apply); + return apply; + } + + void AddFactoryArguments(TNodePtr& apply) const final { + if (Limit) { + apply = L(apply, Limit); + } + } + + std::vector<ui32> GetFactoryColumnIndices() const final { + return {1u, 0u}; + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (Limit) { + if (!Limit->Init(ctx, FakeSource.Get())) { + return false; + } + } + + if (!Key) { + return true; + } + + if (!Key->Init(ctx, src)) { + return false; + } + if (!Payload->Init(ctx, src)) { + return false; + } + + if (Key->IsAggregated()) { + ctx.Error(Pos) << "Aggregation of aggregated values is forbidden"; + return false; + } + return true; + } + + TSourcePtr FakeSource; + TNodePtr Key, Payload, Limit; +}; + +TAggregationPtr BuildKeyPayloadFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TKeyPayloadAggregationFactory(pos, name, factory, aggMode); +} + +class TPayloadPredicateAggregationFactory final : public TAggregationFactory { +public: + TPayloadPredicateAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + {} + +private: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + ui32 adjustArgsCount = isFactory ? 0 : 2; + if (exprs.size() != adjustArgsCount) { + ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires " << + adjustArgsCount << " arguments, given: " << exprs.size(); + return false; + } + + if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) { + return false; + } + + if (!isFactory) { + Payload = exprs.front(); + Predicate = exprs.back(); + Name = src->MakeLocalName(Name); + } + + if (!Init(ctx, src)) { + return false; + } + + if (!isFactory) { + node.Add("Member", "row", Q(Name)); + if (IsOverWindow()) { + src->AddTmpWindowColumn(Name); + } + } + + return true; + } + + TNodePtr DoClone() const final { + return new TPayloadPredicateAggregationFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetExtractor(bool many, TContext& ctx) const final { + Y_UNUSED(ctx); + return BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Payload) : Payload); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + return Y("Apply", Factory, type, + BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Payload) : Payload), + BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Predicate) : Predicate)); + } + + std::vector<ui32> GetFactoryColumnIndices() const final { + return {0u, 1u}; + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!Predicate) { + return true; + } + + if (!Predicate->Init(ctx, src)) { + return false; + } + if (!Payload->Init(ctx, src)) { + return false; + } + + if (Payload->IsAggregated()) { + ctx.Error(Pos) << "Aggregation of aggregated values is forbidden"; + return false; + } + + return true; + } + + TNodePtr Payload, Predicate; +}; + +TAggregationPtr BuildPayloadPredicateFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TPayloadPredicateAggregationFactory(pos, name, factory, aggMode); +} + +class TTwoArgsAggregationFactory final : public TAggregationFactory { +public: + TTwoArgsAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + {} + +private: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + ui32 adjustArgsCount = isFactory ? 0 : 2; + if (exprs.size() != adjustArgsCount) { + ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires " << + adjustArgsCount << " arguments, given: " << exprs.size(); + return false; + } + + if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) { + return false; + } + + if (!isFactory) { + One = exprs.front(); + Two = exprs.back(); + Name = src->MakeLocalName(Name); + } + + if (!Init(ctx, src)) { + return false; + } + + if (!isFactory) { + node.Add("Member", "row", Q(Name)); + if (IsOverWindow()) { + src->AddTmpWindowColumn(Name); + } + } + + return true; + } + + TNodePtr DoClone() const final { + return new TTwoArgsAggregationFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetExtractor(bool many, TContext& ctx) const final { + Y_UNUSED(ctx); + return BuildLambda(Pos, Y("row"), many ? Y("Unwrap", One) : One); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + auto tuple = Q(Y(One, Two)); + return Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", tuple) : tuple)); + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!One) { + return true; + } + + if (!One->Init(ctx, src)) { + return false; + } + if (!Two->Init(ctx, src)) { + return false; + } + + if ((One->IsAggregated() || Two->IsAggregated()) && !IsOverWindow()) { + ctx.Error(Pos) << "Aggregation of aggregated values is forbidden"; + return false; + } + return true; + } + + TNodePtr One, Two; +}; + +TAggregationPtr BuildTwoArgsFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TTwoArgsAggregationFactory(pos, name, factory, aggMode); +} + +class THistogramAggregationFactory final : public TAggregationFactory { +public: + THistogramAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + , FakeSource(BuildFakeSource(pos)) + , Weight(Y("Double", Q("1.0"))) + , Intervals(Y("Uint32", Q("100"))) + {} + +private: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + if (isFactory) { + if (exprs.size() > 1) { + ctx.Error(Pos) << "Aggregation function factory " << Name << " requires 0 or 1 argument(s), given: " << exprs.size(); + return false; + } + } else { + if (exprs.empty() || exprs.size() > 3) { + ctx.Error(Pos) << "Aggregation function " << Name << " requires one, two or three arguments, given: " << exprs.size(); + return false; + } + } + + if (!isFactory) { + /// \todo: solve it with named arguments + const auto integer = exprs.back()->IsIntegerLiteral(); + switch (exprs.size()) { + case 2U: + if (!integer) { + Weight = exprs.back(); + } + break; + case 3U: + if (!integer) { + ctx.Error(Pos) << "Aggregation function " << Name << " for case with 3 arguments should have third argument of integer type"; + return false; + } + Weight = exprs[1]; + break; + } + if (exprs.size() >= 2 && integer) { + Intervals = Y("Cast", exprs.back(), Q("Uint32")); + } + } else { + if (exprs.size() >= 1) { + const auto integer = exprs.back()->IsIntegerLiteral(); + if (!integer) { + ctx.Error(Pos) << "Aggregation function factory " << Name << " should have second interger argument"; + return false; + } + + Intervals = Y("Cast", exprs.back(), Q("Uint32")); + } + } + + return TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front())); + } + + TNodePtr DoClone() const final { + return new THistogramAggregationFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + auto apply = Y("Apply", Factory, type, + BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr), + BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Weight) : Weight)); + AddFactoryArguments(apply); + return apply; + } + + void AddFactoryArguments(TNodePtr& apply) const final { + apply = L(apply, Intervals); + } + + std::vector<ui32> GetFactoryColumnIndices() const final { + return {0u, 1u}; + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!Weight->Init(ctx, src)) { + return false; + } + if (!Intervals->Init(ctx, FakeSource.Get())) { + return false; + } + + return TAggregationFactory::DoInit(ctx, src); + } + + TSourcePtr FakeSource; + TNodePtr Weight, Intervals; +}; + +TAggregationPtr BuildHistogramFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new THistogramAggregationFactory(pos, name, factory, aggMode); +} + +class TLinearHistogramAggregationFactory final : public TAggregationFactory { +public: + TLinearHistogramAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + , FakeSource(BuildFakeSource(pos)) + , BinSize(Y("Double", Q("10.0"))) + , Minimum(Y("Double", Q(ToString(-1.0 * Max<double>())))) + , Maximum(Y("Double", Q(ToString(Max<double>())))) + {} + +private: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + if (isFactory) { + if (exprs.size() > 3) { + ctx.Error(Pos) << "Aggregation function " << Name << " requires zero to three arguments, given: " << exprs.size(); + return false; + } + } else { + if (exprs.empty() || exprs.size() > 4) { + ctx.Error(Pos) << "Aggregation function " << Name << " requires one to four arguments, given: " << exprs.size(); + return false; + } + } + + if (exprs.size() > 1 - isFactory) { + BinSize = exprs[1 - isFactory]; + } + + if (exprs.size() > 2 - isFactory) { + Minimum = exprs[2 - isFactory]; + } + + if (exprs.size() > 3 - isFactory) { + Maximum = exprs[3 - isFactory]; + } + + return TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front())); + } + + TNodePtr DoClone() const final { + return new TLinearHistogramAggregationFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + return Y("Apply", Factory, type, + BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr), + BinSize, Minimum, Maximum); + } + + void AddFactoryArguments(TNodePtr& apply) const final { + apply = L(apply, BinSize, Minimum, Maximum); + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!BinSize->Init(ctx, FakeSource.Get())) { + return false; + } + if (!Minimum->Init(ctx, FakeSource.Get())) { + return false; + } + if (!Maximum->Init(ctx, FakeSource.Get())) { + return false; + } + + return TAggregationFactory::DoInit(ctx, src); + } + + TSourcePtr FakeSource; + TNodePtr BinSize, Minimum, Maximum; +}; + +TAggregationPtr BuildLinearHistogramFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TLinearHistogramAggregationFactory(pos, name, factory, aggMode); +} + +class TPercentileFactory final : public TAggregationFactory { +public: + TPercentileFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + , FakeSource(BuildFakeSource(pos)) + {} + +private: + const TString* GetGenericKey() const final { + return Column; + } + + void Join(IAggregation* aggr) final { + const auto percentile = dynamic_cast<TPercentileFactory*>(aggr); + YQL_ENSURE(percentile); + YQL_ENSURE(Column && percentile->Column && *Column == *percentile->Column); + YQL_ENSURE(AggMode == percentile->AggMode); + Percentiles.insert(percentile->Percentiles.cbegin(), percentile->Percentiles.cend()); + percentile->Percentiles.clear(); + } + + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + ui32 adjustArgsCount = isFactory ? 0 : 1; + if (exprs.size() < 0 + adjustArgsCount || exprs.size() > 1 + adjustArgsCount) { + ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires " + << (0 + adjustArgsCount) << " or " << (1 + adjustArgsCount) << " arguments, given: " << exprs.size(); + return false; + } + + if (!isFactory) { + Column = exprs.front()->GetColumnName(); + } + + if (!TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front()))) + return false; + + TNodePtr x; + if (1 + adjustArgsCount == exprs.size()) { + x = exprs.back(); + if (!x->Init(ctx, FakeSource.Get())) { + return false; + } + } else { + x = Y("Double", Q("0.5")); + } + + if (isFactory) { + FactoryPercentile = x; + } else { + Percentiles.emplace(Name, x); + } + + return true; + } + + TNodePtr DoClone() const final { + return new TPercentileFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + TNodePtr percentiles(Percentiles.cbegin()->second); + + if (Percentiles.size() > 1U) { + percentiles = Y(); + for (const auto& percentile : Percentiles) { + percentiles = L(percentiles, percentile.second); + } + percentiles = Q(percentiles); + } + + return Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr), percentiles); + } + + void AddFactoryArguments(TNodePtr& apply) const final { + apply = L(apply, FactoryPercentile); + } + + std::pair<TNodePtr, bool> AggregationTraits(const TNodePtr& type, bool overState, bool many, bool allowAggApply, TContext& ctx) const final { + if (Percentiles.empty()) + return { TNodePtr(), true }; + + TNodePtr names(Q(Percentiles.cbegin()->first)); + + if (Percentiles.size() > 1U) { + names = Y(); + for (const auto& percentile : Percentiles) + names = L(names, Q(percentile.first)); + names = Q(names); + } + + const bool distinct = AggMode == EAggregateMode::Distinct; + const auto listType = distinct ? Y("ListType", Y("StructMemberType", Y("ListItemType", type), BuildQuotedAtom(Pos, DistinctKey))) : type; + auto apply = GetApply(listType, many, allowAggApply, ctx); + if (!apply) { + return { TNodePtr(), false }; + } + + auto wrapped = WrapIfOverState(apply, overState, many, ctx); + if (!wrapped) { + return { TNodePtr(), false }; + } + + return { distinct ? + Q(Y(names, wrapped, BuildQuotedAtom(Pos, DistinctKey))) : + Q(Y(names, wrapped)), true }; + } + + bool DoInit(TContext& ctx, ISource* src) final { + for (const auto& p : Percentiles) { + if (!p.second->Init(ctx, src)) { + return false; + } + } + + return TAggregationFactory::DoInit(ctx, src); + } + + TSourcePtr FakeSource; + std::multimap<TString, TNodePtr> Percentiles; + TNodePtr FactoryPercentile; + const TString* Column = nullptr; +}; + +TAggregationPtr BuildPercentileFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TPercentileFactory(pos, name, factory, aggMode); +} + +class TTopFreqFactory final : public TAggregationFactory { +public: + TTopFreqFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + , FakeSource(BuildFakeSource(pos)) + {} + +private: + + //first - n, second - buffer + using TPair = std::pair<TNodePtr, TNodePtr>; + + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + ui32 adjustArgsCount = isFactory ? 0 : 1; + const double DefaultBufferC = 1.5; + const ui32 MinBuffer = 100; + + if (exprs.size() < adjustArgsCount || exprs.size() > 2 + adjustArgsCount) { + ctx.Error(Pos) << "Aggregation function " << (isFactory? "factory " : "") << Name << + " requires " << adjustArgsCount << " to " << (2 + adjustArgsCount) << " arguments, given: " << exprs.size(); + return false; + } + + if (!TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front()))) + return false; + + TNodePtr n = Y("Null"); + TNodePtr buffer = Y("Null"); + + if (1 + adjustArgsCount <= exprs.size()) { + n = exprs[adjustArgsCount]; + if (!n->Init(ctx, FakeSource.Get())) { + return false; + } + n = Y("SafeCast", n, Q("Uint32")); + } + + n = Y("Coalesce", n, Y("Uint32", Q("1"))); + if (2 + adjustArgsCount == exprs.size()) { + buffer = exprs[1 + adjustArgsCount]; + if (!buffer->Init(ctx, FakeSource.Get())) { + return false; + } + + buffer = Y("SafeCast", buffer, Q("Uint32")); + } + + buffer = Y("Coalesce", buffer, Y("SafeCast", Y("*", n, Y("Double", Q(ToString(DefaultBufferC)))), Q("Uint32"))); + buffer = Y("Coalesce", buffer, Y("Uint32", Q(ToString(MinBuffer)))); + buffer = Y("Max", buffer, Y("Uint32", Q(ToString(MinBuffer)))); + + auto x = TPair{ n, buffer }; + if (isFactory) { + TopFreqFactoryParams = x; + } else { + TopFreqs.emplace(Name, x); + } + + return true; + } + + TNodePtr DoClone() const final { + return new TTopFreqFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + TPair topFreqs(TopFreqs.cbegin()->second); + + if (TopFreqs.size() > 1U) { + topFreqs = { Y(), Y() }; + for (const auto& topFreq : TopFreqs) { + topFreqs = { L(topFreqs.first, topFreq.second.first), L(topFreqs.second, topFreq.second.second) }; + } + topFreqs = { Q(topFreqs.first), Q(topFreqs.second) }; + } + + auto apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr), topFreqs.first, topFreqs.second); + return apply; + } + + void AddFactoryArguments(TNodePtr& apply) const final { + apply = L(apply, TopFreqFactoryParams.first, TopFreqFactoryParams.second); + } + + std::pair<TNodePtr, bool> AggregationTraits(const TNodePtr& type, bool overState, bool many, bool allowAggApply, TContext& ctx) const final { + if (TopFreqs.empty()) + return { TNodePtr(), true }; + + TNodePtr names(Q(TopFreqs.cbegin()->first)); + + if (TopFreqs.size() > 1U) { + names = Y(); + for (const auto& topFreq : TopFreqs) + names = L(names, Q(topFreq.first)); + names = Q(names); + } + + const bool distinct = AggMode == EAggregateMode::Distinct; + const auto listType = distinct ? Y("ListType", Y("StructMemberType", Y("ListItemType", type), BuildQuotedAtom(Pos, DistinctKey))) : type; + auto apply = GetApply(listType, many, allowAggApply, ctx); + if (!apply) { + return { nullptr, false }; + } + + auto wrapped = WrapIfOverState(apply, overState, many, ctx); + if (!wrapped) { + return { nullptr, false }; + } + + return { distinct ? + Q(Y(names, wrapped, BuildQuotedAtom(Pos, DistinctKey))) : + Q(Y(names, wrapped)), true }; + } + + bool DoInit(TContext& ctx, ISource* src) final { + for (const auto& topFreq : TopFreqs) { + if (!topFreq.second.first->Init(ctx, src)) { + return false; + } + + if (!topFreq.second.second->Init(ctx, src)) { + return false; + } + } + + return TAggregationFactory::DoInit(ctx, src); + } + + std::multimap<TString, TPair> TopFreqs; + TPair TopFreqFactoryParams; + TSourcePtr FakeSource; +}; + +TAggregationPtr BuildTopFreqFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TTopFreqFactory(pos, name, factory, aggMode); +} + +template <bool HasKey> +class TTopAggregationFactory final : public TAggregationFactory { +public: + TTopAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + , FakeSource(BuildFakeSource(pos)) + {} + +private: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + ui32 adjustArgsCount = isFactory ? 1 : (HasKey ? 3 : 2); + if (exprs.size() != adjustArgsCount) { + ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires " + << adjustArgsCount << " arguments, given: " << exprs.size(); + return false; + } + + if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) { + return false; + } + + if (!isFactory) { + Payload = exprs[0]; + if (HasKey) { + Key = exprs[1]; + } + } + + Count = exprs.back(); + + if (!isFactory) { + Name = src->MakeLocalName(Name); + } + + if (!Init(ctx, src)) { + return false; + } + + if (!isFactory) { + node.Add("Member", "row", Q(Name)); + if (IsOverWindow()) { + src->AddTmpWindowColumn(Name); + } + } + + return true; + } + + TNodePtr DoClone() const final { + return new TTopAggregationFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + TNodePtr apply; + if (HasKey) { + apply = Y("Apply", Factory, type, + BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Key) : Key), + BuildLambda(Pos, Y("row"), many ? Y("Payload", Payload) : Payload)); + } else { + apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Payload) : Payload)); + } + AddFactoryArguments(apply); + return apply; + } + + void AddFactoryArguments(TNodePtr& apply) const final { + apply = L(apply, Count); + } + + std::vector<ui32> GetFactoryColumnIndices() const final { + if (HasKey) { + return {1u, 0u}; + } else { + return {0u}; + } + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!Count->Init(ctx, FakeSource.Get())) { + return false; + } + + if (!Payload) { + return true; + } + + if (HasKey) { + if (!Key->Init(ctx, src)) { + return false; + } + } + + if (!Payload->Init(ctx, src)) { + return false; + } + + if ((HasKey && Key->IsAggregated()) || (!HasKey && Payload->IsAggregated())) { + ctx.Error(Pos) << "Aggregation of aggregated values is forbidden"; + return false; + } + return true; + } + + TSourcePtr FakeSource; + TNodePtr Key, Payload, Count; +}; + +template <bool HasKey> +TAggregationPtr BuildTopFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TTopAggregationFactory<HasKey>(pos, name, factory, aggMode); +} + +template TAggregationPtr BuildTopFactoryAggregation<false>(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); +template TAggregationPtr BuildTopFactoryAggregation<true >(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + +class TCountDistinctEstimateAggregationFactory final : public TAggregationFactory { +public: + TCountDistinctEstimateAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + {} + +private: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + ui32 adjustArgsCount = isFactory ? 0 : 1; + if (exprs.size() < adjustArgsCount || exprs.size() > 1 + adjustArgsCount) { + ctx.Error(Pos) << Name << " aggregation function " << (isFactory ? "factory " : "") << " requires " << + adjustArgsCount << " or " << (1 + adjustArgsCount) << " argument(s), given: " << exprs.size(); + return false; + } + + Precision = 14; + if (1 + adjustArgsCount <= exprs.size()) { + auto posSecondArg = exprs[adjustArgsCount]->GetPos(); + if (!Parseui32(exprs[adjustArgsCount], Precision)) { + ctx.Error(posSecondArg) << Name << ": invalid argument, numeric literal is expected"; + return false; + } + } + if (Precision > 18 || Precision < 4) { + ctx.Error(Pos) << Name << ": precision is expected to be between 4 and 18 (inclusive), got " << Precision; + return false; + } + + if (!isFactory) { + Expr = exprs[0]; + Name = src->MakeLocalName(Name); + } + + if (!Init(ctx, src)) { + return false; + } + + if (!isFactory) { + node.Add("Member", "row", Q(Name)); + if (IsOverWindow()) { + src->AddTmpWindowColumn(Name); + } + } + + return true; + } + + TNodePtr DoClone() const final { + return new TCountDistinctEstimateAggregationFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + auto apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr)); + AddFactoryArguments(apply); + return apply; + } + + void AddFactoryArguments(TNodePtr& apply) const final { + apply = L(apply, Y("Uint32", Q(ToString(Precision)))); + } + +private: + ui32 Precision = 0; +}; + +TAggregationPtr BuildCountDistinctEstimateFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TCountDistinctEstimateAggregationFactory(pos, name, factory, aggMode); +} + +class TListAggregationFactory final : public TAggregationFactory { +public: + TListAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + , FakeSource(BuildFakeSource(pos)) + { + } + +private: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + ui32 adjustArgsCount = isFactory ? 0 : 1; + ui32 minArgs = (0 + adjustArgsCount); + ui32 maxArgs = (1 + adjustArgsCount); + if (exprs.size() < minArgs || exprs.size() > maxArgs) { + ctx.Error(Pos) << "List aggregation " << (isFactory ? "factory " : "") << "function require " << minArgs + << " or " << maxArgs << " arguments, given: " << exprs.size(); + return false; + } + + if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) { + return false; + } + + Limit = nullptr; + if (adjustArgsCount + 1U <= exprs.size()) { + auto posSecondArg = exprs[adjustArgsCount]->GetPos(); + Limit = exprs[adjustArgsCount]; + if (!Limit->Init(ctx, FakeSource.Get())) { + return false; + } + } + + if (!isFactory) { + Expr = exprs[0]; + Name = src->MakeLocalName(Name); + } + + if (!Init(ctx, src)) { + return false; + } + + if (!isFactory) { + node.Add("Member", "row", Q(Name)); + if (IsOverWindow()) { + src->AddTmpWindowColumn(Name); + } + } + + return true; + } + + TNodePtr DoClone() const final { + return new TListAggregationFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + auto apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr)); + AddFactoryArguments(apply); + return apply; + } + + void AddFactoryArguments(TNodePtr& apply) const final { + if (!Limit) { + apply = L(apply, Y("Uint64", Q("0"))); + } else { + apply = L(apply, Limit); + } + } + +private: + TSourcePtr FakeSource; + TNodePtr Limit; +}; + +TAggregationPtr BuildListFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TListAggregationFactory(pos, name, factory, aggMode); +} + +class TUserDefinedAggregationFactory final : public TAggregationFactory { +public: + TUserDefinedAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + {} + +private: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + ui32 adjustArgsCount = isFactory ? 0 : 1; + if (exprs.size() < (3 + adjustArgsCount) || exprs.size() > (7 + adjustArgsCount)) { + ctx.Error(Pos) << "User defined aggregation function " << (isFactory ? "factory " : "") << " requires " << + (3 + adjustArgsCount) << " to " << (7 + adjustArgsCount) << " arguments, given: " << exprs.size(); + return false; + } + + Lambdas[0] = BuildLambda(Pos, Y("value", "parent"), Y("NamedApply", exprs[adjustArgsCount], Q(Y("value")), Y("AsStruct"), Y("DependsOn", "parent"))); + Lambdas[1] = BuildLambda(Pos, Y("value", "state", "parent"), Y("NamedApply", exprs[adjustArgsCount + 1], Q(Y("state", "value")), Y("AsStruct"), Y("DependsOn", "parent"))); + Lambdas[2] = BuildLambda(Pos, Y("one", "two"), Y("IfType", exprs[adjustArgsCount + 2], Y("NullType"), + BuildLambda(Pos, Y(), Y("Void")), + BuildLambda(Pos, Y(), Y("Apply", exprs[adjustArgsCount + 2], "one", "two")))); + + for (size_t i = 3U; i < Lambdas.size(); ++i) { + const auto j = adjustArgsCount + i; + Lambdas[i] = BuildLambda(Pos, Y("state"), j >= exprs.size() ? AstNode("state") : Y("Apply", exprs[j], "state")); + } + + DefVal = (exprs.size() == (7 + adjustArgsCount)) ? exprs[adjustArgsCount + 6] : Y("Null"); + return TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front())); + } + + TNodePtr DoClone() const final { + return new TUserDefinedAggregationFactory(Pos, Name, Func, AggMode); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + auto apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr)); + AddFactoryArguments(apply); + return apply; + } + + void AddFactoryArguments(TNodePtr& apply) const final { + apply = L(apply, Lambdas[0], Lambdas[1], Lambdas[2], Lambdas[3], Lambdas[4], Lambdas[5], DefVal); + } + + bool DoInit(TContext& ctx, ISource* src) final { + for (const auto& lambda : Lambdas) { + if (!lambda->Init(ctx, src)) { + return false; + } + } + + if (!DefVal->Init(ctx, src)) { + return false; + } + + return TAggregationFactory::DoInit(ctx, src); + } + + std::array<TNodePtr, 6> Lambdas; + TNodePtr DefVal; +}; + +TAggregationPtr BuildUserDefinedFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TUserDefinedAggregationFactory(pos, name, factory, aggMode); +} + +class TCountAggregation final : public TAggregationFactory { +public: + TCountAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode) + : TAggregationFactory(pos, name, func, aggMode) + {} + +private: + TNodePtr DoClone() const final { + return new TCountAggregation(Pos, Name, Func, AggMode); + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!Expr) { + return true; + } + + if (Expr->IsAsterisk()) { + Expr = Y("Void"); + } + if (!Expr->Init(ctx, src)) { + return false; + } + Expr->SetCountHint(Expr->IsConstant()); + return TAggregationFactory::DoInit(ctx, src); + } +}; + +TAggregationPtr BuildCountAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode) { + return new TCountAggregation(pos, name, func, aggMode); +} + +class TPGFactoryAggregation final : public TAggregationFactory { +public: + TPGFactoryAggregation(TPosition pos, const TString& name, EAggregateMode aggMode) + : TAggregationFactory(pos, name, "", aggMode, false, false) + , PgFunc(Name) + {} + + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) override { + auto ret = TAggregationFactory::InitAggr(ctx, isFactory, src, node, exprs); + if (ret) { + if (isFactory) { + Factory = BuildLambda(Pos, Y("type", "extractor"), Y(AggMode == EAggregateMode::OverWindow ? "PgWindowTraitsTuple" : "PgAggregationTraitsTuple", + Q(PgFunc), Y("ListItemType", "type"), "extractor")); + } else { + Lambda = BuildLambda(Pos, Y("row"), exprs); + } + } + + return ret; + } + + TNodePtr GetExtractor(bool many, TContext& ctx) const override { + Y_UNUSED(many); + ctx.Error() << "Partial aggregation by PostgreSQL function isn't supported"; + return nullptr; + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(many); + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + if (ShouldEmitAggApply(ctx) && allowAggApply && AggMode != EAggregateMode::OverWindow) { + return Y("AggApply", + Q("pg_" + to_lower(PgFunc)), Y("ListItemType", type), Lambda); + } + + return Y(AggMode == EAggregateMode::OverWindow ? "PgWindowTraits" : "PgAggregationTraits", + Q(PgFunc), Y("ListItemType", type), Lambda); + } + +private: + TNodePtr DoClone() const final { + return new TPGFactoryAggregation(Pos, Name, AggMode); + } + + TString PgFunc; + TNodePtr Lambda; +}; + +TAggregationPtr BuildPGFactoryAggregation(TPosition pos, const TString& name, EAggregateMode aggMode) { + return new TPGFactoryAggregation(pos, name, aggMode); +} + +class TNthValueFactoryAggregation final : public TAggregationFactory { +public: +public: + TNthValueFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) + : TAggregationFactory(pos, name, factory, aggMode) + , FakeSource(BuildFakeSource(pos)) + { + } + +private: + bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final { + ui32 adjustArgsCount = isFactory ? 0 : 1; + ui32 expectedArgs = (1 + adjustArgsCount); + if (exprs.size() != expectedArgs) { + ctx.Error(Pos) << "NthValue aggregation " << (isFactory ? "factory " : "") << "function require " + << expectedArgs << " arguments, given: " << exprs.size(); + return false; + } + + if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) { + return false; + } + + Index = exprs[adjustArgsCount]; + if (!Index->Init(ctx, FakeSource.Get())) { + return false; + } + + if (!isFactory) { + Expr = exprs[0]; + Name = src->MakeLocalName(Name); + } + + if (!Init(ctx, src)) { + return false; + } + + if (!isFactory) { + node.Add("Member", "row", Q(Name)); + if (IsOverWindow()) { + src->AddTmpWindowColumn(Name); + } + } + + return true; + } + + TNodePtr DoClone() const final { + return new TNthValueFactoryAggregation(Pos, Name, Func, AggMode); + } + + TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final { + Y_UNUSED(ctx); + Y_UNUSED(allowAggApply); + auto apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr)); + AddFactoryArguments(apply); + return apply; + } + + void AddFactoryArguments(TNodePtr& apply) const final { + apply = L(apply, Index); + } + +private: + TSourcePtr FakeSource; + TNodePtr Index; +}; + +TAggregationPtr BuildNthFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) { + return new TNthValueFactoryAggregation(pos, name, factory, aggMode); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/builtin.cpp b/yql/essentials/sql/v1/builtin.cpp new file mode 100644 index 00000000000..e066cd846d7 --- /dev/null +++ b/yql/essentials/sql/v1/builtin.cpp @@ -0,0 +1,3772 @@ +#include "node.h" +#include "context.h" + +#include "list_builtin.h" +#include "match_recognize.h" + +#include <yql/essentials/ast/yql_type_string.h> +#include <yql/essentials/public/udf/udf_data_type.h> +#include <yql/essentials/core/sql_types/simple_types.h> +#include <yql/essentials/minikql/mkql_program_builder.h> +#include <yql/essentials/minikql/mkql_type_ops.h> +#include <yql/essentials/public/issue/yql_issue_id.h> +#include <yql/essentials/parser/pg_catalog/catalog.h> + +#include <library/cpp/charset/ci_string.h> +#include <library/cpp/yson/node/node_io.h> +#include <util/string/builder.h> +#include <util/string/cast.h> +#include <util/string/util.h> +#include <util/string/join.h> +#include <util/system/env.h> + +#include <unordered_map> + +using namespace NYql; + +namespace NSQLTranslationV1 { + +extern const char SubqueryExtendFor[] = "SubqueryExtendFor"; +extern const char SubqueryUnionAllFor[] = "SubqueryUnionAllFor"; +extern const char SubqueryMergeFor[] = "SubqueryMergeFor"; +extern const char SubqueryUnionMergeFor[] = "SubqueryUnionMergeFor"; +extern const char SubqueryOrderBy[] = "SubqueryOrderBy"; +extern const char SubqueryAssumeOrderBy[] = "SubqueryAssumeOrderBy"; + +TNodePtr MakeTypeConfig(const TPosition& pos, const TString& ns, const TVector<TNodePtr>& udfArgs) { + if (ns == "clickhouse") { + auto settings = NYT::TNode::CreateMap(); + auto args = NYT::TNode::CreateMap(); + for (ui32 i = 0; i < udfArgs.size(); ++i) { + if (!udfArgs[i]->IsNull() && udfArgs[i]->IsLiteral()) { + args[ToString(i)] = NYT::TNode() + ("type", udfArgs[i]->GetLiteralType()) + ("value", udfArgs[i]->GetLiteralValue()); + } + } + + settings["args"] = args; + return (TDeferredAtom(pos, NYT::NodeToYsonString(settings))).Build(); + } + + return nullptr; +} + +void AdjustCheckedAggFuncName(TString& aggNormalizedName, TContext& ctx) { + if (!ctx.Scoped->PragmaCheckedOps) { + return; + } + + if (aggNormalizedName == "sum") { + aggNormalizedName = "checked_sum"; + } else if (aggNormalizedName == "sumif") { + aggNormalizedName = "checked_sumif"; + } +} + +class TGroupingNode final: public TAstListNode { +public: + TGroupingNode(TPosition pos, const TVector<TNodePtr>& args) + : TAstListNode(pos) + , Args(args) + {} + + bool DoInit(TContext& ctx, ISource* src) final { + if (!src) { + ctx.Error(Pos) << "Grouping function should have source"; + return false; + } + TVector<TString> columns; + columns.reserve(Args.size()); + const bool isJoin = src->GetJoin(); + ISource* composite = src->GetCompositeSource(); + for (const auto& node: Args) { + auto namePtr = node->GetColumnName(); + if (!namePtr || !*namePtr) { + ctx.Error(Pos) << "GROUPING function should use columns as arguments"; + return false; + } + TString column = *namePtr; + if (isJoin) { + auto sourceNamePtr = node->GetSourceName(); + if (sourceNamePtr && !sourceNamePtr->empty()) { + column = DotJoin(*sourceNamePtr, column); + } + } + + if (!src->IsGroupByColumn(column) && !src->IsAlias(EExprSeat::GroupBy, *namePtr) && (!composite || !composite->IsGroupByColumn(column))) { + ctx.Error(node->GetPos()) << "Column '" << column << "' is not a grouping column"; + return false; + } + columns.emplace_back(column); + } + TString groupingColumn; + if (!src->AddGrouping(ctx, columns, groupingColumn)) { + return false; + } + Nodes.push_back(BuildAtom(Pos, "Member")); + Nodes.push_back(BuildAtom(Pos, "row")); + Nodes.push_back(BuildQuotedAtom(Pos, groupingColumn)); + return TAstListNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TGroupingNode(Pos, CloneContainer(Args)); + } + +private: + const TVector<TNodePtr> Args; +}; + +class TBasicAggrFunc final: public TAstListNode { +public: + TBasicAggrFunc(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector<TNodePtr>& args) + : TAstListNode(pos) + , Name(name) + , Aggr(aggr) + , Args(args) + {} + + TCiString GetName() const { + return Name; + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!src) { + ctx.Error(Pos) << "Unable to use aggregation function '" << Name << "' without data source"; + return false; + } + if (!DoInitAggregation(ctx, src)) { + return false; + } + + return TAstListNode::DoInit(ctx, src); + } + + void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override { + if (Args.empty() || (Aggr->GetAggregationMode() != EAggregateMode::Distinct && Aggr->GetAggregationMode() != EAggregateMode::OverWindowDistinct)) { + return; + } + + auto& expr = Args.front(); + + // need to initialize expr before checking whether it is a column + auto clone = expr->Clone(); + if (!clone->Init(ctx, &src)) { + return; + } + + const auto column = clone->GetColumnName(); + if (column) { + return; + } + + auto tmpColumn = src.MakeLocalName("_yql_preagg_" + Name); + YQL_ENSURE(!expr->GetLabel()); + expr->SetLabel(tmpColumn); + + PreaggregateExpr = expr; + exprs.push_back(PreaggregateExpr); + expr = BuildColumn(expr->GetPos(), tmpColumn); + + Aggr->MarkKeyColumnAsGenerated(); + } + + TNodePtr DoClone() const final { + TAggregationPtr aggrClone = static_cast<IAggregation*>(Aggr->Clone().Release()); + return new TBasicAggrFunc(Pos, Name, aggrClone, CloneContainer(Args)); + } + + TAggregationPtr GetAggregation() const override { + return Aggr; + } + +private: + bool DoInitAggregation(TContext& ctx, ISource* src) { + if (PreaggregateExpr) { + YQL_ENSURE(PreaggregateExpr->HasState(ENodeState::Initialized)); + if (PreaggregateExpr->IsAggregated() && !PreaggregateExpr->IsAggregationKey() && !Aggr->IsOverWindow()) { + ctx.Error(Aggr->GetPos()) << "Aggregation of aggregated values is forbidden"; + return false; + } + } + + if (!Aggr->InitAggr(ctx, false, src, *this, Args)) { + return false; + } + return src->AddAggregation(ctx, Aggr); + } + + void DoUpdateState() const final { + State.Set(ENodeState::Const, !Args.empty() && AllOf(Args, [](const auto& arg){ return arg->IsConstant(); })); + State.Set(ENodeState::Aggregated); + } + + TNodePtr PreaggregateExpr; +protected: + const TString Name; + TAggregationPtr Aggr; + TVector<TNodePtr> Args; +}; + +class TBasicAggrFactory final : public TAstListNode { +public: + TBasicAggrFactory(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector<TNodePtr>& args) + : TAstListNode(pos) + , Name(name) + , Aggr(aggr) + , Args(args) + {} + + TCiString GetName() const { + return Name; + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!DoInitAggregation(ctx)) { + return false; + } + + auto factory = Aggr->AggregationTraitsFactory(); + auto apply = Y("Apply", factory, Y("ListType", "type")); + + auto columnIndices = Aggr->GetFactoryColumnIndices(); + if (columnIndices.size() == 1) { + apply = L(apply, "extractor"); + } else { + // make several extractors from main that returns a tuple + for (ui32 arg = 0; arg < columnIndices.size(); ++arg) { + auto partial = BuildLambda(Pos, Y("row"), Y("Nth", Y("Apply", "extractor", "row"), Q(ToString(columnIndices[arg])))); + apply = L(apply, partial); + } + } + + Aggr->AddFactoryArguments(apply); + Lambda = BuildLambda(Pos, Y("type", "extractor"), apply); + return TAstListNode::DoInit(ctx, src); + } + + TAstNode* Translate(TContext& ctx) const override { + return Lambda->Translate(ctx); + } + + TNodePtr DoClone() const final { + TAggregationPtr aggrClone = static_cast<IAggregation*>(Aggr->Clone().Release()); + return new TBasicAggrFactory(Pos, Name, aggrClone, CloneContainer(Args)); + } + + TAggregationPtr GetAggregation() const override { + return Aggr; + } + +private: + bool DoInitAggregation(TContext& ctx) { + return Aggr->InitAggr(ctx, true, nullptr, *this, Args); + } + +protected: + const TString Name; + TAggregationPtr Aggr; + TVector<TNodePtr> Args; + TNodePtr Lambda; +}; + +typedef THolder<TBasicAggrFunc> TAggrFuncPtr; + +class TLiteralStringAtom: public INode { +public: + TLiteralStringAtom(TPosition pos, TNodePtr node, const TString& info, const TString& prefix = {}) + : INode(pos) + , Node(node) + , Info(info) + , Prefix(prefix) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + if (!Node) { + ctx.Error(Pos) << Info; + return false; + } + + if (!Node->Init(ctx, src)) { + return false; + } + + Atom = MakeAtomFromExpression(Pos, ctx, Node, Prefix).Build(); + return true; + } + + bool IsLiteral() const override { + return Atom ? Atom->IsLiteral() : false; + } + + TString GetLiteralType() const override { + return Atom ? Atom->GetLiteralType() : ""; + } + + TString GetLiteralValue() const override { + return Atom ? Atom->GetLiteralValue() : ""; + } + + TAstNode* Translate(TContext& ctx) const override { + return Atom->Translate(ctx); + } + + TPtr DoClone() const final { + return new TLiteralStringAtom(GetPos(), SafeClone(Node), Info, Prefix); + } + + void DoUpdateState() const override { + YQL_ENSURE(Atom); + State.Set(ENodeState::Const, Atom->IsConstant()); + State.Set(ENodeState::Aggregated, Atom->IsAggregated()); + State.Set(ENodeState::OverWindow, Atom->IsOverWindow()); + } +private: + TNodePtr Node; + TNodePtr Atom; + TString Info; + TString Prefix; +}; + +class TYqlAsAtom: public TLiteralStringAtom { +public: + TYqlAsAtom(TPosition pos, const TVector<TNodePtr>& args) + : TLiteralStringAtom(pos, args.size() == 1 ? args[0] : nullptr, "Literal string is required as argument") + { + } +}; + +class TYqlData: public TCallNode { +public: + TYqlData(TPosition pos, const TString& type, const TVector<TNodePtr>& args) + : TCallNode(pos, type, 1, 1, args) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + auto slot = NUdf::FindDataSlot(GetOpName()); + if (!slot) { + ctx.Error(Pos) << "Unexpected type " << GetOpName(); + return false; + } + + if (*slot == NUdf::EDataSlot::Decimal) { + MinArgs = MaxArgs = 3; + } + + if (!ValidateArguments(ctx)) { + return false; + } + + auto stringNode = Args[0]; + auto atom = stringNode->GetLiteral("String"); + if (!atom) { + ctx.Error(Pos) << "Expected literal string as argument in " << GetOpName() << " function"; + return false; + } + + TString value; + if (*slot == NUdf::EDataSlot::Decimal) { + const auto precision = Args[1]->GetLiteral("Int32"); + const auto scale = Args[2]->GetLiteral("Int32"); + + if (!NKikimr::NMiniKQL::IsValidDecimal(*atom)) { + ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName(); + return false; + } + + ui8 stub; + if (!(precision && TryFromString<ui8>(*precision, stub))) { + ctx.Error(Pos) << "Invalid precision " << (precision ? precision->Quote() : "") << " for type " << GetOpName(); + return false; + } + + if (!(scale && TryFromString<ui8>(*scale, stub))) { + ctx.Error(Pos) << "Invalid scale " << (scale ? scale->Quote() : "") << " for type " << GetOpName(); + return false; + } + + Args[0] = BuildQuotedAtom(GetPos(), *atom); + Args[1] = BuildQuotedAtom(GetPos(), *precision); + Args[2] = BuildQuotedAtom(GetPos(), *scale); + return TCallNode::DoInit(ctx, src); + } else if (NUdf::GetDataTypeInfo(*slot).Features & (NUdf::DateType | NUdf::TzDateType | NUdf::TimeIntervalType)) { + const auto out = NKikimr::NMiniKQL::ValueFromString(*slot, *atom); + if (!out) { + ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName(); + return false; + } + + switch (*slot) { + case NUdf::EDataSlot::Date: + case NUdf::EDataSlot::TzDate: + value = ToString(out.Get<ui16>()); + break; + case NUdf::EDataSlot::Date32: + case NUdf::EDataSlot::TzDate32: + value = ToString(out.Get<i32>()); + break; + case NUdf::EDataSlot::Datetime: + case NUdf::EDataSlot::TzDatetime: + value = ToString(out.Get<ui32>()); + break; + case NUdf::EDataSlot::Timestamp: + case NUdf::EDataSlot::TzTimestamp: + value = ToString(out.Get<ui64>()); + break; + case NUdf::EDataSlot::Datetime64: + case NUdf::EDataSlot::Timestamp64: + case NUdf::EDataSlot::TzDatetime64: + case NUdf::EDataSlot::TzTimestamp64: + value = ToString(out.Get<i64>()); + break; + case NUdf::EDataSlot::Interval: + case NUdf::EDataSlot::Interval64: + value = ToString(out.Get<i64>()); + if ('T' == atom->back()) { + ctx.Error(Pos) << "Time prefix 'T' at end of interval constant. The designator 'T' shall be absent if all of the time components are absent."; + return false; + } + break; + default: + Y_ABORT("Unexpected data slot"); + } + + if (NUdf::GetDataTypeInfo(*slot).Features & NUdf::TzDateType) { + value += ","; + value += NKikimr::NMiniKQL::GetTimezoneIANAName(out.GetTimezoneId()); + } + } else if (NUdf::EDataSlot::Uuid == *slot) { + char out[0x10]; + if (!NKikimr::NMiniKQL::ParseUuid(*atom, out)) { + ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName(); + return false; + } + + value.assign(out, sizeof(out)); + } else { + if (!NKikimr::NMiniKQL::IsValidStringValue(*slot, *atom)) { + ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName(); + return false; + } + + value = *atom; + } + + Args[0] = BuildQuotedAtom(GetPos(), value); + return TCallNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return new TYqlData(GetPos(), OpName, CloneContainer(Args)); + } +}; + +class TTableName : public TCallNode { +public: + TTableName(TPosition pos, const TVector<TNodePtr>& args, const TString& service) + : TCallNode(pos, "TableName", 0, 2, args) + , Service(service) + , EmptyArgs(args.empty()) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (Args.empty()) { + if (!src) { + ctx.Error(Pos) << "Unable to use TableName() without source"; + return false; + } + + // TODO: TablePath() and TableRecordIndex() have more strict limitations + if (src->GetJoin()) { + ctx.Warning(Pos, + TIssuesIds::YQL_EMPTY_TABLENAME_RESULT) << "TableName() may produce empty result when used in ambiguous context (with JOIN)"; + } + + if (src->HasAggregations()) { + ctx.Warning(Pos, + TIssuesIds::YQL_EMPTY_TABLENAME_RESULT) << "TableName() will produce empty result when used with aggregation.\n" + "Please consult documentation for possible workaround"; + } + + Args.push_back(Y("TablePath", Y("DependsOn", "row"))); + } + + if (Args.size() == 2) { + auto literal = Args[1]->GetLiteral("String"); + if (!literal) { + ctx.Error(Args[1]->GetPos()) << "Expected literal string as second argument in TableName function"; + return false; + } + + Args[1] = BuildQuotedAtom(Args[1]->GetPos(), *literal); + } else { + if (Service.empty()) { + ctx.Error(GetPos()) << GetOpName() << " requires either service name as second argument or current cluster name"; + return false; + } + + Args.push_back(BuildQuotedAtom(GetPos(), Service)); + } + + return TCallNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return new TTableName(GetPos(), CloneContainer(Args), Service); + } + + void DoUpdateState() const override { + if (EmptyArgs) { + State.Set(ENodeState::Const, false); + } else { + TCallNode::DoUpdateState(); + } + } + +private: + TString Service; + const bool EmptyArgs; +}; + +class TYqlParseType final : public INode { +public: + TYqlParseType(TPosition pos, const TVector<TNodePtr>& args) + : INode(pos) + , Args(args) + {} + + TAstNode* Translate(TContext& ctx) const override { + if (Args.size() != 1) { + ctx.Error(Pos) << "Expected 1 argument in ParseType function"; + return nullptr; + } + + auto literal = Args[0]->GetLiteral("String"); + if (!literal) { + ctx.Error(Args[0]->GetPos()) << "Expected literal string as argument in ParseType function"; + return nullptr; + } + + auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos()); + if (!parsed) { + ctx.Error(Args[0]->GetPos()) << "Failed to parse type"; + return nullptr; + } + + return parsed; + } + + TNodePtr DoClone() const final { + return new TYqlParseType(Pos, CloneContainer(Args)); + } + + void DoUpdateState() const final { + State.Set(ENodeState::Const); + } +private: + TVector<TNodePtr> Args; +}; + +class TYqlAddTimezone: public TCallNode { +public: + TYqlAddTimezone(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "AddTimezone", 2, 2, args) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + Args[1] = Y("TimezoneId", Args[1]); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlAddTimezone(Pos, CloneContainer(Args)); + } +}; + +class TYqlPgType: public TCallNode { +public: + TYqlPgType(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "PgType", 1, 1, args) + { + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!ValidateArguments(ctx)) { + return false; + } + + ui32 oid; + if (Args[0]->IsIntegerLiteral() && TryFromString<ui32>(Args[0]->GetLiteralValue(), oid)) { + if (!NPg::HasType(oid)) { + ctx.Error(Args[0]->GetPos()) << "Unknown pg type oid: " << oid; + return false; + } else { + Args[0] = BuildQuotedAtom(Args[0]->GetPos(), NPg::LookupType(oid).Name); + } + } else if (Args[0]->IsLiteral() && Args[0]->GetLiteralType() == "String") { + if (!NPg::HasType(Args[0]->GetLiteralValue())) { + ctx.Error(Args[0]->GetPos()) << "Unknown pg type: " << Args[0]->GetLiteralValue(); + return false; + } else { + Args[0] = BuildQuotedAtom(Args[0]->GetPos(), Args[0]->GetLiteralValue()); + } + } else { + ctx.Error(Args[0]->GetPos()) << "Expecting string literal with pg type name or integer literal with pg type oid"; + return false; + } + + return TCallNode::DoInit(ctx, src); + } + + + TNodePtr DoClone() const final { + return new TYqlPgType(Pos, CloneContainer(Args)); + } +}; + +class TYqlPgConst : public TCallNode { +public: + TYqlPgConst(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "PgConst", 2, -1, args) + { + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[0]->Init(ctx, src)) { + return false; + } + + if (Args[0]->IsLiteral()) { + Args[0] = BuildQuotedAtom(Args[0]->GetPos(), Args[0]->GetLiteralValue()); + } else { + auto value = MakeAtomFromExpression(Pos, ctx, Args[0]).Build(); + Args[0] = value; + } + + if (Args.size() > 2) { + TVector<TNodePtr> typeModArgs; + typeModArgs.push_back(Args[1]); + for (ui32 i = 2; i < Args.size(); ++i) { + if (!Args[i]->IsLiteral()) { + ctx.Error(Args[i]->GetPos()) << "Expecting literal"; + return false; + } + + typeModArgs.push_back(BuildQuotedAtom(Args[i]->GetPos(), Args[i]->GetLiteralValue())); + } + + Args.erase(Args.begin() + 2, Args.end()); + Args.push_back(new TCallNodeImpl(Pos, "PgTypeMod", typeModArgs)); + } + + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlPgConst(Pos, CloneContainer(Args)); + } +}; + +class TYqlPgCast : public TCallNode { +public: + TYqlPgCast(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "PgCast", 2, -1, args) + { + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!ValidateArguments(ctx)) { + return false; + } + + if (Args.size() > 2) { + TVector<TNodePtr> typeModArgs; + typeModArgs.push_back(Args[1]); + for (ui32 i = 2; i < Args.size(); ++i) { + if (!Args[i]->IsLiteral()) { + ctx.Error(Args[i]->GetPos()) << "Expecting literal"; + return false; + } + + typeModArgs.push_back(BuildQuotedAtom(Args[i]->GetPos(), Args[i]->GetLiteralValue())); + } + + Args.erase(Args.begin() + 2, Args.end()); + Args.push_back(new TCallNodeImpl(Pos, "PgTypeMod", typeModArgs)); + } + + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlPgCast(Pos, CloneContainer(Args)); + } +}; + +class TYqlPgOp : public TCallNode { +public: + TYqlPgOp(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "PgOp", 2, 3, args) + { + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[0]->Init(ctx, src)) { + return false; + } + + if (!Args[0]->IsLiteral() || Args[0]->GetLiteralType() != "String") { + ctx.Error(Args[0]->GetPos()) << "Expecting string literal as first argument"; + return false; + } + + Args[0] = BuildQuotedAtom(Args[0]->GetPos(), Args[0]->GetLiteralValue()); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlPgOp(Pos, CloneContainer(Args)); + } +}; + +template <bool RangeFunction> +class TYqlPgCall : public TCallNode { +public: + TYqlPgCall(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "PgCall", 1, -1, args) + { + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[0]->Init(ctx, src)) { + return false; + } + + if (!Args[0]->IsLiteral() || Args[0]->GetLiteralType() != "String") { + ctx.Error(Args[0]->GetPos()) << "Expecting string literal as first argument"; + return false; + } + + Args[0] = BuildQuotedAtom(Args[0]->GetPos(), Args[0]->GetLiteralValue()); + Args.insert(Args.begin() + 1, RangeFunction ? Q(Y(Q(Y(Q("range"))))) : Q(Y())); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlPgCall<RangeFunction>(Pos, CloneContainer(Args)); + } +}; + +template <const char* Name> +class TYqlSubqueryFor : public TCallNode { +public: + TYqlSubqueryFor(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, Name, 2, 2, args) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + Args[0] = Y("EvaluateExpr", Args[0]); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlSubqueryFor<Name>(Pos, CloneContainer(Args)); + } +}; + +template <const char* Name> +class TYqlSubqueryOrderBy : public TCallNode { +public: + TYqlSubqueryOrderBy(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, Name, 2, 2, args) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + Args[1] = Y("EvaluateExpr", Args[1]); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlSubqueryOrderBy<Name>(Pos, CloneContainer(Args)); + } +}; + + +template <bool Strict> +class TYqlTypeAssert : public TCallNode { +public: + TYqlTypeAssert(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, Strict ? "EnsureType" : "EnsureConvertibleTo", 2, 3, args) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[1]->Init(ctx, src)) { + return false; + } + if (Args.size() == 3) { + if (!Args[2]->Init(ctx, src)) { + return false; + } + + auto message = MakeAtomFromExpression(Pos, ctx, Args[2]).Build(); + Args[2] = message; + } + + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlTypeAssert<Strict>(Pos, CloneContainer(Args)); + } +}; + +class TFromBytes final : public TCallNode { +public: + TFromBytes(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "FromBytes", 2, 2, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[1]->Init(ctx, src)) { + return false; + } + Args[1] = MakeAtomFromExpression(Pos, ctx, Y("FormatType", Args[1])).Build(); + + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TFromBytes(Pos, CloneContainer(Args)); + } +}; + +class TYqlTaggedBase : public TCallNode { +public: + TYqlTaggedBase(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, 2, 2, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[1]->Init(ctx, src)) { + return false; + } + + Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build(); + return TCallNode::DoInit(ctx, src); + } +}; + +class TYqlAsTagged final : public TYqlTaggedBase { +public: + TYqlAsTagged(TPosition pos, const TVector<TNodePtr>& args) + : TYqlTaggedBase(pos, "AsTagged", args) + {} + + TNodePtr DoClone() const final { + return new TYqlAsTagged(Pos, CloneContainer(Args)); + } +}; + +class TYqlUntag final : public TYqlTaggedBase { +public: + TYqlUntag(TPosition pos, const TVector<TNodePtr>& args) + : TYqlTaggedBase(pos, "Untag", args) + {} + + TNodePtr DoClone() const final { + return new TYqlUntag(Pos, CloneContainer(Args)); + } +}; + +class TYqlVariant final : public TCallNode { +public: + TYqlVariant(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "Variant", 3, 3, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[1]->Init(ctx, src)) { + return false; + } + + Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlVariant(Pos, CloneContainer(Args)); + } +}; + +class TYqlEnum final : public TCallNode { +public: + TYqlEnum(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "Enum", 2, 2, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[0]->Init(ctx, src)) { + return false; + } + + Args[0] = MakeAtomFromExpression(Pos, ctx, Args[0]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlEnum(Pos, CloneContainer(Args)); + } +}; + +class TYqlAsVariant final : public TCallNode { +public: + TYqlAsVariant(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "AsVariant", 2, 2, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[1]->Init(ctx, src)) { + return false; + } + + Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlAsVariant(Pos, CloneContainer(Args)); + } +}; + +class TYqlAsEnum final : public TCallNode { +public: + TYqlAsEnum(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "AsEnum", 1, 1, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[0]->Init(ctx, src)) { + return false; + } + + Args[0] = MakeAtomFromExpression(Pos, ctx, Args[0]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlAsEnum(Pos, CloneContainer(Args)); + } +}; + +TNodePtr BuildFileNameArgument(TPosition pos, const TNodePtr& argument, const TString& prefix) { + return new TLiteralStringAtom(pos, argument, "FilePath requires string literal as parameter", prefix); +} + +template <typename TDerived, bool IsFile> +class TYqlAtomBase: public TCallNode { +public: + TYqlAtomBase(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, 1, 1, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!Args.empty()) { + Args[0] = BuildFileNameArgument(Pos, Args[0], IsFile ? ctx.Settings.FileAliasPrefix : TString()); + } + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TDerived(Pos, OpName, CloneContainer(Args)); + } + + bool IsLiteral() const override { + return !Args.empty() ? Args[0]->IsLiteral() : false; + } + + TString GetLiteralType() const override { + return !Args.empty() ? Args[0]->GetLiteralType() : ""; + } + + TString GetLiteralValue() const override { + return !Args.empty() ? Args[0]->GetLiteralValue() : ""; + } +}; + +class TYqlAtom final : public TYqlAtomBase<TYqlAtom, false> +{ + using TBase = TYqlAtomBase<TYqlAtom, false>; + using TBase::TBase; +}; + +class TFileYqlAtom final : public TYqlAtomBase<TFileYqlAtom, true> +{ + using TBase = TYqlAtomBase<TFileYqlAtom, true>; + using TBase::TBase; +}; + +class TTryMember final: public TCallNode { +public: + TTryMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, 3, 3, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (Args.size() != 3) { + ctx.Error(Pos) << OpName << " requires exactly three arguments"; + return false; + } + for (const auto& arg : Args) { + if (!arg->Init(ctx, src)) { + return false; + } + } + Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TTryMember(Pos, OpName, CloneContainer(Args)); + } +}; + +template<bool Pretty> +class TFormatTypeDiff final: public TCallNode { +public: + TFormatTypeDiff(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, 3, 3, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (Args.size() != 2) { + ctx.Error(Pos) << OpName << " requires exactly 2 arguments"; + return false; + } + for (const auto& arg : Args) { + if (!arg->Init(ctx, src)) { + return false; + } + } + Args.push_back(Q(Pretty ? "true" : "false")); + OpName = "FormatTypeDiff"; + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TFormatTypeDiff<Pretty>(GetPos(), OpName, CloneContainer(Args)); + } +}; + +class TAddMember final: public TCallNode { +public: + TAddMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, 3, 3, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (Args.size() != 3) { + ctx.Error(Pos) << OpName << " requires exactly three arguments"; + return false; + } + for (const auto& arg : Args) { + if (!arg->Init(ctx, src)) { + return false; + } + } + Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TAddMember(Pos, OpName, CloneContainer(Args)); + } +}; + +class TRemoveMember final: public TCallNode { +public: + TRemoveMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, 2, 2, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (Args.size() != 2) { + ctx.Error(Pos) << OpName << " requires exactly two arguments"; + return false; + } + for (const auto& arg : Args) { + if (!arg->Init(ctx, src)) { + return false; + } + } + Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TRemoveMember(Pos, OpName, CloneContainer(Args)); + } +}; + +class TCombineMembers final: public TCallNode { +public: + TCombineMembers(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, 1, -1, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (Args.empty()) { + ctx.Error(Pos) << "CombineMembers requires at least one argument"; + return false; + } + for (size_t i = 0; i < Args.size(); ++i) { + Args[i] = Q(Y(Q(""), Args[i])); // flatten without prefix + } + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TCombineMembers(Pos, OpName, CloneContainer(Args)); + } +}; + +class TFlattenMembers final: public TCallNode { +public: + TFlattenMembers(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, 1, -1, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (Args.empty()) { + ctx.Error(Pos) << OpName << " requires at least one argument"; + return false; + } + for (size_t i = 0; i < Args.size(); ++i) { + if (!Args[i]->Init(ctx, src)) { + return false; + } + if (Args[i]->GetTupleSize() == 2) { + // flatten with prefix + Args[i] = Q(Y( + MakeAtomFromExpression(Pos, ctx, Args[i]->GetTupleElement(0)).Build(), + Args[i]->GetTupleElement(1) + )); + } else { + ctx.Error(Pos) << OpName << " requires arguments to be tuples of size 2: prefix and struct"; + return false; + } + } + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TFlattenMembers(Pos, OpName, CloneContainer(Args)); + } +}; + +TString NormalizeTypeString(const TString& str) { + auto ret = to_title(str); + if (ret.StartsWith("Tz")) { + ret = "Tz" + to_title(ret.substr(2)); + } + if (ret.StartsWith("Json")) { + ret = "Json" + to_title(ret.substr(4)); + } + if (ret.StartsWith("Dy")) { + ret = "Dy" + to_title(ret.substr(2)); + } + + return ret; +} + +static const TSet<TString> AvailableDataTypes = {"Bool", "String", "Uint32", "Uint64", "Int32", "Int64", "Float", "Double", "Utf8", "Yson", "Json", "JsonDocument", + "Date", "Datetime", "Timestamp", "Interval", "Uint8", "Int8", "Uint16", "Int16", "TzDate", "TzDatetime", "TzTimestamp", "Uuid", "Decimal", "DyNumber", + "Date32", "Datetime64", "Timestamp64", "Interval64", "TzDate32", "TzDatetime64", "TzTimestamp64"}; +TNodePtr GetDataTypeStringNode(TContext& ctx, TCallNode& node, unsigned argNum, TString* outTypeStrPtr = nullptr) { + auto errMsgFunc = [&node, argNum]() { + static std::array<TString, 2> numToName = {{"first", "second"}}; + TStringBuilder sb; + sb << "At " << numToName.at(argNum) << " argument of " << node.GetOpName() << " expected type string, available one of: " + << JoinRange(", ", AvailableDataTypes.begin(), AvailableDataTypes.end()) << ";"; + return TString(sb); + }; + auto typeStringNode = node.GetArgs().at(argNum); + auto typeStringPtr = typeStringNode->GetLiteral("String"); + TNodePtr dataTypeNode; + if (typeStringPtr) { + TString typeString = NormalizeTypeString(*typeStringPtr); + if (!AvailableDataTypes.contains(typeString)) { + ctx.Error(typeStringNode->GetPos()) << "Bad type string: '" << typeString << "'. " << errMsgFunc(); + return {}; + } + if (outTypeStrPtr) { + *outTypeStrPtr = typeString; + } + dataTypeNode = typeStringNode->Q(typeString); + } else { + ctx.Error(typeStringNode->GetPos()) << errMsgFunc(); + return {}; + } + return dataTypeNode; +} + +class TYqlParseFileOp final: public TCallNode { +public: + TYqlParseFileOp(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "ParseFile", 2, 2, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + auto dataTypeStringNode = GetDataTypeStringNode(ctx, *this, 0); + if (!dataTypeStringNode) { + return false; + } + auto aliasNode = BuildFileNameArgument(Args[1]->GetPos(), Args[1], ctx.Settings.FileAliasPrefix); + OpName = "Apply"; + Args[0] = Y("Udf", Q("File.ByLines"), Y("Void"), + Y("TupleType", + Y("TupleType", Y("DataType", dataTypeStringNode)), + Y("StructType"), + Y("TupleType"))); + + Args[1] = Y("FilePath", aliasNode); + return TCallNode::DoInit(ctx, src); + } + + TString GetOpName() const override { + return "ParseFile"; + } + + TNodePtr DoClone() const final { + return new TYqlParseFileOp(Pos, CloneContainer(Args)); + } +}; + +class TYqlDataType final : public TCallNode { +public: + TYqlDataType(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "DataType", 1, 3, args) + { + FakeSource = BuildFakeSource(pos); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + for (ui32 i = 0; i < Args.size(); ++i) { + if (!Args[i]->Init(ctx, FakeSource.Get())) { + return false; + } + + Args[i] = MakeAtomFromExpression(Pos, ctx, Args[i]).Build(); + } + + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlDataType(Pos, CloneContainer(Args)); + } + +private: + TSourcePtr FakeSource; +}; + +class TYqlResourceType final : public TCallNode { +public: + TYqlResourceType(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "ResourceType", 1, 1, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[0]->Init(ctx, src)) { + return false; + } + + Args[0] = MakeAtomFromExpression(Pos, ctx, Args[0]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlResourceType(Pos, CloneContainer(Args)); + } +}; + +class TYqlTaggedType final : public TCallNode { +public: + TYqlTaggedType(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "TaggedType", 2, 2, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[1]->Init(ctx, src)) { + return false; + } + + Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlTaggedType(Pos, CloneContainer(Args)); + } +}; + +class TYqlCallableType final : public TCallNode { +public: + TYqlCallableType(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "CallableType", 2, -1, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[0]->GetTupleNode()) { + ui32 numOptArgs; + if (!Parseui32(Args[0], numOptArgs)) { + ctx.Error(Args[0]->GetPos()) << "Expected either tuple or number of optional arguments"; + return false; + } + + Args[0] = Q(Y(BuildQuotedAtom(Args[0]->GetPos(), ToString(numOptArgs)))); + } + + if (!Args[1]->GetTupleNode()) { + Args[1] = Q(Y(Args[1])); + } + + for (ui32 index = 2; index < Args.size(); ++index) { + if (!Args[index]->GetTupleNode()) { + Args[index] = Q(Y(Args[index])); + } + } + + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlCallableType(Pos, CloneContainer(Args)); + } +}; + +class TYqlTupleElementType final : public TCallNode { +public: + TYqlTupleElementType(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "TupleElementType", 2, 2, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[1]->Init(ctx, src)) { + return false; + } + + Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlTupleElementType(Pos, CloneContainer(Args)); + } +}; + +class TYqlStructMemberType final : public TCallNode { +public: + TYqlStructMemberType(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "StructMemberType", 2, 2, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[1]->Init(ctx, src)) { + return false; + } + + Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build(); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlStructMemberType(Pos, CloneContainer(Args)); + } +}; + +class TYqlCallableArgumentType final : public TCallNode { +public: + TYqlCallableArgumentType(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "CallableArgumentType", 2, 2, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + ui32 index; + if (!Parseui32(Args[1], index)) { + ctx.Error(Args[1]->GetPos()) << "Expected index of the callable argument"; + return false; + } + + Args[1] = BuildQuotedAtom(Args[1]->GetPos(), ToString(index)); + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlCallableArgumentType(Pos, CloneContainer(Args)); + } +}; + +class TStructTypeNode : public TAstListNode { +public: + TStructTypeNode(TPosition pos, const TVector<TNodePtr>& exprs) + : TAstListNode(pos) + , Exprs(exprs) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + Nodes.push_back(BuildAtom(Pos, "StructType", TNodeFlags::Default)); + for (const auto& expr : Exprs) { + const auto& label = expr->GetLabel(); + if (!label) { + ctx.Error(expr->GetPos()) << "Structure does not allow anonymous members"; + return false; + } + Nodes.push_back(Q(Y(Q(label), expr))); + } + return TAstListNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TStructTypeNode(Pos, CloneContainer(Exprs)); + } + +private: + const TVector<TNodePtr> Exprs; +}; + +template <bool IsStrict> +class TYqlIf final: public TCallNode { +public: + TYqlIf(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, IsStrict ? "IfStrict" : "If", 2, 3, args) + {} + +private: + TCallNode::TPtr DoClone() const override { + return new TYqlIf(GetPos(), CloneContainer(Args)); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + Args[0] = Y("Coalesce", Args[0], Y("Bool", Q("false"))); + if (Args.size() == 2) { + Args.push_back(Y("Null")); + } + return TCallNode::DoInit(ctx, src); + } +}; + +class TYqlSubstring final: public TCallNode { +public: + TYqlSubstring(TPosition pos, const TString& name, const TVector<TNodePtr>& args) + : TCallNode(pos, name, 2, 3, args) + {} + +private: + TCallNode::TPtr DoClone() const override { + return new TYqlSubstring(GetPos(), OpName, CloneContainer(Args)); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (Args.size() == 2) { + Args.push_back(Y("Null")); + } + return TCallNode::DoInit(ctx, src); + } +}; + +class TYqlIn final: public TCallNode { +public: + TYqlIn(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "IN", 3, 3, args) + {} + +private: + TNodePtr DoClone() const final { + return new TYqlIn(Pos, CloneContainer(Args)); + } + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + auto key = Args[0]; + auto inNode = Args[1]; + auto hints = Args[2]; + + const auto pos = inNode->GetPos(); + + if (!key->Init(ctx, src)) { + return false; + } + + if (!inNode->Init(ctx, inNode->GetSource() ? nullptr : src)) { + return false; + } + + if (inNode->GetLiteral("String")) { + ctx.Error(pos) << "Unable to use IN predicate with string argument, it won't search substring - " + "expecting tuple, list, dict or single column table source"; + return false; + } + + if (inNode->GetTupleSize() == 1) { + auto singleElement = inNode->GetTupleElement(0); + // TODO: 'IN ((select ...))' is parsed exactly like 'IN (select ...)' instead of a single element tuple + if (singleElement->GetSource() || singleElement->IsSelect()) { + TStringBuf parenKind = singleElement->GetSource() ? "" : "external "; + ctx.Warning(pos, + TIssuesIds::YQL_CONST_SUBREQUEST_IN_LIST) << "Using subrequest in scalar context after IN, " + << "perhaps you should remove " + << parenKind << "parenthesis here"; + } + } + + TVector<TNodePtr> hintElements; + for (size_t i = 0; i < hints->GetTupleSize(); ++i) { + hintElements.push_back(hints->GetTupleElement(i)); + } + + if (inNode->GetSource() || inNode->IsSelect()) { + hintElements.push_back(BuildHint(pos, "tableSource")); + } + + if (!ctx.AnsiInForEmptyOrNullableItemsCollections.Defined()) { + hintElements.push_back(BuildHint(pos, "warnNoAnsi")); + } else if (*ctx.AnsiInForEmptyOrNullableItemsCollections) { + hintElements.push_back(BuildHint(pos, "ansi")); + } + + OpName = "SqlIn"; + MinArgs = MaxArgs = 3; + Args = { + inNode->GetSource() ? inNode->GetSource() : inNode, + key, + BuildTuple(pos, hintElements) + }; + + return TCallNode::DoInit(ctx, src); + } + + static TNodePtr BuildHint(TPosition pos, const TString& name) { + return BuildTuple(pos, { BuildQuotedAtom(pos, name, NYql::TNodeFlags::Default) }); + } + + TString GetOpName() const override { + return "IN predicate"; + } +}; + +class TYqlUdfBase : public TCallNode { +public: + TYqlUdfBase(TPosition pos, const TString& name) + : TCallNode(pos, "Udf", 1, 1, UdfArgs(pos, name)) + {} + + TYqlUdfBase(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2) + : TCallNode(pos, "Udf", argsCount, argsCount, UdfArgs(pos, name, &args)) + {} + +protected: + TYqlUdfBase(TPosition pos, const TString& opName, ui32 minArgs, ui32 maxArgs, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, minArgs, maxArgs, args) + {} + +private: + static TVector<TNodePtr> UdfArgs(TPosition pos, const TString& name, const TVector<TNodePtr>* args = nullptr) { + TVector<TNodePtr> res = { BuildQuotedAtom(pos, name) }; + if (args) { + res.insert(res.end(), args->begin(), args->end()); + } + return res; + } + + void DoUpdateState() const override { + TCallNode::DoUpdateState(); + State.Set(ENodeState::Aggregated, false/*!RunConfig || RunConfig->IsAggregated()*/); + State.Set(ENodeState::Const, true /* FIXME: To avoid CheckAggregationLevel issue for non-const TypeOf. */); + } + +private: + TNodePtr RunConfig; +}; + +class TYqlUdf final : public TYqlUdfBase { +public: + TYqlUdf(TPosition pos, const TString& name) + : TYqlUdfBase(pos, name) + {} + + TYqlUdf(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2) + : TYqlUdfBase(pos, name, args, argsCount) + {} + +private: + TYqlUdf(const TYqlUdf& other) + : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args)) + {} + + TNodePtr DoClone() const final { + return new TYqlUdf(*this); + } +}; + +class TYqlTypeConfigUdf final : public TYqlUdfBase { +public: + TYqlTypeConfigUdf(TPosition pos, const TString& name) + : TYqlUdfBase(pos, name) + {} + + TYqlTypeConfigUdf(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2) + : TYqlUdfBase(pos, name, args, argsCount) + {} + +private: + TYqlTypeConfigUdf(const TYqlTypeConfigUdf& other) + : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args)) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[3]->Init(ctx, src)) { + return false; + } + + Args[3] = MakeAtomFromExpression(Pos, ctx, Args[3]).Build(); + return TYqlUdfBase::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlTypeConfigUdf(*this); + } +}; + +class TWeakFieldOp final: public TCallNode { +public: + TWeakFieldOp(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "WeakField", 2, 3, args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!src) { + ctx.Error(Pos) << GetCallExplain() << " unable use without source"; + return false; + } + + src->AllColumns(); + + if (!ValidateArguments(ctx)) { + return false; + } + + bool hasError = false; + for (auto& arg: Args) { + if (!arg->Init(ctx, src)) { + hasError = true; + continue; + } + } + + if (hasError) { + return false; + } + + PrecacheState(); + + const auto memberPos = Args[0]->GetPos(); + TVector<TNodePtr> repackArgs = {BuildAtom(memberPos, "row", NYql::TNodeFlags::Default)}; + if (auto literal = Args[1]->GetLiteral("String")) { + TString targetType; + if (!GetDataTypeStringNode(ctx, *this, 1, &targetType)) { + return false; + } + + repackArgs.push_back(Args[1]->Q(targetType)); + } else { + repackArgs.push_back(Args[1]); + } + + TVector<TNodePtr> column; + auto namePtr = Args[0]->GetColumnName(); + if (!namePtr || !*namePtr) { + ctx.Error(Pos) << GetCallExplain() << " expects column name as first argument"; + return false; + } + auto memberName = *namePtr; + column.push_back(Args[0]->Q(*namePtr)); + + if (src->GetJoin() && !src->IsJoinKeysInitializing()) { + const auto sourcePtr = Args[0]->GetSourceName(); + if (!sourcePtr || !*sourcePtr) { + ctx.Error(Pos) << GetOpName() << " required to have correlation name in case of JOIN for column at first parameter"; + return false; + } + column.push_back(Args[0]->Q(*sourcePtr)); + memberName = DotJoin(*sourcePtr, memberName); + } + if (!GetLabel()) { + SetLabel(memberName); + } + repackArgs.push_back(BuildTuple(memberPos, column)); + if (Args.size() == 3) { + repackArgs.push_back(Args[2]); + } + ++MinArgs; + ++MaxArgs; + Args.swap(repackArgs); + + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TWeakFieldOp(Pos, CloneContainer(Args)); + } +}; + +template <bool Join> +class TTableRow final : public INode { +public: + TTableRow(TPosition pos, const TVector<TNodePtr>& args) + : TTableRow(pos, args.size()) + {} + + TTableRow(TPosition pos, ui32 argsCount) + : INode(pos) + , ArgsCount(argsCount) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!src || src->IsFake()) { + ctx.Error(Pos) << TStringBuilder() << (Join ? "Join" : "") << "TableRow requires data source"; + return false; + } + + if (ArgsCount > 0) { + ctx.Error(Pos) << "TableRow requires exactly 0 arguments"; + return false; + } + + src->AllColumns(); + const bool isJoin = src->GetJoin(); + if (!Join && ctx.SimpleColumns && isJoin) { + TNodePtr block = Y(); + const auto& sameKeyMap = src->GetJoin()->GetSameKeysMap(); + if (sameKeyMap) { + block = L(block, Y("let", "flatSameKeys", "row")); + for (const auto& sameKeysPair: sameKeyMap) { + const auto& column = sameKeysPair.first; + auto keys = Y("Coalesce"); + auto sameSourceIter = sameKeysPair.second.begin(); + for (auto end = sameKeysPair.second.end(); sameSourceIter != end; ++sameSourceIter) { + auto addKeyNode = Q(DotJoin(*sameSourceIter, column)); + keys = L(keys, Y("TryMember", "row", addKeyNode, Y("Null"))); + } + + block = L(block, Y("let", "flatSameKeys", Y("AddMember", "flatSameKeys", Q(column), keys))); + sameSourceIter = sameKeysPair.second.begin(); + for (auto end = sameKeysPair.second.end(); sameSourceIter != end; ++sameSourceIter) { + auto removeKeyNode = Q(DotJoin(*sameSourceIter, column)); + block = L(block, Y("let", "flatSameKeys", Y("ForceRemoveMember", "flatSameKeys", removeKeyNode))); + } + } + block = L(block, Y("let", "row", "flatSameKeys")); + } + + auto members = Y(); + for (auto& joinLabel: src->GetJoin()->GetJoinLabels()) { + members = L(members, BuildQuotedAtom(Pos, joinLabel + ".")); + } + block = L(block, Y("let", "res", Y("DivePrefixMembers", "row", Q(members)))); + + for (const auto& sameKeysPair: src->GetJoin()->GetSameKeysMap()) { + const auto& column = sameKeysPair.first; + auto addMemberKeyNode = Y("Member", "row", Q(column)); + block = L(block, Y("let", "res", Y("AddMember", "res", Q(column), addMemberKeyNode))); + } + + Node = Y("block", Q(L(block, Y("return", "res")))); + } else { + Node = ctx.EnableSystemColumns ? Y("RemoveSystemMembers", "row") : BuildAtom(Pos, "row", 0); + } + return true; + } + + TAstNode* Translate(TContext& ctx) const override { + Y_DEBUG_ABORT_UNLESS(Node); + return Node->Translate(ctx); + } + + void DoUpdateState() const override { + State.Set(ENodeState::Const, false); + } + + TNodePtr DoClone() const final { + return new TTableRow<Join>(Pos, ArgsCount); + } + +private: + const size_t ArgsCount; + TNodePtr Node; +}; + +TTableRows::TTableRows(TPosition pos, const TVector<TNodePtr>& args) + : TTableRows(pos, args.size()) +{} + +TTableRows::TTableRows(TPosition pos, ui32 argsCount) + : INode(pos) + , ArgsCount(argsCount) +{} + +bool TTableRows::DoInit(TContext& ctx, ISource* /*src*/) { + if (ArgsCount > 0) { + ctx.Error(Pos) << "TableRows requires exactly 0 arguments"; + return false; + } + Node = ctx.EnableSystemColumns ? Y("RemoveSystemMembers", "inputRowsList") : BuildAtom(Pos, "inputRowsList", 0); + return true; +} + +TAstNode* TTableRows::Translate(TContext& ctx) const { + Y_DEBUG_ABORT_UNLESS(Node); + return Node->Translate(ctx); +} + +void TTableRows::DoUpdateState() const { + State.Set(ENodeState::Const, false); +} + +TNodePtr TTableRows::DoClone() const { + return MakeIntrusive<TTableRows>(Pos, ArgsCount); +} + +TSessionWindow::TSessionWindow(TPosition pos, const TVector<TNodePtr>& args) + : INode(pos) + , Args(args) + , FakeSource(BuildFakeSource(pos)) + , Valid(false) +{} + +void TSessionWindow::MarkValid() { + YQL_ENSURE(!HasState(ENodeState::Initialized)); + Valid = true; +} + +TNodePtr TSessionWindow::BuildTraits(const TString& label) const { + YQL_ENSURE(HasState(ENodeState::Initialized)); + + auto trueNode = Y("Bool", Q("true")); + + if (Args.size() == 2) { + auto timeExpr = Args[0]; + auto timeoutExpr = Args[1]; + + auto coalesceLess = [&](auto first, auto second) { + // first < second ?? true + return Y("Coalesce", Y("<", first, second), trueNode); + }; + + auto absDelta = Y("If", + coalesceLess("prev", "curr"), + Y("-", "curr", "prev"), + Y("-", "prev", "curr")); + + auto newSessionPred = Y("And", Y("AggrNotEquals", "curr", "prev"), coalesceLess(timeoutExpr, absDelta)); + auto timeoutLambda = BuildLambda(timeoutExpr->GetPos(), Y("prev", "curr"), newSessionPred); + auto sortSpec = Y("SortTraits", Y("TypeOf", label), trueNode, BuildLambda(Pos, Y("row"), Y("PersistableRepr", timeExpr))); + + return Y("SessionWindowTraits", + Y("TypeOf", label), + sortSpec, + BuildLambda(Pos, Y("row"), timeExpr), + timeoutLambda); + } + + auto orderExpr = Args[0]; + auto initLambda = Args[1]; + auto updateLambda = Args[2]; + auto calculateLambda = Args[3]; + + auto sortSpec = Y("SortTraits", Y("TypeOf", label), trueNode, BuildLambda(Pos, Y("row"), Y("PersistableRepr", orderExpr))); + + return Y("SessionWindowTraits", + Y("TypeOf", label), + sortSpec, + initLambda, + updateLambda, + calculateLambda); +} + +bool TSessionWindow::DoInit(TContext& ctx, ISource* src) { + if (!src || src->IsFake()) { + ctx.Error(Pos) << "SessionWindow requires data source"; + return false; + } + + if (!(Args.size() == 2 || Args.size() == 4)) { + ctx.Error(Pos) << "SessionWindow requires either two or four arguments"; + return false; + } + + if (!Valid) { + ctx.Error(Pos) << "SessionWindow can only be used as a top-level GROUP BY / PARTITION BY expression"; + return false; + } + + if (Args.size() == 2) { + auto timeExpr = Args[0]; + auto timeoutExpr = Args[1]; + return timeExpr->Init(ctx, src) && timeoutExpr->Init(ctx, FakeSource.Get()); + } + + auto orderExpr = Args[0]; + auto initLambda = Args[1]; + auto updateLambda = Args[2]; + auto calculateLambda = Args[3]; + src->AllColumns(); + + return orderExpr->Init(ctx, src) && initLambda->Init(ctx, FakeSource.Get()) && + updateLambda->Init(ctx, FakeSource.Get()) && calculateLambda->Init(ctx, FakeSource.Get()); +} + +TAstNode* TSessionWindow::Translate(TContext&) const { + YQL_ENSURE(false, "Translate is called for SessionWindow"); + return nullptr; +} + +void TSessionWindow::DoUpdateState() const { + State.Set(ENodeState::Const, false); +} + +TNodePtr TSessionWindow::DoClone() const { + return new TSessionWindow(Pos, CloneContainer(Args)); +} + +TString TSessionWindow::GetOpName() const { + return "SessionWindow"; +} + +template<bool IsStart> +class TSessionStart final : public INode { +public: + TSessionStart(TPosition pos, const TVector<TNodePtr>& args) + : INode(pos) + , ArgsCount(args.size()) + { + } +private: + TSessionStart(TPosition pos, size_t argsCount) + : INode(pos) + , ArgsCount(argsCount) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (!src || src->IsFake()) { + ctx.Error(Pos) << GetOpName() << " requires data source"; + return false; + } + + if (ArgsCount > 0) { + ctx.Error(Pos) << GetOpName() << " requires exactly 0 arguments"; + return false; + } + + auto windowName = src->GetWindowName(); + OverWindow = windowName != nullptr; + TNodePtr sessionWindow; + if (windowName) { + auto spec = src->FindWindowSpecification(ctx, *windowName); + if (!spec) { + return false; + } + sessionWindow = spec->Session; + if (!sessionWindow) { + ctx.Error(Pos) << GetOpName() << " can not be used with window " << *windowName << ": SessionWindow specification is missing in PARTITION BY"; + return false; + } + } else { + sessionWindow = src->GetSessionWindowSpec(); + if (!sessionWindow) { + TString extra; + if (src->IsOverWindowSource()) { + extra = ". Maybe you forgot to add OVER `window_name`?"; + } + if (src->HasAggregations()) { + ctx.Error(Pos) << GetOpName() << " can not be used here: SessionWindow specification is missing in GROUP BY" << extra; + } else { + ctx.Error(Pos) << GetOpName() << " can not be used without aggregation by SessionWindow" << extra; + } + return false; + } + + if (!IsStart) { + ctx.Error(Pos) << GetOpName() << " with GROUP BY is not supported yet"; + return false; + } + } + + if (sessionWindow->HasState(ENodeState::Failed)) { + return false; + } + + YQL_ENSURE(sessionWindow->HasState(ENodeState::Initialized)); + YQL_ENSURE(sessionWindow->GetLabel()); + Node = Y("Member", "row", BuildQuotedAtom(Pos, sessionWindow->GetLabel())); + if (OverWindow) { + Node = Y("Member", Node, BuildQuotedAtom(Pos, IsStart ? "start" : "state")); + } + return true; + } + + TAstNode* Translate(TContext& ctx) const override { + Y_DEBUG_ABORT_UNLESS(Node); + return Node->Translate(ctx); + } + + void DoUpdateState() const override { + State.Set(ENodeState::Const, false); + if (OverWindow) { + State.Set(ENodeState::OverWindow, true); + } else if (IsStart) { + State.Set(ENodeState::Aggregated, true); + } + } + + TNodePtr DoClone() const override { + return new TSessionStart<IsStart>(Pos, ArgsCount); + } + + TString GetOpName() const override { + return IsStart ? "SessionStart" : "SessionState"; + } + + const size_t ArgsCount; + bool OverWindow = false; + TNodePtr Node; +}; + +THoppingWindow::THoppingWindow(TPosition pos, const TVector<TNodePtr>& args) + : INode(pos) + , Args(args) + , FakeSource(BuildFakeSource(pos)) + , Valid(false) +{} + +void THoppingWindow::MarkValid() { + YQL_ENSURE(!HasState(ENodeState::Initialized)); + Valid = true; +} + +TNodePtr THoppingWindow::BuildTraits(const TString& label) const { + YQL_ENSURE(HasState(ENodeState::Initialized)); + + return Y( + "HoppingTraits", + Y("ListItemType", Y("TypeOf", label)), + BuildLambda(Pos, Y("row"), Y("Just", Y("SystemMetadata", Y("String", Q("write_time")), Y("DependsOn", "row")))), + Hop, + Interval, + Interval, + Q("true"), + Q("v2")); +} + +bool THoppingWindow::DoInit(TContext& ctx, ISource* src) { + if (!src || src->IsFake()) { + ctx.Error(Pos) << "HoppingWindow requires data source"; + return false; + } + + if (!(Args.size() == 2)) { + ctx.Error(Pos) << "HoppingWindow requires two arguments"; + return false; + } + + if (!Valid) { + ctx.Error(Pos) << "HoppingWindow can only be used as a top-level GROUP BY expression"; + return false; + } + + auto hopExpr = Args[0]; + auto intervalExpr = Args[1]; + if (!(hopExpr->Init(ctx, FakeSource.Get()) && intervalExpr->Init(ctx, FakeSource.Get()))) { + return false; + } + + Hop = ProcessIntervalParam(hopExpr); + Interval = ProcessIntervalParam(intervalExpr); + + return true; +} + +TAstNode* THoppingWindow::Translate(TContext&) const { + YQL_ENSURE(false, "Translate is called for HoppingWindow"); + return nullptr; +} + +void THoppingWindow::DoUpdateState() const { + State.Set(ENodeState::Const, false); +} + +TNodePtr THoppingWindow::DoClone() const { + return new THoppingWindow(Pos, CloneContainer(Args)); +} + +TString THoppingWindow::GetOpName() const { + return "HoppingWindow"; +} + +TNodePtr THoppingWindow::ProcessIntervalParam(const TNodePtr& node) const { + auto literal = node->GetLiteral("String"); + if (!literal) { + return Y("EvaluateExpr", node); + } + + return new TYqlData(node->GetPos(), "Interval", {node}); +} + +TNodePtr BuildUdfUserTypeArg(TPosition pos, const TVector<TNodePtr>& args, TNodePtr customUserType) { + TVector<TNodePtr> argsTypeItems; + for (auto& arg : args) { + argsTypeItems.push_back(new TCallNodeImpl(pos, "TypeOf", TVector<TNodePtr>(1, arg))); + } + + TVector<TNodePtr> userTypeItems; + userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", argsTypeItems)); + userTypeItems.push_back(new TCallNodeImpl(pos, "StructType", {})); + if (customUserType) { + userTypeItems.push_back(customUserType); + } else { + userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {})); + } + + return new TCallNodeImpl(pos, "TupleType", userTypeItems); +} + +TNodePtr BuildUdfUserTypeArg(TPosition pos, TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType) { + TVector<TNodePtr> userTypeItems; + userTypeItems.reserve(3); + userTypeItems.push_back(positionalArgs->Y("TypeOf", positionalArgs)); + userTypeItems.push_back(positionalArgs->Y("TypeOf", namedArgs)); + if (customUserType) { + userTypeItems.push_back(customUserType); + } else { + userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {})); + } + + return new TCallNodeImpl(pos, "TupleType", userTypeItems); +} + +TVector<TNodePtr> BuildUdfArgs(const TContext& ctx, TPosition pos, const TVector<TNodePtr>& args, + TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, TNodePtr typeConfig) { + if (!ctx.Settings.EnableGenericUdfs) { + return {}; + } + TVector<TNodePtr> udfArgs; + udfArgs.push_back(new TAstListNodeImpl(pos)); + udfArgs[0]->Add(new TAstAtomNodeImpl(pos, "Void", 0)); + if (namedArgs) { + udfArgs.push_back(BuildUdfUserTypeArg(pos, positionalArgs, namedArgs, customUserType)); + } else { + udfArgs.push_back(BuildUdfUserTypeArg(pos, args, customUserType)); + } + + if (typeConfig) { + udfArgs.push_back(typeConfig); + } + + return udfArgs; +} + +TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, + TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig) +{ + const TString fullName = module + "." + name; + TNodePtr callable; + if (to_lower(module) == "@yql") { + callable = BuildCallable(pos, module, name, {}); + } else if (!ctx.Settings.EnableGenericUdfs) { + auto varName = ctx.AddSimpleUdf(fullName); + callable = new TAstAtomNodeImpl(pos, varName, TNodeFlags::ArbitraryContent); + } + + if (callable) { + TVector<TNodePtr> applyArgs = { callable }; + applyArgs.insert(applyArgs.end(), args.begin(), args.end()); + return new TCallNodeImpl(pos, namedArgs ? "NamedApply" : "Apply", applyArgs); + } + + TVector<TNodePtr> sqlCallArgs; + sqlCallArgs.push_back(BuildQuotedAtom(pos, fullName)); + if (namedArgs) { + auto tupleNodePtr = positionalArgs->GetTupleNode(); + YQL_ENSURE(tupleNodePtr); + TNodePtr positionalArgsNode = new TCallNodeImpl(pos, "PositionalArgs", tupleNodePtr->Elements()); + sqlCallArgs.push_back(BuildTuple(pos, { positionalArgsNode, namedArgs })); + } else { + TNodePtr positionalArgsNode = new TCallNodeImpl(pos, "PositionalArgs", args); + sqlCallArgs.push_back(BuildTuple(pos, { positionalArgsNode })); + } + + // optional arguments + if (customUserType) { + sqlCallArgs.push_back(customUserType); + } else if (!typeConfig.Empty()) { + sqlCallArgs.push_back(new TCallNodeImpl(pos, "TupleType", {})); + } + + if (!typeConfig.Empty()) { + sqlCallArgs.push_back(typeConfig.Build()); + } else if (runConfig) { + sqlCallArgs.push_back(BuildQuotedAtom(pos, "")); + } + + if (runConfig) { + sqlCallArgs.push_back(runConfig); + } + + return new TCallNodeImpl(pos, "SqlCall", sqlCallArgs); +} + +class TCallableNode final: public INode { +public: + TCallableNode(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, bool forReduce) + : INode(pos) + , Module(module) + , Name(name) + , Args(args) + , ForReduce(forReduce) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (Module == "yql") { + Node = new TFuncNodeImpl(Pos, Name); + } else if (Module == "@yql") { + auto parsedName = StringContent(ctx, Pos, Name); + if (!parsedName) { + return false; + } + + const TString yql("(" + parsedName->Content + ")"); + TAstParseResult ast = ParseAst(yql, ctx.Pool.get()); + /// TODO: do not drop warnings + if (ast.IsOk()) { + const auto rootCount = ast.Root->GetChildrenCount(); + if (rootCount != 1) { + ctx.Error(Pos) << "Failed to parse YQL: expecting AST root node with single child, but got " << rootCount; + return false; + } + Node = AstNode(ast.Root->GetChild(0)); + } else { + ctx.Error(Pos) << "Failed to parse YQL: " << ast.Issues.ToString(); + return false; + } + + if (src) { + src->AllColumns(); + } + } else if (ctx.Settings.ModuleMapping.contains(Module)) { + Node = Y("bind", Module + "_module", Q(Name)); + if (src) { + src->AllColumns(); + } + } else { + TNodePtr customUserType = nullptr; + if (Module == "Tensorflow" && Name == "RunBatch") { + if (Args.size() > 2) { + auto passThroughAtom = Q("PassThrough"); + auto passThroughType = Y("StructMemberType", Y("ListItemType", Y("TypeOf", Args[1])), passThroughAtom); + customUserType = Y("AddMemberType", Args[2], passThroughAtom, passThroughType); + Args.erase(Args.begin() + 2); + } + } + + if ("Datetime" == Module || ("Yson" == Module && ctx.PragmaYsonFast)) + Module.append('2'); + + TNodePtr typeConfig = MakeTypeConfig(Pos, to_lower(Module), Args); + if (ForReduce) { + TVector<TNodePtr> udfArgs; + udfArgs.push_back(BuildQuotedAtom(Pos, TString(Module) + "." + Name)); + udfArgs.push_back(customUserType ? customUserType : new TCallNodeImpl(Pos, "TupleType", {})); + if (typeConfig) { + udfArgs.push_back(typeConfig); + } + Node = new TCallNodeImpl(Pos, "SqlReduceUdf", udfArgs); + } else { + auto udfArgs = BuildUdfArgs(ctx, Pos, Args, nullptr, nullptr, customUserType, typeConfig); + Node = BuildUdf(ctx, Pos, Module, Name, udfArgs); + } + } + return Node->Init(ctx, src); + } + + TAstNode* Translate(TContext& ctx) const override { + Y_DEBUG_ABORT_UNLESS(Node); + return Node->Translate(ctx); + } + + const TString* FuncName() const override { + return &Name; + } + + const TString* ModuleName() const override { + return &Module; + } + + void DoUpdateState() const override { + State.Set(ENodeState::Const, Node->IsConstant()); + State.Set(ENodeState::Aggregated, Node->IsAggregated()); + } + + TNodePtr DoClone() const override { + return new TCallableNode(Pos, Module, Name, CloneContainer(Args), ForReduce); + } + + void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final { + Y_DEBUG_ABORT_UNLESS(Node); + Node->VisitTree(func, visited); + } +private: + TCiString Module; + TString Name; + TVector<TNodePtr> Args; + TNodePtr Node; + const bool ForReduce; +}; + +TNodePtr BuildCallable(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, bool forReduce) { + return new TCallableNode(pos, module, name, args, forReduce); +} + +TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args) { + if (to_lower(module) == "@yql") { + return BuildCallable(pos, module, name, args); + } + + auto fullName = module + "." + name; + if (!args.empty()) { + return new TYqlUdf(pos, fullName, args, args.size() + 1); + + } else { + auto varName = ctx.AddSimpleUdf(fullName); + return new TAstAtomNodeImpl(pos, varName, TNodeFlags::ArbitraryContent); + } +} + +class TScriptUdf final: public INode { +public: + TScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args) + : INode(pos) + , ModuleName(moduleName) + , FuncName(funcName) + , Args(args) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + const bool isPython = ModuleName.find(TStringBuf("Python")) != TString::npos; + if (!isPython) { + if (Args.size() != 2) { + ctx.Error(Pos) << ModuleName << " script declaration requires exactly two parameters"; + return false; + } + } else { + if (Args.size() < 1 || Args.size() > 2) { + ctx.Error(Pos) << ModuleName << " script declaration requires one or two parameters"; + return false; + } + } + + auto nameAtom = BuildQuotedAtom(Pos, FuncName); + auto scriptNode = Args.back(); + if (!scriptNode->Init(ctx, src)) { + return false; + } + auto scriptStrPtr = Args.back()->GetLiteral("String"); + if (!ctx.CompactNamedExprs && scriptStrPtr && scriptStrPtr->size() > SQL_MAX_INLINE_SCRIPT_LEN) { + scriptNode = ctx.UniversalAlias("scriptudf", std::move(scriptNode)); + } + + INode::TPtr type; + if (Args.size() == 2) { + type = Args[0]; + } else { + // Python supports getting functions signatures right from docstrings + type = Y("EvaluateType", Y("ParseTypeHandle", Y("Apply", + Y("bind", "core_module", Q("PythonFuncSignature")), + Q(ModuleName), + scriptNode, + Y("String", nameAtom) + ))); + } + + if (!type->Init(ctx, src)) { + return false; + } + + Node = Y("ScriptUdf", Q(ModuleName), nameAtom, type, scriptNode); + return true; + } + + TAstNode* Translate(TContext& ctx) const override { + Y_UNUSED(ctx); + Y_DEBUG_ABORT_UNLESS(Node); + return Node->Translate(ctx); + } + + void DoUpdateState() const override { + State.Set(ENodeState::Const, true); + } + + TNodePtr DoClone() const final { + return new TScriptUdf(GetPos(), ModuleName, FuncName, CloneContainer(Args)); + } + + void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final { + Y_DEBUG_ABORT_UNLESS(Node); + Node->VisitTree(func, visited); + } +private: + TString ModuleName; + TString FuncName; + TVector<TNodePtr> Args; + TNodePtr Node; +}; + +template <bool Sorted, bool Hashed> +class TYqlToDict final: public TCallNode { +public: + TYqlToDict(TPosition pos, const TString& mode, const TVector<TNodePtr>& args) + : TCallNode(pos, "ToDict", 4, 4, args) + , Mode(mode) + {} + +private: + TCallNode::TPtr DoClone() const override { + return new TYqlToDict<Sorted, Hashed>(GetPos(), Mode, CloneContainer(Args)); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (Args.size() != 1) { + ctx.Error(Pos) << "ToDict required exactly one argument"; + return false; + } + Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("0")))); + Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("1")))); + Args.push_back(Q(Y(Q(Sorted ? "Sorted" : Hashed ? "Hashed" : "Auto"), Q(Mode)))); + return TCallNode::DoInit(ctx, src); + } +private: + TString Mode; +}; + +template <bool IsStart> +class THoppingTime final: public TAstListNode { +public: + THoppingTime(TPosition pos, const TVector<TNodePtr>& args = {}) + : TAstListNode(pos) + { + Y_UNUSED(args); + } + +private: + TNodePtr DoClone() const override { + return new THoppingTime(GetPos()); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(ctx); + + auto legacySpec = src->GetLegacyHoppingWindowSpec(); + auto spec = src->GetHoppingWindowSpec(); + if (!legacySpec && !spec) { + ctx.Error(Pos) << "No hopping window parameters in aggregation"; + return false; + } + + Nodes.clear(); + + const auto fieldName = legacySpec + ? "_yql_time" + : spec->GetLabel(); + + const auto interval = legacySpec + ? legacySpec->Interval + : dynamic_cast<THoppingWindow*>(spec.Get())->Interval; + + if (!IsStart) { + Add("Member", "row", Q(fieldName)); + return true; + } + + Add("Sub", + Y("Member", "row", Q(fieldName)), + interval); + return true; + } + + void DoUpdateState() const override { + State.Set(ENodeState::Aggregated, true); + } +}; + +class TInvalidBuiltin final: public INode { +public: + TInvalidBuiltin(TPosition pos, const TString& info) + : INode(pos) + , Info(info) + { + } + + bool DoInit(TContext& ctx, ISource*) override { + ctx.Error(Pos) << Info; + return false; + } + + TAstNode* Translate(TContext&) const override { + return nullptr; + } + + TPtr DoClone() const override { + return new TInvalidBuiltin(GetPos(), Info); + } +private: + TString Info; +}; + +enum EAggrFuncTypeCallback { + NORMAL, + KEY_PAYLOAD, + PAYLOAD_PREDICATE, + TWO_ARGS, + COUNT, + HISTOGRAM, + LINEAR_HISTOGRAM, + PERCENTILE, + TOPFREQ, + TOP, + TOP_BY, + COUNT_DISTINCT_ESTIMATE, + LIST, + UDAF, + PG, + NTH_VALUE +}; + +struct TCoreFuncInfo { + TString Name; + ui32 MinArgs; + ui32 MaxArgs; +}; + +using TAggrFuncFactoryCallback = std::function<INode::TPtr(TPosition pos, const TVector<TNodePtr>& args, EAggregateMode aggMode, bool isFactory)>; +using TAggrFuncFactoryCallbackMap = std::unordered_map<TString, TAggrFuncFactoryCallback, THash<TString>>; +using TBuiltinFactoryCallback = std::function<TNodePtr(TPosition pos, const TVector<TNodePtr>& args)>; +using TBuiltinFactoryCallbackMap = std::unordered_map<TString, TBuiltinFactoryCallback, THash<TString>>; +using TCoreFuncMap = std::unordered_map<TString, TCoreFuncInfo, THash<TString>>; + +TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback( + const TString& functionName, + const TString& factoryName, + EAggrFuncTypeCallback type = NORMAL, + const TString& functionNameOverride = TString(), + const TVector<EAggregateMode>& validModes = {}) { + + const TString realFunctionName = functionNameOverride.empty() ? functionName : functionNameOverride; + return [functionName, realFunctionName, factoryName, type, validModes] (TPosition pos, const TVector<TNodePtr>& args, EAggregateMode aggMode, bool isFactory) -> INode::TPtr { + if (!validModes.empty()) { + if (!IsIn(validModes, aggMode)) { + TString errorText; + if (TVector{EAggregateMode::OverWindow} == validModes) { + errorText = TStringBuilder() + << "Can't use window function " << functionName << " without window specification (OVER keyword is missing)"; + } else { + errorText = TStringBuilder() + << "Can't use " << functionName << " in " << ToString(aggMode) << " aggregation mode"; + } + return INode::TPtr(new TInvalidBuiltin(pos, errorText)); + } + } + TAggregationPtr factory = nullptr; + switch (type) { + case NORMAL: + factory = BuildFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case KEY_PAYLOAD: + factory = BuildKeyPayloadFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case PAYLOAD_PREDICATE: + factory = BuildPayloadPredicateFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case TWO_ARGS: + factory = BuildTwoArgsFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case COUNT: + factory = BuildCountAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case HISTOGRAM: + factory = BuildHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case LINEAR_HISTOGRAM: + factory = BuildLinearHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case PERCENTILE: + factory = BuildPercentileFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case TOPFREQ: + factory = BuildTopFreqFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case TOP: + factory = BuildTopFactoryAggregation<false>(pos, realFunctionName, factoryName, aggMode); + break; + case TOP_BY: + factory = BuildTopFactoryAggregation<true>(pos, realFunctionName, factoryName, aggMode); + break; + case COUNT_DISTINCT_ESTIMATE: + factory = BuildCountDistinctEstimateFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case LIST: + factory = BuildListFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case UDAF: + factory = BuildUserDefinedFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + case PG: + factory = BuildPGFactoryAggregation(pos, realFunctionName, aggMode); + break; + case NTH_VALUE: + factory = BuildNthFactoryAggregation(pos, realFunctionName, factoryName, aggMode); + break; + } + if (isFactory) { + auto realArgs = args; + realArgs.erase(realArgs.begin()); // skip function name + return new TBasicAggrFactory(pos, functionName, factory, realArgs); + } else { + return new TBasicAggrFunc(pos, functionName, factory, args); + } + }; +} + +TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback( + const TString& functionName, + const TString& factoryName, + const TVector<EAggregateMode>& validModes, + EAggrFuncTypeCallback type = NORMAL, + const TString& functionNameOverride = TString()) { + return BuildAggrFuncFactoryCallback(functionName, factoryName, type, functionNameOverride, validModes); +} + +template<typename TType> +TBuiltinFactoryCallback BuildSimpleBuiltinFactoryCallback() { + return [] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr { + return new TType(pos, args); + }; +} + +template<typename TType> +TBuiltinFactoryCallback BuildNamedBuiltinFactoryCallback(const TString& name) { + return [name] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr { + return new TType(pos, name, args); + }; +} + +template<typename TType> +TBuiltinFactoryCallback BuildArgcBuiltinFactoryCallback(i32 minArgs, i32 maxArgs) { + return [minArgs, maxArgs] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr { + return new TType(pos, minArgs, maxArgs, args); + }; +} + +template<typename TType> +TBuiltinFactoryCallback BuildNamedArgcBuiltinFactoryCallback(const TString& name, i32 minArgs, i32 maxArgs) { + return [name, minArgs, maxArgs] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr { + return new TType(pos, name, minArgs, maxArgs, args); + }; +} + +template<typename TType> +TBuiltinFactoryCallback BuildNamedDepsArgcBuiltinFactoryCallback(ui32 reqArgsCount, const TString& name, i32 minArgs, i32 maxArgs) { + return [reqArgsCount, name, minArgs, maxArgs](TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr { + return new TType(reqArgsCount, pos, name, minArgs, maxArgs, args); + }; +} + +template<typename TType> +TBuiltinFactoryCallback BuildBoolBuiltinFactoryCallback(bool arg) { + return [arg] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr { + return new TType(pos, args, arg); + }; +} + +template<typename TType> +TBuiltinFactoryCallback BuildFoldBuiltinFactoryCallback(const TString& name, const TString& defaultValue) { + return [name, defaultValue] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr { + return new TType(pos, name, "Bool", defaultValue, 1, args); + }; +} + +TNodePtr MakePair(TPosition pos, const TVector<TNodePtr>& args) { + TNodePtr list = new TAstListNodeImpl(pos, { + args[0], + args.size() > 1 ? args[1] : new TAstListNodeImpl(pos,{ new TAstAtomNodeImpl(pos, "Null", TNodeFlags::Default) }) + }); + + return new TAstListNodeImpl(pos, { + new TAstAtomNodeImpl(pos, "quote", TNodeFlags::Default), + list + }); +} + +struct TBuiltinFuncData { + const TBuiltinFactoryCallbackMap BuiltinFuncs; + const TAggrFuncFactoryCallbackMap AggrFuncs; + const TCoreFuncMap CoreFuncs; + + TBuiltinFuncData(): + BuiltinFuncs(MakeBuiltinFuncs()), + AggrFuncs(MakeAggrFuncs()), + CoreFuncs(MakeCoreFuncs()) + { + } + + TBuiltinFactoryCallbackMap MakeBuiltinFuncs() { + TBuiltinFactoryCallbackMap builtinFuncs = { + // Branching + {"if", BuildSimpleBuiltinFactoryCallback<TYqlIf<false>>()}, + {"ifstrict", BuildSimpleBuiltinFactoryCallback<TYqlIf<true>>() }, + + // String builtins + {"len", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)}, + {"length", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)}, + {"charlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)}, + {"characterlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)}, + {"substring", BuildNamedBuiltinFactoryCallback<TYqlSubstring>("Substring")}, + {"find", BuildNamedBuiltinFactoryCallback<TYqlSubstring>("Find")}, + {"rfind", BuildNamedBuiltinFactoryCallback<TYqlSubstring>("RFind")}, + {"byteat", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ByteAt", 2, 2) }, + {"startswith", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StartsWith", 2, 2)}, + {"endswith", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EndsWith", 2, 2)}, + + // Numeric builtins + {"abs", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Abs", 1, 1) }, + {"tobytes", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ToBytes", 1, 1) }, + {"frombytes", BuildSimpleBuiltinFactoryCallback<TFromBytes>() }, + + // Compare builtins + {"minof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Min", 1, -1)}, + {"maxof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Max", 1, -1)}, + {"greatest", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Max", 1, -1)}, + {"least", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Min", 1, -1)}, + {"in", BuildSimpleBuiltinFactoryCallback<TYqlIn>()}, + + // List builtins + {"aslist", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsListMayWarn", 0, -1)}, + {"asliststrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsListStrict", 0, -1) }, + {"listlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Length", 1, 1)}, + {"listhasitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("HasItems", 1, 1)}, + {"listextend", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListExtend", 0, -1)}, + {"listextendstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListExtendStrict", 0, -1)}, + {"listunionall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListUnionAll", 0, -1) }, + {"listzip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListZip", -1, -1)}, + {"listzipall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListZipAll", -1, -1)}, + {"listenumerate", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListEnumerate", 1, 3)}, + {"listreverse", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListReverse", 1, 1)}, + {"listskip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListSkip", 2, 2)}, + {"listtake", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTake", 2, 2)}, + {"listhead", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListHead", 1, 1)}, + {"listlast", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListLast", 1, 1)}, + {"listsort", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(true)}, + {"listsortasc", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(true)}, + {"listsortdesc", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(false)}, + {"listmap", BuildBoolBuiltinFactoryCallback<TListMapBuiltin>(false)}, + {"listflatmap", BuildBoolBuiltinFactoryCallback<TListMapBuiltin>(true)}, + {"listfilter", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListFilter")}, + {"listany", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListAny", 1, 1)}, + {"listall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListAll", 1, 1)}, + {"listhas", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListHas", 2, 2)}, + {"listmax", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListMax", 1, 1)}, + {"listmin", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListMin", 1, 1)}, + {"listsum", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListSum", 1, 1)}, + {"listfold", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFold", 3, 3)}, + {"listfold1", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFold1", 3, 3)}, + {"listfoldmap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFoldMap", 3, 3)}, + {"listfold1map", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFold1Map", 3, 3)}, + {"listavg", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListAvg", 1, 1)}, + {"listconcat", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListConcat", 1, 2)}, + {"listextract", BuildSimpleBuiltinFactoryCallback<TListExtractBuiltin>()}, + {"listuniq", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListUniq", 1, 1)}, + {"listuniqstable", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListUniqStable", 1, 1)}, + {"listcreate", BuildSimpleBuiltinFactoryCallback<TListCreateBuiltin>()}, + {"listfromrange", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFromRange", 2, 3) }, + {"listreplicate", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Replicate", 2, 2) }, + {"listtakewhile", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListTakeWhile") }, + {"listskipwhile", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListSkipWhile") }, + {"listtakewhileinclusive", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListTakeWhileInclusive") }, + {"listskipwhileinclusive", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListSkipWhileInclusive") }, + {"listcollect", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListCollect", 1, 1) }, + {"listnotnull", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListNotNull", 1, 1)}, + {"listflatten", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFlatten", 1, 1)}, + {"listtop", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTop", 2, 3)}, + {"listtopasc", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopAsc", 2, 3)}, + {"listtopdesc", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopDesc", 2, 3)}, + {"listtopsort", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopSort", 2, 3)}, + {"listtopsortasc", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopSortAsc", 2, 3)}, + {"listtopsortdesc", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopSortDesc", 2, 3)}, + + // Dict builtins + {"dictlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Length", 1, 1)}, + {"dicthasitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("HasItems", 1, 1)}, + {"dictcreate", BuildSimpleBuiltinFactoryCallback<TDictCreateBuiltin>()}, + {"setcreate", BuildSimpleBuiltinFactoryCallback<TSetCreateBuiltin>()}, + {"asdict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDictMayWarn", 0, -1)}, + {"asdictstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDictStrict", 0, -1)}, + {"asset", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsSetMayWarn", 0, -1)}, + {"assetstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsSetStrict", 0, -1)}, + {"todict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, false>>("One")}, + {"tomultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, false>>("Many")}, + {"tosorteddict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true, false>>("One")}, + {"tosortedmultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true, false>>("Many")}, + {"tohasheddict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, true>>("One")}, + {"tohashedmultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, true>>("Many")}, + {"dictkeys", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeys", 1, 1) }, + {"dictpayloads", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloads", 1, 1) }, + {"dictitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictItems", 1, 1) }, + {"dictlookup", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Lookup", 2, 2) }, + {"dictcontains", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Contains", 2, 2) }, + + // Atom builtins + {"asatom", BuildSimpleBuiltinFactoryCallback<TYqlAsAtom>()}, + {"secureparam", BuildNamedBuiltinFactoryCallback<TYqlAtom>("SecureParam")}, + + {"void", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Void", 0, 0)}, + {"emptylist", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyList", 0, 0)}, + {"emptydict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyDict", 0, 0)}, + {"callable", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Callable", 2, 2)}, + {"way", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Way", 1, 1) }, + {"variant", BuildSimpleBuiltinFactoryCallback<TYqlVariant>() }, + {"enum", BuildSimpleBuiltinFactoryCallback<TYqlEnum>() }, + {"asvariant", BuildSimpleBuiltinFactoryCallback<TYqlAsVariant>() }, + {"asenum", BuildSimpleBuiltinFactoryCallback<TYqlAsEnum>() }, + {"astagged", BuildSimpleBuiltinFactoryCallback<TYqlAsTagged>() }, + {"untag", BuildSimpleBuiltinFactoryCallback<TYqlUntag>() }, + {"parsetype", BuildSimpleBuiltinFactoryCallback<TYqlParseType>() }, + {"ensuretype", BuildSimpleBuiltinFactoryCallback<TYqlTypeAssert<true>>() }, + {"ensureconvertibleto", BuildSimpleBuiltinFactoryCallback<TYqlTypeAssert<false>>() }, + {"ensure", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Ensure", 2, 3) }, + {"evaluateexpr", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateExpr", 1, 1) }, + {"evaluateatom", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateAtom", 1, 1) }, + {"evaluatetype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateType", 1, 1) }, + {"unwrap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Unwrap", 1, 2) }, + {"just", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Just", 1, 1) }, + {"nothing", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nothing", 1, 1) }, + {"formattype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FormatType", 1, 1) }, + {"formattypediff", BuildNamedBuiltinFactoryCallback<TFormatTypeDiff<false>>("FormatTypeDiff") }, + {"formattypediffpretty", BuildNamedBuiltinFactoryCallback<TFormatTypeDiff<true>>("FormatTypeDiffPretty") }, + {"pgtype", BuildSimpleBuiltinFactoryCallback<TYqlPgType>() }, + {"pgconst", BuildSimpleBuiltinFactoryCallback<TYqlPgConst>() }, + {"pgop", BuildSimpleBuiltinFactoryCallback<TYqlPgOp>() }, + {"pgcall", BuildSimpleBuiltinFactoryCallback<TYqlPgCall<false>>() }, + {"pgrangecall", BuildSimpleBuiltinFactoryCallback<TYqlPgCall<true>>() }, + {"pgcast", BuildSimpleBuiltinFactoryCallback<TYqlPgCast>() }, + {"frompg", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FromPg", 1, 1) }, + {"topg", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ToPg", 1, 1) }, + {"pgor", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgOr", 2, 2) }, + {"pgand", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgAnd", 2, 2) }, + {"pgnot", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgNot", 1, 1) }, + {"pgarray", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgArray", 1, -1) }, + {"typeof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeOf", 1, 1) }, + {"instanceof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("InstanceOf", 1, 1) }, + {"datatype", BuildSimpleBuiltinFactoryCallback<TYqlDataType>() }, + {"optionaltype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalType", 1, 1) }, + {"listtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListType", 1, 1) }, + {"streamtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamType", 1, 1) }, + {"dicttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictType", 2, 2) }, + {"tupletype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleType", 0, -1) }, + {"generictype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("GenericType", 0, 0) }, + {"unittype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("UnitType", 0, 0) }, + {"voidtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VoidType", 0, 0) }, + {"resourcetype", BuildSimpleBuiltinFactoryCallback<TYqlResourceType>() }, + {"taggedtype", BuildSimpleBuiltinFactoryCallback<TYqlTaggedType>() }, + {"varianttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantType", 1, 1) }, + {"callabletype", BuildSimpleBuiltinFactoryCallback<TYqlCallableType>() }, + {"optionalitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalItemType", 1, 1) }, + {"listitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListItemType", 1, 1) }, + {"streamitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamItemType", 1, 1) }, + {"dictkeytype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeyType", 1, 1) }, + {"dictpayloadtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloadType", 1, 1) }, + {"tupleelementtype", BuildSimpleBuiltinFactoryCallback<TYqlTupleElementType>() }, + {"structmembertype", BuildSimpleBuiltinFactoryCallback<TYqlStructMemberType>() }, + {"callableresulttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableResultType", 1, 1) }, + {"callableargumenttype", BuildSimpleBuiltinFactoryCallback<TYqlCallableArgumentType>() }, + {"variantunderlyingtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantUnderlyingType", 1, 1) }, + {"fromysonsimpletype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FromYsonSimpleType", 2, 2) }, + {"currentutcdate", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcDate", 0, -1) }, + {"currentutcdatetime", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcDatetime", 0, -1) }, + {"currentutctimestamp", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcTimestamp", 0, -1) }, + { "currenttzdate", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(1, "CurrentTzDate", 1, -1) }, + { "currenttzdatetime", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(1, "CurrentTzDatetime", 1, -1) }, + { "currenttztimestamp", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(1, "CurrentTzTimestamp", 1, -1) }, + {"currentoperationid", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentOperationId", 0, 0) }, + {"currentoperationsharedid", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentOperationSharedId", 0, 0) }, + {"currentauthenticateduser", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentAuthenticatedUser", 0, 0) }, + {"addtimezone", BuildSimpleBuiltinFactoryCallback<TYqlAddTimezone>() }, + {"removetimezone", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("RemoveTimezone", 1, 1) }, + {"pickle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Pickle", 1, 1) }, + {"stablepickle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StablePickle", 1, 1) }, + {"unpickle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Unpickle", 2, 2) }, + + {"typehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeHandle", 1, 1) }, + {"parsetypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ParseTypeHandle", 1, 1) }, + {"typekind", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeKind", 1, 1) }, + {"datatypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DataTypeComponents", 1, 1) }, + {"datatypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DataTypeHandle", 1, 1) }, + {"optionaltypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalTypeHandle", 1, 1) }, + {"listtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTypeHandle", 1, 1) }, + {"streamtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamTypeHandle", 1, 1) }, + {"tupletypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleTypeComponents", 1, 1) }, + {"tupletypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleTypeHandle", 1, 1) }, + {"structtypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructTypeComponents", 1, 1) }, + {"structtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructTypeHandle", 1, 1) }, + {"dicttypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictTypeComponents", 1, 1) }, + {"dicttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictTypeHandle", 2, 2) }, + {"resourcetypetag", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ResourceTypeTag", 1, 1) }, + {"resourcetypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ResourceTypeHandle", 1, 1) }, + {"taggedtypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TaggedTypeComponents", 1, 1) }, + {"taggedtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TaggedTypeHandle", 2, 2) }, + {"varianttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantTypeHandle", 1, 1) }, + {"voidtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VoidTypeHandle", 0, 0) }, + {"nulltypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("NullTypeHandle", 0, 0) }, + {"emptylisttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyListTypeHandle", 0, 0) }, + {"emptydicttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyDictTypeHandle", 0, 0) }, + {"callabletypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableTypeComponents", 1, 1) }, + {"callableargument", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableArgument", 1, 3) }, + {"callabletypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableTypeHandle", 2, 4) }, + {"pgtypename", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgTypeName", 1, 1) }, + {"pgtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgTypeHandle", 1, 1) }, + {"formatcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FormatCode", 1, 1) }, + {"worldcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("WorldCode", 0, 0) }, + {"atomcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AtomCode", 1, 1) }, + {"listcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListCode", 0, -1) }, + {"funccode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FuncCode", 1, -1) }, + {"lambdacode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("LambdaCode", 1, 2) }, + {"evaluatecode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateCode", 1, 1) }, + {"reprcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ReprCode", 1, 1) }, + {"quotecode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("QuoteCode", 1, 1) }, + {"lambdaargumentscount", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("LambdaArgumentsCount", 1, 1) }, + {"lambdaoptionalargumentscount", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("LambdaOptionalArgumentsCount", 1, 1) }, + {"subqueryextend", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryExtend", 1, -1) }, + {"subqueryunionall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryUnionAll", 1, -1) }, + {"subquerymerge", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryMerge", 1, -1) }, + {"subqueryunionmerge", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryUnionMerge", 1, -1) }, + {"subqueryextendfor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryExtendFor>>() }, + {"subqueryunionallfor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryUnionAllFor>>() }, + {"subquerymergefor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryMergeFor>>() }, + {"subqueryunionmergefor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryUnionMergeFor>>() }, + {"subqueryorderby", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryOrderBy<SubqueryOrderBy>>() }, + {"subqueryassumeorderby", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryOrderBy<SubqueryAssumeOrderBy>>() }, + + // Tuple builtins + {"astuple", BuildSimpleBuiltinFactoryCallback<TTupleNode>()}, + + // Struct builtins + {"trymember", BuildNamedBuiltinFactoryCallback<TTryMember>("TryMember")}, + {"addmember", BuildNamedBuiltinFactoryCallback<TAddMember>("AddMember")}, + {"replacemember", BuildNamedBuiltinFactoryCallback<TAddMember>("ReplaceMember")}, + {"removemember", BuildNamedBuiltinFactoryCallback<TRemoveMember>("RemoveMember")}, + {"forceremovemember", BuildNamedBuiltinFactoryCallback<TRemoveMember>("ForceRemoveMember")}, + {"combinemembers", BuildNamedBuiltinFactoryCallback<TCombineMembers>("FlattenMembers")}, + {"flattenmembers", BuildNamedBuiltinFactoryCallback<TFlattenMembers>("FlattenMembers")}, + {"staticmap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticMap", 2, 2) }, + {"staticzip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticZip", 1, -1) }, + {"structunion", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructUnion", 2, 3)}, + {"structintersection", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructIntersection", 2, 3)}, + {"structdifference", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructDifference", 2, 2)}, + {"structsymmetricdifference", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructSymmetricDifference", 2, 2)}, + {"staticfold", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticFold", 3, 3)}, + {"staticfold1", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticFold1", 3, 3)}, + + // File builtins + {"filepath", BuildNamedBuiltinFactoryCallback<TFileYqlAtom>("FilePath")}, + {"filecontent", BuildNamedBuiltinFactoryCallback<TFileYqlAtom>("FileContent")}, + {"folderpath", BuildNamedBuiltinFactoryCallback<TFileYqlAtom>("FolderPath") }, + {"files", BuildNamedBuiltinFactoryCallback<TFileYqlAtom>("Files")}, + {"parsefile", BuildSimpleBuiltinFactoryCallback<TYqlParseFileOp>()}, + + // Misc builtins + {"coalesce", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1)}, + {"nvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1) }, + {"nanvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nanvl", 2, 2) }, + {"likely", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Likely", 1, -1)}, + {"assumestrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AssumeStrict", 1, 1)}, + {"assumenonstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AssumeNonStrict", 1, 1)}, + {"random", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "Random", 1, -1)}, + {"randomnumber", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomNumber", 1, -1)}, + {"randomuuid", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomUuid", 1, -1) }, + {"tablepath", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TablePath") }, + {"tablerecordindex", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TableRecord") }, + {"tablerow", BuildSimpleBuiltinFactoryCallback<TTableRow<false>>() }, + {"jointablerow", BuildSimpleBuiltinFactoryCallback<TTableRow<true>>() }, + {"tablerows", BuildSimpleBuiltinFactoryCallback<TTableRows>() }, + {"weakfield", BuildSimpleBuiltinFactoryCallback<TWeakFieldOp>()}, + {"version", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Version", 0, 0)}, + + {"systemmetadata", BuildNamedArgcBuiltinFactoryCallback<TCallDirectRow>("SystemMetadata", 1, -1)}, + + // Hint builtins + {"grouping", BuildSimpleBuiltinFactoryCallback<TGroupingNode>()}, + + // Window funcitons + {"rownumber", BuildNamedArgcBuiltinFactoryCallback<TWinRowNumber>("RowNumber", 0, 0)}, + {"rank", BuildNamedArgcBuiltinFactoryCallback<TWinRank>("Rank", 0, 1)}, + {"denserank", BuildNamedArgcBuiltinFactoryCallback<TWinRank>("DenseRank", 0, 1)}, + {"lead", BuildNamedArgcBuiltinFactoryCallback<TWinLeadLag>("Lead", 1, 2)}, + {"lag", BuildNamedArgcBuiltinFactoryCallback<TWinLeadLag>("Lag", 1, 2)}, + {"percentrank", BuildNamedArgcBuiltinFactoryCallback<TWinRank>("PercentRank", 0, 1)}, + {"cumedist", BuildNamedArgcBuiltinFactoryCallback<TWinCumeDist>("CumeDist", 0, 0)}, + {"ntile", BuildNamedArgcBuiltinFactoryCallback<TWinNTile>("NTile", 1, 1)}, + + // Session window + {"sessionwindow", BuildSimpleBuiltinFactoryCallback<TSessionWindow>()}, + {"sessionstart", BuildSimpleBuiltinFactoryCallback<TSessionStart<true>>()}, + {"sessionstate", BuildSimpleBuiltinFactoryCallback<TSessionStart<false>>()}, + + // New hopping + {"hoppingwindow", BuildSimpleBuiltinFactoryCallback<THoppingWindow>()}, + + // Hopping intervals time functions + {"hopstart", BuildSimpleBuiltinFactoryCallback<THoppingTime<true>>()}, + {"hopend", BuildSimpleBuiltinFactoryCallback<THoppingTime<false>>()}, + + //MatchRecognize navigation functions + {"first", BuildNamedBuiltinFactoryCallback<TMatchRecognizeNavigate>("FIRST")}, + {"last", BuildNamedBuiltinFactoryCallback<TMatchRecognizeNavigate>("LAST")}, + }; + return builtinFuncs; + } + + TAggrFuncFactoryCallbackMap MakeAggrFuncs() { + constexpr auto OverWindow = EAggregateMode::OverWindow; + + TAggrFuncFactoryCallbackMap aggrFuncs = { + {"min", BuildAggrFuncFactoryCallback("Min", "min_traits_factory")}, + {"max", BuildAggrFuncFactoryCallback("Max", "max_traits_factory")}, + + {"minby", BuildAggrFuncFactoryCallback("MinBy", "min_by_traits_factory", KEY_PAYLOAD)}, + {"maxby", BuildAggrFuncFactoryCallback("MaxBy", "max_by_traits_factory", KEY_PAYLOAD)}, + + {"sum", BuildAggrFuncFactoryCallback("Sum", "sum_traits_factory")}, + {"sumif", BuildAggrFuncFactoryCallback("SumIf", "sum_if_traits_factory", PAYLOAD_PREDICATE) }, + + {"checked_sum", BuildAggrFuncFactoryCallback("CheckedSum", "checked_sum_traits_factory")}, + {"checked_sumif", BuildAggrFuncFactoryCallback("CheckedSumIf", "checked_sum_if_traits_factory", PAYLOAD_PREDICATE) }, + + {"some", BuildAggrFuncFactoryCallback("Some", "some_traits_factory")}, + {"somevalue", BuildAggrFuncFactoryCallback("SomeValue", "some_traits_factory")}, + + {"count", BuildAggrFuncFactoryCallback("Count", "count_traits_factory", COUNT)}, + {"countif", BuildAggrFuncFactoryCallback("CountIf", "count_if_traits_factory")}, + + {"every", BuildAggrFuncFactoryCallback("Every", "and_traits_factory")}, + {"booland", BuildAggrFuncFactoryCallback("BoolAnd", "and_traits_factory")}, + {"boolor", BuildAggrFuncFactoryCallback("BoolOr", "or_traits_factory")}, + {"boolxor", BuildAggrFuncFactoryCallback("BoolXor", "xor_traits_factory")}, + + {"bitand", BuildAggrFuncFactoryCallback("BitAnd", "bit_and_traits_factory")}, + {"bitor", BuildAggrFuncFactoryCallback("BitOr", "bit_or_traits_factory")}, + {"bitxor", BuildAggrFuncFactoryCallback("BitXor", "bit_xor_traits_factory")}, + + {"avg", BuildAggrFuncFactoryCallback("Avg", "avg_traits_factory")}, + {"avgif", BuildAggrFuncFactoryCallback("AvgIf", "avg_if_traits_factory", PAYLOAD_PREDICATE) }, + + {"agglist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)}, + {"aggrlist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)}, + {"aggregatelist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)}, + {"agglistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)}, + {"aggrlistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)}, + {"aggregatelistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)}, + + {"median", BuildAggrFuncFactoryCallback("Median", "percentile_traits_factory", PERCENTILE)}, + {"percentile", BuildAggrFuncFactoryCallback("Percentile", "percentile_traits_factory", PERCENTILE)}, + + {"mode", BuildAggrFuncFactoryCallback("Mode", "topfreq_traits_factory", TOPFREQ) }, + {"topfreq", BuildAggrFuncFactoryCallback("TopFreq", "topfreq_traits_factory", TOPFREQ) }, + + {"top", BuildAggrFuncFactoryCallback("Top", "top_traits_factory", TOP)}, + {"bottom", BuildAggrFuncFactoryCallback("Bottom", "bottom_traits_factory", TOP)}, + {"topby", BuildAggrFuncFactoryCallback("TopBy", "top_by_traits_factory", TOP_BY)}, + {"bottomby", BuildAggrFuncFactoryCallback("BottomBy", "bottom_by_traits_factory", TOP_BY)}, + + {"histogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM, "Histogram")}, + {"histogramcdf", BuildAggrFuncFactoryCallback("AdaptiveWardHistogramCDF", "histogram_cdf_adaptive_ward_traits_factory", HISTOGRAM, "HistogramCDF")}, + {"adaptivewardhistogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM)}, + {"adaptivewardhistogramcdf", BuildAggrFuncFactoryCallback("AdaptiveWardHistogramCDF", "histogram_cdf_adaptive_ward_traits_factory", HISTOGRAM)}, + {"adaptiveweighthistogram", BuildAggrFuncFactoryCallback("AdaptiveWeightHistogram", "histogram_adaptive_weight_traits_factory", HISTOGRAM)}, + {"adaptiveweighthistogramcdf", BuildAggrFuncFactoryCallback("AdaptiveWeightHistogramCDF", "histogram_cdf_adaptive_weight_traits_factory", HISTOGRAM)}, + {"adaptivedistancehistogram", BuildAggrFuncFactoryCallback("AdaptiveDistanceHistogram", "histogram_adaptive_distance_traits_factory", HISTOGRAM)}, + {"adaptivedistancehistogramcdf", BuildAggrFuncFactoryCallback("AdaptiveDistanceHistogramCDF", "histogram_cdf_adaptive_distance_traits_factory", HISTOGRAM)}, + {"blockwardhistogram", BuildAggrFuncFactoryCallback("BlockWardHistogram", "histogram_block_ward_traits_factory", HISTOGRAM)}, + {"blockwardhistogramcdf", BuildAggrFuncFactoryCallback("BlockWardHistogramCDF", "histogram_cdf_block_ward_traits_factory", HISTOGRAM)}, + {"blockweighthistogram", BuildAggrFuncFactoryCallback("BlockWeightHistogram", "histogram_block_weight_traits_factory", HISTOGRAM)}, + {"blockweighthistogramcdf", BuildAggrFuncFactoryCallback("BlockWeightHistogramCDF", "histogram_cdf_block_weight_traits_factory", HISTOGRAM)}, + {"linearhistogram", BuildAggrFuncFactoryCallback("LinearHistogram", "histogram_linear_traits_factory", LINEAR_HISTOGRAM)}, + {"linearhistogramcdf", BuildAggrFuncFactoryCallback("LinearHistogramCDF", "histogram_cdf_linear_traits_factory", LINEAR_HISTOGRAM)}, + {"logarithmichistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM)}, + {"logarithmichistogramcdf", BuildAggrFuncFactoryCallback("LogarithmicHistogramCDF", "histogram_cdf_logarithmic_traits_factory", LINEAR_HISTOGRAM)}, + {"loghistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM, "LogHistogram")}, + {"loghistogramcdf", BuildAggrFuncFactoryCallback("LogarithmicHistogramCDF", "histogram_cdf_logarithmic_traits_factory", LINEAR_HISTOGRAM, "LogHistogramCDF")}, + + {"hyperloglog", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE)}, + {"hll", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "HLL")}, + {"countdistinctestimate", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "CountDistinctEstimate")}, + + {"variance", BuildAggrFuncFactoryCallback("Variance", "variance_0_1_traits_factory")}, + {"stddev", BuildAggrFuncFactoryCallback("StdDev", "variance_1_1_traits_factory")}, + {"populationvariance", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")}, + {"variancepopulation", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")}, + {"populationstddev", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")}, + {"stddevpopulation", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")}, + {"varpop", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")}, + {"stddevpop", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")}, + {"varp", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")}, + {"stddevp", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")}, + {"variancesample", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")}, + {"stddevsample", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")}, + {"varsamp", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")}, + {"stddevsamp", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")}, + {"vars", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")}, + {"stddevs", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")}, + + {"correlation", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS)}, + {"corr", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS, "Corr")}, + {"covariance", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covariance")}, + {"covariancesample", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS)}, + {"covarsamp", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarSamp")}, + {"covar", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covar")}, + {"covars", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarS")}, + {"covariancepopulation", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS)}, + {"covarpop", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarPop")}, + {"covarp", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarP")}, + + {"udaf", BuildAggrFuncFactoryCallback("UDAF", "udaf_traits_factory", UDAF)}, + + // Window functions + {"firstvalue", BuildAggrFuncFactoryCallback("FirstValue", "first_value_traits_factory", {OverWindow})}, + {"lastvalue", BuildAggrFuncFactoryCallback("LastValue", "last_value_traits_factory", {OverWindow})}, + {"nthvalue", BuildAggrFuncFactoryCallback("NthValue", "nth_value_traits_factory", {OverWindow}, NTH_VALUE)}, + {"firstvalueignorenulls", BuildAggrFuncFactoryCallback("FirstValueIgnoreNulls", "first_value_ignore_nulls_traits_factory", {OverWindow})}, + {"lastvalueignorenulls", BuildAggrFuncFactoryCallback("LastValueIgnoreNulls", "last_value_ignore_nulls_traits_factory", {OverWindow})}, + {"nthvalueignorenulls", BuildAggrFuncFactoryCallback("NthValueIgnoreNulls", "nth_value_ignore_nulls_traits_factory", {OverWindow}, NTH_VALUE)}, + }; + return aggrFuncs; + } + + TCoreFuncMap MakeCoreFuncs() { + TCoreFuncMap coreFuncs = { + {"listindexof", { "IndexOf", 2, 2}}, + {"testbit", { "TestBit", 2, 2}}, + {"setbit", { "SetBit", 2, 2}}, + {"clearbit", { "ClearBit", 2, 2}}, + {"flipbit", { "FlipBit", 2, 2 }}, + {"toset", { "ToSet", 1, 1 }}, + {"setisdisjoint", { "SetIsDisjoint", 2, 2}}, + {"setintersection", { "SetIntersection", 2, 3}}, + {"setincludes", { "SetIncludes", 2, 2}}, + {"setunion", { "SetUnion", 2, 3}}, + {"setdifference", { "SetDifference", 2, 2}}, + {"setsymmetricdifference", { "SetSymmetricDifference", 2, 3}}, + {"listaggregate", { "ListAggregate", 2, 2}}, + {"dictaggregate", { "DictAggregate", 2, 2}}, + {"aggregatetransforminput", { "AggregateTransformInput", 2, 2}}, + {"aggregatetransformoutput", { "AggregateTransformOutput", 2, 2}}, + {"aggregateflatten", { "AggregateFlatten", 1, 1}}, + {"choosemembers", { "ChooseMembers", 2, 2}}, + {"removemembers", { "RemoveMembers", 2, 2}}, + {"forceremovemembers", { "ForceRemoveMembers", 2, 2}}, + {"structmembers", { "StructMembers", 1, 1}}, + {"gathermembers", { "GatherMembers", 1, 1}}, + {"renamemembers", { "RenameMembers", 2, 2}}, + {"forcerenamemembers", { "ForceRenameMembers", 2, 2}}, + {"spreadmembers", { "SpreadMembers", 2, 2}}, + {"forcespreadmembers", { "ForceSpreadMembers", 2, 2}}, + {"listfromtuple", { "ListFromTuple", 1, 1}}, + {"listtotuple", { "ListToTuple", 2, 2}}, + }; + return coreFuncs; + } +}; + +TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVector<TNodePtr>& args, + const TString& originalNameSpace, EAggregateMode aggMode, bool* mustUseNamed, bool warnOnYqlNameSpace) { + + const TBuiltinFuncData* funcData = Singleton<TBuiltinFuncData>(); + const TBuiltinFactoryCallbackMap& builtinFuncs = funcData->BuiltinFuncs; + const TAggrFuncFactoryCallbackMap& aggrFuncs = funcData->AggrFuncs; + const TCoreFuncMap& coreFuncs = funcData->CoreFuncs; + + for (auto& arg: args) { + if (!arg) { + return nullptr; + } + } + + TString normalizedName(name); + TString nameSpace(originalNameSpace); + TString ns = to_lower(nameSpace); + if (ns.empty()) { + TMaybe<TIssue> error = NormalizeName(pos, normalizedName); + if (!error.Empty()) { + return new TInvalidBuiltin(pos, error->GetMessage()); + } + + auto coreFunc = coreFuncs.find(normalizedName); + if (coreFunc != coreFuncs.end()) { + ns = "core"; + name = coreFunc->second.Name; + if (args.size() < coreFunc->second.MinArgs || args.size() > coreFunc->second.MaxArgs) { + return new TInvalidBuiltin(pos, TStringBuilder() << name << " expected from " + << coreFunc->second.MinArgs << " to " << coreFunc->second.MaxArgs << " arguments, but got: " << args.size()); + } + + if (coreFunc->second.MinArgs != coreFunc->second.MaxArgs) { + name += ToString(args.size()); + } + } + } + + TString moduleResource; + if (ctx.Settings.ModuleMapping.contains(ns)) { + moduleResource = ctx.Settings.ModuleMapping.at(ns); + } + + if (ns == "js") { + ns = "javascript"; + nameSpace = "JavaScript"; + } + + if (ns == "datetime2") { + ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_DATETIME2) << "DateTime2:: is a temporary alias for DateTime:: which will be removed in the future, use DateTime:: instead"; + } + + if (ns == "datetime") { + ns = "datetime2"; + nameSpace = "DateTime2"; + } + + auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(ns); + switch (scriptType) { + case NKikimr::NMiniKQL::EScriptType::Python: + case NKikimr::NMiniKQL::EScriptType::Python3: + case NKikimr::NMiniKQL::EScriptType::ArcPython3: + scriptType = NKikimr::NMiniKQL::EScriptType::Python3; + break; + case NKikimr::NMiniKQL::EScriptType::Python2: + scriptType = NKikimr::NMiniKQL::EScriptType::ArcPython2; + break; + case NKikimr::NMiniKQL::EScriptType::SystemPython2: + scriptType = NKikimr::NMiniKQL::EScriptType::Python2; + break; + default: + break; + } + + if (ns == "yql" || ns == "@yql") { + if (warnOnYqlNameSpace && GetEnv("YQL_DETERMINISTIC_MODE").empty()) { + ctx.Warning(pos, TIssuesIds::YQL_S_EXPRESSIONS_CALL) + << "It is not recommended to directly access s-expressions functions via YQL::" << Endl + << "This mechanism is mostly intended for temporary workarounds or internal testing purposes"; + } + + if (ns == "yql") { + return new TCallNodeImpl(pos, name, -1, -1, args); + } + } else if (moduleResource) { + auto exportName = ns == "core" ? name : "$" + name; + TVector<TNodePtr> applyArgs = { + new TCallNodeImpl(pos, "bind", { + BuildAtom(pos, ns + "_module", 0), BuildQuotedAtom(pos, exportName) + }) + }; + applyArgs.insert(applyArgs.end(), args.begin(), args.end()); + return new TCallNodeImpl(pos, "Apply", applyArgs); + } else if (ns == "hyperscan" || ns == "pcre" || ns == "pire" || ns.StartsWith("re2")) { + TString moduleName(nameSpace); + moduleName.to_title(); + if ((args.size() == 1 || args.size() == 2) && (name.StartsWith("Multi") || (ns.StartsWith("re2") && name == "Capture"))) { + TVector<TNodePtr> multiArgs{ + ns.StartsWith("re2") && name == "Capture" ? MakePair(pos, args) : args[0], + new TCallNodeImpl(pos, "Void", 0, 0, {}), + args[0] + }; + auto fullName = moduleName + "." + name; + return new TYqlTypeConfigUdf(pos, fullName, multiArgs, multiArgs.size() + 1); + } else if (!(ns.StartsWith("re2") && name == "Options")) { + auto newArgs = args; + if (ns.StartsWith("re2")) { + // convert run config is tuple of string and optional options + if (args.size() == 1 || args.size() == 2) { + newArgs[0] = MakePair(pos, args); + if (args.size() == 2) { + newArgs.pop_back(); + } + } else { + return new TInvalidBuiltin(pos, TStringBuilder() << ns << "." << name << " expected one or two arguments."); + } + } + + return BuildUdf(ctx, pos, moduleName, name, newArgs); + } + } else if (ns == "datetime2" && (name == "Parse")) { + return BuildUdf(ctx, pos, nameSpace, name, args); + } else if (ns == "pg") { + const bool isAggregateFunc = NYql::NPg::HasAggregation(name, NYql::NPg::EAggKind::Normal); + if (isAggregateFunc) { + if (aggMode == EAggregateMode::Distinct) { + return new TInvalidBuiltin(pos, "Distinct is not supported yet for PG aggregation "); + } + + return BuildAggrFuncFactoryCallback(name, "", EAggrFuncTypeCallback::PG)(pos, args, aggMode, false); + } else { + TVector<TNodePtr> pgCallArgs; + pgCallArgs.push_back(BuildLiteralRawString(pos, name)); + pgCallArgs.insert(pgCallArgs.end(), args.begin(), args.end()); + return new TYqlPgCall<false>(pos, pgCallArgs); + } + } else if (name == "MakeLibraPreprocessor") { + if (args.size() != 1) { + return new TInvalidBuiltin(pos, TStringBuilder() << name << " requires exactly one argument"); + } + + auto settings = NYT::TNode::CreateMap(); + + auto makeUdfArgs = [&args, &pos, &settings]() { + return TVector<TNodePtr> { + args[0], + new TCallNodeImpl(pos, "Void", {}), + BuildQuotedAtom(pos, NYT::NodeToYsonString(settings)) + }; + }; + + auto structNode = args[0]->GetStructNode(); + if (!structNode) { + if (auto callNode = args[0]->GetCallNode()) { + if (callNode->GetOpName() == "AsStruct") { + return BuildUdf(ctx, pos, nameSpace, name, makeUdfArgs()); + } + } + + return new TInvalidBuiltin(pos, TStringBuilder() << name << " requires struct as argument"); + } + + for (const auto& item : structNode->GetExprs()) { + const auto& label = item->GetLabel(); + if (label == "Entities") { + auto callNode = item->GetCallNode(); + if (!callNode || callNode->GetOpName() != "AsListMayWarn") { + return new TInvalidBuiltin(pos, TStringBuilder() << name << " entities must be list of strings"); + } + + auto entities = NYT::TNode::CreateList(); + for (const auto& entity : callNode->GetArgs()) { + if (!entity->IsLiteral() || entity->GetLiteralType() != "String") { + return new TInvalidBuiltin(pos, TStringBuilder() << name << " entity must be string literal"); + } + entities.Add(entity->GetLiteralValue()); + } + + settings(label, std::move(entities)); + } else if (label == "EntitiesStrategy") { + if (!item->IsLiteral() || item->GetLiteralType() != "String") { + return new TInvalidBuiltin( + pos, TStringBuilder() << name << " entities strategy must be string literal" + ); + } + + if (!EqualToOneOf(item->GetLiteralValue(), "whitelist", "blacklist")) { + return new TInvalidBuiltin( + pos, + TStringBuilder() << name << " got invalid entities strategy: expected 'whitelist' or 'blacklist'" + ); + } + + settings(label, item->GetLiteralValue()); + } else if (label == "Mode") { + if (!item->IsLiteral() || item->GetLiteralType() != "String") { + return new TInvalidBuiltin( + pos, TStringBuilder() << name << " mode must be string literal" + ); + } + + settings(label, item->GetLiteralValue()); + } else if (EqualToOneOf(label, "BlockstatDict", "ParseWithFat")) { + continue; + } else { + return new TInvalidBuiltin( + pos, + TStringBuilder() + << name << " got unsupported setting: " << label + << "; supported: Entities, EntitiesStrategy, BlockstatDict, ParseWithFat" ); + } + } + + return BuildUdf(ctx, pos, nameSpace, name, makeUdfArgs()); + } else if (scriptType != NKikimr::NMiniKQL::EScriptType::Unknown) { + auto scriptName = NKikimr::NMiniKQL::IsCustomPython(scriptType) ? nameSpace : TString(NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType)); + return new TScriptUdf(pos, scriptName, name, args); + } else if (ns.empty()) { + if (auto simpleType = LookupSimpleType(normalizedName, ctx.FlexibleTypes, /* isPgType = */ false)) { + const auto type = *simpleType; + if (NUdf::FindDataSlot(type)) { + YQL_ENSURE(type != "Decimal"); + return new TYqlData(pos, type, args); + } + + if (type.StartsWith("pg") || type.StartsWith("_pg")) { + TVector<TNodePtr> pgConstArgs; + if (!args.empty()) { + pgConstArgs.push_back(args.front()); + pgConstArgs.push_back(new TCallNodeImpl(pos, "PgType", { BuildQuotedAtom(pos, + TString(type.StartsWith("pg") ? "" : "_") + type.substr(type.StartsWith("pg") ? 2 : 3), TNodeFlags::Default) })); + pgConstArgs.insert(pgConstArgs.end(), args.begin() + 1, args.end()); + } + return new TYqlPgConst(pos, pgConstArgs); + } else if (type == "Void" || type == "EmptyList" || type == "EmptyDict") { + return new TCallNodeImpl(pos, type, 0, 0, args); + } else { + return new TInvalidBuiltin(pos, TStringBuilder() << "Can not create objects of type " << type); + } + } + + if (normalizedName == "decimal") { + if (args.size() == 2) { + TVector<TNodePtr> dataTypeArgs = { BuildQuotedAtom(pos, "Decimal", TNodeFlags::Default) }; + for (auto& arg : args) { + if (auto literal = arg->GetLiteral("Int32")) { + dataTypeArgs.push_back(BuildQuotedAtom(pos, *literal, TNodeFlags::Default)); + } else { + dataTypeArgs.push_back(MakeAtomFromExpression(ctx.Pos(), ctx, arg).Build()); + } + } + return new TCallNodeImpl(pos, "DataType", dataTypeArgs); + } + return new TYqlData(pos, "Decimal", args); + } + + if (normalizedName == "tablename") { + return new TTableName(pos, args, ctx.Scoped->CurrService); + } + + if (normalizedName == "aggregationfactory") { + if (args.size() < 1 || !args[0]->GetLiteral("String")) { + return new TInvalidBuiltin(pos, "AGGREGATION_FACTORY requries a function name"); + } + + auto aggNormalizedName = *args[0]->GetLiteral("String"); + auto error = NormalizeName(pos, aggNormalizedName); + if (!error.Empty()) { + return new TInvalidBuiltin(pos, error->GetMessage()); + } + + if (aggNormalizedName == "aggregateby") { + return new TInvalidBuiltin(pos, "AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY"); + } + + if (aggNormalizedName == "multiaggregateby") { + return new TInvalidBuiltin(pos, "MULTI_AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY"); + } + + if (aggMode == EAggregateMode::Distinct || aggMode == EAggregateMode::OverWindowDistinct) { + return new TInvalidBuiltin(pos, "DISTINCT can only be used in aggregation functions"); + } + + if (to_lower(*args[0]->GetLiteral("String")).StartsWith("pg::")) { + auto name = args[0]->GetLiteral("String")->substr(4); + const bool isAggregateFunc = NYql::NPg::HasAggregation(name, NYql::NPg::EAggKind::Normal); + if (!isAggregateFunc) { + return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown aggregation function: " << *args[0]->GetLiteral("String")); + } + + return BuildAggrFuncFactoryCallback(name, "", EAggrFuncTypeCallback::PG)(pos, args, aggMode, true); + } + + AdjustCheckedAggFuncName(aggNormalizedName, ctx); + + auto aggrCallback = aggrFuncs.find(aggNormalizedName); + if (aggrCallback == aggrFuncs.end()) { + return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown aggregation function: " << *args[0]->GetLiteral("String")); + } + + return (*aggrCallback).second(pos, args, aggMode, true).Release(); + } + + if (normalizedName == "aggregateby" || normalizedName == "multiaggregateby") { + const bool multi = (normalizedName == "multiaggregateby"); + if (args.size() != 2) { + return new TInvalidBuiltin(pos, TStringBuilder() << (multi ? "MULTI_AGGREGATE_BY" : "AGGREGATE_BY") << " requries two arguments"); + } + + auto name = multi ? "MultiAggregateBy" : "AggregateBy"; + auto aggr = BuildFactoryAggregation(pos, name, "", aggMode, multi); + return new TBasicAggrFunc(pos, name, aggr, args); + } + + AdjustCheckedAggFuncName(normalizedName, ctx); + + auto aggrCallback = aggrFuncs.find(normalizedName); + if (aggrCallback != aggrFuncs.end()) { + return (*aggrCallback).second(pos, args, aggMode, false).Release(); + } + if (aggMode == EAggregateMode::Distinct || aggMode == EAggregateMode::OverWindowDistinct) { + return new TInvalidBuiltin(pos, "DISTINCT can only be used in aggregation functions"); + } + + auto builtinCallback = builtinFuncs.find(normalizedName); + if (builtinCallback != builtinFuncs.end()) { + return (*builtinCallback).second(pos, args); + } else if (normalizedName == "udf") { + if (mustUseNamed && *mustUseNamed) { + *mustUseNamed = false; + } + return new TUdfNode(pos, args); + } else if (normalizedName == "asstruct" || normalizedName == "structtype") { + if (args.empty()) { + return new TCallNodeImpl(pos, normalizedName == "asstruct" ? "AsStruct" : "StructType", 0, 0, args); + } + + if (mustUseNamed && *mustUseNamed) { + *mustUseNamed = false; + YQL_ENSURE(args.size() == 2); + Y_DEBUG_ABORT_UNLESS(args[0]->GetTupleNode()); + auto posArgs = args[0]->GetTupleNode(); + if (posArgs->IsEmpty()) { + if (normalizedName == "asstruct") { + return args[1]; + } else { + Y_DEBUG_ABORT_UNLESS(args[1]->GetStructNode()); + auto namedArgs = args[1]->GetStructNode(); + return new TStructTypeNode(pos, namedArgs->GetExprs()); + } + } + } + return new TInvalidBuiltin(pos, TStringBuilder() << + (normalizedName == "asstruct" ? "AsStruct" : "StructType") << + " requires all argument to be named"); + } else if (normalizedName == "expandstruct") { + if (mustUseNamed) { + if (!*mustUseNamed) { + return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires at least one named argument"); + } + *mustUseNamed = false; + } + YQL_ENSURE(args.size() == 2); + Y_DEBUG_ABORT_UNLESS(args[0]->GetTupleNode()); + Y_DEBUG_ABORT_UNLESS(args[1]->GetStructNode()); + auto posArgs = args[0]->GetTupleNode(); + if (posArgs->GetTupleSize() != 1) { + return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires all arguments except first to be named"); + } + + TVector<TNodePtr> flattenMembersArgs = { + BuildTuple(pos, {BuildQuotedAtom(pos, ""), posArgs->GetTupleElement(0)}), + BuildTuple(pos, {BuildQuotedAtom(pos, ""), args[1]}), + }; + return new TCallNodeImpl(pos, "FlattenMembers", 2, 2, flattenMembersArgs); + } else if (normalizedName == "sqlexternalfunction") { + return new TCallNodeImpl(pos, "SqlExternalFunction", args); + } else { + return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown builtin: " << name); + } + } + + TNodePtr positionalArgs; + TNodePtr namedArgs; + if (mustUseNamed && *mustUseNamed) { + YQL_ENSURE(args.size() == 2); + positionalArgs = args[0]; + namedArgs = args[1]; + *mustUseNamed = false; + } + + TVector<TNodePtr> usedArgs = args; + + TNodePtr customUserType = nullptr; + if (ns == "json") { + ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_JSON_UDF) << "Json UDF is deprecated. Please use JSON API instead"; + + ns = "yson"; + nameSpace = "Yson"; + if (name == "Serialize") { + name = "SerializeJson"; + } + else if (name == "Parse") { + name = "ParseJson"; + } + } + + if (ctx.PragmaYsonFast && ns == "yson") { + ns.append('2'); + nameSpace.append('2'); + } + + if (ns.StartsWith("yson")) { + if (name == "ConvertTo" && usedArgs.size() > 1) { + customUserType = usedArgs[1]; + usedArgs.erase(usedArgs.begin() + 1); + } + + if (name == "Serialize") { + if (usedArgs) { + usedArgs.resize(1U); + } + } else if (ctx.PragmaYsonFast && name == "SerializeJsonEncodeUtf8") { + name = "SerializeJson"; + if (usedArgs.size() < 2U) { + usedArgs.emplace_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict, ctx.PragmaYsonFast)); + } + positionalArgs = BuildTuple(pos, usedArgs); + auto encodeUtf8 = BuildLiteralBool(pos, true); + encodeUtf8->SetLabel("EncodeUtf8"); + namedArgs = BuildStructure(pos, {encodeUtf8}); + usedArgs = {positionalArgs, namedArgs}; + } else if (name.StartsWith("From")) { + if (usedArgs) { + usedArgs.resize(1U); + } + name = "From"; + } else if (name == "GetLength" || name.StartsWith("ConvertTo") || name.StartsWith("Parse") || name.StartsWith("SerializeJson")) { + if (usedArgs.size() < 2U) { + usedArgs.emplace_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict, ctx.PragmaYsonFast)); + } + } else if (name == "Contains" || name.StartsWith("Lookup") || name.StartsWith("YPath")) { + if (usedArgs.size() < 3U) { + usedArgs.push_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict, ctx.PragmaYsonFast)); + } + } + } + + if (ns == "datetime2" && name == "Update") { + if (namedArgs) { + TStructNode* castedNamedArgs = namedArgs->GetStructNode(); + Y_DEBUG_ABORT_UNLESS(castedNamedArgs); + auto exprs = castedNamedArgs->GetExprs(); + for (auto& arg : exprs) { + if (arg->GetLabel() == "Timezone") { + arg = new TCallNodeImpl(pos, "TimezoneId", 1, 1, { arg }); + arg->SetLabel("TimezoneId"); + } + } + + namedArgs = BuildStructure(pos, exprs); + usedArgs.pop_back(); + usedArgs.push_back(namedArgs); + }; + } + + TNodePtr typeConfig = MakeTypeConfig(pos, ns, usedArgs); + return BuildSqlCall(ctx, pos, nameSpace, name, usedArgs, positionalArgs, namedArgs, customUserType, TDeferredAtom(typeConfig, ctx), nullptr); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/context.cpp b/yql/essentials/sql/v1/context.cpp new file mode 100644 index 00000000000..4637a3be9e9 --- /dev/null +++ b/yql/essentials/sql/v1/context.cpp @@ -0,0 +1,656 @@ +#include "context.h" + +#include <yql/essentials/providers/common/provider/yql_provider_names.h> +#include <yql/essentials/utils/yql_panic.h> +#include <yql/essentials/utils/yql_paths.h> + +#include <util/folder/pathsplit.h> +#include <util/string/join.h> +#include <util/stream/null.h> + +#ifdef GetMessage +#undef GetMessage +#endif + +using namespace NYql; + +namespace NSQLTranslationV1 { + +namespace { + +TNodePtr AddTablePathPrefix(TContext& ctx, TStringBuf prefixPath, const TDeferredAtom& path) { + if (prefixPath.empty()) { + return path.Build(); + } + + if (path.GetLiteral()) { + return BuildQuotedAtom(path.Build()->GetPos(), BuildTablePath(prefixPath, *path.GetLiteral())); + } + + auto pathNode = path.Build(); + pathNode = new TCallNodeImpl(pathNode->GetPos(), "String", { pathNode }); + auto prefixNode = BuildLiteralRawString(pathNode->GetPos(), TString(prefixPath)); + + TNodePtr buildPathNode = new TCallNodeImpl(pathNode->GetPos(), "BuildTablePath", { prefixNode, pathNode }); + + TDeferredAtom result; + MakeTableFromExpression(ctx.Pos(), ctx, buildPathNode, result); + return result.Build(); +} + +typedef bool TContext::*TPragmaField; + +THashMap<TStringBuf, TPragmaField> CTX_PRAGMA_FIELDS = { + {"AnsiOptionalAs", &TContext::AnsiOptionalAs}, + {"WarnOnAnsiAliasShadowing", &TContext::WarnOnAnsiAliasShadowing}, + {"PullUpFlatMapOverJoin", &TContext::PragmaPullUpFlatMapOverJoin}, + {"FilterPushdownOverJoinOptionalSide", &TContext::FilterPushdownOverJoinOptionalSide}, + {"RotateJoinTree", &TContext::RotateJoinTree}, + {"DqEngineEnable", &TContext::DqEngineEnable}, + {"DqEngineForce", &TContext::DqEngineForce}, + {"RegexUseRe2", &TContext::PragmaRegexUseRe2}, + {"OrderedColumns", &TContext::OrderedColumns}, + {"BogousStarInGroupByOverJoin", &TContext::BogousStarInGroupByOverJoin}, + {"CoalesceJoinKeysOnQualifiedAll", &TContext::CoalesceJoinKeysOnQualifiedAll}, + {"UnorderedSubqueries", &TContext::UnorderedSubqueries}, + {"FlexibleTypes", &TContext::FlexibleTypes}, + {"AnsiCurrentRow", &TContext::AnsiCurrentRow}, + {"EmitStartsWith", &TContext::EmitStartsWith}, + {"AnsiLike", &TContext::AnsiLike}, + {"UseBlocks", &TContext::UseBlocks}, + {"BlockEngineEnable", &TContext::BlockEngineEnable}, + {"BlockEngineForce", &TContext::BlockEngineForce}, + {"UnorderedResult", &TContext::UnorderedResult}, + {"CompactNamedExprs", &TContext::CompactNamedExprs}, + {"ValidateUnusedExprs", &TContext::ValidateUnusedExprs}, + {"AnsiImplicitCrossJoin", &TContext::AnsiImplicitCrossJoin}, + {"DistinctOverWindow", &TContext::DistinctOverWindow}, +}; + +typedef TMaybe<bool> TContext::*TPragmaMaybeField; + +THashMap<TStringBuf, TPragmaMaybeField> CTX_PRAGMA_MAYBE_FIELDS = { + {"AnsiRankForNullableKeys", &TContext::AnsiRankForNullableKeys}, + {"AnsiInForEmptyOrNullableItemsCollections", &TContext::AnsiInForEmptyOrNullableItemsCollections}, + {"EmitAggApply", &TContext::EmitAggApply}, + {"CompactGroupBy", &TContext::CompactGroupBy}, +}; + +} // namespace + +TContext::TContext(const NSQLTranslation::TTranslationSettings& settings, + const NSQLTranslation::TSQLHints& hints, + TIssues& issues) + : ClusterMapping(settings.ClusterMapping) + , PathPrefix(settings.PathPrefix) + , ClusterPathPrefixes(settings.ClusterPathPrefixes) + , SQLHints(hints) + , Settings(settings) + , Pool(new TMemoryPool(4096)) + , Issues(issues) + , IncrementMonCounterFunction(settings.IncrementCounter) + , HasPendingErrors(false) + , DqEngineEnable(Settings.DqDefaultAuto->Allow()) + , AnsiQuotedIdentifiers(settings.AnsiLexer) + , BlockEngineEnable(Settings.BlockDefaultAuto->Allow()) +{ + for (auto lib : settings.Libraries) { + Libraries.emplace(lib, TLibraryStuff()); + } + + Scoped = MakeIntrusive<TScopedState>(); + AllScopes.push_back(Scoped); + Scoped->UnicodeLiterals = settings.UnicodeLiterals; + if (settings.DefaultCluster) { + Scoped->CurrCluster = TDeferredAtom({}, settings.DefaultCluster); + auto provider = GetClusterProvider(settings.DefaultCluster); + YQL_ENSURE(provider); + Scoped->CurrService = *provider; + } + + Position.File = settings.File; + + for (auto& flag: settings.Flags) { + bool value = true; + TStringBuf key = flag; + auto ptr = CTX_PRAGMA_FIELDS.FindPtr(key); + auto ptrMaybe = CTX_PRAGMA_MAYBE_FIELDS.FindPtr(key); + if (!ptr && !ptrMaybe && key.SkipPrefix("Disable")) { + value = false; + ptr = CTX_PRAGMA_FIELDS.FindPtr(key); + ptrMaybe = CTX_PRAGMA_MAYBE_FIELDS.FindPtr(key); + } + if (ptr) { + this->*(*ptr) = value; + } else if (ptrMaybe) { + this->*(*ptrMaybe) = value; + } + } + DiscoveryMode = (NSQLTranslation::ESqlMode::DISCOVERY == Settings.Mode); +} + +TContext::~TContext() +{ + for (auto& x: AllScopes) { + x->Clear(); + } +} + +const NYql::TPosition& TContext::Pos() const { + return Position; +} + +TString TContext::MakeName(const TString& name) { + auto iter = GenIndexes.find(name); + if (iter == GenIndexes.end()) { + iter = GenIndexes.emplace(name, 0).first; + } + TStringBuilder str; + str << name << iter->second; + ++iter->second; + return str; +} + +void TContext::PushCurrentBlocks(TBlocks* blocks) { + YQL_ENSURE(blocks); + CurrentBlocks.push_back(blocks); +} + +void TContext::PopCurrentBlocks() { + YQL_ENSURE(!CurrentBlocks.empty()); + CurrentBlocks.pop_back(); +} + +TBlocks& TContext::GetCurrentBlocks() const { + YQL_ENSURE(!CurrentBlocks.empty()); + return *CurrentBlocks.back(); +} + +IOutputStream& TContext::Error(NYql::TIssueCode code) { + return Error(Pos(), code); +} + +IOutputStream& TContext::Error(NYql::TPosition pos, NYql::TIssueCode code) { + HasPendingErrors = true; + return MakeIssue(TSeverityIds::S_ERROR, code, pos); +} + +IOutputStream& TContext::Warning(NYql::TPosition pos, NYql::TIssueCode code) { + return MakeIssue(TSeverityIds::S_WARNING, code, pos); +} + +IOutputStream& TContext::Info(NYql::TPosition pos) { + return MakeIssue(TSeverityIds::S_INFO, TIssuesIds::INFO, pos); +} + +void TContext::SetWarningPolicyFor(NYql::TIssueCode code, NYql::EWarningAction action) { + TString codePattern = ToString(code); + TString actionString = ToString(action); + + TWarningRule rule; + TString parseError; + auto parseResult = TWarningRule::ParseFrom(codePattern, actionString, rule, parseError); + YQL_ENSURE(parseResult == TWarningRule::EParseResult::PARSE_OK); + WarningPolicy.AddRule(rule); +} + +TVector<NSQLTranslation::TSQLHint> TContext::PullHintForToken(NYql::TPosition tokenPos) { + TVector<NSQLTranslation::TSQLHint> result; + auto it = SQLHints.find(tokenPos); + if (it == SQLHints.end()) { + return result; + } + result = std::move(it->second); + SQLHints.erase(it); + return result; +} + +void TContext::WarnUnusedHints() { + if (!SQLHints.empty()) { + // warn about first unused hint + auto firstUnused = SQLHints.begin(); + YQL_ENSURE(!firstUnused->second.empty()); + const NSQLTranslation::TSQLHint& hint = firstUnused->second.front(); + Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Hint " << hint.Name << " will not be used"; + } +} + +IOutputStream& TContext::MakeIssue(ESeverity severity, TIssueCode code, NYql::TPosition pos) { + if (severity == TSeverityIds::S_WARNING) { + auto action = WarningPolicy.GetAction(code); + if (action == EWarningAction::ERROR) { + severity = TSeverityIds::S_ERROR; + HasPendingErrors = true; + } else if (action == EWarningAction::DISABLE) { + return Cnull; + } + } + + // we have the last cell for issue, let's fill it with our internal error + if (severity >= TSeverityIds::S_WARNING) { + const bool aboveHalf = Issues.Size() > Settings.MaxErrors / 2; + if (aboveHalf) { + return Cnull; + } + } else { + if (Settings.MaxErrors == Issues.Size() + 1) { + Issues.AddIssue(TIssue(NYql::TPosition(), TString(TStringBuf("Too many issues")))); + Issues.back().SetCode(UNEXPECTED_ERROR, TSeverityIds::S_ERROR); + } + + if (Settings.MaxErrors <= Issues.Size()) { + ythrow NProtoAST::TTooManyErrors() << "Too many issues"; + } + } + + Issues.AddIssue(TIssue(pos, TString())); + auto& curIssue = Issues.back(); + curIssue.Severity = severity; + curIssue.IssueCode = code; + IssueMsgHolder.Reset(new TStringOutput(*Issues.back().MutableMessage())); + return *IssueMsgHolder; +} + +bool TContext::IsDynamicCluster(const TDeferredAtom& cluster) const { + const TString* clusterPtr = cluster.GetLiteral(); + if (!clusterPtr) { + return false; + } + TString unused; + if (ClusterMapping.GetClusterProvider(*clusterPtr, unused)) { + return false; + } + if (Settings.AssumeYdbOnClusterWithSlash && clusterPtr->StartsWith('/')) { + return false; + } + return !Settings.DynamicClusterProvider.empty(); +} + +bool TContext::SetPathPrefix(const TString& value, TMaybe<TString> arg) { + if (arg.Defined()) { + if (*arg == YtProviderName + || *arg == KikimrProviderName + || *arg == RtmrProviderName + ) + { + ProviderPathPrefixes[*arg] = value; + return true; + } + + TString normalizedClusterName; + if (!GetClusterProvider(*arg, normalizedClusterName)) { + Error() << "Unknown cluster or provider: " << *arg; + IncrementMonCounter("sql_errors", "BadPragmaValue"); + return false; + } + + ClusterPathPrefixes[normalizedClusterName] = value; + } else { + PathPrefix = value; + } + + return true; +} + +TNodePtr TContext::GetPrefixedPath(const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& path) { + TStringBuf prefixPath = GetPrefixPath(service, cluster); + if (prefixPath) { + return AddTablePathPrefix(*this, prefixPath, path); + } + return path.Build(); +} + +TStringBuf TContext::GetPrefixPath(const TString& service, const TDeferredAtom& cluster) const { + if (IsDynamicCluster(cluster)) { + return {}; + } + auto* clusterPrefix = cluster.GetLiteral() + ? ClusterPathPrefixes.FindPtr(*cluster.GetLiteral()) + : nullptr; + if (clusterPrefix && !clusterPrefix->empty()) { + return *clusterPrefix; + } else { + auto* providerPrefix = ProviderPathPrefixes.FindPtr(service); + if (providerPrefix && !providerPrefix->empty()) { + return *providerPrefix; + } else if (!PathPrefix.empty()) { + return PathPrefix; + } + return {}; + } +} + +TNodePtr TContext::UniversalAlias(const TString& baseName, TNodePtr&& node) { + auto alias = MakeName(baseName); + UniversalAliases.emplace(alias, node); + return BuildAtom(node->GetPos(), alias, TNodeFlags::Default); +} + +bool TContext::IsAlreadyDeclared(const TString& varName) const { + return Variables.find(varName) != Variables.end() && !WeakVariables.contains(varName); +} + +void TContext::DeclareVariable(const TString& varName, const TPosition& pos, const TNodePtr& typeNode, bool isWeak) { + if (isWeak) { + auto inserted = Variables.emplace(varName, std::make_pair(pos, typeNode)); + YQL_ENSURE(inserted.second); + WeakVariables.insert(varName); + } else { + WeakVariables.erase(WeakVariables.find(varName)); + Variables[varName] = std::make_pair(pos, typeNode); + } +} + +bool TContext::AddExport(TPosition pos, const TString& name) { + if (IsAnonymousName(name)) { + Error(pos) << "Can not export anonymous name " << name; + return false; + } + if (Exports.contains(name)) { + Error(pos) << "Duplicate export symbol: " << name; + return false; + } + if (!Scoped->LookupNode(name)) { + Error(pos) << "Unable to export unknown symbol: " << name; + return false; + } + Exports.emplace(name); + return true; +} + +TString TContext::AddImport(const TVector<TString>& modulePath) { + YQL_ENSURE(!modulePath.empty()); + TString path = JoinRange("/", modulePath.cbegin(), modulePath.cend()); + if (!path.StartsWith('/')) { + path = Settings.FileAliasPrefix + path; + } + + auto iter = ImportModuleAliases.find(path); + if (iter == ImportModuleAliases.end()) { + const TString alias = MakeName(TStringBuilder() << modulePath.back() << "_module"); + iter = ImportModuleAliases.emplace(path, alias).first; + } + return iter->second; +} + +TString TContext::AddSimpleUdf(const TString& udf) { + auto& name = SimpleUdfs[udf]; + if (name.empty()) { + name = TStringBuilder() << "Udf" << SimpleUdfs.size(); + } + + return name; +} + +void TContext::SetPackageVersion(const TString& packageName, ui32 version) { + PackageVersions[packageName] = version; +} + +void TScopedState::UseCluster(const TString& service, const TDeferredAtom& cluster) { + YQL_ENSURE(!cluster.Empty()); + if (cluster.GetLiteral()) { + if (!Local.UsedPlainClusters.insert(*cluster.GetLiteral()).second) { + return; + } + } else { + if (!Local.UsedExprClusters.insert(cluster.Build().Get()).second) { + return; + } + } + Local.UsedClusters.push_back({service, cluster}); +} + +void TScopedState::AddExprCluster(TNodePtr expr, TContext& ctx) { + auto node = expr.Get(); + if (Local.ExprClustersMap.count(node)) { + return; + } + auto name = ctx.MakeName("cluster"); + auto wrappedNode = expr->Y("EvaluateAtom", expr); + Local.ExprClustersMap.insert({node, {name, wrappedNode}}); + Local.ExprClusters.push_back(expr); +} + +const TVector<std::pair<TString, TDeferredAtom>>& TScopedState::GetUsedClusters() { + return Local.UsedClusters; +} + +TNodePtr TScopedState::WrapCluster(const TDeferredAtom& cluster, TContext& ctx) { + auto node = cluster.Build(); + if (!cluster.GetLiteral()) { + if (ctx.CompactNamedExprs) { + return node->Y("EvaluateAtom", node); + } + AddExprCluster(node, ctx); + auto exprIt = Local.ExprClustersMap.find(node.Get()); + YQL_ENSURE(exprIt != Local.ExprClustersMap.end()); + return node->AstNode(exprIt->second.first); + } + + return node; +} + +void TScopedState::Clear() { + *this = TScopedState(); +} + +TNodePtr TScopedState::LookupNode(const TString& name) { + auto mapIt = NamedNodes.find(name); + if (mapIt == NamedNodes.end()) { + return nullptr; + } + Y_DEBUG_ABORT_UNLESS(!mapIt->second.empty()); + mapIt->second.front()->IsUsed = true; + return mapIt->second.front()->Node->Clone(); +} + +bool TContext::HasNonYtProvider(const ISource& source) const { + TTableList tableList; + source.GetInputTables(tableList); + + TSet<TString> clusters; + for (auto& it: tableList) { + if (it.Service != YtProviderName) { + return true; + } + } + + for (auto& cl: Scoped->Local.UsedClusters) { + if (cl.first != YtProviderName) { + return true; + } + } + + return false; +} + +bool TContext::UseUnordered(const ISource& source) const { + return !HasNonYtProvider(source); +} + +bool TContext::UseUnordered(const TTableRef& table) const { + return YtProviderName == table.Service; +} + + +TMaybe<EColumnRefState> GetFunctionArgColumnStatus(TContext& ctx, const TString& module, const TString& func, size_t argIndex) { + static const TSet<TStringBuf> denyForAllArgs = { + "datatype", + "optionaltype", + "listtype", + "streamtype", + "dicttype", + "tupletype", + "resourcetype", + "taggedtype", + "varianttype", + "callabletype", + "optionalitemtype", + "listitemtype", + "streamitemtype", + "dictkeytype", + "dictpayloadtype", + "tupleelementtype", + "structmembertype", + "callableresulttype", + "callableargumenttype", + "variantunderlyingtype", + }; + static const TMap<std::pair<TStringBuf, size_t>, EColumnRefState> positionalArgsCustomStatus = { + { {"frombytes", 1}, EColumnRefState::Deny }, + { {"enum", 0}, EColumnRefState::Deny }, + { {"asenum", 0}, EColumnRefState::Deny }, + { {"variant", 1}, EColumnRefState::Deny }, + { {"variant", 2}, EColumnRefState::Deny }, + { {"asvariant", 1}, EColumnRefState::Deny }, + { {"astagged", 1}, EColumnRefState::Deny }, + { {"ensuretype", 1}, EColumnRefState::Deny }, + { {"ensuretype", 2}, EColumnRefState::Deny }, + { {"ensureconvertibleto", 1}, EColumnRefState::Deny }, + { {"ensureconvertibleto", 2}, EColumnRefState::Deny }, + + { {"nothing", 0}, EColumnRefState::Deny }, + { {"formattype", 0}, EColumnRefState::Deny }, + { {"instanceof", 0}, EColumnRefState::Deny }, + { {"pgtype", 0}, EColumnRefState::AsPgType }, + { {"pgconst", 0}, EColumnRefState::Deny }, + { {"pgconst", 1}, EColumnRefState::AsPgType }, + { {"pgcast", 1}, EColumnRefState::AsPgType }, + + { {"unpickle", 0}, EColumnRefState::Deny }, + { {"typehandle", 0}, EColumnRefState::Deny }, + + { {"listcreate", 0}, EColumnRefState::Deny }, + { {"setcreate", 0}, EColumnRefState::Deny }, + { {"dictcreate", 0}, EColumnRefState::Deny }, + { {"dictcreate", 1}, EColumnRefState::Deny }, + { {"weakfield", 1}, EColumnRefState::Deny }, + + { {"Yson::ConvertTo", 1}, EColumnRefState::Deny }, + }; + + TString normalized; + if (module.empty()) { + normalized = to_lower(func); + } else if (to_upper(module) == "YQL") { + normalized = "YQL::" + func; + } else { + normalized = module + "::" + func; + } + + if (normalized == "typeof" && argIndex == 0) { + // TODO: more such cases? + return ctx.GetTopLevelColumnReferenceState(); + } + + if (denyForAllArgs.contains(normalized)) { + return EColumnRefState::Deny; + } + + auto it = positionalArgsCustomStatus.find(std::make_pair(normalized, argIndex)); + if (it != positionalArgsCustomStatus.end()) { + return it->second; + } + return {}; +} + +TTranslation::TTranslation(TContext& ctx) + : Ctx(ctx) +{ +} + +TContext& TTranslation::Context() { + return Ctx; +} + +IOutputStream& TTranslation::Error() { + return Ctx.Error(); +} + +TNodePtr TTranslation::GetNamedNode(const TString& name) { + if (name == "$_") { + Ctx.Error() << "Unable to reference anonymous name " << name; + return nullptr; + } + auto res = Ctx.Scoped->LookupNode(name); + if (!res) { + Ctx.Error() << "Unknown name: " << name; + } + return SafeClone(res); +} + +TString TTranslation::PushNamedNode(TPosition namePos, const TString& name, const TNodeBuilderByName& builder) { + TString resultName = name; + if (IsAnonymousName(name)) { + resultName = "$_yql_anonymous_name_" + ToString(Ctx.AnonymousNameIndex++); + YQL_ENSURE(Ctx.Scoped->NamedNodes.find(resultName) == Ctx.Scoped->NamedNodes.end()); + } + auto node = builder(resultName); + Y_DEBUG_ABORT_UNLESS(node); + auto mapIt = Ctx.Scoped->NamedNodes.find(resultName); + if (mapIt == Ctx.Scoped->NamedNodes.end()) { + auto result = Ctx.Scoped->NamedNodes.insert(std::make_pair(resultName, TDeque<TNodeWithUsageInfoPtr>())); + Y_DEBUG_ABORT_UNLESS(result.second); + mapIt = result.first; + } + + mapIt->second.push_front(MakeIntrusive<TNodeWithUsageInfo>(node, namePos, Ctx.ScopeLevel)); + return resultName; +} + +TString TTranslation::PushNamedNode(NYql::TPosition namePos, const TString &name, NSQLTranslationV1::TNodePtr node) { + return PushNamedNode(namePos, name, [node](const TString&) { return node; }); +} + +TString TTranslation::PushNamedAtom(TPosition namePos, const TString& name) { + auto buildAtom = [namePos](const TString& resultName) { + return BuildAtom(namePos, resultName); + }; + return PushNamedNode(namePos, name, buildAtom); +} + +void TTranslation::PopNamedNode(const TString& name) { + auto mapIt = Ctx.Scoped->NamedNodes.find(name); + Y_DEBUG_ABORT_UNLESS(mapIt != Ctx.Scoped->NamedNodes.end()); + Y_DEBUG_ABORT_UNLESS(mapIt->second.size() > 0); + auto& top = mapIt->second.front(); + if (!top->IsUsed && !Ctx.HasPendingErrors && !name.StartsWith("$_")) { + Ctx.Warning(top->NamePos, TIssuesIds::YQL_UNUSED_SYMBOL) << "Symbol " << name << " is not used"; + } + mapIt->second.pop_front(); + if (mapIt->second.empty()) { + Ctx.Scoped->NamedNodes.erase(mapIt); + } +} + +void TTranslation::WarnUnusedNodes() const { + if (Ctx.HasPendingErrors) { + // result is not reliable in this case + return; + } + for (const auto& [name, items]: Ctx.Scoped->NamedNodes) { + if (name.StartsWith("$_")) { + continue; + } + for (const auto& item : items) { + if (!item->IsUsed && item->Level == Ctx.ScopeLevel) { + Ctx.Warning(item->NamePos, TIssuesIds::YQL_UNUSED_SYMBOL) << "Symbol " << name << " is not used"; + } + } + } +} + +TString GetDescription(const google::protobuf::Message& node, const google::protobuf::FieldDescriptor* d) { + const auto& field = node.GetReflection()->GetMessage(node, d); + return field.GetReflection()->GetString(field, d->message_type()->FindFieldByName("Descr")); +} + +TString TTranslation::AltDescription(const google::protobuf::Message& node, ui32 altCase, const google::protobuf::Descriptor* descr) const { + return GetDescription(node, descr->FindFieldByNumber(altCase)); +} + +void TTranslation::AltNotImplemented(const TString& ruleName, ui32 altCase, const google::protobuf::Message& node, const google::protobuf::Descriptor* descr) { + Error() << ruleName << ": alternative is not implemented yet: " << AltDescription(node, altCase, descr); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/context.h b/yql/essentials/sql/v1/context.h new file mode 100644 index 00000000000..4aa766e34a0 --- /dev/null +++ b/yql/essentials/sql/v1/context.h @@ -0,0 +1,421 @@ +#pragma once + +#include "source.h" +#include "sql.h" + +#include <yql/essentials/providers/common/provider/yql_provider_names.h> +#include <yql/essentials/core/issue/protos/issue_id.pb.h> +#include <yql/essentials/public/issue/yql_warning.h> +#include <yql/essentials/sql/settings/translation_settings.h> +#include <yql/essentials/sql/cluster_mapping.h> + +#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h> + +#include <util/generic/hash.h> +#include <util/generic/map.h> +#include <util/generic/maybe.h> +#include <util/generic/set.h> +#include <util/generic/deque.h> +#include <util/generic/vector.h> + +#define ANTLR3_TOKEN(NAME) SQLv1LexerTokens::TOKEN_##NAME << 16 +#define ANTLR4_TOKEN(NAME) (SQLv1Antlr4Lexer::TOKEN_##NAME << 16) + 1 +#define IS_TOKEN(ID, NAME) (UnifiedToken(ID) == ANTLR3_TOKEN(NAME) || UnifiedToken(ID) == ANTLR4_TOKEN(NAME)) + +namespace NSQLTranslationV1 { + inline bool IsAnonymousName(const TString& name) { + return name == "$_"; + } + + inline bool IsStreamingService(const TString& service) { + return service == NYql::RtmrProviderName || service == NYql::PqProviderName; + } + + + struct TNodeWithUsageInfo : public TThrRefBase { + explicit TNodeWithUsageInfo(const TNodePtr& node, TPosition namePos, int level) + : Node(node) + , NamePos(namePos) + , Level(level) + {} + + TNodePtr Node; + TPosition NamePos; + int Level = 0; + bool IsUsed = false; + }; + + using TNodeWithUsageInfoPtr = TIntrusivePtr<TNodeWithUsageInfo>; + using TNamedNodesMap = THashMap<TString, TDeque<TNodeWithUsageInfoPtr>>; + using TBlocks = TVector<TNodePtr>; + + struct TScopedState : public TThrRefBase { + TString CurrService; + TDeferredAtom CurrCluster; + bool PragmaClassicDivision = true; + bool PragmaCheckedOps = false; + bool StrictJoinKeyTypes = false; + bool UnicodeLiterals = false; + bool WarnUntypedStringLiterals = false; + TNamedNodesMap NamedNodes; + + struct TLocal { + TVector<std::pair<TString, TDeferredAtom>> UsedClusters; + THashSet<TString> UsedPlainClusters; + THashSet<INode*> UsedExprClusters; + THashMap<INode*, std::pair<TString, TNodePtr>> ExprClustersMap; + TVector<TNodePtr> ExprClusters; + }; + + TLocal Local; + + void UseCluster(const TString& service, const TDeferredAtom& cluster); + const TVector<std::pair<TString, TDeferredAtom>>& GetUsedClusters(); + TNodePtr WrapCluster(const TDeferredAtom& cluster, TContext& ctx); + void AddExprCluster(TNodePtr expr, TContext& ctx); + void Clear(); + TNodePtr LookupNode(const TString& name); + }; + + using TScopedStatePtr = TIntrusivePtr<TScopedState>; + + class TColumnRefScope; + enum class EColumnRefState { + Deny, + Allow, + AsStringLiteral, + AsPgType, + MatchRecognize, + }; + + class TContext { + public: + TContext(const NSQLTranslation::TTranslationSettings& settings, + const NSQLTranslation::TSQLHints& hints, + NYql::TIssues& issues); + + virtual ~TContext(); + + const NYql::TPosition& Pos() const; + + void PushCurrentBlocks(TBlocks* blocks); + void PopCurrentBlocks(); + TBlocks& GetCurrentBlocks() const; + + TString MakeName(const TString& name); + + IOutputStream& Error(NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR); + IOutputStream& Error(NYql::TPosition pos, NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR); + IOutputStream& Warning(NYql::TPosition pos, NYql::TIssueCode code); + IOutputStream& Info(NYql::TPosition pos); + + void SetWarningPolicyFor(NYql::TIssueCode code, NYql::EWarningAction action); + + const TString& Token(const NSQLv1Generated::TToken& token) { + Position.Row = token.GetLine(); + Position.Column = token.GetColumn() + 1; + return token.GetValue(); + } + + TPosition TokenPosition(const NSQLv1Generated::TToken& token) { + TPosition pos = Position; + pos.Row = token.GetLine(); + pos.Column = token.GetColumn() + 1; + return pos; + } + + inline void IncrementMonCounter(const TString& name, const TString& value) { + if (IncrementMonCounterFunction) { + IncrementMonCounterFunction(name, value); + } + } + + bool HasCluster(const TString& cluster) const { + return GetClusterProvider(cluster).Defined(); + } + + TMaybe<TString> GetClusterProvider(const TString& cluster) const { + TString unusedNormalizedClusterName; + return GetClusterProvider(cluster, unusedNormalizedClusterName); + } + + TMaybe<TString> GetClusterProvider(const TString& cluster, TString& normalizedClusterName) const { + auto provider = ClusterMapping.GetClusterProvider(cluster, normalizedClusterName); + if (!provider) { + if (Settings.AssumeYdbOnClusterWithSlash && cluster.StartsWith('/')) { + normalizedClusterName = cluster; + return TString(NYql::KikimrProviderName); + } + if (Settings.DynamicClusterProvider) { + normalizedClusterName = cluster.StartsWith('/') ? cluster : Settings.PathPrefix + "/" + cluster; + return Settings.DynamicClusterProvider; + } + return Nothing(); + } + + return provider; + } + + bool IsDynamicCluster(const TDeferredAtom& cluster) const; + bool HasNonYtProvider(const ISource& source) const; + bool UseUnordered(const ISource& source) const; + bool UseUnordered(const TTableRef& table) const; + + bool SetPathPrefix(const TString& value, TMaybe<TString> arg = TMaybe<TString>()); + + TNodePtr GetPrefixedPath(const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& path); + TStringBuf GetPrefixPath(const TString& service, const TDeferredAtom& cluster) const; + + TNodePtr UniversalAlias(const TString& baseName, TNodePtr&& node); + + void BodyPart() { + IntoHeading = false; + } + + bool IsParseHeading() const { + return IntoHeading; + } + + bool IsAlreadyDeclared(const TString& varName) const; + void DeclareVariable(const TString& varName, const TPosition& pos, const TNodePtr& typeNode, bool isWeak = false); + + bool AddExport(TPosition symbolPos, const TString& symbolName); + TString AddImport(const TVector<TString>& modulePath); + TString AddSimpleUdf(const TString& udf); + void SetPackageVersion(const TString& packageName, ui32 version); + + bool IsStreamingService(const TStringBuf service) const; + + bool CheckColumnReference(TPosition pos, const TString& name) { + const bool allowed = GetColumnReferenceState() != EColumnRefState::Deny; + if (!allowed) { + Error(pos) << "Column reference \"" << name << "\" is not allowed " << NoColumnErrorContext; + IncrementMonCounter("sql_errors", "ColumnReferenceInScopeIsNotAllowed"); + } + return allowed; + } + + EColumnRefState GetColumnReferenceState() const { + return ColumnReferenceState; + } + + EColumnRefState GetTopLevelColumnReferenceState() const { + return TopLevelColumnReferenceState; + } + + TStringBuf GetMatchRecognizeDefineVar() const { + YQL_ENSURE(EColumnRefState::MatchRecognize == ColumnReferenceState, + "DefineVar can only be accessed within processing of MATCH_RECOGNIZE lambdas"); + return MatchRecognizeDefineVar; + } + + TVector<NSQLTranslation::TSQLHint> PullHintForToken(NYql::TPosition tokenPos); + void WarnUnusedHints(); + + private: + IOutputStream& MakeIssue(NYql::ESeverity severity, NYql::TIssueCode code, NYql::TPosition pos); + + private: + NYql::TPosition Position; + THolder<TStringOutput> IssueMsgHolder; + NSQLTranslation::TClusterMapping ClusterMapping; + TString PathPrefix; + THashMap<TString, TString> ProviderPathPrefixes; + THashMap<TString, TString> ClusterPathPrefixes; + bool IntoHeading = true; + NSQLTranslation::TSQLHints SQLHints; + + friend class TColumnRefScope; + + EColumnRefState ColumnReferenceState = EColumnRefState::Deny; + EColumnRefState TopLevelColumnReferenceState = EColumnRefState::Deny; + TString MatchRecognizeDefineVar; + TString NoColumnErrorContext = "in current scope"; + TVector<TBlocks*> CurrentBlocks; + + public: + THashMap<TString, std::pair<TPosition, TNodePtr>> Variables; + THashSet<TString> WeakVariables; + NSQLTranslation::TTranslationSettings Settings; + std::unique_ptr<TMemoryPool> Pool; + NYql::TIssues& Issues; + TMap<TString, TNodePtr> UniversalAliases; + THashSet<TString> Exports; + THashMap<TString, TString> ImportModuleAliases; + THashMap<TString, TString> RequiredModules; + TMap<TString, TString> SimpleUdfs; + NSQLTranslation::TIncrementMonCounterFunction IncrementMonCounterFunction; + TScopedStatePtr Scoped; + int ScopeLevel = 0; + size_t AnonymousNameIndex = 0; + TDeque<TScopedStatePtr> AllScopes; + bool HasPendingErrors; + THashMap<TString, ui32> GenIndexes; + using TWinSpecsRef = std::reference_wrapper<TWinSpecs>; + TDeque<TWinSpecsRef> WinSpecsScopes; + bool PragmaRefSelect = false; + bool PragmaSampleSelect = false; + bool PragmaAllowDotInAlias = false; + bool PragmaInferSchema = false; + bool PragmaAutoCommit = false; + bool PragmaUseTablePrefixForEach = false; + bool SimpleColumns = true; + bool CoalesceJoinKeysOnQualifiedAll = false; + bool PragmaDirectRead = false; + bool PragmaYsonFast = true; + bool PragmaYsonAutoConvert = false; + bool PragmaYsonStrict = true; + bool PragmaRegexUseRe2 = true; + bool PragmaPullUpFlatMapOverJoin = true; + bool FilterPushdownOverJoinOptionalSide = false; + bool RotateJoinTree = true; + bool WarnUnnamedColumns = false; + bool DiscoveryMode = false; + bool EnableSystemColumns = true; + bool DqEngineEnable = false; + bool DqEngineForce = false; + TString CostBasedOptimizer; + TMaybe<bool> JsonQueryReturnsJsonDocument; + TMaybe<bool> AnsiInForEmptyOrNullableItemsCollections; + TMaybe<bool> AnsiRankForNullableKeys = true; + const bool AnsiQuotedIdentifiers; + bool AnsiOptionalAs = true; + bool OrderedColumns = false; + bool PositionalUnionAll = false; + bool BogousStarInGroupByOverJoin = false; + bool UnorderedSubqueries = true; + bool PragmaDataWatermarks = true; + bool WarnOnAnsiAliasShadowing = true; + ui32 ResultRowsLimit = 0; + ui64 ResultSizeLimit = 0; + ui32 PragmaGroupByLimit = 1 << 6; + ui32 PragmaGroupByCubeLimit = 5; + // if FlexibleTypes=true, emit TypeOrMember callable and resolve Type/Column uncertainty on type annotation stage, otherwise always emit Type + bool FlexibleTypes = false; + // see YQL-10265 + bool AnsiCurrentRow = false; + TMaybe<bool> YsonCastToString; + using TLiteralWithPosition = std::pair<TString, TPosition>; + using TLibraryStuff = std::tuple<TPosition, std::optional<TLiteralWithPosition>, std::optional<TLiteralWithPosition>>; + std::unordered_map<TString, TLibraryStuff> Libraries; // alias -> optional file with token + using TPackageStuff = std::tuple< + TPosition, TLiteralWithPosition, + std::optional<TLiteralWithPosition> + >; + + std::unordered_map<TString, TPackageStuff> Packages; // alias -> url with optional token + + using TOverrideLibraryStuff = std::tuple<TPosition>; + std::unordered_map<TString, TOverrideLibraryStuff> OverrideLibraries; // alias -> position + + THashMap<TString, ui32> PackageVersions; + NYql::TWarningPolicy WarningPolicy; + TString PqReadByRtmrCluster; + bool EmitStartsWith = true; + TMaybe<bool> EmitAggApply; + bool UseBlocks = false; + bool AnsiLike = false; + bool FeatureR010 = false; //Row pattern recognition: FROM clause + TMaybe<bool> CompactGroupBy; + bool BlockEngineEnable = false; + bool BlockEngineForce = false; + bool UnorderedResult = false; + ui64 ParallelModeCount = 0; + bool CompactNamedExprs = false; + bool ValidateUnusedExprs = false; + bool AnsiImplicitCrossJoin = false; // select * from A,B + bool DistinctOverWindow = false; + }; + + class TColumnRefScope { + public: + TColumnRefScope(TContext& ctx, EColumnRefState state, bool isTopLevelExpr = true, const TString& defineVar = "") + : PrevTop(ctx.TopLevelColumnReferenceState) + , Prev(ctx.ColumnReferenceState) + , PrevErr(ctx.NoColumnErrorContext) + , PrevDefineVar(ctx.MatchRecognizeDefineVar) + , Ctx(ctx) + { + if (isTopLevelExpr) { + Ctx.ColumnReferenceState = Ctx.TopLevelColumnReferenceState = state; + } else { + Ctx.ColumnReferenceState = state; + } + YQL_ENSURE(defineVar.empty() || EColumnRefState::MatchRecognize == state, "Internal logic error"); + ctx.MatchRecognizeDefineVar = defineVar; + } + + void SetNoColumnErrContext(const TString& msg) { + Ctx.NoColumnErrorContext = msg; + } + + ~TColumnRefScope() { + Ctx.TopLevelColumnReferenceState = PrevTop; + Ctx.ColumnReferenceState = Prev; + std::swap(Ctx.NoColumnErrorContext, PrevErr); + std::swap(Ctx.MatchRecognizeDefineVar, PrevDefineVar); + } + private: + const EColumnRefState PrevTop; + const EColumnRefState Prev; + TString PrevErr; + TString PrevDefineVar; + TContext& Ctx; + }; + + TMaybe<EColumnRefState> GetFunctionArgColumnStatus(TContext& ctx, const TString& module, const TString& func, size_t argIndex); + + class TTranslation { + protected: + typedef TSet<ui32> TSetType; + + protected: + TTranslation(TContext& ctx); + + public: + TContext& Context(); + IOutputStream& Error(); + + const TString& Token(const NSQLv1Generated::TToken& token) { + return Ctx.Token(token); + } + + ui32 UnifiedToken(ui32 id) const { + return Ctx.Settings.Antlr4Parser + (id << 16); + } + + TString Identifier(const NSQLv1Generated::TToken& token) { + return IdContent(Ctx, Token(token)); + } + + TString Identifier(const TString& str) const { + return IdContent(Ctx, str); + } + + TNodePtr GetNamedNode(const TString& name); + + using TNodeBuilderByName = std::function<TNodePtr(const TString& effectiveName)>; + TString PushNamedNode(TPosition namePos, const TString& name, const TNodeBuilderByName& builder); + TString PushNamedNode(TPosition namePos, const TString& name, TNodePtr node); + TString PushNamedAtom(TPosition namePos, const TString& name); + void PopNamedNode(const TString& name); + void WarnUnusedNodes() const; + + template <typename TNode> + void AltNotImplemented(const TString& ruleName, const TNode& node) { + AltNotImplemented(ruleName, node.Alt_case(), node, TNode::descriptor()); + } + + template <typename TNode> + TString AltDescription(const TNode& node) const { + return AltDescription(node, node.Alt_case(), TNode::descriptor()); + } + + protected: + void AltNotImplemented(const TString& ruleName, ui32 altCase, const google::protobuf::Message& node, const google::protobuf::Descriptor* descr); + TString AltDescription(const google::protobuf::Message& node, ui32 altCase, const google::protobuf::Descriptor* descr) const; + + protected: + TContext& Ctx; + }; +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/format/sql_format.cpp b/yql/essentials/sql/v1/format/sql_format.cpp new file mode 100644 index 00000000000..463c52ede46 --- /dev/null +++ b/yql/essentials/sql/v1/format/sql_format.cpp @@ -0,0 +1,3105 @@ +#include "sql_format.h" + +#include <yql/essentials/parser/lexer_common/lexer.h> +#include <yql/essentials/core/sql_types/simple_types.h> + +#include <yql/essentials/sql/v1/lexer/lexer.h> +#include <yql/essentials/sql/v1/proto_parser/proto_parser.h> + +#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h> + +#include <library/cpp/protobuf/util/simple_reflection.h> +#include <library/cpp/resource/resource.h> + +#include <util/string/builder.h> +#include <util/string/split.h> +#include <util/string/strip.h> +#include <util/string/subst.h> +#include <util/generic/hash_set.h> + + +namespace NSQLFormat { + +namespace { + +using namespace NSQLv1Generated; + +using NSQLTranslation::TParsedToken; +using NSQLTranslation::TParsedTokenList; +using TTokenIterator = TParsedTokenList::const_iterator; + +TTokenIterator SkipWS(TTokenIterator curr, TTokenIterator end) { + while (curr != end && curr->Name == "WS") { + ++curr; + } + return curr; +} + +TTokenIterator SkipWSOrComment(TTokenIterator curr, TTokenIterator end) { + while (curr != end && (curr->Name == "WS" || curr->Name == "COMMENT")) { + ++curr; + } + return curr; +} + +bool Validate(const TParsedTokenList& query, const TParsedTokenList& formattedQuery) { + auto in = query.begin(); + auto out = formattedQuery.begin(); + auto inEnd = query.end(); + auto outEnd = formattedQuery.end(); + + while (in != inEnd && out != outEnd) { + in = SkipWS(in, inEnd); + out = SkipWS(out, outEnd); + if (in != inEnd && out != outEnd) { + if (in->Name != out->Name) { + return false; + } + if (AsciiEqualsIgnoreCase(in->Name, in->Content)) { + if (!AsciiEqualsIgnoreCase(in->Content, out->Content)) { + return false; + } + } else { + if (in->Content != out->Content) { + return false; + } + } + ++in; + ++out; + } + } + in = SkipWS(in, inEnd); + out = SkipWS(out, outEnd); + return in == inEnd && out == outEnd; +} + +enum EParenType { + Open, + Close, + None +}; + +using TAdvanceCallback = std::function<EParenType(TTokenIterator& curr, TTokenIterator end)>; + +TTokenIterator SkipToNextBalanced(TTokenIterator begin, TTokenIterator end, const TAdvanceCallback& advance) { + i64 level = 0; + TTokenIterator curr = begin; + while (curr != end) { + switch (advance(curr, end)) { + case EParenType::Open: { + ++level; + break; + } + case EParenType::Close: { + --level; + if (level < 0) { + return end; + } else if (level == 0) { + return curr; + } + break; + } + case EParenType::None: + break; + } + } + return curr; +} + +TTokenIterator GetNextStatementBegin(TTokenIterator begin, TTokenIterator end) { + TAdvanceCallback advanceLambdaBody = [](TTokenIterator& curr, TTokenIterator end) -> EParenType { + Y_UNUSED(end); + if (curr->Name == "LBRACE_CURLY") { + ++curr; + return EParenType::Open; + } else if (curr->Name == "RBRACE_CURLY") { + ++curr; + return EParenType::Close; + } else { + ++curr; + return EParenType::None; + } + }; + + TAdvanceCallback advanceAction = [](TTokenIterator& curr, TTokenIterator end) -> EParenType { + auto tmp = curr; + if (curr->Name == "DEFINE") { + ++curr; + curr = SkipWSOrComment(curr, end); + if (curr != end && (curr->Name == "ACTION" || curr->Name == "SUBQUERY")) { + ++curr; + return EParenType::Open; + } + } else if (curr->Name == "END") { + ++curr; + curr = SkipWSOrComment(curr, end); + if (curr != end && curr->Name == "DEFINE") { + ++curr; + return EParenType::Close; + } + } + + curr = tmp; + ++curr; + return EParenType::None; + }; + + TAdvanceCallback advanceInlineAction = [](TTokenIterator& curr, TTokenIterator end) -> EParenType { + auto tmp = curr; + if (curr->Name == "DO") { + ++curr; + curr = SkipWSOrComment(curr, end); + if (curr != end && curr->Name == "BEGIN") { + ++curr; + return EParenType::Open; + } + } else if (curr->Name == "END") { + ++curr; + curr = SkipWSOrComment(curr, end); + if (curr != end && curr->Name == "DO") { + ++curr; + return EParenType::Close; + } + } + + curr = tmp; + ++curr; + return EParenType::None; + }; + + TTokenIterator curr = begin; + while (curr != end) { + bool matched = false; + for (auto cb : {advanceLambdaBody, advanceAction, advanceInlineAction}) { + TTokenIterator tmp = curr; + if (cb(tmp, end) == EParenType::Open) { + curr = SkipToNextBalanced(curr, end, cb); + matched = true; + if (curr == end) { + return curr; + } + } + } + if (matched) { + continue; + } + if (curr->Name == "SEMICOLON") { + ++curr; + break; + } + ++curr; + } + + return curr; +} + +void SplitByStatements(TTokenIterator begin, TTokenIterator end, TVector<TTokenIterator>& output) { + output.clear(); + if (begin == end) { + return; + } + output.push_back(begin); + auto curr = begin; + while (curr != end) { + curr = GetNextStatementBegin(curr, end); + output.push_back(curr); + } +} + +enum class EScope { + Default, + TypeName, + Identifier, + DoubleQuestion +}; + +class TPrettyVisitor; +using TPrettyFunctor = std::function<void(TPrettyVisitor&, const NProtoBuf::Message& msg)>; +class TObfuscatingVisitor; +using TObfuscatingFunctor = std::function<void(TObfuscatingVisitor&, const NProtoBuf::Message& msg)>; + +struct TStaticData { + TStaticData(); + static const TStaticData& GetInstance() { + return *Singleton<TStaticData>(); + } + + THashSet<TString> Keywords; + THashMap<const NProtoBuf::Descriptor*, EScope> ScopeDispatch; + THashMap<const NProtoBuf::Descriptor*, TPrettyFunctor> PrettyVisitDispatch; + THashMap<const NProtoBuf::Descriptor*, TObfuscatingFunctor> ObfuscatingVisitDispatch; +}; + +template <typename T, void (T::*Func)(const NProtoBuf::Message&)> +void VisitAllFieldsImpl(T* obj, const NProtoBuf::Descriptor* descr, const NProtoBuf::Message& msg) { + for (int i = 0; i < descr->field_count(); ++i) { + const NProtoBuf::FieldDescriptor* fd = descr->field(i); + NProtoBuf::TConstField field(msg, fd); + if (field.IsMessage()) { + for (size_t j = 0; j < field.Size(); ++j) { + (obj->*Func)(*field.template Get<NProtoBuf::Message>(j)); + } + } + } +} + +class TObfuscatingVisitor { +friend struct TStaticData; +public: + TObfuscatingVisitor() + : StaticData(TStaticData::GetInstance()) + {} + + TString Process(const NProtoBuf::Message& msg) { + Scopes.push_back(EScope::Default); + Visit(msg); + return SB; + } + +private: + void VisitToken(const TToken& token) { + auto str = token.GetValue(); + if (str == "<EOF>") { + return; + } + + if (!First) { + SB << ' '; + } else { + First = false; + } + + if (str == "$" && FuncCall) { + FuncCall = false; + } + + if (Scopes.back() == EScope::Identifier && !FuncCall) { + if (str != "$" && !NYql::LookupSimpleTypeBySqlAlias(str, true)) { + SB << "id"; + } else { + SB << str; + } + } else if (NextToken) { + SB << *NextToken; + NextToken = Nothing(); + } else { + SB << str; + } + } + + void VisitPragmaValue(const TRule_pragma_value& msg) { + switch (msg.Alt_case()) { + case TRule_pragma_value::kAltPragmaValue1: { + NextToken = "0"; + break; + } + case TRule_pragma_value::kAltPragmaValue3: { + NextToken = "'str'"; + break; + } + case TRule_pragma_value::kAltPragmaValue4: { + NextToken = "false"; + break; + } + default:; + } + VisitAllFields(TRule_pragma_value::GetDescriptor(), msg); + } + + void VisitLiteralValue(const TRule_literal_value& msg) { + switch (msg.Alt_case()) { + case TRule_literal_value::kAltLiteralValue1: { + NextToken = "0"; + break; + } + case TRule_literal_value::kAltLiteralValue2: { + NextToken = "0.0"; + break; + } + case TRule_literal_value::kAltLiteralValue3: { + NextToken = "'str'"; + break; + } + case TRule_literal_value::kAltLiteralValue9: { + NextToken = "false"; + break; + } + default:; + } + + VisitAllFields(TRule_literal_value::GetDescriptor(), msg); + } + + void VisitAtomExpr(const TRule_atom_expr& msg) { + switch (msg.Alt_case()) { + case TRule_atom_expr::kAltAtomExpr7: { + FuncCall = true; + break; + } + default:; + } + + VisitAllFields(TRule_atom_expr::GetDescriptor(), msg); + FuncCall = false; + } + + void VisitInAtomExpr(const TRule_in_atom_expr& msg) { + switch (msg.Alt_case()) { + case TRule_in_atom_expr::kAltInAtomExpr6: { + FuncCall = true; + break; + } + default:; + } + + VisitAllFields(TRule_in_atom_expr::GetDescriptor(), msg); + FuncCall = false; + } + + void VisitUnaryCasualSubexpr(const TRule_unary_casual_subexpr& msg) { + bool invoke = false; + for (auto& b : msg.GetRule_unary_subexpr_suffix2().GetBlock1()) { + switch (b.GetBlock1().Alt_case()) { + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: { + invoke = true; + break; + } + default:; + } + + break; + } + + if (invoke) { + FuncCall = true; + } + + Visit(msg.GetBlock1()); + if (invoke) { + FuncCall = false; + } + + Visit(msg.GetRule_unary_subexpr_suffix2()); + } + + void VisitInUnaryCasualSubexpr(const TRule_in_unary_casual_subexpr& msg) { + bool invoke = false; + for (auto& b : msg.GetRule_unary_subexpr_suffix2().GetBlock1()) { + switch (b.GetBlock1().Alt_case()) { + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: { + invoke = true; + break; + } + default:; + } + + break; + } + + if (invoke) { + FuncCall = true; + } + + Visit(msg.GetBlock1()); + if (invoke) { + FuncCall = false; + } + + Visit(msg.GetRule_unary_subexpr_suffix2()); + } + + void Visit(const NProtoBuf::Message& msg) { + const NProtoBuf::Descriptor* descr = msg.GetDescriptor(); + auto scopePtr = StaticData.ScopeDispatch.FindPtr(descr); + if (scopePtr) { + Scopes.push_back(*scopePtr); + } + + auto funcPtr = StaticData.ObfuscatingVisitDispatch.FindPtr(descr); + if (funcPtr) { + (*funcPtr)(*this, msg); + } else { + VisitAllFields(descr, msg); + } + + if (scopePtr) { + Scopes.pop_back(); + } + } + + void VisitAllFields(const NProtoBuf::Descriptor* descr, const NProtoBuf::Message& msg) { + VisitAllFieldsImpl<TObfuscatingVisitor, &TObfuscatingVisitor::Visit>(this, descr, msg); + } + + const TStaticData& StaticData; + TStringBuilder SB; + bool First = true; + TMaybe<TString> NextToken; + TVector<EScope> Scopes; + bool FuncCall = false; +}; + +class TPrettyVisitor { +friend struct TStaticData; +public: + TPrettyVisitor(const TParsedTokenList& parsedTokens, const TParsedTokenList& comments) + : StaticData(TStaticData::GetInstance()) + , ParsedTokens(parsedTokens) + , Comments(comments) + { + } + + TString Process(const NProtoBuf::Message& msg, bool& addLine) { + Scopes.push_back(EScope::Default); + MarkedTokens.reserve(ParsedTokens.size()); + MarkTokens(msg); + Y_ENSURE(MarkTokenStack.empty()); + Y_ENSURE(TokenIndex == ParsedTokens.size()); + TokenIndex = 0; + Visit(msg); + Y_ENSURE(TokenIndex == ParsedTokens.size()); + Y_ENSURE(MarkTokenStack.empty()); + for (; LastComment < Comments.size(); ++LastComment) { + const auto text = Comments[LastComment].Content; + AddComment(text); + } + addLine = AddLine.GetOrElse(true); + + return SB; + } + +private: + struct TTokenInfo { + bool OpeningBracket = false; + bool ClosingBracket = false; + bool BracketForcedExpansion = false; + ui32 ClosingBracketIndex = 0; + }; + + using TMarkTokenStack = TVector<ui32>; + + void Out(TStringBuf s) { + for (ui32 i = 0; i < s.size(); ++i) { + Out(s[i], i == 0); + } + } + + void Out(char c, bool useIndent = true) { + if (c == '\n' || c == '\r') { + SB << c; + if (!(c == '\n' && !SB.empty() && SB.back() == '\r')) { + // do not increase OutLine if \n is preceded by \r + // this way we handle \r, \n, or \r\n as single new line + ++OutLine; + } + OutColumn = 0; + } else { + if (!OutColumn && useIndent) { + ui32 indent = (CurrentIndent >= 0) ? CurrentIndent : 0; + for (ui32 i = 0; i < indent; ++i) { + SB << ' '; + } + } + + SB << c; + ++OutColumn; + } + } + + void NewLine() { + if (OutColumn) { + Out('\n'); + } + } + + void AddComment(TStringBuf text) { + if (text.StartsWith("--") && !SB.empty() && SB.back() == '-') { + Out(' '); + } + + Out(text); + } + + void MarkTokens(const NProtoBuf::Message& msg) { + const NProtoBuf::Descriptor* descr = msg.GetDescriptor(); + auto scopePtr = StaticData.ScopeDispatch.FindPtr(descr); + if (scopePtr) { + if (*scopePtr == EScope::TypeName) { + ++InsideType; + } + + Scopes.push_back(*scopePtr); + } + + bool suppressExpr = false; + if (descr == TToken::GetDescriptor()) { + const auto& token = dynamic_cast<const TToken&>(msg); + MarkToken(token); + } else if (descr == TRule_sql_stmt_core::GetDescriptor()) { + if (AddLine.Empty()) { + AddLine = !IsSimpleStatement(dynamic_cast<const TRule_sql_stmt_core&>(msg)).GetOrElse(false); + } + } else if (descr == TRule_lambda_body::GetDescriptor()) { + Y_ENSURE(TokenIndex >= 1); + auto prevIndex = TokenIndex - 1; + Y_ENSURE(prevIndex < ParsedTokens.size()); + Y_ENSURE(ParsedTokens[prevIndex].Content == "{"); + MarkedTokens[prevIndex].OpeningBracket = false; + ForceExpandedColumn = ParsedTokens[prevIndex].LinePos; + ForceExpandedLine = ParsedTokens[prevIndex].Line; + } else if (descr == TRule_in_atom_expr::GetDescriptor()) { + const auto& value = dynamic_cast<const TRule_in_atom_expr&>(msg); + if (value.Alt_case() == TRule_in_atom_expr::kAltInAtomExpr7) { + suppressExpr = true; + } + } else if (descr == TRule_select_kind_parenthesis::GetDescriptor()) { + const auto& value = dynamic_cast<const TRule_select_kind_parenthesis&>(msg); + if (value.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis2) { + suppressExpr = true; + } + } else if (descr == TRule_window_specification::GetDescriptor()) { + const auto& value = dynamic_cast<const TRule_window_specification&>(msg); + const auto& details = value.GetRule_window_specification_details2(); + const bool needsNewline = details.HasBlock1() || details.HasBlock2() || + details.HasBlock3() || details.HasBlock4(); + if (needsNewline) { + auto& paren = value.GetToken1(); + ForceExpandedColumn = paren.GetColumn(); + ForceExpandedLine = paren.GetLine(); + } + suppressExpr = true; + } else if (descr == TRule_exists_expr::GetDescriptor()) { + const auto& value = dynamic_cast<const TRule_exists_expr&>(msg); + auto& paren = value.GetToken2(); + ForceExpandedColumn = paren.GetColumn(); + ForceExpandedLine = paren.GetLine(); + suppressExpr = true; + } else if (descr == TRule_case_expr::GetDescriptor()) { + const auto& value = dynamic_cast<const TRule_case_expr&>(msg); + auto& token = value.GetToken1(); + ForceExpandedColumn = token.GetColumn(); + ForceExpandedLine = token.GetLine(); + } + + const bool expr = (descr == TRule_expr::GetDescriptor() || descr == TRule_in_expr::GetDescriptor()); + if (expr) { + ++InsideExpr; + } + + ui64 prevInsideExpr = InsideExpr; + if (suppressExpr) { + InsideExpr = 0; + } + + VisitAllFieldsImpl<TPrettyVisitor, &TPrettyVisitor::MarkTokens>(this, descr, msg); + if (suppressExpr) { + InsideExpr = prevInsideExpr; + } + + if (scopePtr) { + if (*scopePtr == EScope::TypeName) { + --InsideType; + } + + Scopes.pop_back(); + } + + if (expr) { + --InsideExpr; + } + } + + void MarkToken(const TToken& token) { + auto str = token.GetValue(); + if (str == "<EOF>") { + return; + } + + MarkedTokens.emplace_back(); + if (str == "(" || str == "[" || str == "{" || str == "<|" || (InsideType && str == "<")) { + MarkTokenStack.push_back(TokenIndex); + auto& info = MarkedTokens[TokenIndex]; + info.OpeningBracket = (InsideExpr > 0); + } else if (str == ")") { + PopBracket("("); + } else if (str == "]") { + PopBracket("["); + } else if (str == "}") { + PopBracket("{"); + } else if (str == "|>") { + PopBracket("<|"); + } else if (InsideType && str == ">") { + PopBracket("<"); + } + + TokenIndex++; + } + + void PopBracket(const TString& expected) { + Y_ENSURE(!MarkTokenStack.empty()); + Y_ENSURE(MarkTokenStack.back() < ParsedTokens.size()); + auto& openToken = ParsedTokens[MarkTokenStack.back()]; + Y_ENSURE(openToken.Content == expected); + auto& openInfo = MarkedTokens[MarkTokenStack.back()]; + auto& closeInfo = MarkedTokens[TokenIndex]; + const bool forcedExpansion = openToken.Line == ForceExpandedLine && openToken.LinePos <= ForceExpandedColumn; + + if (openInfo.OpeningBracket) { + openInfo.ClosingBracketIndex = TokenIndex; + openInfo.BracketForcedExpansion = forcedExpansion; + closeInfo.BracketForcedExpansion = forcedExpansion; + closeInfo.ClosingBracket = true; + } + + MarkTokenStack.pop_back(); + } + + void Visit(const NProtoBuf::Message& msg) { + const NProtoBuf::Descriptor* descr = msg.GetDescriptor(); + //Cerr << descr->name() << "\n"; + auto scopePtr = StaticData.ScopeDispatch.FindPtr(descr); + if (descr == TRule_invoke_expr::GetDescriptor()) { + AfterInvokeExpr = true; + } + + if (descr == TRule_unary_op::GetDescriptor()) { + AfterUnaryOp = true; + } + + if (scopePtr) { + if (*scopePtr == EScope::TypeName) { + ++InsideType; + } + + Scopes.push_back(*scopePtr); + } + + auto funcPtr = StaticData.PrettyVisitDispatch.FindPtr(descr); + if (funcPtr) { + (*funcPtr)(*this, msg); + } else { + VisitAllFields(descr, msg); + } + + if (scopePtr) { + if (*scopePtr == EScope::TypeName) { + --InsideType; + } + + Scopes.pop_back(); + } + } + + TMaybe<bool> IsSimpleStatement(const TRule_sql_stmt_core& msg) { + switch (msg.Alt_case()) { + case TRule_sql_stmt_core::kAltSqlStmtCore1: // pragma + case TRule_sql_stmt_core::kAltSqlStmtCore5: // drop table + case TRule_sql_stmt_core::kAltSqlStmtCore6: // use + case TRule_sql_stmt_core::kAltSqlStmtCore8: // commit + case TRule_sql_stmt_core::kAltSqlStmtCore11: // rollback + case TRule_sql_stmt_core::kAltSqlStmtCore12: // declare + case TRule_sql_stmt_core::kAltSqlStmtCore13: // import + case TRule_sql_stmt_core::kAltSqlStmtCore14: // export + case TRule_sql_stmt_core::kAltSqlStmtCore32: // drop external data source + case TRule_sql_stmt_core::kAltSqlStmtCore34: // drop replication + return true; + case TRule_sql_stmt_core::kAltSqlStmtCore3: { // named nodes + const auto& stmt = msg.GetAlt_sql_stmt_core3().GetRule_named_nodes_stmt1(); + if (stmt.GetBlock3().HasAlt1()) { + return true; + } + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore17: { // do + const auto& stmt = msg.GetAlt_sql_stmt_core17().GetRule_do_stmt1(); + if (stmt.GetBlock2().HasAlt1()) { + return true; + } + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore19: // if + case TRule_sql_stmt_core::kAltSqlStmtCore20: // for + return false; + default: + break; + } + + return {}; + } + + template <typename T> + void VisitRepeated(const ::google::protobuf::RepeatedPtrField<T>& field) { + for (const auto& m : field) { + Visit(m); + } + } + + void VisitDefineActionOrSubqueryBody(const TRule_define_action_or_subquery_body& msg) { + VisitRepeated(msg.GetBlock1()); + if (msg.HasBlock2()) { + const auto& b = msg.GetBlock2(); + Visit(b.GetRule_sql_stmt_core1()); + for (auto block : b.GetBlock2()) { + VisitRepeated(block.GetBlock1()); + if (!IsSimpleStatement(block.GetRule_sql_stmt_core2()).GetOrElse(false)) { + Out('\n'); + } + Visit(block.GetRule_sql_stmt_core2()); + } + + VisitRepeated(b.GetBlock3()); + } + } + + void VisitPragma(const TRule_pragma_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitKeyword(msg.GetToken1()); + auto prefix = msg.GetRule_opt_id_prefix_or_type2(); + if (prefix.HasBlock1()) { + Visit(prefix.GetBlock1().GetRule_an_id_or_type1()); + VisitKeyword(prefix.GetBlock1().GetToken2()); + AfterDot = true; + } + + Visit(msg.GetRule_an_id3()); + if (msg.GetBlock4().HasAlt2()) { + AfterInvokeExpr = true; + const auto& alt2 = msg.GetBlock4().GetAlt2(); + VisitKeyword(alt2.GetToken1()); + Visit(alt2.GetRule_pragma_value2()); + VisitRepeated(alt2.GetBlock3()); + VisitKeyword(alt2.GetToken4()); + } else { + Visit(msg.GetBlock4()); + } + } + + void PosFromPartial(const TRule_select_kind_partial& partial) { + const auto& kind = partial.GetRule_select_kind1(); + if (kind.HasBlock1()) { // DISCARD + PosFromToken(kind.GetBlock1().GetToken1()); + } else { + switch (kind.GetBlock2().Alt_case()) { + case TRule_select_kind_TBlock2::kAlt1: + PosFromToken(kind.GetBlock2().GetAlt1().GetRule_process_core1().GetToken1()); + break; + case TRule_select_kind_TBlock2::kAlt2: + PosFromToken(kind.GetBlock2().GetAlt2().GetRule_reduce_core1().GetToken1()); + break; + case TRule_select_kind_TBlock2::kAlt3: { + const auto& selCore = kind.GetBlock2().GetAlt3().GetRule_select_core1(); + if (selCore.HasBlock1()) { + PosFromToken(selCore.GetBlock1().GetToken1()); + } else { + PosFromToken(selCore.GetToken2()); + } + + break; + } + + default: + ythrow yexception() << "Alt is not supported"; + } + } + } + + void VisitSelect(const TRule_select_stmt& msg) { + const auto& paren = msg.GetRule_select_kind_parenthesis1(); + if (paren.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis1) { + const auto& partial = paren.GetAlt_select_kind_parenthesis1().GetRule_select_kind_partial1(); + PosFromPartial(partial); + } else { + PosFromToken(paren.GetAlt_select_kind_parenthesis2().GetToken1()); + } + + NewLine(); + Visit(msg.GetRule_select_kind_parenthesis1()); + for (const auto& block : msg.GetBlock2()) { + NewLine(); + Visit(block.GetRule_select_op1()); + NewLine(); + Visit(block.GetRule_select_kind_parenthesis2()); + } + } + + void VisitSelectUnparenthesized(const TRule_select_unparenthesized_stmt& msg) { + const auto& partial = msg.GetRule_select_kind_partial1(); + PosFromPartial(partial); + NewLine(); + Visit(msg.GetRule_select_kind_partial1()); + for (const auto& block : msg.GetBlock2()) { + NewLine(); + Visit(block.GetRule_select_op1()); + NewLine(); + Visit(block.GetRule_select_kind_parenthesis2()); + } + } + + void VisitNamedNodes(const TRule_named_nodes_stmt& msg) { + PosFromToken(msg.GetRule_bind_parameter_list1().GetRule_bind_parameter1().GetToken1()); + NewLine(); + Visit(msg.GetRule_bind_parameter_list1()); + Visit(msg.GetToken2()); + switch (msg.GetBlock3().Alt_case()) { + case TRule_named_nodes_stmt::TBlock3::kAlt1: { + const auto& alt = msg.GetBlock3().GetAlt1(); + Visit(alt); + break; + } + + case TRule_named_nodes_stmt::TBlock3::kAlt2: { + const auto& alt = msg.GetBlock3().GetAlt2(); + const auto& subselect = alt.GetRule_subselect_stmt1(); + switch (subselect.GetBlock1().Alt_case()) { + case TRule_subselect_stmt::TBlock1::kAlt1: { + const auto& alt = subselect.GetBlock1().GetAlt1(); + Visit(alt.GetToken1()); + NewLine(); + PushCurrentIndent(); + Visit(alt.GetRule_select_stmt2()); + PopCurrentIndent(); + NewLine(); + Visit(alt.GetToken3()); + break; + } + + case TRule_subselect_stmt::TBlock1::kAlt2: { + const auto& alt = subselect.GetBlock1().GetAlt2(); + NewLine(); + PushCurrentIndent(); + Visit(alt); + PopCurrentIndent(); + break; + } + + default: + ythrow yexception() << "Alt is not supported"; + } + + break; + } + + default: + ythrow yexception() << "Alt is not supported"; + } + } + + void VisitCreateTable(const TRule_create_table_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + Visit(msg.GetToken1()); + Visit(msg.GetBlock2()); + Visit(msg.GetBlock3()); + Visit(msg.GetBlock4()); + Visit(msg.GetRule_simple_table_ref5()); + Visit(msg.GetToken6()); + PushCurrentIndent(); + NewLine(); + Visit(msg.GetRule_create_table_entry7()); + for (const auto& b : msg.GetBlock8()) { + Visit(b.GetToken1()); + NewLine(); + Visit(b.GetRule_create_table_entry2()); + } + if (msg.HasBlock9()) { + Visit(msg.GetBlock9()); + } + + PopCurrentIndent(); + NewLine(); + Visit(msg.GetToken10()); + if (msg.HasBlock11()) { + NewLine(); + Visit(msg.GetBlock11()); + } + if (msg.HasBlock12()) { + NewLine(); + Visit(msg.GetBlock12()); + } + if (msg.HasBlock13()) { + NewLine(); + Visit(msg.GetBlock13()); + } + if (msg.HasBlock14()) { + NewLine(); + Visit(msg.GetBlock14()); + } + if (msg.HasBlock15()) { + NewLine(); + Visit(msg.GetBlock15()); + } + } + + void VisitDropTable(const TRule_drop_table_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_drop_table_stmt::GetDescriptor(), msg); + } + + void VisitAnalyze(const TRule_analyze_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_analyze_stmt::GetDescriptor(), msg); + } + + void VisitBackup(const TRule_backup_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_backup_stmt::GetDescriptor(), msg); + } + + void VisitRestore(const TRule_restore_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_restore_stmt::GetDescriptor(), msg); + } + + void VisitUse(const TRule_use_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_use_stmt::GetDescriptor(), msg); + } + + void VisitIntoTable(const TRule_into_table_stmt& msg) { + switch (msg.GetBlock1().Alt_case()) { + case TRule_into_table_stmt_TBlock1::AltCase::kAlt1: + PosFromToken(msg.GetBlock1().GetAlt1().GetToken1()); + break; + case TRule_into_table_stmt_TBlock1::AltCase::kAlt2: + PosFromToken(msg.GetBlock1().GetAlt2().GetToken1()); + break; + case TRule_into_table_stmt_TBlock1::AltCase::kAlt3: + PosFromToken(msg.GetBlock1().GetAlt3().GetToken1()); + break; + case TRule_into_table_stmt_TBlock1::AltCase::kAlt4: + PosFromToken(msg.GetBlock1().GetAlt4().GetToken1()); + break; + case TRule_into_table_stmt_TBlock1::AltCase::kAlt5: + PosFromToken(msg.GetBlock1().GetAlt5().GetToken1()); + break; + case TRule_into_table_stmt_TBlock1::AltCase::kAlt6: + PosFromToken(msg.GetBlock1().GetAlt6().GetToken1()); + break; + default: + ythrow yexception() << "Alt is not supported"; + } + + NewLine(); + VisitAllFields(TRule_into_table_stmt::GetDescriptor(), msg); + } + + void VisitCommit(const TRule_commit_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_commit_stmt::GetDescriptor(), msg); + } + + void VisitUpdate(const TRule_update_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + Visit(msg.GetToken1()); + Visit(msg.GetRule_simple_table_ref2()); + switch (msg.GetBlock3().Alt_case()) { + case TRule_update_stmt_TBlock3::kAlt1: { + const auto& alt = msg.GetBlock3().GetAlt1(); + NewLine(); + Visit(alt.GetToken1()); + const auto& choice = alt.GetRule_set_clause_choice2(); + NewLine(); + + switch (choice.Alt_case()) { + case TRule_set_clause_choice::kAltSetClauseChoice1: { + const auto& clauses = choice.GetAlt_set_clause_choice1().GetRule_set_clause_list1(); + PushCurrentIndent(); + Visit(clauses.GetRule_set_clause1()); + for (auto& block : clauses.GetBlock2()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_set_clause2()); + } + + PopCurrentIndent(); + break; + } + case TRule_set_clause_choice::kAltSetClauseChoice2: { + const auto& multiColumn = choice.GetAlt_set_clause_choice2().GetRule_multiple_column_assignment1(); + const auto& targets = multiColumn.GetRule_set_target_list1(); + Visit(targets.GetToken1()); + NewLine(); + PushCurrentIndent(); + Visit(targets.GetRule_set_target2()); + for (auto& block : targets.GetBlock3()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_set_target2()); + } + + NewLine(); + PopCurrentIndent(); + Visit(targets.GetToken4()); + Visit(multiColumn.GetToken2()); + Visit(multiColumn.GetToken3()); + NewLine(); + const auto& simpleValues = multiColumn.GetRule_simple_values_source4(); + switch (simpleValues.Alt_case()) { + case TRule_simple_values_source::kAltSimpleValuesSource1: { + const auto& exprs = simpleValues.GetAlt_simple_values_source1().GetRule_expr_list1(); + PushCurrentIndent(); + Visit(exprs.GetRule_expr1()); + for (const auto& block : exprs.GetBlock2()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_expr2()); + } + + PopCurrentIndent(); + break; + } + case TRule_simple_values_source::kAltSimpleValuesSource2: { + PushCurrentIndent(); + Visit(simpleValues.GetAlt_simple_values_source2()); + PopCurrentIndent(); + break; + } + default: + ythrow yexception() << "Alt is not supported"; + } + + NewLine(); + Visit(multiColumn.GetToken5()); + break; + } + default: + ythrow yexception() << "Alt is not supported"; + } + + PopCurrentIndent(); + if (alt.HasBlock3()) { + NewLine(); + Visit(alt.GetBlock3()); + } + + PopCurrentIndent(); + break; + } + case TRule_update_stmt_TBlock3::kAlt2: { + const auto& alt = msg.GetBlock3().GetAlt2(); + NewLine(); + Visit(alt.GetToken1()); + Visit(alt.GetRule_into_values_source2()); + break; + } + default: + ythrow yexception() << "Alt is not supported"; + } + } + + void VisitDelete(const TRule_delete_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + Visit(msg.GetToken1()); + Visit(msg.GetToken2()); + Visit(msg.GetRule_simple_table_ref3()); + if (msg.HasBlock4()) { + switch (msg.GetBlock4().Alt_case()) { + case TRule_delete_stmt_TBlock4::kAlt1: { + const auto& alt = msg.GetBlock4().GetAlt1(); + NewLine(); + Visit(alt); + break; + } + case TRule_delete_stmt_TBlock4::kAlt2: { + const auto& alt = msg.GetBlock4().GetAlt2(); + NewLine(); + Visit(alt); + break; + } + default: + ythrow yexception() << "Alt is not supported"; + } + } + } + + void VisitRollback(const TRule_rollback_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_rollback_stmt::GetDescriptor(), msg); + } + + void VisitDeclare(const TRule_declare_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_declare_stmt::GetDescriptor(), msg); + } + + void VisitImport(const TRule_import_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_import_stmt::GetDescriptor(), msg); + } + + void VisitExport(const TRule_export_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_export_stmt::GetDescriptor(), msg); + } + + void VisitAlterTable(const TRule_alter_table_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitKeyword(msg.GetToken1()); + VisitKeyword(msg.GetToken2()); + Visit(msg.GetRule_simple_table_ref3()); + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_alter_table_action4()); + for (auto& b : msg.GetBlock5()) { + Visit(b.GetToken1()); + NewLine(); + Visit(b.GetRule_alter_table_action2()); + } + + PopCurrentIndent(); + } + + void VisitAlterTableStore(const TRule_alter_table_store_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_alter_table_store_stmt::GetDescriptor(), msg); + } + + void VisitAlterExternalTable(const TRule_alter_external_table_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitKeyword(msg.GetToken1()); + VisitKeyword(msg.GetToken2()); + VisitKeyword(msg.GetToken3()); + Visit(msg.GetRule_simple_table_ref4()); + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_alter_external_table_action5()); + for (auto& b : msg.GetBlock6()) { + Visit(b.GetToken1()); + NewLine(); + Visit(b.GetRule_alter_external_table_action2()); + } + + PopCurrentIndent(); + } + + void VisitDo(const TRule_do_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitKeyword(msg.GetToken1()); + switch (msg.GetBlock2().Alt_case()) { + case TRule_do_stmt_TBlock2::kAlt1: { // CALL + const auto& alt = msg.GetBlock2().GetAlt1().GetRule_call_action1(); + Visit(alt.GetBlock1()); + AfterInvokeExpr = true; + Visit(alt.GetToken2()); + if (alt.HasBlock3()) { + Visit(alt.GetBlock3()); + } + + Visit(alt.GetToken4()); + break; + } + case TRule_do_stmt_TBlock2::kAlt2: { // INLINE + const auto& alt = msg.GetBlock2().GetAlt2().GetRule_inline_action1(); + VisitKeyword(alt.GetToken1()); + PushCurrentIndent(); + NewLine(); + Visit(alt.GetRule_define_action_or_subquery_body2()); + PopCurrentIndent(); + NewLine(); + VisitKeyword(alt.GetToken3()); + VisitKeyword(alt.GetToken4()); + break; + } + default: + ythrow yexception() << "Alt is not supported"; + } + } + + void VisitAction(const TRule_define_action_or_subquery_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitKeyword(msg.GetToken1()); + VisitKeyword(msg.GetToken2()); + Visit(msg.GetRule_bind_parameter3()); + AfterInvokeExpr = true; + Visit(msg.GetToken4()); + if (msg.HasBlock5()) { + Visit(msg.GetBlock5()); + } + + Visit(msg.GetToken6()); + VisitKeyword(msg.GetToken7()); // AS + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_define_action_or_subquery_body8()); + PopCurrentIndent(); + NewLine(); + VisitKeyword(msg.GetToken9()); + VisitKeyword(msg.GetToken10()); + } + + void VisitIf(const TRule_if_stmt& msg) { + if (msg.HasBlock1()) { + PosFromToken(msg.GetBlock1().GetToken1()); + } else { + PosFromToken(msg.GetToken2()); + } + + NewLine(); + if (msg.HasBlock1()) { + Visit(msg.GetBlock1()); + } + + Visit(msg.GetToken2()); + Visit(msg.GetRule_expr3()); + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_do_stmt4()); + PopCurrentIndent(); + if (msg.HasBlock5()) { + NewLine(); + Visit(msg.GetBlock5().GetToken1()); + NewLine(); + PushCurrentIndent(); + Visit(msg.GetBlock5().GetRule_do_stmt2()); + PopCurrentIndent(); + } + } + + void VisitFor(const TRule_for_stmt& msg) { + if (msg.HasBlock1()) { + PosFromToken(msg.GetBlock1().GetToken1()); + } else if (msg.HasBlock2()) { + PosFromToken(msg.GetBlock2().GetToken1()); + } else { + PosFromToken(msg.GetToken3()); + } + + NewLine(); + if (msg.HasBlock1()) { + Visit(msg.GetBlock1()); + } + + if (msg.HasBlock2()) { + Visit(msg.GetBlock2()); + } + + Visit(msg.GetToken3()); + Visit(msg.GetRule_bind_parameter4()); + Visit(msg.GetToken5()); + Visit(msg.GetRule_expr6()); + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_do_stmt7()); + PopCurrentIndent(); + if (msg.HasBlock8()) { + NewLine(); + Visit(msg.GetBlock8().GetToken1()); + NewLine(); + PushCurrentIndent(); + Visit(msg.GetBlock8().GetRule_do_stmt2()); + PopCurrentIndent(); + } + } + + void VisitValues(const TRule_values_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitKeyword(msg.GetToken1()); + const auto& rowList = msg.GetRule_values_source_row_list2(); + PushCurrentIndent(); + NewLine(); + Visit(rowList.GetRule_values_source_row1()); + for (const auto& b : rowList.GetBlock2()) { + Visit(b.GetToken1()); + NewLine(); + Visit(b.GetRule_values_source_row2()); + } + + PopCurrentIndent(); + } + + void VisitGrantPermissions(const TRule_grant_permissions_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_grant_permissions_stmt::GetDescriptor(), msg); + } + + void VisitRevokePermissions(const TRule_revoke_permissions_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_revoke_permissions_stmt::GetDescriptor(), msg); + } + + void VisitCreateUser(const TRule_create_user_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_create_user_stmt::GetDescriptor(), msg); + } + + void VisitAlterUser(const TRule_alter_user_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_alter_user_stmt::GetDescriptor(), msg); + } + + void VisitCreateGroup(const TRule_create_group_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_create_group_stmt::GetDescriptor(), msg); + } + + void VisitAlterGroup(const TRule_alter_group_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_alter_group_stmt::GetDescriptor(), msg); + } + + void VisitDropRole(const TRule_drop_role_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_drop_role_stmt::GetDescriptor(), msg); + } + + void VisitUpsertObject(const TRule_upsert_object_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_upsert_object_stmt::GetDescriptor(), msg); + } + + void VisitCreateObject(const TRule_create_object_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_create_object_stmt::GetDescriptor(), msg); + } + + void VisitAlterObject(const TRule_alter_object_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_alter_object_stmt::GetDescriptor(), msg); + } + + void VisitDropObject(const TRule_drop_object_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_drop_object_stmt::GetDescriptor(), msg); + } + + void VisitCreateTopic(const TRule_create_topic_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitKeyword(msg.GetToken1()); + VisitKeyword(msg.GetToken2()); + Visit(msg.GetBlock3()); + Visit(msg.GetRule_topic_ref4()); + if (msg.HasBlock5()) { + PushCurrentIndent(); + auto& b = msg.GetBlock5().GetRule_create_topic_entries1(); + Visit(b.GetToken1()); + NewLine(); + Visit(b.GetRule_create_topic_entry2()); + for (auto& subEntry : b.GetBlock3()) { + Visit(subEntry.GetToken1()); + NewLine(); + Visit(subEntry.GetRule_create_topic_entry2()); + } + NewLine(); + PopCurrentIndent(); + Visit(b.GetToken4()); + } + if (msg.HasBlock6()) { + auto& b = msg.GetBlock6().GetRule_with_topic_settings1(); + VisitKeyword(b.GetToken1()); + VisitKeyword(b.GetToken2()); + PushCurrentIndent(); + NewLine(); + Visit(b.GetRule_topic_settings3()); + PopCurrentIndent(); + NewLine(); + VisitKeyword(b.GetToken4()); + } + } + + void VisitAlterTopic(const TRule_alter_topic_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitKeyword(msg.GetToken1()); + VisitKeyword(msg.GetToken2()); + Visit(msg.GetBlock3()); + Visit(msg.GetRule_topic_ref4()); + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_alter_topic_action5()); + for (auto& b : msg.GetBlock6()) { + Visit(b.GetToken1()); + NewLine(); + Visit(b.GetRule_alter_topic_action2()); + } + + PopCurrentIndent(); + } + + void VisitDropTopic(const TRule_drop_topic_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_drop_topic_stmt::GetDescriptor(), msg); + } + + void VisitCreateExternalDataSource(const TRule_create_external_data_source_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_create_external_data_source_stmt::GetDescriptor(), msg); + } + + void VisitAlterExternalDataSource(const TRule_alter_external_data_source_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitToken(msg.GetToken1()); + VisitToken(msg.GetToken2()); + VisitToken(msg.GetToken3()); + VisitToken(msg.GetToken4()); + Visit(msg.GetRule_object_ref5()); + + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_alter_external_data_source_action6()); + for (const auto& action : msg.GetBlock7()) { + Visit(action.GetToken1()); // comma + NewLine(); + Visit(action.GetRule_alter_external_data_source_action2()); + } + + PopCurrentIndent(); + } + + void VisitDropExternalDataSource(const TRule_drop_external_data_source_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_drop_external_data_source_stmt::GetDescriptor(), msg); + } + + void VisitCreateView(const TRule_create_view_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_create_view_stmt::GetDescriptor(), msg); + } + + void VisitDropView(const TRule_drop_view_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_drop_view_stmt::GetDescriptor(), msg); + } + + void VisitCreateAsyncReplication(const TRule_create_replication_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_create_replication_stmt::GetDescriptor(), msg); + } + + void VisitAlterAsyncReplication(const TRule_alter_replication_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_alter_replication_stmt::GetDescriptor(), msg); + } + + void VisitDropAsyncReplication(const TRule_drop_replication_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_drop_replication_stmt::GetDescriptor(), msg); + } + + void VisitCreateResourcePool(const TRule_create_resource_pool_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_create_resource_pool_stmt::GetDescriptor(), msg); + } + + void VisitAlterResourcePool(const TRule_alter_resource_pool_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitToken(msg.GetToken1()); + VisitToken(msg.GetToken2()); + VisitToken(msg.GetToken3()); + Visit(msg.GetRule_object_ref4()); + + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_alter_resource_pool_action5()); + for (const auto& action : msg.GetBlock6()) { + Visit(action.GetToken1()); // comma + NewLine(); + Visit(action.GetRule_alter_resource_pool_action2()); + } + + PopCurrentIndent(); + } + + void VisitDropResourcePool(const TRule_drop_resource_pool_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_drop_resource_pool_stmt::GetDescriptor(), msg); + } + + void VisitCreateBackupCollection(const TRule_create_backup_collection_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_create_backup_collection_stmt::GetDescriptor(), msg); + } + + void VisitAlterBackupCollection(const TRule_alter_backup_collection_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitToken(msg.GetToken1()); + Visit(msg.GetRule_backup_collection2()); + + NewLine(); + PushCurrentIndent(); + switch (msg.GetBlock3().Alt_case()) { + case TRule_alter_backup_collection_stmt_TBlock3::kAlt1: { + Visit(msg.GetBlock3().GetAlt1().GetRule_alter_backup_collection_actions1().GetRule_alter_backup_collection_action1()); + for (const auto& action : msg.GetBlock3().GetAlt1().GetRule_alter_backup_collection_actions1().GetBlock2()) { + Visit(action.GetToken1()); // comma + NewLine(); + Visit(action.GetRule_alter_backup_collection_action2()); + } + break; + } + case TRule_alter_backup_collection_stmt_TBlock3::kAlt2: { + Visit(msg.GetBlock3().GetAlt2().GetRule_alter_backup_collection_entries1().GetRule_alter_backup_collection_entry1()); + for (const auto& entry : msg.GetBlock3().GetAlt2().GetRule_alter_backup_collection_entries1().GetBlock2()) { + Visit(entry.GetToken1()); // comma + NewLine(); + Visit(entry.GetRule_alter_backup_collection_entry2()); + } + break; + } + default: + ythrow yexception() << "Alt is not supported"; + } + + PopCurrentIndent(); + } + + void VisitDropBackupCollection(const TRule_drop_backup_collection_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_drop_backup_collection_stmt::GetDescriptor(), msg); + } + + void VisitCreateResourcePoolClassifier(const TRule_create_resource_pool_classifier_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_create_resource_pool_classifier_stmt::GetDescriptor(), msg); + } + + void VisitAlterResourcePoolClassifier(const TRule_alter_resource_pool_classifier_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitToken(msg.GetToken1()); + VisitToken(msg.GetToken2()); + VisitToken(msg.GetToken3()); + VisitToken(msg.GetToken4()); + Visit(msg.GetRule_object_ref5()); + + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_alter_resource_pool_classifier_action6()); + for (const auto& action : msg.GetBlock7()) { + Visit(action.GetToken1()); // comma + NewLine(); + Visit(action.GetRule_alter_resource_pool_classifier_action2()); + } + + PopCurrentIndent(); + } + + void VisitDropResourcePoolClassifier(const TRule_drop_resource_pool_classifier_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_drop_resource_pool_classifier_stmt::GetDescriptor(), msg); + } + + void VisitAllFields(const NProtoBuf::Descriptor* descr, const NProtoBuf::Message& msg) { + VisitAllFieldsImpl<TPrettyVisitor, &TPrettyVisitor::Visit>(this, descr, msg); + } + + void WriteComments() { + while (LastComment < Comments.size()) { + const auto& c = Comments[LastComment]; + if (c.Line > LastLine || c.Line == LastLine && c.LinePos > LastColumn) { + break; + } + + AddComment(c.Content); + ++LastComment; + } + } + + void PosFromToken(const TToken& token) { + LastLine = token.GetLine(); + LastColumn = token.GetColumn(); + WriteComments(); + } + + void PosFromParsedToken(const TParsedToken& token) { + LastLine = token.Line; + LastColumn = token.LinePos; + WriteComments(); + } + + void VisitToken(const TToken& token) { + VisitTokenImpl(token, false); + } + + void VisitKeyword(const TToken& token) { + VisitTokenImpl(token, true); + } + + void VisitTokenImpl(const TToken& token, bool forceKeyword) { + PosFromToken(token); + auto str = token.GetValue(); + + if (str == "<EOF>") { + return; + } + + //Cerr << str << "\n"; + auto currentScope = Scopes.back(); + if (!SkipSpaceAfterUnaryOp && !InMultiTokenOp) { + if (AfterLess && str == ">") { + Out(' '); + } else if (AfterDigits && str == ".") { + Out(' '); + } else if (OutColumn && (currentScope == EScope::DoubleQuestion || str != "?") + && str != ":" && str != "." && str != "," && str != ";" && str != ")" && str != "]" + && str != "}" && str != "|>" && str != "::" && !AfterNamespace && !AfterBracket + && !AfterInvokeExpr && !AfterDollarOrAt && !AfterDot && (!AfterQuestion || str != "?") + && (!InsideType || (str != "<" && str != ">" && str != "<>")) + && (!InsideType || !AfterLess) + && (!AfterKeyExpr || str != "[") + ) { + Out(' '); + } + } + + SkipSpaceAfterUnaryOp = false; + if (AfterUnaryOp) { + if (str == "+" || str == "-" || str == "~") { + SkipSpaceAfterUnaryOp = true; + } + + AfterUnaryOp = false; + } + + AfterInvokeExpr = false; + AfterNamespace = (str == "::"); + AfterBracket = (str == "(" || str == "[" || str == "{" || str == "<|"); + AfterDot = (str == "."); + AfterDigits = !str.empty() && AllOf(str, [](char c) { return c >= '0' && c <= '9'; }); + AfterQuestion = (str == "?"); + AfterLess = (str == "<"); + AfterKeyExpr = false; + + if (forceKeyword) { + str = to_upper(str); + } else if (currentScope == EScope::Default) { + if (auto p = StaticData.Keywords.find(to_upper(str)); p != StaticData.Keywords.end()) { + str = *p; + } + } + + AfterDollarOrAt = (str == "$" || str == "@"); + + const auto& markedInfo = MarkedTokens[TokenIndex]; + if (markedInfo.ClosingBracket) { + Y_ENSURE(!MarkTokenStack.empty()); + auto beginTokenIndex = MarkTokenStack.back(); + if (markedInfo.BracketForcedExpansion || ParsedTokens[beginTokenIndex].Line != ParsedTokens[TokenIndex].Line) { + // multiline + PopCurrentIndent(); + NewLine(); + } + + MarkTokenStack.pop_back(); + } + + Out(str); + if (str == ";") { + Out('\n'); + } + + if (markedInfo.OpeningBracket) { + MarkTokenStack.push_back(TokenIndex); + if (markedInfo.BracketForcedExpansion || ParsedTokens[TokenIndex].Line != ParsedTokens[markedInfo.ClosingBracketIndex].Line) { + // multiline + PushCurrentIndent(); + NewLine(); + } + } + + if (str == "," && !MarkTokenStack.empty()) { + const bool addNewline = + (TokenIndex + 1 < ParsedTokens.size() && ParsedTokens[TokenIndex].Line != ParsedTokens[TokenIndex + 1].Line) + || (TokenIndex > 0 && ParsedTokens[TokenIndex - 1].Line != ParsedTokens[TokenIndex].Line); + // add line for trailing comma + if (addNewline) { + NewLine(); + } + } + + TokenIndex++; + } + + void VisitIntoValuesSource(const TRule_into_values_source& msg) { + switch (msg.Alt_case()) { + case TRule_into_values_source::kAltIntoValuesSource1: { + const auto& alt = msg.GetAlt_into_values_source1(); + if (alt.HasBlock1()) { + const auto& columns = alt.GetBlock1().GetRule_pure_column_list1(); + Visit(columns.GetToken1()); + NewLine(); + PushCurrentIndent(); + Visit(columns.GetRule_an_id2()); + for (const auto& block : columns.GetBlock3()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_an_id2()); + } + + PopCurrentIndent(); + NewLine(); + Visit(columns.GetToken4()); + NewLine(); + } + + Visit(alt.GetRule_values_source2()); + break; + } + case TRule_into_values_source::kAltIntoValuesSource2: { + VisitAllFields(TRule_into_values_source::GetDescriptor(), msg); + break; + } + default: + ythrow yexception() << "Alt is not supported"; + } + } + + void VisitSelectKind(const TRule_select_kind& msg) { + if (msg.HasBlock1()) { + Visit(msg.GetBlock1()); + } + + Visit(msg.GetBlock2()); + if (msg.HasBlock3()) { + NewLine(); + Visit(msg.GetBlock3()); + } + } + + void VisitProcessCore(const TRule_process_core& msg) { + Visit(msg.GetToken1()); + if (msg.HasBlock2()) { + Visit(msg.GetBlock2()); + } + + Visit(msg.GetRule_named_single_source3()); + VisitRepeated(msg.GetBlock4()); + if (msg.HasBlock5()) { + NewLine(); + const auto& block5 = msg.GetBlock5(); + Visit(block5.GetToken1()); + Visit(block5.GetRule_using_call_expr2()); + if (block5.HasBlock3()) { + Visit(block5.GetBlock3()); + } + + if (block5.HasBlock4()) { + NewLine(); + Visit(block5.GetBlock4()); + } + + if (block5.HasBlock5()) { + NewLine(); + Visit(block5.GetBlock5()); + } + + if (block5.HasBlock6()) { + NewLine(); + Visit(block5.GetBlock6()); + } + + if (block5.HasBlock7()) { + NewLine(); + Visit(block5.GetBlock7()); + } + } + } + + void VisitReduceCore(const TRule_reduce_core& msg) { + Visit(msg.GetToken1()); + Visit(msg.GetRule_named_single_source2()); + VisitRepeated(msg.GetBlock3()); + + if (msg.HasBlock4()) { + NewLine(); + Visit(msg.GetBlock4()); + } + + NewLine(); + Visit(msg.GetToken5()); + const auto& columns = msg.GetRule_column_list6(); + NewLine(); + PushCurrentIndent(); + Visit(columns.GetRule_column_name1()); + for (const auto& block : columns.GetBlock2()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_column_name2()); + } + + if (columns.HasBlock3()) { + Visit(columns.GetBlock3()); + } + + PopCurrentIndent(); + NewLine(); + Visit(msg.GetToken7()); + if (msg.HasBlock8()) { + Visit(msg.GetBlock8()); + } + + Visit(msg.GetRule_using_call_expr9()); + if (msg.HasBlock10()) { + Visit(msg.GetBlock10()); + } + + if (msg.HasBlock11()) { + NewLine(); + Visit(msg.GetBlock11()); + } + + if (msg.HasBlock12()) { + NewLine(); + Visit(msg.GetBlock12()); + } + + if (msg.HasBlock13()) { + NewLine(); + Visit(msg.GetBlock13()); + } + } + + void VisitSortSpecificationList(const TRule_sort_specification_list& msg) { + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_sort_specification1()); + for (const auto& block : msg.GetBlock2()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_sort_specification2()); + } + + PopCurrentIndent(); + } + + void VisitSelectCore(const TRule_select_core& msg) { + if (msg.HasBlock1()) { + Visit(msg.GetBlock1()); + NewLine(); + } + + Visit(msg.GetToken2()); + if (msg.HasBlock3()) { + Visit(msg.GetBlock3()); + } + + Visit(msg.GetRule_opt_set_quantifier4()); + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_result_column5()); + for (const auto& block : msg.GetBlock6()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_result_column2()); + } + + if (msg.HasBlock7()) { + Visit(msg.GetBlock7()); + } + + if (msg.HasBlock8()) { + NewLine(); + Visit(msg.GetBlock8()); + } + + PopCurrentIndent(); + if (msg.HasBlock9()) { + NewLine(); + Visit(msg.GetBlock9()); + } + + if (msg.HasBlock10()) { + NewLine(); + Visit(msg.GetBlock10()); + } + + if (msg.HasBlock11()) { + NewLine(); + Visit(msg.GetBlock11()); + } + + if (msg.HasBlock12()) { + NewLine(); + Visit(msg.GetBlock12()); + } + + if (msg.HasBlock13()) { + NewLine(); + Visit(msg.GetBlock13()); + } + + if (msg.HasBlock14()) { + NewLine(); + Visit(msg.GetBlock14()); + } + } + + void VisitJoinSource(const TRule_join_source& msg) { + if (msg.HasBlock1()) { + Visit(msg.GetBlock1()); + } + + Visit(msg.GetRule_flatten_source2()); + for (const auto& block : msg.GetBlock3()) { + NewLine(); + Visit(block.GetRule_join_op1()); + if (block.HasBlock2()) { + Visit(block.GetBlock2()); + } + + Visit(block.GetRule_flatten_source3()); + if (block.HasBlock4()) { + NewLine(); + Visit(block.GetBlock4()); + } + } + } + + void VisitSingleSource(const TRule_single_source& msg) { + switch (msg.Alt_case()) { + case TRule_single_source::kAltSingleSource1: { + const auto& alt = msg.GetAlt_single_source1(); + Visit(alt); + break; + } + case TRule_single_source::kAltSingleSource2: { + const auto& alt = msg.GetAlt_single_source2(); + Visit(alt.GetToken1()); + PushCurrentIndent(); + Visit(alt.GetRule_select_stmt2()); + PopCurrentIndent(); + NewLine(); + Visit(alt.GetToken3()); + break; + } + case TRule_single_source::kAltSingleSource3: { + const auto& alt = msg.GetAlt_single_source3(); + Visit(alt.GetToken1()); + PushCurrentIndent(); + Visit(alt.GetRule_values_stmt2()); + PopCurrentIndent(); + NewLine(); + Visit(alt.GetToken3()); + break; + } + default: + ythrow yexception() << "Alt is not supported"; + } + } + + void VisitFlattenSource(const TRule_flatten_source& msg) { + Visit(msg.GetRule_named_single_source1()); + if (msg.HasBlock2()) { + PushCurrentIndent(); + NewLine(); + Visit(msg.GetBlock2()); + PopCurrentIndent(); + } + } + + void VisitNamedSingleSource(const TRule_named_single_source& msg) { + Visit(msg.GetRule_single_source1()); + if (msg.HasBlock2()) { + const auto& matchRecognize = msg.GetBlock2(); + //TODO handle MATCH_RECOGNIZE block + //https://st.yandex-team.ru/YQL-16186 + Visit(matchRecognize); + } + if (msg.HasBlock3()) { + NewLine(); + PushCurrentIndent(); + const auto& block3 = msg.GetBlock3(); + Visit(block3.GetBlock1()); + if (block3.HasBlock2()) { + const auto& columns = block3.GetBlock2().GetRule_pure_column_list1(); + Visit(columns.GetToken1()); + NewLine(); + PushCurrentIndent(); + Visit(columns.GetRule_an_id2()); + for (const auto& block : columns.GetBlock3()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_an_id2()); + } + + NewLine(); + PopCurrentIndent(); + Visit(columns.GetToken4()); + } + + PopCurrentIndent(); + } + + if (msg.HasBlock4()) { + NewLine(); + PushCurrentIndent(); + Visit(msg.GetBlock4()); + PopCurrentIndent(); + } + } + + void VisitSimpleTableRef(const TRule_simple_table_ref& msg) { + Visit(msg.GetRule_simple_table_ref_core1()); + if (msg.HasBlock2()) { + NewLine(); + PushCurrentIndent(); + Visit(msg.GetBlock2()); + PopCurrentIndent(); + } + } + + void VisitIntoSimpleTableRef(const TRule_into_simple_table_ref& msg) { + Visit(msg.GetRule_simple_table_ref1()); + if (msg.HasBlock2()) { + const auto& block2 = msg.GetBlock2(); + NewLine(); + PushCurrentIndent(); + Visit(block2.GetToken1()); + Visit(block2.GetToken2()); + const auto& columns = block2.GetRule_pure_column_list3(); + Visit(columns.GetToken1()); + NewLine(); + PushCurrentIndent(); + Visit(columns.GetRule_an_id2()); + for (const auto& block : columns.GetBlock3()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_an_id2()); + } + + PopCurrentIndent(); + NewLine(); + Visit(columns.GetToken4()); + PopCurrentIndent(); + } + } + + void VisitSelectKindPartial(const TRule_select_kind_partial& msg) { + Visit(msg.GetRule_select_kind1()); + if (msg.HasBlock2()) { + NewLine(); + Visit(msg.GetBlock2()); + } + } + + void VisitFlattenByArg(const TRule_flatten_by_arg& msg) { + switch (msg.Alt_case()) { + case TRule_flatten_by_arg::kAltFlattenByArg1: { + const auto& alt = msg.GetAlt_flatten_by_arg1(); + NewLine(); + PushCurrentIndent(); + Visit(alt); + PopCurrentIndent(); + break; + } + case TRule_flatten_by_arg::kAltFlattenByArg2: { + const auto& alt = msg.GetAlt_flatten_by_arg2(); + Visit(alt.GetToken1()); + NewLine(); + PushCurrentIndent(); + const auto& exprs = alt.GetRule_named_expr_list2(); + Visit(exprs.GetRule_named_expr1()); + for (const auto& block : exprs.GetBlock2()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_named_expr2()); + } + + if (alt.HasBlock3()) { + Visit(alt.GetBlock3()); + } + + NewLine(); + PopCurrentIndent(); + Visit(alt.GetToken4()); + break; + } + default: + ythrow yexception() << "Alt is not supported"; + } + } + + void VisitWithoutColumnList(const TRule_without_column_list& msg) { + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_without_column_name1()); + for (const auto& block : msg.GetBlock2()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_without_column_name2()); + } + + if (msg.HasBlock3()) { + Visit(msg.GetBlock3()); + } + + PopCurrentIndent(); + } + + void VisitTableRef(const TRule_table_ref& msg) { + if (msg.HasBlock1()) { + Visit(msg.GetBlock1()); + } + + if (msg.HasBlock2()) { + Visit(msg.GetBlock2()); + } + + const auto& block3 = msg.GetBlock3(); + switch (block3.Alt_case()) { + case TRule_table_ref::TBlock3::kAlt1: { + const auto& alt = block3.GetAlt1(); + const auto& key = alt.GetRule_table_key1(); + Visit(key.GetRule_id_table_or_type1()); + if (key.HasBlock2()) { + NewLine(); + PushCurrentIndent(); + Visit(key.GetBlock2()); + PopCurrentIndent(); + } + + break; + } + case TRule_table_ref::TBlock3::kAlt2: { + const auto& alt = block3.GetAlt2(); + Visit(alt.GetRule_an_id_expr1()); + AfterInvokeExpr = true; + Visit(alt.GetToken2()); + if (alt.HasBlock3()) { + Visit(alt.GetBlock3()); + } + + Visit(alt.GetToken4()); + break; + } + case TRule_table_ref::TBlock3::kAlt3: { + const auto& alt = block3.GetAlt3(); + Visit(alt.GetRule_bind_parameter1()); + if (alt.HasBlock2()) { + AfterInvokeExpr = true; + Visit(alt.GetBlock2()); + } + + if (alt.HasBlock3()) { + NewLine(); + PushCurrentIndent(); + Visit(alt.GetBlock3()); + PopCurrentIndent(); + } + + break; + } + default: + ythrow yexception() << "Alt is not supported"; + } + + if (msg.HasBlock4()) { + NewLine(); + PushCurrentIndent(); + Visit(msg.GetBlock4()); + PopCurrentIndent(); + } + } + + void VisitGroupingElementList(const TRule_grouping_element_list& msg) { + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_grouping_element1()); + for (const auto& block : msg.GetBlock2()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_grouping_element2()); + } + + PopCurrentIndent(); + } + + void VisitGroupByClause(const TRule_group_by_clause& msg) { + Visit(msg.GetToken1()); + if (msg.HasBlock2()) { + Visit(msg.GetBlock2()); + } + + Visit(msg.GetToken3()); + Visit(msg.GetRule_opt_set_quantifier4()); + Visit(msg.GetRule_grouping_element_list5()); + if (msg.HasBlock6()) { + NewLine(); + PushCurrentIndent(); + Visit(msg.GetBlock6()); + PopCurrentIndent(); + } + } + + void VisitWindowDefinitionList(const TRule_window_definition_list& msg) { + NewLine(); + PushCurrentIndent(); + + Visit(msg.GetRule_window_definition1()); + for (const auto& block : msg.GetBlock2()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_window_definition2()); + } + + PopCurrentIndent(); + } + + void VisitWindowSpecification(const TRule_window_specification& msg) { + Visit(msg.GetToken1()); + const auto& details = msg.GetRule_window_specification_details2(); + const bool needsNewline = details.HasBlock1() || details.HasBlock2() || + details.HasBlock3() || details.HasBlock4(); + if (needsNewline) { + NewLine(); + PushCurrentIndent(); + } + + if (details.HasBlock1()) { + NewLine(); + Visit(details.GetBlock1()); + } + + if (details.HasBlock2()) { + NewLine(); + Visit(details.GetBlock2()); + } + + if (details.HasBlock3()) { + NewLine(); + Visit(details.GetBlock3()); + } + + if (details.HasBlock4()) { + NewLine(); + Visit(details.GetBlock4()); + } + + if (needsNewline) { + NewLine(); + PopCurrentIndent(); + } + + Visit(msg.GetToken3()); + } + + void VisitWindowParitionClause(const TRule_window_partition_clause& msg) { + Visit(msg.GetToken1()); + if (msg.HasBlock2()) { + Visit(msg.GetBlock2()); + } + + Visit(msg.GetToken3()); + const auto& exprs = msg.GetRule_named_expr_list4(); + PushCurrentIndent(); + NewLine(); + Visit(exprs.GetRule_named_expr1()); + for (const auto& block : exprs.GetBlock2()) { + Visit(block.GetToken1()); + NewLine(); + Visit(block.GetRule_named_expr2()); + } + + PopCurrentIndent(); + } + + void VisitLambdaBody(const TRule_lambda_body& msg) { + PushCurrentIndent(); + NewLine(); + VisitRepeated(msg.GetBlock1()); + for (const auto& block : msg.GetBlock2()) { + Visit(block); + NewLine(); + } + + Visit(msg.GetToken3()); + Visit(msg.GetRule_expr4()); + VisitRepeated(msg.GetBlock5()); + + PopCurrentIndent(); + NewLine(); + } + + void VisitInAtomExpr(const TRule_in_atom_expr& msg) { + if (msg.Alt_case() == TRule_in_atom_expr::kAltInAtomExpr7) { + const auto& alt = msg.GetAlt_in_atom_expr7(); + Visit(alt.GetToken1()); + NewLine(); + PushCurrentIndent(); + Visit(alt.GetRule_select_stmt2()); + NewLine(); + PopCurrentIndent(); + Visit(alt.GetToken3()); + } else { + VisitAllFields(TRule_in_atom_expr::GetDescriptor(), msg); + } + } + + void VisitSelectKindParenthesis(const TRule_select_kind_parenthesis& msg) { + if (msg.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis2) { + const auto& alt = msg.GetAlt_select_kind_parenthesis2(); + Visit(alt.GetToken1()); + NewLine(); + PushCurrentIndent(); + Visit(alt.GetRule_select_kind_partial2()); + PopCurrentIndent(); + NewLine(); + Visit(alt.GetToken3()); + } else { + VisitAllFields(TRule_select_kind_parenthesis::GetDescriptor(), msg); + } + } + + void VisitCastExpr(const TRule_cast_expr& msg) { + Visit(msg.GetToken1()); + AfterInvokeExpr = true; + Visit(msg.GetToken2()); + Visit(msg.GetRule_expr3()); + Visit(msg.GetToken4()); + Visit(msg.GetRule_type_name_or_bind5()); + Visit(msg.GetToken6()); + } + + void VisitBitCastExpr(const TRule_bitcast_expr& msg) { + Visit(msg.GetToken1()); + AfterInvokeExpr = true; + Visit(msg.GetToken2()); + Visit(msg.GetRule_expr3()); + Visit(msg.GetToken4()); + Visit(msg.GetRule_type_name_simple5()); + Visit(msg.GetToken6()); + } + + void VisitExtOrderByClause(const TRule_ext_order_by_clause& msg) { + if (msg.HasBlock1()) { + Visit(msg.GetBlock1()); + } + + Visit(msg.GetRule_order_by_clause2()); + } + + void VisitKeyExpr(const TRule_key_expr& msg) { + AfterKeyExpr = true; + VisitAllFields(TRule_key_expr::GetDescriptor(), msg); + } + + void VisitExistsExpr(const TRule_exists_expr& msg) { + VisitKeyword(msg.GetToken1()); + VisitToken(msg.GetToken2()); + + NewLine(); + PushCurrentIndent(); + + Visit(msg.GetBlock3()); + + PopCurrentIndent(); + NewLine(); + + VisitToken(msg.GetToken4()); + } + + void VisitCaseExpr(const TRule_case_expr& msg) { + VisitKeyword(msg.GetToken1()); + if (msg.HasBlock2()) { + Visit(msg.GetBlock2()); + } + NewLine(); + PushCurrentIndent(); + + for (const auto& block : msg.GetBlock3()) { + Visit(block); + NewLine(); + } + + if (msg.HasBlock4()) { + const auto& block = msg.GetBlock4(); + VisitKeyword(block.GetToken1()); + Visit(block.GetRule_expr2()); + } + + PopCurrentIndent(); + NewLine(); + Visit(msg.GetToken5()); + } + + void VisitWhenExpr(const TRule_when_expr& msg) { + VisitKeyword(msg.GetToken1()); + Visit(msg.GetRule_expr2()); + + NewLine(); + PushCurrentIndent(); + VisitKeyword(msg.GetToken3()); + Visit(msg.GetRule_expr4()); + PopCurrentIndent(); + } + + void VisitWithTableSettingsExpr(const TRule_with_table_settings& msg) { + VisitKeyword(msg.GetToken1()); + Visit(msg.GetToken2()); + + const bool needIndent = msg.Block4Size() > 0; // more then one setting + if (needIndent) { + NewLine(); + PushCurrentIndent(); + Visit(msg.GetRule_table_settings_entry3()); // first setting + + for (const auto& entry : msg.GetBlock4()) { + Visit(entry.GetToken1()); // comma + NewLine(); + Visit(entry.GetRule_table_settings_entry2()); // other settings + } + PopCurrentIndent(); + NewLine(); + } else { + Visit(msg.GetRule_table_settings_entry3()); + } + + Visit(msg.GetToken5()); + } + + void VisitExpr(const TRule_expr& msg) { + if (msg.HasAlt_expr2()) { + Visit(msg.GetAlt_expr2()); + return; + } + const auto& orExpr = msg.GetAlt_expr1(); + auto getExpr = [](const TRule_expr::TAlt1::TBlock2& b) -> const TRule_or_subexpr& { return b.GetRule_or_subexpr2(); }; + auto getOp = [](const TRule_expr::TAlt1::TBlock2& b) -> const TToken& { return b.GetToken1(); }; + VisitBinaryOp(orExpr.GetRule_or_subexpr1(), getOp, getExpr, orExpr.GetBlock2().begin(), orExpr.GetBlock2().end()); + } + + void VisitOrSubexpr(const TRule_or_subexpr& msg) { + auto getExpr = [](const TRule_or_subexpr::TBlock2& b) -> const TRule_and_subexpr& { return b.GetRule_and_subexpr2(); }; + auto getOp = [](const TRule_or_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); }; + VisitBinaryOp(msg.GetRule_and_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end()); + } + + void VisitAndSubexpr(const TRule_and_subexpr& msg) { + auto getExpr = [](const TRule_and_subexpr::TBlock2& b) -> const TRule_xor_subexpr& { return b.GetRule_xor_subexpr2(); }; + auto getOp = [](const TRule_and_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); }; + VisitBinaryOp(msg.GetRule_xor_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end()); + } + + void VisitEqSubexpr(const TRule_eq_subexpr& msg) { + auto getExpr = [](const TRule_eq_subexpr::TBlock2& b) -> const TRule_neq_subexpr& { return b.GetRule_neq_subexpr2(); }; + auto getOp = [](const TRule_eq_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); }; + VisitBinaryOp(msg.GetRule_neq_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end()); + } + + void VisitNeqSubexpr(const TRule_neq_subexpr& msg) { + VisitNeqSubexprImpl(msg, false); + } + + void VisitNeqSubexprImpl(const TRule_neq_subexpr& msg, bool pushedIndent) { + auto getExpr = [](const TRule_neq_subexpr::TBlock2& b) -> const TRule_bit_subexpr& { return b.GetRule_bit_subexpr2(); }; + auto getOp = [](const TRule_neq_subexpr::TBlock2& b) -> const TRule_neq_subexpr::TBlock2::TBlock1& { return b.GetBlock1(); }; + VisitBinaryOp(msg.GetRule_bit_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end()); + + if (msg.HasBlock3()) { + const auto& b = msg.GetBlock3(); + switch (b.Alt_case()) { + case TRule_neq_subexpr_TBlock3::kAlt1: { + const auto& alt = b.GetAlt1(); + const bool hasFirstNewline = LastLine != ParsedTokens[TokenIndex].Line; + // 2 is `??` size in tokens + const bool hasSecondNewline = ParsedTokens[TokenIndex].Line != ParsedTokens[TokenIndex + 2].Line; + const ui32 currentOutLine = OutLine; + + PosFromParsedToken(ParsedTokens[TokenIndex]); + if (currentOutLine != OutLine || (hasFirstNewline && hasSecondNewline)) { + NewLine(); + if (!pushedIndent) { + PushCurrentIndent(); + pushedIndent = true; + } + } + + Visit(alt.GetRule_double_question1()); + PosFromParsedToken(ParsedTokens[TokenIndex]); + if (hasFirstNewline || hasSecondNewline) { + NewLine(); + if (!pushedIndent) { + PushCurrentIndent(); + pushedIndent = true; + } + } + + VisitNeqSubexprImpl(alt.GetRule_neq_subexpr2(), pushedIndent); + if (pushedIndent) { + PopCurrentIndent(); + } + + break; + } + case TRule_neq_subexpr_TBlock3::kAlt2: + Visit(b.GetAlt2()); + break; + default: + ythrow yexception() << "Alt is not supported"; + } + } + } + + void VisitBitSubexpr(const TRule_bit_subexpr& msg) { + auto getExpr = [](const TRule_bit_subexpr::TBlock2& b) -> const TRule_add_subexpr& { return b.GetRule_add_subexpr2(); }; + auto getOp = [](const TRule_bit_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); }; + VisitBinaryOp(msg.GetRule_add_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end()); + } + + void VisitAddSubexpr(const TRule_add_subexpr& msg) { + auto getExpr = [](const TRule_add_subexpr::TBlock2& b) -> const TRule_mul_subexpr& { return b.GetRule_mul_subexpr2(); }; + auto getOp = [](const TRule_add_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); }; + VisitBinaryOp(msg.GetRule_mul_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end()); + } + + void VisitMulSubexpr(const TRule_mul_subexpr& msg) { + auto getExpr = [](const TRule_mul_subexpr::TBlock2& b) -> const TRule_con_subexpr& { return b.GetRule_con_subexpr2(); }; + auto getOp = [](const TRule_mul_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); }; + VisitBinaryOp(msg.GetRule_con_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end()); + } + + ui32 BinaryOpTokenSize(const TToken&) { + return 1; + } + + ui32 BinaryOpTokenSize(const TRule_neq_subexpr::TBlock2::TBlock1& block) { + switch (block.Alt_case()) { + case TRule_neq_subexpr::TBlock2::TBlock1::kAlt1: + case TRule_neq_subexpr::TBlock2::TBlock1::kAlt3: + case TRule_neq_subexpr::TBlock2::TBlock1::kAlt5: + case TRule_neq_subexpr::TBlock2::TBlock1::kAlt6: + case TRule_neq_subexpr::TBlock2::TBlock1::kAlt7: + return 1; + case TRule_neq_subexpr::TBlock2::TBlock1::kAlt2: + return 2; + case TRule_neq_subexpr::TBlock2::TBlock1::kAlt4: + return 3; + default: + ythrow yexception() << "Alt is not supported"; + } + } + + void VisitShiftRight(const TRule_shift_right& msg) { + VisitToken(msg.GetToken1()); + InMultiTokenOp = true; + VisitToken(msg.GetToken2()); + InMultiTokenOp = false; + } + + void VisitRotRight(const TRule_rot_right& msg) { + VisitToken(msg.GetToken1()); + InMultiTokenOp = true; + VisitToken(msg.GetToken2()); + VisitToken(msg.GetToken3()); + InMultiTokenOp = false; + } + + template <typename TExpr, typename TGetOp, typename TGetExpr, typename TIter> + void VisitBinaryOp(const TExpr& expr, TGetOp getOp, TGetExpr getExpr, TIter begin, TIter end) { + Visit(expr); + bool pushedIndent = false; + + for (; begin != end; ++begin) { + const auto op = getOp(*begin); + const auto opSize = BinaryOpTokenSize(op); + const bool hasFirstNewline = LastLine != ParsedTokens[TokenIndex].Line; + const bool hasSecondNewline = ParsedTokens[TokenIndex].Line != ParsedTokens[TokenIndex + opSize].Line; + const ui32 currentOutLine = OutLine; + + PosFromParsedToken(ParsedTokens[TokenIndex]); + if (currentOutLine != OutLine || (hasFirstNewline && hasSecondNewline)) { + NewLine(); + if (!pushedIndent) { + PushCurrentIndent(); + pushedIndent = true; + } + } + Visit(op); + + PosFromParsedToken(ParsedTokens[TokenIndex]); + if (hasFirstNewline || hasSecondNewline) { + NewLine(); + if (!pushedIndent) { + PushCurrentIndent(); + pushedIndent = true; + } + } + + Visit(getExpr(*begin)); + } + + if (pushedIndent) { + PopCurrentIndent(); + } + } + + void PushCurrentIndent() { + CurrentIndent += OneIndent; + } + + void PopCurrentIndent() { + CurrentIndent -= OneIndent; + } + +private: + const TStaticData& StaticData; + const TParsedTokenList& ParsedTokens; + const TParsedTokenList& Comments; + TStringBuilder SB; + ui32 OutColumn = 0; + ui32 OutLine = 1; + ui32 LastLine = 0; + ui32 LastColumn = 0; + ui32 LastComment = 0; + i32 CurrentIndent = 0; + TVector<EScope> Scopes; + TMaybe<bool> AddLine; + ui64 InsideType = 0; + bool AfterNamespace = false; + bool AfterBracket = false; + bool AfterInvokeExpr = false; + bool AfterUnaryOp = false; + bool SkipSpaceAfterUnaryOp = false; + bool AfterDollarOrAt = false; + bool AfterDot = false; + bool AfterDigits = false; + bool AfterQuestion = false; + bool AfterLess = false; + bool AfterKeyExpr = false; + bool InMultiTokenOp = false; + ui32 ForceExpandedLine = 0; + ui32 ForceExpandedColumn = 0; + + ui32 TokenIndex = 0; + TMarkTokenStack MarkTokenStack; + TVector<TTokenInfo> MarkedTokens; + ui64 InsideExpr = 0; +}; + +template <typename T> +TPrettyFunctor MakePrettyFunctor(void (TPrettyVisitor::*memberPtr)(const T& msg)) { + return [memberPtr](TPrettyVisitor& visitor, const NProtoBuf::Message& rawMsg) { + (visitor.*memberPtr)(dynamic_cast<const T&>(rawMsg)); + }; +} + +template <typename T> +TObfuscatingFunctor MakeObfuscatingFunctor(void (TObfuscatingVisitor::*memberPtr)(const T& msg)) { + return [memberPtr](TObfuscatingVisitor& visitor, const NProtoBuf::Message& rawMsg) { + (visitor.*memberPtr)(dynamic_cast<const T&>(rawMsg)); + }; +} + +TStaticData::TStaticData() + : Keywords(GetKeywords()) + , ScopeDispatch({ + {TRule_type_name::GetDescriptor(), EScope::TypeName}, + {TRule_type_name_composite::GetDescriptor(), EScope::TypeName}, + {TRule_double_question::GetDescriptor(), EScope::DoubleQuestion}, + {TRule_id::GetDescriptor(), EScope::Identifier}, + {TRule_id_or_type::GetDescriptor(), EScope::Identifier}, + {TRule_id_schema::GetDescriptor(), EScope::Identifier}, + {TRule_id_expr::GetDescriptor(), EScope::Identifier}, + {TRule_id_expr_in::GetDescriptor(), EScope::Identifier}, + {TRule_id_window::GetDescriptor(), EScope::Identifier}, + {TRule_id_table::GetDescriptor(), EScope::Identifier}, + {TRule_id_without::GetDescriptor(), EScope::Identifier}, + {TRule_id_hint::GetDescriptor(), EScope::Identifier}, + {TRule_identifier::GetDescriptor(), EScope::Identifier}, + {TRule_id_table_or_type::GetDescriptor(), EScope::Identifier}, + {TRule_bind_parameter::GetDescriptor(), EScope::Identifier}, + {TRule_an_id_as_compat::GetDescriptor(), EScope::Identifier}, + }) + , PrettyVisitDispatch({ + {TToken::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitToken)}, + {TRule_into_values_source::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitIntoValuesSource)}, + {TRule_select_kind::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelectKind)}, + {TRule_process_core::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitProcessCore)}, + {TRule_reduce_core::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitReduceCore)}, + {TRule_sort_specification_list::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSortSpecificationList)}, + {TRule_select_core::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelectCore)}, + {TRule_join_source::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitJoinSource)}, + {TRule_single_source::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSingleSource)}, + {TRule_flatten_source::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitFlattenSource)}, + {TRule_named_single_source::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitNamedSingleSource)}, + {TRule_simple_table_ref::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSimpleTableRef)}, + {TRule_into_simple_table_ref::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitIntoSimpleTableRef)}, + {TRule_select_kind_partial::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelectKindPartial)}, + {TRule_flatten_by_arg::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitFlattenByArg)}, + {TRule_without_column_list::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWithoutColumnList)}, + {TRule_table_ref::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitTableRef)}, + {TRule_grouping_element_list::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitGroupingElementList)}, + {TRule_group_by_clause::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitGroupByClause)}, + {TRule_window_definition_list::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWindowDefinitionList)}, + {TRule_window_specification::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWindowSpecification)}, + {TRule_window_partition_clause::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWindowParitionClause)}, + {TRule_lambda_body::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitLambdaBody)}, + {TRule_in_atom_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitInAtomExpr)}, + {TRule_select_kind_parenthesis::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelectKindParenthesis)}, + {TRule_cast_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCastExpr)}, + {TRule_bitcast_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitBitCastExpr)}, + {TRule_ext_order_by_clause::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitExtOrderByClause)}, + {TRule_key_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitKeyExpr)}, + {TRule_define_action_or_subquery_body::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDefineActionOrSubqueryBody)}, + {TRule_exists_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitExistsExpr)}, + {TRule_case_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCaseExpr)}, + {TRule_when_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWhenExpr)}, + {TRule_with_table_settings::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWithTableSettingsExpr)}, + + {TRule_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitExpr)}, + {TRule_or_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitOrSubexpr)}, + {TRule_and_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAndSubexpr)}, + {TRule_eq_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitEqSubexpr)}, + {TRule_neq_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitNeqSubexpr)}, + {TRule_bit_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitBitSubexpr)}, + {TRule_add_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAddSubexpr)}, + {TRule_mul_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitMulSubexpr)}, + + {TRule_rot_right::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitRotRight)}, + {TRule_shift_right::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitShiftRight)}, + + {TRule_pragma_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitPragma)}, + {TRule_select_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelect)}, + {TRule_select_unparenthesized_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelectUnparenthesized)}, + {TRule_named_nodes_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitNamedNodes)}, + {TRule_create_table_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateTable)}, + {TRule_drop_table_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropTable)}, + {TRule_use_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitUse)}, + {TRule_into_table_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitIntoTable)}, + {TRule_commit_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCommit)}, + {TRule_update_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitUpdate)}, + {TRule_delete_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDelete)}, + {TRule_rollback_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitRollback)}, + {TRule_declare_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDeclare)}, + {TRule_import_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitImport)}, + {TRule_export_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitExport)}, + {TRule_alter_table_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterTable)}, + {TRule_alter_external_table_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterExternalTable)}, + {TRule_do_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDo)}, + {TRule_define_action_or_subquery_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAction)}, + {TRule_if_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitIf)}, + {TRule_for_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitFor)}, + {TRule_values_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitValues)}, + {TRule_create_user_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateUser)}, + {TRule_alter_user_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterUser)}, + {TRule_create_group_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateGroup)}, + {TRule_alter_group_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterGroup)}, + {TRule_drop_role_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropRole)}, + {TRule_upsert_object_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitUpsertObject)}, + {TRule_create_object_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateObject)}, + {TRule_alter_object_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterObject)}, + {TRule_drop_object_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropObject)}, + {TRule_create_external_data_source_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateExternalDataSource)}, + {TRule_alter_external_data_source_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterExternalDataSource)}, + {TRule_drop_external_data_source_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropExternalDataSource)}, + {TRule_create_replication_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateAsyncReplication)}, + {TRule_alter_replication_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterAsyncReplication)}, + {TRule_drop_replication_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropAsyncReplication)}, + {TRule_create_topic_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateTopic)}, + {TRule_alter_topic_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterTopic)}, + {TRule_drop_topic_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropTopic)}, + {TRule_grant_permissions_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitGrantPermissions)}, + {TRule_revoke_permissions_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitRevokePermissions)}, + {TRule_alter_table_store_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterTableStore)}, + {TRule_create_view_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateView)}, + {TRule_drop_view_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropView)}, + {TRule_create_resource_pool_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateResourcePool)}, + {TRule_alter_resource_pool_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterResourcePool)}, + {TRule_drop_resource_pool_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropResourcePool)}, + {TRule_create_backup_collection_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateBackupCollection)}, + {TRule_alter_backup_collection_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterBackupCollection)}, + {TRule_drop_backup_collection_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropBackupCollection)}, + {TRule_analyze_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAnalyze)}, + {TRule_create_resource_pool_classifier_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateResourcePoolClassifier)}, + {TRule_alter_resource_pool_classifier_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterResourcePoolClassifier)}, + {TRule_drop_resource_pool_classifier_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropResourcePoolClassifier)}, + {TRule_backup_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitBackup)}, + {TRule_restore_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitRestore)}, + }) + , ObfuscatingVisitDispatch({ + {TToken::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitToken)}, + {TRule_literal_value::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitLiteralValue)}, + {TRule_pragma_value::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitPragmaValue)}, + {TRule_atom_expr::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitAtomExpr)}, + {TRule_in_atom_expr::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitInAtomExpr)}, + {TRule_unary_casual_subexpr::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitUnaryCasualSubexpr)}, + {TRule_in_unary_casual_subexpr::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitInUnaryCasualSubexpr)}, + }) +{ + // ensure that all statements have a visitor + auto coreDescr = TRule_sql_stmt_core::GetDescriptor(); + for (int i = 0; i < coreDescr->field_count(); ++i) { + const NProtoBuf::FieldDescriptor* fd = coreDescr->field(i); + if (fd->cpp_type() != NProtoBuf::FieldDescriptor::CPPTYPE_MESSAGE) { + continue; + } + + auto altDescr = fd->message_type(); + for (int j = 0; j < altDescr->field_count(); ++j) { + auto fd2 = altDescr->field(j); + if (fd2->cpp_type() != NProtoBuf::FieldDescriptor::CPPTYPE_MESSAGE) { + continue; + } + + auto stmtMessage = fd2->message_type(); + Y_ENSURE(PrettyVisitDispatch.contains(stmtMessage), TStringBuilder() << "Missing visitor for " << stmtMessage->name()); + } + } +} + +class TSqlFormatter : public NSQLFormat::ISqlFormatter { +public: + TSqlFormatter(const NSQLTranslation::TTranslationSettings& settings) + : Settings(settings) + {} + + bool Format(const TString& query, TString& formattedQuery, NYql::TIssues& issues, EFormatMode mode) override { + formattedQuery = (mode == EFormatMode::Obfuscate) ? "" : query; + auto parsedSettings = Settings; + if (!NSQLTranslation::ParseTranslationSettings(query, parsedSettings, issues)) { + return false; + } + + if (parsedSettings.PgParser) { + return mode != EFormatMode::Obfuscate; + } + + if (mode == EFormatMode::Obfuscate) { + auto message = NSQLTranslationV1::SqlAST(query, "Query", issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, parsedSettings.Arena); + if (!message) { + return false; + } + + TObfuscatingVisitor visitor; + return Format(visitor.Process(*message), formattedQuery, issues, EFormatMode::Pretty); + } + + auto lexer = NSQLTranslationV1::MakeLexer(parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser); + TParsedTokenList allTokens; + auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) { + if (token.Name != "EOF") { + allTokens.push_back(token); + } + }; + + if (!lexer->Tokenize(query, "Query", onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) { + return false; + } + + TVector<TTokenIterator> statements; + SplitByStatements(allTokens.begin(), allTokens.end(), statements); + TStringBuilder finalFormattedQuery; + for (size_t i = 1; i < statements.size(); ++i) { + TStringBuilder currentQueryBuilder; + for (auto it = statements[i - 1]; it != statements[i]; ++it) { + currentQueryBuilder << it->Content; + } + + TString currentQuery = currentQueryBuilder; + currentQuery = StripStringLeft(currentQuery); + bool isBlank = true; + for (auto c : currentQuery) { + if (c != ';') { + isBlank = false; + break; + } + }; + + if (isBlank) { + continue; + } + + TVector<NSQLTranslation::TParsedToken> comments; + TParsedTokenList parsedTokens, stmtTokens; + bool hasTrailingComments = false; + auto onNextRawToken = [&](NSQLTranslation::TParsedToken&& token) { + stmtTokens.push_back(token); + if (token.Name == "COMMENT") { + comments.emplace_back(std::move(token)); + hasTrailingComments = true; + } else if (token.Name != "WS" && token.Name != "EOF") { + parsedTokens.emplace_back(std::move(token)); + hasTrailingComments = false; + } + }; + + if (!lexer->Tokenize(currentQuery, "Query", onNextRawToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) { + return false; + } + + NYql::TIssues parserIssues; + auto message = NSQLTranslationV1::SqlAST(currentQuery, "Query", parserIssues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, parsedSettings.Arena); + if (!message) { + finalFormattedQuery << currentQuery; + if (!currentQuery.EndsWith("\n")) { + finalFormattedQuery << "\n"; + } + + continue; + } + + TPrettyVisitor visitor(parsedTokens, comments); + bool addLine; + auto currentFormattedQuery = visitor.Process(*message, addLine); + TParsedTokenList stmtFormattedTokens; + auto onNextFormattedToken = [&](NSQLTranslation::TParsedToken&& token) { + stmtFormattedTokens.push_back(token); + }; + + if (!lexer->Tokenize(currentFormattedQuery, "Query", onNextFormattedToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) { + return false; + } + + if (!Validate(stmtFormattedTokens, stmtTokens)) { + issues.AddIssue(NYql::TIssue({}, TStringBuilder() << "Validation failed: " << currentQuery.Quote() << " != " << currentFormattedQuery.Quote())); + return false; + } + + if (addLine && !finalFormattedQuery.empty()) { + finalFormattedQuery << "\n"; + } + + finalFormattedQuery << currentFormattedQuery; + if (parsedTokens.back().Name != "SEMICOLON") { + if (hasTrailingComments + && !comments.back().Content.EndsWith("\n") + && comments.back().Content.StartsWith("--")) { + finalFormattedQuery << "\n"; + } + finalFormattedQuery << ";\n"; + } + } + + formattedQuery = finalFormattedQuery; + return true; + } + +private: + const NSQLTranslation::TTranslationSettings Settings; +}; + +} + +ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslation::TTranslationSettings& settings) { + return ISqlFormatter::TPtr(new TSqlFormatter(settings)); +} + +TString MutateQuery(const TString& query, const NSQLTranslation::TTranslationSettings& settings) { + auto parsedSettings = settings; + NYql::TIssues issues; + if (!NSQLTranslation::ParseTranslationSettings(query, parsedSettings, issues)) { + throw yexception() << issues.ToString(); + } + + auto lexer = NSQLTranslationV1::MakeLexer(parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser); + TVector<NSQLTranslation::TParsedToken> allTokens; + auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) { + if (token.Name != "EOF") { + allTokens.push_back(token); + } + }; + + if (!lexer->Tokenize(query, "Query", onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) { + throw yexception() << issues.ToString(); + } + + TStringBuilder newQueryBuilder; + ui32 index = 0; + for (const auto& x : allTokens) { + newQueryBuilder << " /*" << index++ << "*/ "; + newQueryBuilder << x.Content; + } + + newQueryBuilder << " /*" << index++ << "*/ "; + return newQueryBuilder; +} + +bool SqlFormatSimple(const TString& query, TString& formattedQuery, TString& error) { + try { + google::protobuf::Arena arena; + NSQLTranslation::TTranslationSettings settings; + settings.Arena = &arena; + + auto formatter = MakeSqlFormatter(settings); + NYql::TIssues issues; + const bool result = formatter->Format(query, formattedQuery, issues); + if (!result) { + error = issues.ToString(); + } + return result; + } catch (const std::exception& e) { + error = e.what(); + return false; + } +} + +THashSet<TString> GetKeywords() { + TString grammar; + // ANTLR4-MIGRATION: just change SQLv1 to SQLv1Antlr4 + Y_ENSURE(NResource::FindExact("SQLv1.g.in", &grammar)); + THashSet<TString> res; + TVector<TString> lines; + Split(grammar, "\n", lines); + for (auto s : lines) { + s = StripString(s); + if (s.StartsWith("//")) { + continue; + } + + auto pos1 = s.find(':'); + auto pos2 = s.find(';'); + if (pos1 == TString::npos || pos2 == TString::npos || pos2 < pos1 + 2) { + continue; + } + + auto before = s.substr(0, pos1); + auto after = s.substr(pos1 + 1, pos2 - pos1 - 1); + SubstGlobal(after, " ", ""); + SubstGlobal(after, "'", ""); + if (after == before) { + //Cerr << before << "\n"; + res.insert(before); + } + } + + return res; +} + +} // namespace NSQLFormat diff --git a/yql/essentials/sql/v1/format/sql_format.h b/yql/essentials/sql/v1/format/sql_format.h new file mode 100644 index 00000000000..6944a730710 --- /dev/null +++ b/yql/essentials/sql/v1/format/sql_format.h @@ -0,0 +1,35 @@ +#pragma once + +#include <yql/essentials/public/issue/yql_issue.h> +#include <yql/essentials/sql/settings/translation_settings.h> + +#include <util/generic/string.h> + +namespace NSQLFormat { + +constexpr ui32 OneIndent = 4; + +enum class EFormatMode { + Pretty, + Obfuscate +}; + +class ISqlFormatter { +public: + using TPtr = THolder<ISqlFormatter>; + + virtual bool Format(const TString& query, TString& formattedQuery, NYql::TIssues& issues, + EFormatMode mode = EFormatMode::Pretty) = 0; + virtual ~ISqlFormatter() = default; +}; + +ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslation::TTranslationSettings& settings = {}); + +// insert spaces and comments between each tokens +TString MutateQuery(const TString& query, const NSQLTranslation::TTranslationSettings& settings = {}); + +bool SqlFormatSimple(const TString& query, TString& formattedQuery, TString& error); + +THashSet<TString> GetKeywords(); + +} diff --git a/yql/essentials/sql/v1/format/sql_format_ut.cpp b/yql/essentials/sql/v1/format/sql_format_ut.cpp new file mode 100644 index 00000000000..3cfd7031159 --- /dev/null +++ b/yql/essentials/sql/v1/format/sql_format_ut.cpp @@ -0,0 +1,51 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include "sql_format.h" + +#include <google/protobuf/arena.h> +#include <util/string/subst.h> +#include <util/string/join.h> + +namespace { + +using TCases = TVector<std::pair<TString, TString>>; + +struct TSetup { + TSetup() { + NSQLTranslation::TTranslationSettings settings; + settings.Arena = &Arena; + Formatter = NSQLFormat::MakeSqlFormatter(settings); + } + + void Run(const TCases& cases, NSQLFormat::EFormatMode mode = NSQLFormat::EFormatMode::Pretty) { + for (const auto& c : cases) { + NYql::TIssues issues; + TString formatted; + auto res = Formatter->Format(c.first, formatted, issues, mode); + UNIT_ASSERT_C(res, issues.ToString()); + auto expected = c.second; + SubstGlobal(expected, "\t", TString(NSQLFormat::OneIndent, ' ')); + UNIT_ASSERT_NO_DIFF(formatted, expected); + + TString formatted2; + auto res2 = Formatter->Format(formatted, formatted2, issues); + UNIT_ASSERT_C(res2, issues.ToString()); + UNIT_ASSERT_NO_DIFF(formatted, formatted2); + + if (mode == NSQLFormat::EFormatMode::Pretty) { + auto mutatedQuery = NSQLFormat::MutateQuery(c.first); + auto res3 = Formatter->Format(mutatedQuery, formatted, issues); + UNIT_ASSERT_C(res3, issues.ToString()); + } + } + } + + google::protobuf::Arena Arena; + NSQLFormat::ISqlFormatter::TPtr Formatter; +}; + +} + +Y_UNIT_TEST_SUITE(CheckSqlFormatter) { + #include "sql_format_ut.h" +} diff --git a/yql/essentials/sql/v1/format/sql_format_ut.h b/yql/essentials/sql/v1/format/sql_format_ut.h new file mode 100644 index 00000000000..951bf427989 --- /dev/null +++ b/yql/essentials/sql/v1/format/sql_format_ut.h @@ -0,0 +1,1650 @@ +Y_UNIT_TEST(Pragma) { + TCases cases = { + {"pragma user = user;","PRAGMA user = user;\n"}, + {"pragma user = default;","PRAGMA user = default;\n"}, + {"pragma user.user = user;","PRAGMA user.user = user;\n"}, + {"pragma user.user(user);","PRAGMA user.user(user);\n"}, + {"pragma user.user(user, user);","PRAGMA user.user(user, user);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(DotAfterDigits) { + TCases cases = { + {"select a.1 .b from plato.foo;","SELECT\n\ta.1 .b\nFROM plato.foo;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(GrantPermissions) { + TCases cases { + {"use plato;grant connect, modify tables, list on `/Root` to user;", "USE plato;\n\nGRANT CONNECT, MODIFY TABLES, LIST ON `/Root` TO user;\n"}, + {"use plato;grant select , select tables, select attributes on `/Root` to user;", "USE plato;\n\nGRANT SELECT, SELECT TABLES, SELECT ATTRIBUTES ON `/Root` TO user;\n"}, + {"use plato;grant insert, modify attributes on `/Root` to user;", "USE plato;\n\nGRANT INSERT, MODIFY ATTRIBUTES ON `/Root` TO user;\n"}, + {"use plato;grant use legacy, use on `/Root` to user1, user2;", "USE plato;\n\nGRANT USE LEGACY, USE ON `/Root` TO user1, user2;\n"}, + {"use plato;grant manage, full legacy, full, create on `/Root` to user;", "USE plato;\n\nGRANT MANAGE, FULL LEGACY, FULL, CREATE ON `/Root` TO user;\n"}, + {"use plato;grant drop, grant, select row, update row on `/Root` to user;", "USE plato;\n\nGRANT DROP, GRANT, SELECT ROW, UPDATE ROW ON `/Root` TO user;\n"}, + {"use plato;grant erase row, create directory on `/Root` to user;", "USE plato;\n\nGRANT ERASE ROW, CREATE DIRECTORY ON `/Root` TO user;\n"}, + {"use plato;grant create table, create queue, remove schema on `/Root` to user;", "USE plato;\n\nGRANT CREATE TABLE, CREATE QUEUE, REMOVE SCHEMA ON `/Root` TO user;\n"}, + {"use plato;grant describe schema, alter schema on `/Root` to user;", "USE plato;\n\nGRANT DESCRIBE SCHEMA, ALTER SCHEMA ON `/Root` TO user;\n"}, + {"use plato;grant select, on `/Root` to user, with grant option;", "USE plato;\n\nGRANT SELECT, ON `/Root` TO user, WITH GRANT OPTION;\n"}, + {"use plato;grant all privileges on `/Root` to user;", "USE plato;\n\nGRANT ALL PRIVILEGES ON `/Root` TO user;\n"}, + {"use plato;grant list on `/Root/db1`, `/Root/db2` to user;", "USE plato;\n\nGRANT LIST ON `/Root/db1`, `/Root/db2` TO user;\n"} + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(RevokePermissions) { + TCases cases { + {"use plato;revoke connect, modify tables, list on `/Root` from user;", "USE plato;\n\nREVOKE CONNECT, MODIFY TABLES, LIST ON `/Root` FROM user;\n"}, + {"use plato;revoke select , select tables, select attributes on `/Root` from user;", "USE plato;\n\nREVOKE SELECT, SELECT TABLES, SELECT ATTRIBUTES ON `/Root` FROM user;\n"}, + {"use plato;revoke insert, modify attributes on `/Root` from user;", "USE plato;\n\nREVOKE INSERT, MODIFY ATTRIBUTES ON `/Root` FROM user;\n"}, + {"use plato;revoke use legacy, use on `/Root` from user1, user2;", "USE plato;\n\nREVOKE USE LEGACY, USE ON `/Root` FROM user1, user2;\n"}, + {"use plato;revoke manage, full legacy, full, create on `/Root` from user;", "USE plato;\n\nREVOKE MANAGE, FULL LEGACY, FULL, CREATE ON `/Root` FROM user;\n"}, + {"use plato;revoke drop, grant, select row, update row on `/Root` from user;", "USE plato;\n\nREVOKE DROP, GRANT, SELECT ROW, UPDATE ROW ON `/Root` FROM user;\n"}, + {"use plato;revoke erase row, create directory on `/Root` from user;", "USE plato;\n\nREVOKE ERASE ROW, CREATE DIRECTORY ON `/Root` FROM user;\n"}, + {"use plato;revoke create table, create queue, remove schema on `/Root` from user;", "USE plato;\n\nREVOKE CREATE TABLE, CREATE QUEUE, REMOVE SCHEMA ON `/Root` FROM user;\n"}, + {"use plato;revoke describe schema, alter schema on `/Root` from user;", "USE plato;\n\nREVOKE DESCRIBE SCHEMA, ALTER SCHEMA ON `/Root` FROM user;\n"}, + {"use plato;revoke grant option for insert, on `/Root` from user;", "USE plato;\n\nREVOKE GRANT OPTION FOR INSERT, ON `/Root` FROM user;\n"}, + {"use plato;revoke all privileges on `/Root` from user;", "USE plato;\n\nREVOKE ALL PRIVILEGES ON `/Root` FROM user;\n"}, + {"use plato;revoke list on `/Root/db1`, `/Root/db2` from user;", "USE plato;\n\nREVOKE LIST ON `/Root/db1`, `/Root/db2` FROM user;\n"} + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(DropRole) { + TCases cases = { + {"use plato;drop user user,user,user;","USE plato;\n\nDROP USER user, user, user;\n"}, + {"use plato;drop group if exists user;","USE plato;\n\nDROP GROUP IF EXISTS user;\n"}, + {"use plato;drop group user,;","USE plato;\n\nDROP GROUP user,;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(CreateUser) { + TCases cases = { + {"use plato;create user user;","USE plato;\n\nCREATE USER user;\n"}, + {"use plato;create user user encrypted password 'foo';","USE plato;\n\nCREATE USER user ENCRYPTED PASSWORD 'foo';\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(CreateGroup) { + TCases cases = { + {"use plato;create group user;","USE plato;\n\nCREATE GROUP user;\n"}, + {"use plato;create group user with user user;","USE plato;\n\nCREATE GROUP user WITH USER user;\n"}, + {"use plato;create group user with user user, user,;","USE plato;\n\nCREATE GROUP user WITH USER user, user,;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(AlterUser) { + TCases cases = { + {"use plato;alter user user rename to user;","USE plato;\n\nALTER USER user RENAME TO user;\n"}, + {"use plato;alter user user encrypted password 'foo';","USE plato;\n\nALTER USER user ENCRYPTED PASSWORD 'foo';\n"}, + {"use plato;alter user user with encrypted password 'foo';","USE plato;\n\nALTER USER user WITH ENCRYPTED PASSWORD 'foo';\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(AlterGroup) { + TCases cases = { + {"use plato;alter group user add user user;","USE plato;\n\nALTER GROUP user ADD USER user;\n"}, + {"use plato;alter group user drop user user;","USE plato;\n\nALTER GROUP user DROP USER user;\n"}, + {"use plato;alter group user add user user, user,;","USE plato;\n\nALTER GROUP user ADD USER user, user,;\n"}, + {"use plato;alter group user rename to user;","USE plato;\n\nALTER GROUP user RENAME TO user;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Use) { + TCases cases = { + {"use user;","USE user;\n"}, + {"use user:user;","USE user: user;\n"}, + {"use user:*;","USE user: *;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Commit) { + TCases cases = { + {"commit;","COMMIT;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Rollback) { + TCases cases = { + {"rollback;","ROLLBACK;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Export) { + TCases cases = { + {"export $foo;","EXPORT $foo;\n"}, + {"export $foo, $bar;","EXPORT $foo, $bar;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Import) { + TCases cases = { + {"import user symbols $foo;","IMPORT user SYMBOLS $foo;\n"}, + {"import user symbols $foo,$bar;","IMPORT user SYMBOLS $foo, $bar;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Values) { + TCases cases = { + {"values (1);","VALUES\n\t(1);\n"}, + {"values (1,2),(3,4);","VALUES\n\t(1, 2),\n\t(3, 4);\n"}, + {"values ('a\nb');","VALUES\n\t('a\nb');\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Declare) { + TCases cases = { + {"declare $foo as int32;","DECLARE $foo AS int32;\n"}, + {"declare $foo as bool ?","DECLARE $foo AS bool?;\n"}, + {"declare $foo as bool ? ?","DECLARE $foo AS bool??;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(NamedNode) { + TCases cases = { + {"$x=1","$x = 1;\n"}, + {"$x,$y=(2,3)","$x, $y = (2, 3);\n"}, + {"$a = select 1 union all select 2","$a =\n\tSELECT\n\t\t1\n\tUNION ALL\n\tSELECT\n\t\t2;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(DropTable) { + TCases cases = { + {"drop table user","DROP TABLE user;\n"}, + {"drop table if exists user","DROP TABLE IF EXISTS user;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(CreateTable) { + TCases cases = { + {"create table user(user int32)","CREATE TABLE user (\n\tuser int32\n);\n"}, + {"create table user(user int32,user bool ?)","CREATE TABLE user (\n\tuser int32,\n\tuser bool?\n);\n"}, + {"create table user(user int32) with (user=user)","CREATE TABLE user (\n\tuser int32\n)\nWITH (user = user);\n"}, + {"create table user(primary key (user))","CREATE TABLE user (\n\tPRIMARY KEY (user)\n);\n"}, + {"create table user(primary key (user,user))","CREATE TABLE user (\n\tPRIMARY KEY (user, user)\n);\n"}, + {"create table user(partition by (user))","CREATE TABLE user (\n\tPARTITION BY (user)\n);\n"}, + {"create table user(partition by (user,user))","CREATE TABLE user (\n\tPARTITION BY (user, user)\n);\n"}, + {"create table user(order by (user asc))","CREATE TABLE user (\n\tORDER BY (user ASC)\n);\n"}, + {"create table user(order by (user desc,user))","CREATE TABLE user (\n\tORDER BY (user DESC, user)\n);\n"}, + {"create table user(user int32) with (ttl=interval('P1D') on user as seconds)", + "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS SECONDS);\n"}, + {"create table user(user int32) with (ttl=interval('P1D') on user as MilliSeconds)", + "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS MILLISECONDS);\n"}, + {"create table user(user int32) with (ttl=interval('P1D') on user as microSeconds)", + "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS MICROSECONDS);\n"}, + {"create table user(user int32) with (ttl=interval('P1D') on user as nAnOsEcOnDs)", + "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS NANOSECONDS);\n"}, + {"create table user(index user global unique sync on (user,user) with (user=user,user=user))", + "CREATE TABLE user (\n\tINDEX user GLOBAL UNIQUE SYNC ON (user, user) WITH (user = user, user = user)\n);\n"}, + {"create table user(index user global async on (user) with (user=user,))", + "CREATE TABLE user (\n\tINDEX user GLOBAL ASYNC ON (user) WITH (user = user,)\n);\n"}, + {"create table user(index user local on (user) cover (user))", + "CREATE TABLE user (\n\tINDEX user LOCAL ON (user) COVER (user)\n);\n"}, + {"create table user(index user local on (user) cover (user,user))", + "CREATE TABLE user (\n\tINDEX user LOCAL ON (user) COVER (user, user)\n);\n"}, + {"create table user(index idx global using subtype on (col) cover (col) with (setting = foo, another_setting = bar));", + "CREATE TABLE user (\n\tINDEX idx GLOBAL USING subtype ON (col) COVER (col) WITH (setting = foo, another_setting = bar)\n);\n"}, + {"create table user(family user (user='foo'))", + "CREATE TABLE user (\n\tFAMILY user (user = 'foo')\n);\n"}, + {"create table user(family user (user='foo',user='bar'))", + "CREATE TABLE user (\n\tFAMILY user (user = 'foo', user = 'bar')\n);\n"}, + {"create table user(changefeed user with (user='foo'))", + "CREATE TABLE user (\n\tCHANGEFEED user WITH (user = 'foo')\n);\n"}, + {"create table user(changefeed user with (user='foo',user='bar'))", + "CREATE TABLE user (\n\tCHANGEFEED user WITH (user = 'foo', user = 'bar')\n);\n"}, + {"create table user(user) AS SELECT 1","CREATE TABLE user (\n\tuser\n)\nAS\nSELECT\n 1;\n"}, + {"create table user(user) AS VALUES (1), (2)","CREATE TABLE user (\n\tuser\n)\nAS\nVALUES\n (1),\n (2);\n"}, + {"create table user(foo int32, bar bool ?) inherits (s3:$cluster.xxx) partition by hash(a,b,hash) with (inherits=interval('PT1D') ON logical_time) tablestore tablestore", + "CREATE TABLE user (\n" + "\tfoo int32,\n" + "\tbar bool?\n" + ")\n" + "INHERITS (s3: $cluster.xxx)\n" + "PARTITION BY HASH (a, b, hash)\n" + "WITH (inherits = interval('PT1D') ON logical_time)\n" + "TABLESTORE tablestore;\n"}, + {"create table user(foo int32, bar bool ?) partition by hash(a,b,hash) with (tiering='some')", + "CREATE TABLE user (\n" + "\tfoo int32,\n" + "\tbar bool?\n" + ")\n" + "PARTITION BY HASH (a, b, hash)\n" + "WITH (tiering = 'some');\n"}, + {"create table if not exists user(user int32)", "CREATE TABLE IF NOT EXISTS user (\n\tuser int32\n);\n"}, + {"create temp table user(user int32)", "CREATE TEMP TABLE user (\n\tuser int32\n);\n"}, + {"create temporary table user(user int32)", "CREATE TEMPORARY TABLE user (\n\tuser int32\n);\n"} + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(ObjectOperations) { + TCases cases = { + {"alter oBject usEr (TYpe abcde) Set (a = b)", + "ALTER OBJECT usEr (TYPE abcde) SET (a = b);\n"}, + {"creAte oBject usEr (tYpe abcde) With (a = b)", + "CREATE OBJECT usEr (TYPE abcde) WITH (a = b);\n"}, + {"creAte oBject if not exIstS usEr (tYpe abcde) With (a = b)", + "CREATE OBJECT IF NOT EXISTS usEr (TYPE abcde) WITH (a = b);\n"}, + {"creAte oBject usEr (tYpe abcde) With a = b", + "CREATE OBJECT usEr (TYPE abcde) WITH a = b;\n"}, + {"dRop oBject usEr (tYpe abcde) With (aeEE)", + "DROP OBJECT usEr (TYPE abcde) WITH (aeEE);\n"}, + {"dRop oBject If ExistS usEr (tYpe abcde) With (aeEE)", + "DROP OBJECT IF EXISTS usEr (TYPE abcde) WITH (aeEE);\n"}, + {"dRop oBject usEr (tYpe abcde) With aeEE", + "DROP OBJECT usEr (TYPE abcde) WITH aeEE;\n"} + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(TableStoreOperations) { + TCases cases = { + {"alter tableStore uSer aDd column usEr int32", + "ALTER TABLESTORE uSer ADD COLUMN usEr int32;\n"}, + {"alter tableStore uSer drOp column usEr", + "ALTER TABLESTORE uSer DROP COLUMN usEr;\n"} + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(ExternalDataSourceOperations) { + TCases cases = { + {"creAte exTernAl daTa SouRce usEr With (a = \"b\")", + "CREATE EXTERNAL DATA SOURCE usEr WITH (a = \"b\");\n"}, + {"creAte exTernAl daTa SouRce if not exists usEr With (a = \"b\")", + "CREATE EXTERNAL DATA SOURCE IF NOT EXISTS usEr WITH (a = \"b\");\n"}, + {"creAte oR rePlaCe exTernAl daTa SouRce usEr With (a = \"b\")", + "CREATE OR REPLACE EXTERNAL DATA SOURCE usEr WITH (a = \"b\");\n"}, + {"create external data source eds with (a=\"a\",b=\"b\",c = true)", + "CREATE EXTERNAL DATA SOURCE eds WITH (\n\ta = \"a\",\n\tb = \"b\",\n\tc = TRUE\n);\n"}, + {"alter external data source eds set a true, reset (b, c), set (x=y, z=false)", + "ALTER EXTERNAL DATA SOURCE eds\n\tSET a TRUE,\n\tRESET (b, c),\n\tSET (x = y, z = FALSE);\n"}, + {"alter external data source eds reset (a), set (x=y)", + "ALTER EXTERNAL DATA SOURCE eds\n\tRESET (a),\n\tSET (x = y);\n"}, + {"dRop exTerNal Data SouRce usEr", + "DROP EXTERNAL DATA SOURCE usEr;\n"}, + {"dRop exTerNal Data SouRce if exists usEr", + "DROP EXTERNAL DATA SOURCE IF EXISTS usEr;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(AsyncReplication) { + TCases cases = { + {"create async replication user for table1 AS table2 with (user='foo')", + "CREATE ASYNC REPLICATION user FOR table1 AS table2 WITH (user = 'foo');\n"}, + {"alter async replication user set (user='foo')", + "ALTER ASYNC REPLICATION user SET (user = 'foo');\n"}, + {"drop async replication user", + "DROP ASYNC REPLICATION user;\n"}, + {"drop async replication user cascade", + "DROP ASYNC REPLICATION user CASCADE;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(ExternalTableOperations) { + TCases cases = { + {"creAte exTernAl TabLe usEr (a int) With (a = \"b\")", + "CREATE EXTERNAL TABLE usEr (\n\ta int\n)\nWITH (a = \"b\");\n"}, + {"creAte oR rePlaCe exTernAl TabLe usEr (a int) With (a = \"b\")", + "CREATE OR REPLACE EXTERNAL TABLE usEr (\n\ta int\n)\nWITH (a = \"b\");\n"}, + {"creAte exTernAl TabLe iF NOt Exists usEr (a int) With (a = \"b\")", + "CREATE EXTERNAL TABLE IF NOT EXISTS usEr (\n\ta int\n)\nWITH (a = \"b\");\n"}, + {"create external table user (a int) with (a=\"b\",c=\"d\")", + "CREATE EXTERNAL TABLE user (\n\ta int\n)\nWITH (\n\ta = \"b\",\n\tc = \"d\"\n);\n"}, + {"alter external table user add column col1 int32, drop column col2, reset(prop), set (prop2 = 42, x=y), set a true", + "ALTER EXTERNAL TABLE user\n\tADD COLUMN col1 int32,\n\tDROP COLUMN col2,\n\tRESET (prop),\n\tSET (prop2 = 42, x = y),\n\tSET a TRUE;\n"}, + {"dRop exTerNal taBlE usEr", + "DROP EXTERNAL TABLE usEr;\n"}, + {"dRop exTerNal taBlE iF eXiStS usEr", + "DROP EXTERNAL TABLE IF EXISTS usEr;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(TypeSelection) { + TCases cases = { + {"Select tYpe.* frOm Table tYpe", + "SELECT\n\ttYpe.*\nFROM Table\n\ttYpe;\n"} + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(AlterTable) { + TCases cases = { + {"alter table user add user int32", + "ALTER TABLE user\n\tADD user int32;\n"}, + {"alter table user add user int32, add user bool ?", + "ALTER TABLE user\n\tADD user int32,\n\tADD user bool?;\n"}, + {"alter table user add column user int32", + "ALTER TABLE user\n\tADD COLUMN user int32;\n"}, + {"alter table user drop user", + "ALTER TABLE user\n\tDROP user;\n"}, + {"alter table user drop column user", + "ALTER TABLE user\n\tDROP COLUMN user;\n"}, + {"alter table user alter column user set family user", + "ALTER TABLE user\n\tALTER COLUMN user SET FAMILY user;\n"}, + {"alter table t alter column c drop not null", + "ALTER TABLE t\n\tALTER COLUMN c DROP NOT NULL;\n"}, + {"alter table user add family user(user='foo')", + "ALTER TABLE user\n\tADD FAMILY user (user = 'foo');\n"}, + {"alter table user alter family user set user 'foo'", + "ALTER TABLE user\n\tALTER FAMILY user SET user 'foo';\n"}, + {"alter table user set user user", + "ALTER TABLE user\n\tSET user user;\n"}, + {"alter table user set (user=user)", + "ALTER TABLE user\n\tSET (user = user);\n"}, + {"alter table user set (user=user,user=user)", + "ALTER TABLE user\n\tSET (user = user, user = user);\n"}, + {"alter table user reset(user)", + "ALTER TABLE user\n\tRESET (user);\n"}, + {"alter table user reset(user, user)", + "ALTER TABLE user\n\tRESET (user, user);\n"}, + {"alter table user add index user local on (user)", + "ALTER TABLE user\n\tADD INDEX user LOCAL ON (user);\n"}, + {"alter table user alter index idx set setting 'foo'", + "ALTER TABLE user\n\tALTER INDEX idx SET setting 'foo';\n"}, + {"alter table user alter index idx set (setting = 'foo', another_setting = 'bar')", + "ALTER TABLE user\n\tALTER INDEX idx SET (setting = 'foo', another_setting = 'bar');\n"}, + {"alter table user alter index idx reset (setting, another_setting)", + "ALTER TABLE user\n\tALTER INDEX idx RESET (setting, another_setting);\n"}, + {"alter table user add index idx global using subtype on (col) cover (col) with (setting = foo, another_setting = 'bar');", + "ALTER TABLE user\n\tADD INDEX idx GLOBAL USING subtype ON (col) COVER (col) WITH (setting = foo, another_setting = 'bar');\n"}, + {"alter table user drop index user", + "ALTER TABLE user\n\tDROP INDEX user;\n"}, + {"alter table user rename to user", + "ALTER TABLE user\n\tRENAME TO user;\n"}, + {"alter table user add changefeed user with (user = 'foo')", + "ALTER TABLE user\n\tADD CHANGEFEED user WITH (user = 'foo');\n"}, + {"alter table user alter changefeed user disable", + "ALTER TABLE user\n\tALTER CHANGEFEED user DISABLE;\n"}, + {"alter table user alter changefeed user set(user='foo')", + "ALTER TABLE user\n\tALTER CHANGEFEED user SET (user = 'foo');\n"}, + {"alter table user drop changefeed user", + "ALTER TABLE user\n\tDROP CHANGEFEED user;\n"}, + {"alter table user add changefeed user with (initial_scan = tRUe)", + "ALTER TABLE user\n\tADD CHANGEFEED user WITH (initial_scan = TRUE);\n"}, + {"alter table user add changefeed user with (initial_scan = FaLsE)", + "ALTER TABLE user\n\tADD CHANGEFEED user WITH (initial_scan = FALSE);\n"}, + {"alter table user add changefeed user with (retention_period = Interval(\"P1D\"))", + "ALTER TABLE user\n\tADD CHANGEFEED user WITH (retention_period = Interval(\"P1D\"));\n"}, + {"alter table user add changefeed user with (virtual_timestamps = TruE)", + "ALTER TABLE user\n\tADD CHANGEFEED user WITH (virtual_timestamps = TRUE);\n"}, + {"alter table user add changefeed user with (virtual_timestamps = fAlSe)", + "ALTER TABLE user\n\tADD CHANGEFEED user WITH (virtual_timestamps = FALSE);\n"}, + {"alter table user add changefeed user with (resolved_timestamps = Interval(\"PT1S\"))", + "ALTER TABLE user\n\tADD CHANGEFEED user WITH (resolved_timestamps = Interval(\"PT1S\"));\n"}, + {"alter table user add changefeed user with (topic_min_active_partitions = 1)", + "ALTER TABLE user\n\tADD CHANGEFEED user WITH (topic_min_active_partitions = 1);\n"}, + {"alter table user add changefeed user with (topic_auto_partitioning = 'ENABLED', topic_min_active_partitions = 1, topic_max_active_partitions = 7)", + "ALTER TABLE user\n\tADD CHANGEFEED user WITH (topic_auto_partitioning = 'ENABLED', topic_min_active_partitions = 1, topic_max_active_partitions = 7);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(CreateTopic) { + TCases cases = { + {"create topic topic1", + "CREATE TOPIC topic1;\n"}, + {"create topic topic1 (consumer c1)", + "CREATE TOPIC topic1 (\n\tCONSUMER c1\n);\n"}, + {"create topic topic1 (consumer c1, consumer c2 with (important = True))", + "CREATE TOPIC topic1 (\n\tCONSUMER c1,\n\tCONSUMER c2 WITH (important = TRUE)\n);\n"}, + {"create topic topic1 (consumer c1) with (partition_count_limit = 5)", + "CREATE TOPIC topic1 (\n\tCONSUMER c1\n) WITH (\n\tpartition_count_limit = 5\n);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(AlterTopic) { + TCases cases = { + {"alter topic topic1 alter consumer c1 set (important = false)", + "ALTER TOPIC topic1\n\tALTER CONSUMER c1 SET (important = FALSE);\n"}, + {"alter topic topic1 alter consumer c1 set (important = false), alter consumer c2 reset (read_from)", + "ALTER TOPIC topic1\n\tALTER CONSUMER c1 SET (important = FALSE),\n\tALTER CONSUMER c2 RESET (read_from);\n"}, + {"alter topic topic1 add consumer c1, drop consumer c2", + "ALTER TOPIC topic1\n\tADD CONSUMER c1,\n\tDROP CONSUMER c2;\n"}, + {"alter topic topic1 set (supported_codecs = 'RAW'), RESET (retention_period)", + "ALTER TOPIC topic1\n\tSET (supported_codecs = 'RAW'),\n\tRESET (retention_period);\n"}, + + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(DropTopic) { + TCases cases = { + {"drop topic topic1", + "DROP TOPIC topic1;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(TopicExistsStatement) { + TCases cases = { + {"drop topic if exists topic1", + "DROP TOPIC IF EXISTS topic1;\n"}, + {"create topic if not exists topic1 with (partition_count_limit = 5)", + "CREATE TOPIC IF NOT EXISTS topic1 WITH (\n\tpartition_count_limit = 5\n);\n"}, + {"alter topic if exists topic1 alter consumer c1 set (important = false)", + "ALTER TOPIC IF EXISTS topic1\n\tALTER CONSUMER c1 SET (important = FALSE);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Do) { + TCases cases = { + {"do $a(1,2,3)", + "DO $a(1, 2, 3);\n"}, + {"do begin values(1); end do;", + "DO BEGIN\n\tVALUES\n\t\t(1);\nEND DO;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(DefineActionOrSubquery) { + TCases cases = { + {"define action $a() as " + "define action $b() as " + "values(1); " + "end define; " + "define subquery $c() as " + "select 1; " + "end define; " + "do $b(); " + "process $c(); " + "end define", + "DEFINE ACTION $a() AS\n\tDEFINE ACTION $b() AS\n\t\t" + "VALUES\n\t\t\t(1);\n\tEND DEFINE;\n\n\t" + "DEFINE SUBQUERY $c() AS\n\t\tSELECT\n\t\t\t1;\n\t" + "END DEFINE;\n\tDO $b();\n\n\tPROCESS $c();\nEND DEFINE;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(If) { + TCases cases = { + {"evaluate if 1=1 do $a()", + "EVALUATE IF 1 = 1\n\tDO $a();\n"}, + {"evaluate if 1=1 do $a() else do $b()", + "EVALUATE IF 1 = 1\n\tDO $a()\nELSE\n\tDO $b();\n"}, + {"evaluate if 1=1 do begin select 1; end do", + "EVALUATE IF 1 = 1\n\tDO BEGIN\n\t\tSELECT\n\t\t\t1;\n\tEND DO;\n"}, + {"evaluate if 1=1 do begin select 1; end do else do begin select 2; end do", + "EVALUATE IF 1 = 1\n\tDO BEGIN\n\t\tSELECT\n\t\t\t1;\n\tEND DO\n" + "ELSE\n\tDO BEGIN\n\t\tSELECT\n\t\t\t2;\n\tEND DO;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(For) { + TCases cases = { + {"evaluate for $x in [] do $a($x)", + "EVALUATE FOR $x IN []\n\tDO $a($x);\n"}, + {"evaluate for $x in [] do $a($x) else do $b()", + "EVALUATE FOR $x IN []\n\tDO $a($x)\nELSE\n\tDO $b();\n"}, + {"evaluate for $x in [] do begin select $x; end do", + "EVALUATE FOR $x IN []\n\tDO BEGIN\n\t\tSELECT\n\t\t\t$x;\n\tEND DO;\n"}, + {"evaluate for $x in [] do begin select $x; end do else do begin select 2; end do", + "EVALUATE FOR $x IN []\n\tDO BEGIN\n\t\tSELECT\n\t\t\t$x;\n\tEND DO\nELSE\n\tDO BEGIN\n\t\tSELECT\n\t\t\t2;\n\tEND DO;\n"}, + {"evaluate parallel for $x in [] do $a($x)", + "EVALUATE PARALLEL FOR $x IN []\n\tDO $a($x);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Update) { + TCases cases = { + {"update user on default values", + "UPDATE user\nON DEFAULT VALUES;\n"}, + {"update user on values (1),(2)", + "UPDATE user\nON\nVALUES\n\t(1),\n\t(2);\n"}, + {"update user on select 1 as x, 2 as y", + "UPDATE user\nON\nSELECT\n\t1 AS x,\n\t2 AS y;\n"}, + {"update user on (x) values (1),(2),(3)", + "UPDATE user\nON (\n\tx\n)\nVALUES\n\t(1),\n\t(2),\n\t(3);\n"}, + {"update user on (x,y) values (1,2),(2,3),(3,4)", + "UPDATE user\nON (\n\tx,\n\ty\n)\nVALUES\n\t(1, 2),\n\t(2, 3),\n\t(3, 4);\n"}, + {"update user on (x) select 1", + "UPDATE user\nON (\n\tx\n)\nSELECT\n\t1;\n"}, + {"update user on (x,y) select 1,2", + "UPDATE user\nON (\n\tx,\n\ty\n)\nSELECT\n\t1,\n\t2;\n"}, + {"update user set x=1", + "UPDATE user\nSET\n\tx = 1;\n"}, + {"update user set (x)=(1)", + "UPDATE user\nSET\n(\n\tx\n) = (\n\t1\n);\n"}, + {"update user set (x,y)=(1,2)", + "UPDATE user\nSET\n(\n\tx,\n\ty\n) = (\n\t1,\n\t2\n);\n"}, + {"update user set (x,y)=(select 1,2)", + "UPDATE user\nSET\n(\n\tx,\n\ty\n) = (\n\tSELECT\n\t\t1,\n\t\t2\n);\n"}, + {"update user set x=1,y=2 where z=3", + "UPDATE user\nSET\n\tx = 1,\n\ty = 2\nWHERE z = 3;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Delete) { + TCases cases = { + {"delete from user", + "DELETE FROM user;\n"}, + {"delete from user where 1=1", + "DELETE FROM user\nWHERE 1 = 1;\n"}, + {"delete from user on select 1 as x, 2 as y", + "DELETE FROM user\nON\nSELECT\n\t1 AS x,\n\t2 AS y;\n"}, + {"delete from user on (x) values (1)", + "DELETE FROM user\nON (\n\tx\n)\nVALUES\n\t(1);\n"}, + {"delete from user on (x,y) values (1,2), (3,4)", + "DELETE FROM user\nON (\n\tx,\n\ty\n)\nVALUES\n\t(1, 2),\n\t(3, 4);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Into) { + TCases cases = { + {"insert into user select 1 as x", + "INSERT INTO user\nSELECT\n\t1 AS x;\n"}, + {"insert or abort into user select 1 as x", + "INSERT OR ABORT INTO user\nSELECT\n\t1 AS x;\n"}, + {"insert or revert into user select 1 as x", + "INSERT OR REVERT INTO user\nSELECT\n\t1 AS x;\n"}, + {"insert or ignore into user select 1 as x", + "INSERT OR IGNORE INTO user\nSELECT\n\t1 AS x;\n"}, + {"upsert into user select 1 as x", + "UPSERT INTO user\nSELECT\n\t1 AS x;\n"}, + {"replace into user select 1 as x", + "REPLACE INTO user\nSELECT\n\t1 AS x;\n"}, + {"insert into user(x) values (1)", + "INSERT INTO user (\n\tx\n)\nVALUES\n\t(1);\n"}, + {"insert into user(x,y) values (1,2)", + "INSERT INTO user (\n\tx,\n\ty\n)\nVALUES\n\t(1, 2);\n"}, + {"insert into plato.user select 1 as x", + "INSERT INTO plato.user\nSELECT\n\t1 AS x;\n"}, + {"insert into @user select 1 as x", + "INSERT INTO @user\nSELECT\n\t1 AS x;\n"}, + {"insert into $user select 1 as x", + "INSERT INTO $user\nSELECT\n\t1 AS x;\n"}, + {"insert into @$user select 1 as x", + "INSERT INTO @$user\nSELECT\n\t1 AS x;\n"}, + {"upsert into user erase by (x,y) values (1)", + "UPSERT INTO user\n\tERASE BY (\n\t\tx,\n\t\ty\n\t)\nVALUES\n\t(1);\n"}, + {"insert into user with truncate select 1 as x", + "INSERT INTO user\n\tWITH truncate\nSELECT\n\t1 AS x;\n"}, + {"insert into user with (truncate,inferscheme='1') select 1 as x", + "INSERT INTO user\n\tWITH (truncate, inferscheme = '1')\nSELECT\n\t1 AS x;\n"}, + {"insert into user with schema Struct<user:int32> select 1 as user", + "INSERT INTO user\n\tWITH SCHEMA Struct<user: int32>\nSELECT\n\t1 AS user;\n"}, + {"insert into user with schema (int32 as user) select 1 as user", + "INSERT INTO user\n\tWITH SCHEMA (int32 AS user)\nSELECT\n\t1 AS user;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Process) { + TCases cases = { + {"process user", + "PROCESS user;\n"}, + {"process user using $f() as user", + "PROCESS user\nUSING $f() AS user;\n"}, + {"process user,user using $f()", + "PROCESS user, user\nUSING $f();\n"}, + {"process user using $f() where 1=1 having 1=1 assume order by user", + "PROCESS user\nUSING $f()\nWHERE 1 = 1\nHAVING 1 = 1\nASSUME ORDER BY\n\tuser;\n"}, + {"process user using $f() union all process user using $f()", + "PROCESS user\nUSING $f()\nUNION ALL\nPROCESS user\nUSING $f();\n"}, + {"process user using $f() with foo=bar", + "PROCESS user\nUSING $f()\nWITH foo = bar;\n"}, + {"discard process user using $f()", + "DISCARD PROCESS user\nUSING $f();\n"}, + {"process user using $f() into result user", + "PROCESS user\nUSING $f()\nINTO RESULT user;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Reduce) { + TCases cases = { + {"reduce user on user using $f()", + "REDUCE user\nON\n\tuser\nUSING $f();\n"}, + {"reduce user on user, using $f()", + "REDUCE user\nON\n\tuser,\nUSING $f();\n"}, + {"discard reduce user on user using $f();", + "DISCARD REDUCE user\nON\n\tuser\nUSING $f();\n"}, + {"reduce user on user using $f() into result user", + "REDUCE user\nON\n\tuser\nUSING $f()\nINTO RESULT user;\n"}, + {"reduce user on user using all $f()", + "REDUCE user\nON\n\tuser\nUSING ALL $f();\n"}, + {"reduce user on user using $f() as user", + "REDUCE user\nON\n\tuser\nUSING $f() AS user;\n"}, + {"reduce user,user on user using $f()", + "REDUCE user, user\nON\n\tuser\nUSING $f();\n"}, + {"reduce user on user,user using $f()", + "REDUCE user\nON\n\tuser,\n\tuser\nUSING $f();\n"}, + {"reduce user on user using $f() where 1=1 having 1=1 assume order by user", + "REDUCE user\nON\n\tuser\nUSING $f()\nWHERE 1 = 1\nHAVING 1 = 1\nASSUME ORDER BY\n\tuser;\n"}, + {"reduce user presort user,user on user using $f();", + "REDUCE user\nPRESORT\n\tuser,\n\tuser\nON\n\tuser\nUSING $f();\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Select) { + TCases cases = { + {"select 1", + "SELECT\n\t1;\n"}, + {"select 1,", + "SELECT\n\t1,;\n"}, + {"select 1 as x", + "SELECT\n\t1 AS x;\n"}, + {"select *", + "SELECT\n\t*;\n"}, + {"select a.*", + "SELECT\n\ta.*;\n"}, + {"select * without a", + "SELECT\n\t*\n\tWITHOUT\n\t\ta;\n"}, + {"select * without a,b", + "SELECT\n\t*\n\tWITHOUT\n\t\ta,\n\t\tb;\n"}, + {"select * without a,", + "SELECT\n\t*\n\tWITHOUT\n\t\ta,;\n"}, + {"select 1 from user", + "SELECT\n\t1\nFROM user;\n"}, + {"select 1 from plato.user", + "SELECT\n\t1\nFROM plato.user;\n"}, + {"select 1 from $user", + "SELECT\n\t1\nFROM $user;\n"}, + {"select 1 from @user", + "SELECT\n\t1\nFROM @user;\n"}, + {"select 1 from @$user", + "SELECT\n\t1\nFROM @$user;\n"}, + {"select 1 from user view user", + "SELECT\n\t1\nFROM user\n\tVIEW user;\n"}, + {"select 1 from user as user", + "SELECT\n\t1\nFROM user\n\tAS user;\n"}, + {"select 1 from user as user(user)", + "SELECT\n\t1\nFROM user\n\tAS user (\n\t\tuser\n\t);\n"}, + {"select 1 from user as user(user, user)", + "SELECT\n\t1\nFROM user\n\tAS user (\n\t\tuser,\n\t\tuser\n\t);\n"}, + {"select 1 from user with user=user", + "SELECT\n\t1\nFROM user\n\tWITH user = user;\n"}, + {"select 1 from user with (user=user, user=user)", + "SELECT\n\t1\nFROM user\n\tWITH (user = user, user = user);\n"}, + {"select 1 from user sample 0.1", + "SELECT\n\t1\nFROM user\n\tSAMPLE 0.1;\n"}, + {"select 1 from user tablesample system(0.1)", + "SELECT\n\t1\nFROM user\n\tTABLESAMPLE SYSTEM (0.1);\n"}, + {"select 1 from user tablesample bernoulli(0.1) repeatable(10)", + "SELECT\n\t1\nFROM user\n\tTABLESAMPLE BERNOULLI (0.1) REPEATABLE (10);\n"}, + {"select 1 from user flatten columns", + "SELECT\n\t1\nFROM user\n\tFLATTEN COLUMNS;\n"}, + {"select 1 from user flatten list by user", + "SELECT\n\t1\nFROM user\n\tFLATTEN LIST BY\n\t\tuser;\n"}, + {"select 1 from user flatten list by (user,user)", + "SELECT\n\t1\nFROM user\n\tFLATTEN LIST BY (\n\t\tuser,\n\t\tuser\n\t);\n"}, + {"select 1 from $user(1,2)", + "SELECT\n\t1\nFROM $user(1, 2);\n"}, + {"select 1 from $user(1,2) view user", + "SELECT\n\t1\nFROM $user(1, 2)\n\tVIEW user;\n"}, + {"select 1 from range('a','b')", + "SELECT\n\t1\nFROM range('a', 'b');\n"}, + {"from user select 1", + "FROM user\nSELECT\n\t1;\n"}, + {"select * from user as a join user as b on a.x=b.y", + "SELECT\n\t*\nFROM user\n\tAS a\nJOIN user\n\tAS b\nON a.x = b.y;\n"}, + {"select * from user as a join user as b using(x)", + "SELECT\n\t*\nFROM user\n\tAS a\nJOIN user\n\tAS b\nUSING (x);\n"}, + {"select * from any user as a full join user as b on a.x=b.y", + "SELECT\n\t*\nFROM ANY user\n\tAS a\nFULL JOIN user\n\tAS b\nON a.x = b.y;\n"}, + {"select * from user as a left join any user as b on a.x=b.y", + "SELECT\n\t*\nFROM user\n\tAS a\nLEFT JOIN ANY user\n\tAS b\nON a.x = b.y;\n"}, + {"select * from any user as a right join any user as b on a.x=b.y", + "SELECT\n\t*\nFROM ANY user\n\tAS a\nRIGHT JOIN ANY user\n\tAS b\nON a.x = b.y;\n"}, + {"select * from user as a cross join user as b", + "SELECT\n\t*\nFROM user\n\tAS a\nCROSS JOIN user\n\tAS b;\n"}, + {"select 1 from user where key = 1", + "SELECT\n\t1\nFROM user\nWHERE key = 1;\n"}, + {"select 1 from user having count(*) = 1", + "SELECT\n\t1\nFROM user\nHAVING count(*) = 1;\n"}, + {"select 1 from user group by key", + "SELECT\n\t1\nFROM user\nGROUP BY\n\tkey;\n"}, + {"select 1 from user group compact by key, value as v", + "SELECT\n\t1\nFROM user\nGROUP COMPACT BY\n\tkey,\n\tvalue AS v;\n"}, + {"select 1 from user group by key with combine", + "SELECT\n\t1\nFROM user\nGROUP BY\n\tkey\n\tWITH combine;\n"}, + {"select 1 from user order by key asc", + "SELECT\n\t1\nFROM user\nORDER BY\n\tkey ASC;\n"}, + {"select 1 from user order by key, value desc", + "SELECT\n\t1\nFROM user\nORDER BY\n\tkey,\n\tvalue DESC;\n"}, + {"select 1 from user assume order by key", + "SELECT\n\t1\nFROM user\nASSUME ORDER BY\n\tkey;\n"}, + {"select 1 from user window w1 as (), w2 as ()", + "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (),\n\tw2 AS ();\n"}, + {"select 1 from user window w1 as (user)", + "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tuser\n\t);\n"}, + {"select 1 from user window w1 as (partition by user)", + "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tPARTITION BY\n\t\t\tuser\n\t);\n"}, + {"select 1 from user window w1 as (partition by user, user)", + "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tPARTITION BY\n\t\t\tuser,\n\t\t\tuser\n\t);\n"}, + {"select 1 from user window w1 as (order by user asc)", + "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tORDER BY\n\t\t\tuser ASC\n\t);\n"}, + {"select 1 from user window w1 as (order by user, user desc)", + "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tORDER BY\n\t\t\tuser,\n\t\t\tuser DESC\n\t);\n"}, + {"select 1 from user window w1 as (rows between 1 preceding and 1 following)", + "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING\n\t);\n"}, + {"select 1 limit 10", + "SELECT\n\t1\nLIMIT 10;\n"}, + {"select 1 limit 10 offset 5", + "SELECT\n\t1\nLIMIT 10 OFFSET 5;\n"}, + { "select 1 union all select 2", + "SELECT\n\t1\nUNION ALL\nSELECT\n\t2;\n" }, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(CompositeTypesAndQuestions) { + TCases cases = { + {"declare $_x AS list<int32>??;declare $_y AS int32 ? ? ;select 1<>2, 1??2," + "formattype(list<int32>), formattype(resource<user>),formattype(tuple<>), formattype(tuple< >), formattype(int32 ? ? )", + "DECLARE $_x AS list<int32>??;\nDECLARE $_y AS int32??;\n\nSELECT\n\t1 <> 2,\n\t1 ?? 2,\n\tformattype(list<int32>)," + "\n\tformattype(resource<user>),\n\tformattype(tuple<>),\n\tformattype(tuple< >),\n\tformattype(int32??" ");\n" + }, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Lambda) { + TCases cases = { + {"$f=($a,$b)->{$x=$a+$b;return $a*$x};$g=($a,$b?)->($a+$b??0);select $f(10,4),$g(1,2);", + "$f = ($a, $b) -> {\n\t$x = $a + $b;\n\tRETURN $a * $x\n};\n" + "$g = ($a, $b?) -> ($a + $b ?? 0);\n\n" + "SELECT\n\t$f(10, 4),\n\t$g(1, 2);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(NestedSelect) { + TCases cases = { + {"$x=select 1", + "$x =\n\tSELECT\n\t\t1;\n"}, + {"$x=(select 1)", + "$x = (\n\tSELECT\n\t\t1\n);\n"}, + {"select 1 in (select 1)", + "SELECT\n\t1 IN (\n\t\tSELECT\n\t\t\t1\n\t);\n"}, + {"select 1 in ((select 1))", + "SELECT\n\t1 IN (\n\t\t(\n\t\t\tSELECT\n\t\t\t\t1\n\t\t)\n\t);\n"}, + {"select 1 in (\nselect 1)", + "SELECT\n\t1 IN (\n\t\tSELECT\n\t\t\t1\n\t);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Cast) { + TCases cases = { + {"select cast(1 as string)","SELECT\n\tCAST(1 AS string);\n"}, + {"select bitcast(1 as int32)","SELECT\n\tBITCAST(1 AS int32);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(StructLiteral) { + TCases cases = { + {"select <||>","SELECT\n\t<||>;\n"}, + {"select <|a:1|>","SELECT\n\t<|a: 1|>;\n"}, + {"select <|a:1,b:2|>","SELECT\n\t<|a: 1, b: 2|>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(TableHints) { + TCases cases = { + {"select * from plato.T with schema(foo int32, bar list<string>) where key is not null", + "SELECT\n\t*\nFROM plato.T\n\tWITH SCHEMA (foo int32, bar list<string>)\nWHERE key IS NOT NULL;\n"}, + {"select * from plato.T with schema struct<foo:integer, Bar:list<string?>> where key<0", + "SELECT\n\t*\nFROM plato.T\n\tWITH SCHEMA struct<foo: integer, Bar: list<string?>>\nWHERE key < 0;\n"}, + {"select * from plato.T with (foo=bar, x=$y, a=(a, b, c), u='aaa', schema (foo int32, bar list<string>))", + "SELECT\n\t*\nFROM plato.T\n\tWITH (foo = bar, x = $y, a = (a, b, c), u = 'aaa', SCHEMA (foo int32, bar list<string>));\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(BoolAsVariableName) { + TCases cases = { + {"$ False = True; select $ False;", + "$False = TRUE;\n\nSELECT\n\t$False;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(WithSchemaEquals) { + TCases cases = { + {"select * from plato.T with (format= csv_with_names, schema=(year int32 Null, month String, day String not null, a Utf8, b Uint16));", + "SELECT\n\t*\nFROM plato.T\n\tWITH (format = csv_with_names, SCHEMA = (year int32 NULL, month String, day String NOT NULL, a Utf8, b Uint16));\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(SquareBrackets) { + TCases cases = { + {"select a[0]", + "SELECT\n\ta[0];\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineList) { + TCases cases = { + {"select [\n]", + "SELECT\n\t[\n\t];\n"}, + {"select [1\n]", + "SELECT\n\t[\n\t\t1\n\t];\n"}, + {"select [\n1]", + "SELECT\n\t[\n\t\t1\n\t];\n"}, + {"select [1,\n]", + "SELECT\n\t[\n\t\t1,\n\t];\n"}, + {"select [1\n,]", + "SELECT\n\t[\n\t\t1,\n\t];\n"}, + {"select [\n1,]", + "SELECT\n\t[\n\t\t1,\n\t];\n"}, + {"select [1,2,\n3,4]", + "SELECT\n\t[\n\t\t1, 2,\n\t\t3, 4\n\t];\n"}, + {"select [1,2,\n3,4,]", + "SELECT\n\t[\n\t\t1, 2,\n\t\t3, 4,\n\t];\n"}, + {"select [1,2\n,3,\n4\n,5]", + "SELECT\n\t[\n\t\t1, 2,\n\t\t3,\n\t\t4,\n\t\t5\n\t];\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineTuple) { + TCases cases = { + {"select (\n)", + "SELECT\n\t(\n\t);\n"}, + {"select (1,\n)", + "SELECT\n\t(\n\t\t1,\n\t);\n"}, + {"select (1\n,)", + "SELECT\n\t(\n\t\t1,\n\t);\n"}, + {"select (\n1,)", + "SELECT\n\t(\n\t\t1,\n\t);\n"}, + {"select (1,2,\n3,4)", + "SELECT\n\t(\n\t\t1, 2,\n\t\t3, 4\n\t);\n"}, + {"select (1,2,\n3,4,)", + "SELECT\n\t(\n\t\t1, 2,\n\t\t3, 4,\n\t);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineSet) { + TCases cases = { + {"select {\n}", + "SELECT\n\t{\n\t};\n"}, + {"select {1\n}", + "SELECT\n\t{\n\t\t1\n\t};\n"}, + {"select {\n1}", + "SELECT\n\t{\n\t\t1\n\t};\n"}, + {"select {1,\n}", + "SELECT\n\t{\n\t\t1,\n\t};\n"}, + {"select {1\n,}", + "SELECT\n\t{\n\t\t1,\n\t};\n"}, + {"select {\n1,}", + "SELECT\n\t{\n\t\t1,\n\t};\n"}, + {"select {1,2,\n3,4}", + "SELECT\n\t{\n\t\t1, 2,\n\t\t3, 4\n\t};\n"}, + {"select {1,2,\n3,4,}", + "SELECT\n\t{\n\t\t1, 2,\n\t\t3, 4,\n\t};\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineDict) { + TCases cases = { + {"select {0:1\n}", + "SELECT\n\t{\n\t\t0: 1\n\t};\n"}, + {"select {\n0:1}", + "SELECT\n\t{\n\t\t0: 1\n\t};\n"}, + {"select {0:1,\n}", + "SELECT\n\t{\n\t\t0: 1,\n\t};\n"}, + {"select {0:1\n,}", + "SELECT\n\t{\n\t\t0: 1,\n\t};\n"}, + {"select {\n0:1,}", + "SELECT\n\t{\n\t\t0: 1,\n\t};\n"}, + {"select {10:1,20:2,\n30:3,40:4}", + "SELECT\n\t{\n\t\t10: 1, 20: 2,\n\t\t30: 3, 40: 4\n\t};\n"}, + {"select {10:1,20:2,\n30:3,40:4,}", + "SELECT\n\t{\n\t\t10: 1, 20: 2,\n\t\t30: 3, 40: 4,\n\t};\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineFuncCall) { + TCases cases = { + {"select f(\n)", + "SELECT\n\tf(\n\t);\n"}, + {"select f(1\n)", + "SELECT\n\tf(\n\t\t1\n\t);\n"}, + {"select f(\n1)", + "SELECT\n\tf(\n\t\t1\n\t);\n"}, + {"select f(1,\n)", + "SELECT\n\tf(\n\t\t1,\n\t);\n"}, + {"select f(1\n,)", + "SELECT\n\tf(\n\t\t1,\n\t);\n"}, + {"select f(\n1,)", + "SELECT\n\tf(\n\t\t1,\n\t);\n"}, + {"select f(1,2,\n3,4)", + "SELECT\n\tf(\n\t\t1, 2,\n\t\t3, 4\n\t);\n"}, + {"select f(1,2,\n3,4,)", + "SELECT\n\tf(\n\t\t1, 2,\n\t\t3, 4,\n\t);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineStruct) { + TCases cases = { + {"select <|\n|>", + "SELECT\n\t<|\n\t|>;\n"}, + {"select <|a:1\n|>", + "SELECT\n\t<|\n\t\ta: 1\n\t|>;\n"}, + {"select <|\na:1|>", + "SELECT\n\t<|\n\t\ta: 1\n\t|>;\n"}, + {"select <|a:1,\n|>", + "SELECT\n\t<|\n\t\ta: 1,\n\t|>;\n"}, + {"select <|a:1\n,|>", + "SELECT\n\t<|\n\t\ta: 1,\n\t|>;\n"}, + {"select <|\na:1,|>", + "SELECT\n\t<|\n\t\ta: 1,\n\t|>;\n"}, + {"select <|a:1,b:2,\nc:3,d:4|>", + "SELECT\n\t<|\n\t\ta: 1, b: 2,\n\t\tc: 3, d: 4\n\t|>;\n"}, + {"select <|a:1,b:2,\nc:3,d:4,|>", + "SELECT\n\t<|\n\t\ta: 1, b: 2,\n\t\tc: 3, d: 4,\n\t|>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineListType) { + TCases cases = { + {"select list<int32\n>", + "SELECT\n\tlist<\n\t\tint32\n\t>;\n"}, + {"select list<\nint32>", + "SELECT\n\tlist<\n\t\tint32\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineOptionalType) { + TCases cases = { + {"select optional<int32\n>", + "SELECT\n\toptional<\n\t\tint32\n\t>;\n"}, + {"select optional<\nint32>", + "SELECT\n\toptional<\n\t\tint32\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineStreamType) { + TCases cases = { + {"select stream<int32\n>", + "SELECT\n\tstream<\n\t\tint32\n\t>;\n"}, + {"select stream<\nint32>", + "SELECT\n\tstream<\n\t\tint32\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineFlowType) { + TCases cases = { + {"select flow<int32\n>", + "SELECT\n\tflow<\n\t\tint32\n\t>;\n"}, + {"select flow<\nint32>", + "SELECT\n\tflow<\n\t\tint32\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineSetType) { + TCases cases = { + {"select set<int32\n>", + "SELECT\n\tset<\n\t\tint32\n\t>;\n"}, + {"select set<\nint32>", + "SELECT\n\tset<\n\t\tint32\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineTupleType) { + TCases cases = { + {"select tuple<\n>", + "SELECT\n\ttuple<\n\t\t \n\t>;\n"}, + {"select tuple<int32\n>", + "SELECT\n\ttuple<\n\t\tint32\n\t>;\n"}, + {"select tuple<\nint32>", + "SELECT\n\ttuple<\n\t\tint32\n\t>;\n"}, + {"select tuple<int32,\n>", + "SELECT\n\ttuple<\n\t\tint32,\n\t>;\n"}, + {"select tuple<int32\n,>", + "SELECT\n\ttuple<\n\t\tint32,\n\t>;\n"}, + {"select tuple<\nint32,>", + "SELECT\n\ttuple<\n\t\tint32,\n\t>;\n"}, + {"select tuple<\nint32,string,\ndouble,bool>", + "SELECT\n\ttuple<\n\t\tint32, string,\n\t\tdouble, bool\n\t>;\n"}, + {"select tuple<\nint32,string,\ndouble,bool,>", + "SELECT\n\ttuple<\n\t\tint32, string,\n\t\tdouble, bool,\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineStructType) { + TCases cases = { + {"select struct<\n>", + "SELECT\n\tstruct<\n\t\t \n\t>;\n"}, + {"select struct<a:int32\n>", + "SELECT\n\tstruct<\n\t\ta: int32\n\t>;\n"}, + {"select struct<\na:int32>", + "SELECT\n\tstruct<\n\t\ta: int32\n\t>;\n"}, + {"select struct<a:int32,\n>", + "SELECT\n\tstruct<\n\t\ta: int32,\n\t>;\n"}, + {"select struct<a:int32\n,>", + "SELECT\n\tstruct<\n\t\ta: int32,\n\t>;\n"}, + {"select struct<\na:int32,>", + "SELECT\n\tstruct<\n\t\ta: int32,\n\t>;\n"}, + {"select struct<\na:int32,b:string,\nc:double,d:bool>", + "SELECT\n\tstruct<\n\t\ta: int32, b: string,\n\t\tc: double, d: bool\n\t>;\n"}, + {"select struct<\na:int32,b:string,\nc:double,d:bool,>", + "SELECT\n\tstruct<\n\t\ta: int32, b: string,\n\t\tc: double, d: bool,\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineVariantOverTupleType) { + TCases cases = { + {"select variant<int32\n>", + "SELECT\n\tvariant<\n\t\tint32\n\t>;\n"}, + {"select variant<\nint32>", + "SELECT\n\tvariant<\n\t\tint32\n\t>;\n"}, + {"select variant<int32,\n>", + "SELECT\n\tvariant<\n\t\tint32,\n\t>;\n"}, + {"select variant<int32\n,>", + "SELECT\n\tvariant<\n\t\tint32,\n\t>;\n"}, + {"select variant<\nint32,>", + "SELECT\n\tvariant<\n\t\tint32,\n\t>;\n"}, + {"select variant<\nint32,string,\ndouble,bool>", + "SELECT\n\tvariant<\n\t\tint32, string,\n\t\tdouble, bool\n\t>;\n"}, + {"select variant<\nint32,string,\ndouble,bool,>", + "SELECT\n\tvariant<\n\t\tint32, string,\n\t\tdouble, bool,\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineVariantOverStructType) { + TCases cases = { + {"select variant<a:int32\n>", + "SELECT\n\tvariant<\n\t\ta: int32\n\t>;\n"}, + {"select variant<\na:int32>", + "SELECT\n\tvariant<\n\t\ta: int32\n\t>;\n"}, + {"select variant<a:int32,\n>", + "SELECT\n\tvariant<\n\t\ta: int32,\n\t>;\n"}, + {"select variant<a:int32\n,>", + "SELECT\n\tvariant<\n\t\ta: int32,\n\t>;\n"}, + {"select variant<\na:int32,>", + "SELECT\n\tvariant<\n\t\ta: int32,\n\t>;\n"}, + {"select variant<\na:int32,b:string,\nc:double,d:bool>", + "SELECT\n\tvariant<\n\t\ta: int32, b: string,\n\t\tc: double, d: bool\n\t>;\n"}, + {"select variant<\na:int32,b:string,\nc:double,d:bool,>", + "SELECT\n\tvariant<\n\t\ta: int32, b: string,\n\t\tc: double, d: bool,\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineEnum) { + TCases cases = { + {"select enum<a\n>", + "SELECT\n\tenum<\n\t\ta\n\t>;\n"}, + {"select enum<\na>", + "SELECT\n\tenum<\n\t\ta\n\t>;\n"}, + {"select enum<a,\n>", + "SELECT\n\tenum<\n\t\ta,\n\t>;\n"}, + {"select enum<a\n,>", + "SELECT\n\tenum<\n\t\ta,\n\t>;\n"}, + {"select enum<\na,>", + "SELECT\n\tenum<\n\t\ta,\n\t>;\n"}, + {"select enum<\na,b,\nc,d>", + "SELECT\n\tenum<\n\t\ta, b,\n\t\tc, d\n\t>;\n"}, + {"select enum<\na,b,\nc,d,>", + "SELECT\n\tenum<\n\t\ta, b,\n\t\tc, d,\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineResourceType) { + TCases cases = { + {"select resource<foo\n>", + "SELECT\n\tresource<\n\t\tfoo\n\t>;\n"}, + {"select resource<\nfoo>", + "SELECT\n\tresource<\n\t\tfoo\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineTaggedType) { + TCases cases = { + {"select tagged<int32,foo\n>", + "SELECT\n\ttagged<\n\t\tint32, foo\n\t>;\n"}, + {"select tagged<int32,\nfoo>", + "SELECT\n\ttagged<\n\t\tint32,\n\t\tfoo\n\t>;\n"}, + {"select tagged<int32\n,foo>", + "SELECT\n\ttagged<\n\t\tint32,\n\t\tfoo\n\t>;\n"}, + {"select tagged<\nint32,foo>", + "SELECT\n\ttagged<\n\t\tint32, foo\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineDictType) { + TCases cases = { + {"select dict<int32,string\n>", + "SELECT\n\tdict<\n\t\tint32, string\n\t>;\n"}, + {"select dict<int32,\nstring>", + "SELECT\n\tdict<\n\t\tint32,\n\t\tstring\n\t>;\n"}, + {"select dict<int32\n,string>", + "SELECT\n\tdict<\n\t\tint32,\n\t\tstring\n\t>;\n"}, + {"select dict<\nint32,string>", + "SELECT\n\tdict<\n\t\tint32, string\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiLineCallableType) { + TCases cases = { + {"select callable<()->int32\n>", + "SELECT\n\tcallable<\n\t\t() -> int32\n\t>;\n"}, + {"select callable<\n()->int32>", + "SELECT\n\tcallable<\n\t\t() -> int32\n\t>;\n"}, + {"select callable<\n(int32)->int32>", + "SELECT\n\tcallable<\n\t\t(int32) -> int32\n\t>;\n"}, + {"select callable<\n(int32,\ndouble)->int32>", + "SELECT\n\tcallable<\n\t\t(\n\t\t\tint32,\n\t\t\tdouble\n\t\t) -> int32\n\t>;\n"}, + {"select callable<\n(int32\n,double)->int32>", + "SELECT\n\tcallable<\n\t\t(\n\t\t\tint32,\n\t\t\tdouble\n\t\t) -> int32\n\t>;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(UnaryOp) { + TCases cases = { + {"select -x,+x,~x,-1,-1.0,+1,+1.0,~1u", + "SELECT\n\t-x,\n\t+x,\n\t~x,\n\t-1,\n\t-1.0,\n\t+1,\n\t+1.0,\n\t~1u;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MatchRecognize) { + TCases cases = {{R"( +pragma FeatureR010="prototype"; +USE plato; +SELECT + * +FROM Input MATCH_RECOGNIZE( + PATTERN ( A ) + DEFINE A as A +); +)", +R"(PRAGMA FeatureR010 = "prototype"; +USE plato; + +SELECT + * +FROM Input MATCH_RECOGNIZE (PATTERN (A) DEFINE A AS A); +)" + }}; + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(CreateTableTrailingComma) { + TCases cases = { + {"CREATE TABLE tableName (Key Uint32, PRIMARY KEY (Key),);", + "CREATE TABLE tableName (\n\tKey Uint32,\n\tPRIMARY KEY (Key),\n);\n"}, + {"CREATE TABLE tableName (Key Uint32,);", + "CREATE TABLE tableName (\n\tKey Uint32,\n);\n"}, + }; + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Union) { + TCases cases = { + {"select 1 union all select 2 union select 3 union all select 4 union select 5", + "SELECT\n\t1\nUNION ALL\nSELECT\n\t2\nUNION\nSELECT\n\t3\nUNION ALL\nSELECT\n\t4\nUNION\nSELECT\n\t5;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(CommentAfterLastSelect) { + TCases cases = { + {"SELECT 1--comment\n", + "SELECT\n\t1--comment\n;\n"}, + {"SELECT 1\n\n--comment\n", + "SELECT\n\t1--comment\n;\n"}, + {"SELECT 1\n\n--comment", + "SELECT\n\t1--comment\n;\n"}, + {"SELECT * FROM Input\n\n\n\n/* comment */\n\n\n", + "SELECT\n\t*\nFROM Input/* comment */;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(WindowFunctionInsideExpr) { + TCases cases = { + {"SELECT CAST(ROW_NUMBER() OVER () AS String) AS x,\nFROM Input;", + "SELECT\n\tCAST(ROW_NUMBER() OVER () AS String) AS x,\nFROM Input;\n"}, + {"SELECT CAST(ROW_NUMBER() OVER (PARTITION BY key) AS String) AS x,\nFROM Input;", + "SELECT\n\tCAST(\n\t\tROW_NUMBER() OVER (\n\t\t\tPARTITION BY\n\t\t\t\tkey\n\t\t) AS String\n\t) AS x,\nFROM Input;\n"}, + {"SELECT CAST(ROW_NUMBER() OVER (users) AS String) AS x,\nFROM Input;", + "SELECT\n\tCAST(\n\t\tROW_NUMBER() OVER (\n\t\t\tusers\n\t\t) AS String\n\t) AS x,\nFROM Input;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(ExistsExpr) { + TCases cases = { + {"SELECT EXISTS (SELECT 1);", + "SELECT\n\tEXISTS (\n\t\tSELECT\n\t\t\t1\n\t);\n"}, + {"SELECT CAST(EXISTS(SELECT 1) AS Int) AS x,\nFROM Input;", + "SELECT\n\tCAST(\n\t\tEXISTS (\n\t\t\tSELECT\n\t\t\t\t1\n\t\t) AS Int\n\t) AS x,\nFROM Input;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(LambdaInsideExpr) { + TCases cases = { + {"SELECT ListMap(AsList(1,2),($x)->{return $x+1});", + "SELECT\n\tListMap(\n\t\tAsList(1, 2), ($x) -> {\n\t\t\tRETURN $x + 1\n\t\t}\n\t);\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(CaseExpr) { + TCases cases = { + {"SELECT CASE WHEN 1 == 2 THEN 3 WHEN 4 == 5 THEN 6 WHEN 7 == 8 THEN 9 ELSE 10 END;", + "SELECT\n\tCASE\n\t\tWHEN 1 == 2\n\t\t\tTHEN 3\n\t\tWHEN 4 == 5\n\t\t\tTHEN 6\n\t\tWHEN 7 == 8\n\t\t\tTHEN 9\n\t\tELSE 10\n\tEND;\n"}, + {"SELECT CAST(CASE WHEN 1 == 2 THEN 3 WHEN 4 == 5 THEN 6 ELSE 10 END AS String);", + "SELECT\n\tCAST(\n\t\tCASE\n\t\t\tWHEN 1 == 2\n\t\t\t\tTHEN 3\n\t\t\tWHEN 4 == 5\n\t\t\t\tTHEN 6\n\t\t\tELSE 10\n\t\tEND AS String\n\t);\n"}, + {"SELECT CASE x WHEN 1 THEN 2 WHEN 3 THEN 4 WHEN 5 THEN 6 ELSE 10 END;", + "SELECT\n\tCASE x\n\t\tWHEN 1\n\t\t\tTHEN 2\n\t\tWHEN 3\n\t\t\tTHEN 4\n\t\tWHEN 5\n\t\t\tTHEN 6\n\t\tELSE 10\n\tEND;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(MultiTokenOperations) { + TCases cases = { + {"$x = 1 >>| 2;", + "$x = 1 >>| 2;\n"}, + {"$x = 1 >> 2;", + "$x = 1 >> 2;\n"}, + {"$x = 1 ?? 2;", + "$x = 1 ?? 2;\n"}, + {"$x = 1 > /*comment*/ > /*comment*/ | 2;", + "$x = 1 >/*comment*/>/*comment*/| 2;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(OperatorNewlines) { + TCases cases = { + {"$x = TRUE\nOR\nFALSE;", + "$x = TRUE\n\tOR\n\tFALSE;\n"}, + {"$x = TRUE OR\nFALSE;", + "$x = TRUE OR\n\tFALSE;\n"}, + {"$x = TRUE\nOR FALSE;", + "$x = TRUE OR\n\tFALSE;\n"}, + {"$x = 1\n+2\n*3;", + "$x = 1 +\n\t2 *\n\t\t3;\n"}, + {"$x = 1\n+\n2\n*3\n*5\n+\n4;", + "$x = 1\n\t+\n\t2 *\n\t\t3 *\n\t\t5\n\t+\n\t4;\n"}, + {"$x = 1\n+2+3+4\n+5+6+7+\n\n8+9+10;", + "$x = 1 +\n\t2 + 3 + 4 +\n\t5 + 6 + 7 +\n\t8 + 9 + 10;\n"}, + {"$x = TRUE\nAND\nTRUE OR\nFALSE\nAND TRUE\nOR FALSE\nAND TRUE\nOR FALSE;", + "$x = TRUE\n\tAND\n\tTRUE OR\n\tFALSE AND\n\t\tTRUE OR\n\tFALSE AND\n\t\tTRUE OR\n\tFALSE;\n"}, + {"$x = 1 -- comment\n+ 2;", + "$x = 1-- comment\n\t+\n\t2;\n"}, + {"$x = 1 -- comment\n+ -- comment\n2;", + "$x = 1-- comment\n\t+-- comment\n\t2;\n"}, + {"$x = 1 + -- comment\n2;", + "$x = 1 +-- comment\n\t2;\n"}, + {"$x = 1\n>\n>\n|\n2;", + "$x = 1\n\t>>|\n\t2;\n"}, + {"$x = 1\n?? 2 ??\n3\n??\n4 +\n5\n*\n6 +\n7 ??\n8;", + "$x = 1 ??\n\t2 ??\n\t3\n\t??\n\t4 +\n\t\t5\n\t\t\t*\n\t\t\t6 +\n\t\t7 ??\n\t8;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(ObfuscateSelect) { + TCases cases = { + {"select 1;", + "SELECT\n\t0;\n"}, + {"select true;", + "SELECT\n\tFALSE;\n"}, + {"select 'foo';", + "SELECT\n\t'str';\n"}, + {"select 3.0;", + "SELECT\n\t0.0;\n"}, + {"select col;", + "SELECT\n\tid;\n"}, + {"select * from tab;", + "SELECT\n\t*\nFROM id;\n"}, + {"select cast(col as int32);", + "SELECT\n\tCAST(id AS int32);\n"}, + {"select func(col);", + "SELECT\n\tfunc(id);\n"}, + {"select mod::func(col);", + "SELECT\n\tmod::func(id);\n"}, + {"declare $a as int32;", + "DECLARE $id AS int32;\n"}, + {"select * from `logs/of/bob` where pwd='foo';", + "SELECT\n\t*\nFROM id\nWHERE id = 'str';\n"}, + {"select $f();", + "SELECT\n\t$id();\n"}, + }; + + TSetup setup; + setup.Run(cases, NSQLFormat::EFormatMode::Obfuscate); +} + +Y_UNIT_TEST(ObfuscatePragma) { + TCases cases = { + {"pragma a=1", + "PRAGMA id = 0;\n"}, + {"pragma a='foo';", + "PRAGMA id = 'str';\n"}, + {"pragma a=true;", + "PRAGMA id = FALSE;\n"}, + {"pragma a=$foo;", + "PRAGMA id = $id;\n"}, + {"pragma a=foo;", + "PRAGMA id = id;\n"}, + }; + + TSetup setup; + setup.Run(cases, NSQLFormat::EFormatMode::Obfuscate); +} + +Y_UNIT_TEST(CreateView) { + TCases cases = { + {"creAte vIEw TheView wiTh (security_invoker = trUE) As SELect 1", + "CREATE VIEW TheView WITH (security_invoker = TRUE) AS\nSELECT\n\t1;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(DropView) { + TCases cases = { + {"dRop viEW theVIEW", + "DROP VIEW theVIEW;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(ResourcePoolOperations) { + TCases cases = { + {"creAte reSourCe poOl naMe With (a = \"b\")", + "CREATE RESOURCE POOL naMe WITH (a = \"b\");\n"}, + {"create resource pool eds with (a=\"a\",b=\"b\",c = true)", + "CREATE RESOURCE POOL eds WITH (\n\ta = \"a\",\n\tb = \"b\",\n\tc = TRUE\n);\n"}, + {"alTer reSOurcE poOl naMe resEt (b, c), seT (x=y, z=false)", + "ALTER RESOURCE POOL naMe\n\tRESET (b, c),\n\tSET (x = y, z = FALSE);\n"}, + {"alter resource pool eds reset (a), set (x=y)", + "ALTER RESOURCE POOL eds\n\tRESET (a),\n\tSET (x = y);\n"}, + {"dRop reSourCe poOl naMe", + "DROP RESOURCE POOL naMe;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(BackupCollectionOperations) { + TCases cases = { + {"creAte BackuP colLection `-naMe` wIth (a = \"b\")", + "CREATE BACKUP COLLECTION `-naMe` WITH (a = \"b\");\n"}, + {"creAte BackuP colLection `-naMe` DATabase wIth (a = \"b\")", + "CREATE BACKUP COLLECTION `-naMe` DATABASE WITH (a = \"b\");\n"}, + {"creAte BackuP colLection `-naMe` ( tabLe `tbl1` , TablE `tbl2`) wIth (a = \"b\")", + "CREATE BACKUP COLLECTION `-naMe` (TABLE `tbl1`, TABLE `tbl2`) WITH (a = \"b\");\n"}, + {"alTer bACKuP coLLECTION naMe resEt (b, c), seT (x=y, z=false)", + "ALTER BACKUP COLLECTION naMe\n\tRESET (b, c),\n\tSET (x = y, z = FALSE);\n"}, + {"alTer bACKuP coLLECTION naMe aDD DATAbase", + "ALTER BACKUP COLLECTION naMe\n\tADD DATABASE;\n"}, + {"alTer bACKuP coLLECTION naMe DRoP \n\n DaTAbase", + "ALTER BACKUP COLLECTION naMe\n\tDROP DATABASE;\n"}, + {"alTer bACKuP coLLECTION naMe add \n\n tablE\n\tsometable,drOp TABle `other`", + "ALTER BACKUP COLLECTION naMe\n\tADD TABLE sometable,\n\tDROP TABLE `other`;\n"}, + {"DROP backup collectiOn `/some/path`", + "DROP BACKUP COLLECTION `/some/path`;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Analyze) { + TCases cases = { + {"analyze table (col1, col2, col3)", + "ANALYZE table (col1, col2, col3);\n"}, + {"analyze table", + "ANALYZE table;\n"} + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(ResourcePoolClassifierOperations) { + TCases cases = { + {"creAte reSourCe poOl ClaSsiFIer naMe With (a = \"b\")", + "CREATE RESOURCE POOL CLASSIFIER naMe WITH (a = \"b\");\n"}, + {"create resource pool classifier eds with (a=\"a\",b=\"b\",c = true)", + "CREATE RESOURCE POOL CLASSIFIER eds WITH (\n\ta = \"a\",\n\tb = \"b\",\n\tc = TRUE\n);\n"}, + {"alTer reSOurcE poOl ClaSsiFIer naMe resEt (b, c), seT (x=y, z=false)", + "ALTER RESOURCE POOL CLASSIFIER naMe\n\tRESET (b, c),\n\tSET (x = y, z = FALSE);\n"}, + {"alter resource pool classifier eds reset (a), set (x=y)", + "ALTER RESOURCE POOL CLASSIFIER eds\n\tRESET (a),\n\tSET (x = y);\n"}, + {"dRop reSourCe poOl ClaSsiFIer naMe", + "DROP RESOURCE POOL CLASSIFIER naMe;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Backup) { + TCases cases = { + {"\tBaCKup\n\n TestCollection incremENTAl", + "BACKUP TestCollection INCREMENTAL;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + +Y_UNIT_TEST(Restore) { + TCases cases = { + {"resToRe\n\n\n TestCollection aT\n \t \n '2024-06-16_20-14-02'", + "RESTORE TestCollection AT '2024-06-16_20-14-02';\n"}, + }; + + TSetup setup; + setup.Run(cases); +} diff --git a/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp b/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp new file mode 100644 index 00000000000..63c08bb8b7e --- /dev/null +++ b/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp @@ -0,0 +1,52 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include "sql_format.h" + +#include <google/protobuf/arena.h> +#include <util/string/subst.h> +#include <util/string/join.h> + +namespace { + +using TCases = TVector<std::pair<TString, TString>>; + +struct TSetup { + TSetup() { + NSQLTranslation::TTranslationSettings settings; + settings.Arena = &Arena; + settings.Antlr4Parser = true; + Formatter = NSQLFormat::MakeSqlFormatter(settings); + } + + void Run(const TCases& cases, NSQLFormat::EFormatMode mode = NSQLFormat::EFormatMode::Pretty) { + for (const auto& c : cases) { + NYql::TIssues issues; + TString formatted; + auto res = Formatter->Format(c.first, formatted, issues, mode); + UNIT_ASSERT_C(res, issues.ToString()); + auto expected = c.second; + SubstGlobal(expected, "\t", TString(NSQLFormat::OneIndent, ' ')); + UNIT_ASSERT_NO_DIFF(formatted, expected); + + TString formatted2; + auto res2 = Formatter->Format(formatted, formatted2, issues); + UNIT_ASSERT_C(res2, issues.ToString()); + UNIT_ASSERT_NO_DIFF(formatted, formatted2); + + if (mode == NSQLFormat::EFormatMode::Pretty) { + auto mutatedQuery = NSQLFormat::MutateQuery(c.first); + auto res3 = Formatter->Format(mutatedQuery, formatted, issues); + UNIT_ASSERT_C(res3, issues.ToString()); + } + } + } + + google::protobuf::Arena Arena; + NSQLFormat::ISqlFormatter::TPtr Formatter; +}; + +} + +Y_UNIT_TEST_SUITE(CheckSqlFormatter) { + #include "sql_format_ut.h" +} diff --git a/yql/essentials/sql/v1/format/ut/ya.make b/yql/essentials/sql/v1/format/ut/ya.make new file mode 100644 index 00000000000..4c3ef65f965 --- /dev/null +++ b/yql/essentials/sql/v1/format/ut/ya.make @@ -0,0 +1,7 @@ +UNITTEST_FOR(yql/essentials/sql/v1/format) + +SRCS( + sql_format_ut.cpp +) + +END() diff --git a/yql/essentials/sql/v1/format/ut_antlr4/ya.make b/yql/essentials/sql/v1/format/ut_antlr4/ya.make new file mode 100644 index 00000000000..a0f9d710a8c --- /dev/null +++ b/yql/essentials/sql/v1/format/ut_antlr4/ya.make @@ -0,0 +1,7 @@ +UNITTEST_FOR(yql/essentials/sql/v1/format) + +SRCS( + sql_format_ut_antlr4.cpp +) + +END() diff --git a/yql/essentials/sql/v1/format/ya.make b/yql/essentials/sql/v1/format/ya.make new file mode 100644 index 00000000000..642addcb7b1 --- /dev/null +++ b/yql/essentials/sql/v1/format/ya.make @@ -0,0 +1,26 @@ +LIBRARY() + + +SRCS( + sql_format.cpp +) + +RESOURCE(DONT_PARSE yql/essentials/sql/v1/SQLv1.g.in SQLv1.g.in) +RESOURCE(DONT_PARSE yql/essentials/sql/v1/SQLv1Antlr4.g.in SQLv1Antlr4.g.in) + +PEERDIR( + yql/essentials/parser/lexer_common + yql/essentials/sql/settings + yql/essentials/sql/v1/lexer + yql/essentials/sql/v1/proto_parser + yql/essentials/core/sql_types + library/cpp/protobuf/util + library/cpp/resource +) + +END() + +RECURSE_FOR_TESTS( + ut + ut_antlr4 +) diff --git a/yql/essentials/sql/v1/insert.cpp b/yql/essentials/sql/v1/insert.cpp new file mode 100644 index 00000000000..181137457f1 --- /dev/null +++ b/yql/essentials/sql/v1/insert.cpp @@ -0,0 +1,443 @@ +#include "source.h" +#include "context.h" + +#include <yql/essentials/utils/yql_panic.h> + +using namespace NYql; + +namespace NSQLTranslationV1 { + +static const TMap<ESQLWriteColumnMode, EWriteColumnMode> sqlIntoMode2WriteColumn = { + {ESQLWriteColumnMode::InsertInto, EWriteColumnMode::Insert}, + {ESQLWriteColumnMode::InsertOrAbortInto, EWriteColumnMode::InsertOrAbort}, + {ESQLWriteColumnMode::InsertOrIgnoreInto, EWriteColumnMode::InsertOrIgnore}, + {ESQLWriteColumnMode::InsertOrRevertInto, EWriteColumnMode::InsertOrRevert}, + {ESQLWriteColumnMode::UpsertInto, EWriteColumnMode::Upsert}, + {ESQLWriteColumnMode::ReplaceInto, EWriteColumnMode::Replace}, + {ESQLWriteColumnMode::InsertIntoWithTruncate, EWriteColumnMode::Renew}, + {ESQLWriteColumnMode::Update, EWriteColumnMode::Update}, + {ESQLWriteColumnMode::Delete, EWriteColumnMode::Delete}, +}; + +class TModifySourceBase: public ISource { +public: + TModifySourceBase(TPosition pos, const TVector<TString>& columnsHint) + : ISource(pos) + , ColumnsHint(columnsHint) + { + } + + bool AddFilter(TContext& ctx, TNodePtr filter) override { + Y_UNUSED(filter); + ctx.Error(Pos) << "Source does not allow filtering"; + return false; + } + + bool AddGroupKey(TContext& ctx, const TString& column) override { + Y_UNUSED(column); + ctx.Error(Pos) << "Source does not allow grouping"; + return false; + } + + bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override { + YQL_ENSURE(aggr); + ctx.Error(aggr->GetPos()) << "Source does not allow aggregation"; + return false; + } + + TNodePtr BuildFilter(TContext& ctx, const TString& label) override { + Y_UNUSED(ctx); + Y_UNUSED(label); + return nullptr; + } + + std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx) override { + Y_UNUSED(label); + Y_UNUSED(ctx); + return { nullptr, true }; + } + +protected: + TVector<TString> ColumnsHint; + TString OperationHumanName; +}; + +class TUpdateByValues: public TModifySourceBase { +public: + TUpdateByValues(TPosition pos, const TString& operationHumanName, const TVector<TString>& columnsHint, const TVector<TNodePtr>& values) + : TModifySourceBase(pos, columnsHint) + , OperationHumanName(operationHumanName) + , Values(values) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + if (ColumnsHint.size() != Values.size()) { + ctx.Error(Pos) << "VALUES have " << Values.size() << " columns, " << OperationHumanName << " expects: " << ColumnsHint.size(); + return false; + } + for (auto& value: Values) { + if (!value->Init(ctx, src)) { + return false; + } + } + return true; + } + + TNodePtr Build(TContext& ctx) override { + Y_UNUSED(ctx); + YQL_ENSURE(Values.size() == ColumnsHint.size()); + + auto structObj = Y("AsStruct"); + for (size_t i = 0; i < Values.size(); ++i) { + TString column = ColumnsHint[i]; + TNodePtr value = Values[i]; + + structObj = L(structObj, Q(Y(Q(column), value))); + } + + auto updateRow = BuildLambda(Pos, Y("row"), structObj); + return updateRow; + } + + TNodePtr DoClone() const final { + return new TUpdateByValues(Pos, OperationHumanName, ColumnsHint, CloneContainer(Values)); + } +private: + TString OperationHumanName; + +protected: + TVector<TNodePtr> Values; +}; + +class TModifyByValues: public TModifySourceBase { +public: + TModifyByValues(TPosition pos, const TString& operationHumanName, const TVector<TString>& columnsHint, const TVector<TVector<TNodePtr>>& values) + : TModifySourceBase(pos, columnsHint) + , OperationHumanName(operationHumanName) + , Values(values) + { + FakeSource = BuildFakeSource(pos); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + bool hasError = false; + for (const auto& row: Values) { + if (ColumnsHint.empty()) { + ctx.Error(Pos) << OperationHumanName << " ... VALUES requires specification of table columns"; + hasError = true; + continue; + } + if (ColumnsHint.size() != row.size()) { + ctx.Error(Pos) << "VALUES have " << row.size() << " columns, " << OperationHumanName << " expects: " << ColumnsHint.size(); + hasError = true; + continue; + } + for (auto& value: row) { + if (!value->Init(ctx, FakeSource.Get())) { + hasError = true; + continue; + } + } + } + return !hasError; + } + + TNodePtr Build(TContext& ctx) override { + Y_UNUSED(ctx); + auto tuple = Y(); + for (const auto& row: Values) { + auto rowValues = Y("AsStruct"); // ordered struct + auto column = ColumnsHint.begin(); + for (auto value: row) { + rowValues = L(rowValues, Q(Y(BuildQuotedAtom(Pos, *column), value))); + ++column; + } + tuple = L(tuple, rowValues); + } + return Y("PersistableRepr", Q(tuple)); + } + + TNodePtr DoClone() const final { + TVector<TVector<TNodePtr>> clonedValues; + clonedValues.reserve(Values.size()); + for (auto cur: Values) { + clonedValues.push_back(CloneContainer(cur)); + } + return new TModifyByValues(Pos, OperationHumanName, ColumnsHint, clonedValues); + } + +private: + TString OperationHumanName; + TVector<TVector<TNodePtr>> Values; + TSourcePtr FakeSource; +}; + +class TModifyBySource: public TModifySourceBase { +public: + TModifyBySource(TPosition pos, const TString& operationHumanName, const TVector<TString>& columnsHint, TSourcePtr source) + : TModifySourceBase(pos, columnsHint) + , OperationHumanName(operationHumanName) + , Source(std::move(source)) + {} + + void GetInputTables(TTableList& tableList) const override { + if (Source) { + return Source->GetInputTables(tableList); + } + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!Source->Init(ctx, src)) { + return false; + } + const size_t numColumns = ColumnsHint.size(); + if (numColumns) { + const auto sourceColumns = Source->GetColumns(); + if (!sourceColumns || sourceColumns->All || sourceColumns->QualifiedAll) { + return true; + } + + if (numColumns != sourceColumns->List.size()) { + ctx.Error(Pos) << "SELECT have " << numColumns << " columns, " << OperationHumanName << " expects: " << ColumnsHint.size(); + return false; + } + + TStringStream str; + bool mismatchFound = false; + for (size_t i = 0; i < numColumns; ++i) { + bool hasName = sourceColumns->NamedColumns[i]; + if (hasName) { + const auto& hintColumn = ColumnsHint[i]; + const auto& sourceColumn = sourceColumns->List[i]; + if (hintColumn != sourceColumn) { + if (!mismatchFound) { + str << "Column names in SELECT don't match column specification in parenthesis"; + mismatchFound = true; + } + str << ". \"" << hintColumn << "\" doesn't match \"" << sourceColumn << "\""; + } + } + } + if (mismatchFound) { + ctx.Warning(Pos, TIssuesIds::YQL_SOURCE_SELECT_COLUMN_MISMATCH) << str.Str(); + } + } + return true; + } + + TNodePtr Build(TContext& ctx) override { + auto input = Source->Build(ctx); + if (ColumnsHint.empty()) { + return input; + } + auto columns = Y(); + for (auto column: ColumnsHint) { + columns = L(columns, BuildQuotedAtom(Pos, column)); + } + const auto sourceColumns = Source->GetColumns(); + if (!sourceColumns || sourceColumns->All || sourceColumns->QualifiedAll || sourceColumns->HasUnnamed) { + // will try to resolve column mapping on type annotation stage + return Y("OrderedSqlRename", input, Q(columns)); + } + + YQL_ENSURE(sourceColumns->List.size() == ColumnsHint.size()); + auto srcColumn = Source->GetColumns()->List.begin(); + auto structObj = Y("AsStruct"); // ordered struct + for (auto column: ColumnsHint) { + structObj = L(structObj, Q(Y(BuildQuotedAtom(Pos, column), + Y("Member", "row", BuildQuotedAtom(Pos, *srcColumn)) + ))); + ++srcColumn; + } + return Y("AssumeColumnOrder", Y("OrderedMap", input, BuildLambda(Pos, Y("row"), structObj)), Q(columns)); + } + + TNodePtr DoClone() const final { + return new TModifyBySource(Pos, OperationHumanName, ColumnsHint, Source->CloneSource()); + } + + EOrderKind GetOrderKind() const final { + return Source->GetOrderKind(); + } + +private: + TString OperationHumanName; + TSourcePtr Source; +}; + +TSourcePtr BuildWriteValues(TPosition pos, const TString& operationHumanName, const TVector<TString>& columnsHint, const TVector<TVector<TNodePtr>>& values) { + return new TModifyByValues(pos, operationHumanName, columnsHint, values); +} + +TSourcePtr BuildWriteValues(TPosition pos, const TString& operationHumanName, const TVector<TString>& columnsHint, TSourcePtr source) { + return new TModifyBySource(pos, operationHumanName, columnsHint, std::move(source)); +} + +TSourcePtr BuildUpdateValues(TPosition pos, const TVector<TString>& columnsHint, const TVector<TNodePtr>& values) { + return new TUpdateByValues(pos, "UPDATE", columnsHint, values); +} + +class TWriteColumnsNode: public TAstListNode { +public: + TWriteColumnsNode(TPosition pos, TScopedStatePtr scoped, + const TTableRef& table, EWriteColumnMode mode, TSourcePtr values = nullptr, TNodePtr options = nullptr) + : TAstListNode(pos) + , Scoped(scoped) + , Table(table) + , Mode(mode) + , Values(std::move(values)) + , Options(std::move(options)) + { + FakeSource = BuildFakeSource(pos); + } + + void ResetSource(TSourcePtr source) { + TableSource = std::move(source); + } + + void ResetUpdate(TSourcePtr update) { + Update = std::move(update); + } + + bool DoInit(TContext& ctx, ISource* src) override { + TTableList tableList; + TNodePtr values; + auto options = Y(); + if (Options) { + if (!Options->Init(ctx, src)) { + return false; + } + options = L(Options); + } + + ISource* underlyingSrc = src; + + if (TableSource) { + if (!TableSource->Init(ctx, src) || !TableSource->InitFilters(ctx)) { + return false; + } + options = L(options, Q(Y(Q("filter"), TableSource->BuildFilterLambda()))); + } + + bool unordered = false; + if (Values) { + if (!Values->Init(ctx, TableSource.Get())) { + return false; + } + + Values->GetInputTables(tableList); + underlyingSrc = Values.Get(); + values = Values->Build(ctx); + if (!values) { + return false; + } + unordered = (EOrderKind::None == Values->GetOrderKind()); + } + + TNodePtr node(BuildInputTables(Pos, tableList, false, Scoped)); + if (!node->Init(ctx, underlyingSrc)) { + return false; + } + + if (Update) { + if (!Update->Init(ctx, TableSource.Get()) || !Update->InitFilters(ctx)) { + return false; + } + options = L(options, Q(Y(Q("update"), Update->Build(ctx)))); + } + + auto write = BuildWriteTable(Pos, "values", Table, Mode, std::move(options), Scoped); + if (!write->Init(ctx, FakeSource.Get())) { + return false; + } + if (values) { + node = L(node, Y("let", "values", values)); + if (unordered && ctx.UseUnordered(Table)) { + node = L(node, Y("let", "values", Y("Unordered", "values"))); + } + } else { + node = L(node, Y("let", "values", Y("Void"))); + } + node = L(node, Y("let", "world", write)); + node = L(node, Y("return", "world")); + + Add("block", Q(node)); + return true; + } + + TNodePtr DoClone() const final { + return {}; + } + +protected: + TScopedStatePtr Scoped; + TTableRef Table; + TSourcePtr TableSource; + EWriteColumnMode Mode; + TSourcePtr Values; + TSourcePtr Update; + TSourcePtr FakeSource; + TNodePtr Options; +}; + +EWriteColumnMode ToWriteColumnsMode(ESQLWriteColumnMode sqlWriteColumnMode) { + return sqlIntoMode2WriteColumn.at(sqlWriteColumnMode); +} + +TNodePtr BuildWriteColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, EWriteColumnMode mode, TSourcePtr values, TNodePtr options) { + YQL_ENSURE(values, "Invalid values node"); + return new TWriteColumnsNode(pos, scoped, table, mode, std::move(values), std::move(options)); +} + +TNodePtr BuildUpdateColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr values, TSourcePtr source, TNodePtr options) { + YQL_ENSURE(values, "Invalid values node"); + TIntrusivePtr<TWriteColumnsNode> writeNode = new TWriteColumnsNode(pos, scoped, table, EWriteColumnMode::Update, nullptr, options); + writeNode->ResetSource(std::move(source)); + writeNode->ResetUpdate(std::move(values)); + return writeNode; +} + +TNodePtr BuildDelete(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr source, TNodePtr options) { + TIntrusivePtr<TWriteColumnsNode> writeNode = new TWriteColumnsNode(pos, scoped, table, EWriteColumnMode::Delete, nullptr, options); + writeNode->ResetSource(std::move(source)); + return writeNode; +} + + +class TEraseColumnsNode: public TAstListNode { +public: + TEraseColumnsNode(TPosition pos, const TVector<TString>& columns) + : TAstListNode(pos) + , Columns(columns) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(ctx); + Y_UNUSED(src); + + TNodePtr columnList = Y(); + for (const auto& column: Columns) { + columnList->Add(Q(column)); + } + + Add(Q(Y(Q("erase_columns"), Q(columnList)))); + + return true; + } + + TNodePtr DoClone() const final { + return new TEraseColumnsNode(GetPos(), Columns); + } + +private: + TVector<TString> Columns; +}; + + +TNodePtr BuildEraseColumns(TPosition pos, const TVector<TString>& columns) { + return new TEraseColumnsNode(pos, columns); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/join.cpp b/yql/essentials/sql/v1/join.cpp new file mode 100644 index 00000000000..de789569c76 --- /dev/null +++ b/yql/essentials/sql/v1/join.cpp @@ -0,0 +1,670 @@ +#include "source.h" +#include "context.h" + +#include <yql/essentials/utils/yql_panic.h> + +#include <library/cpp/charset/ci_string.h> +#include <util/generic/hash_set.h> +#include <util/string/cast.h> +#include <util/string/split.h> +#include <util/string/join.h> + +using namespace NYql; + +namespace NSQLTranslationV1 { + +TString NormalizeJoinOp(const TString& joinOp) { + TVector<TString> joinOpsParts; + Split(joinOp, " ", joinOpsParts); + for (auto&x : joinOpsParts) { + x.to_title(); + } + + return JoinSeq("", joinOpsParts); +} + +struct TJoinDescr { + TString Op; + TJoinLinkSettings LinkSettings; + + struct TFullColumn { + ui32 Source; + TNodePtr Column; + }; + + TVector<std::pair<TFullColumn, TFullColumn>> Keys; + + explicit TJoinDescr(const TString& op) + : Op(op) + {} +}; + +class TJoinBase: public IJoin { +public: + TJoinBase(TPosition pos, TVector<TSourcePtr>&& sources, TVector<bool>&& anyFlags) + : IJoin(pos) + , Sources(std::move(sources)) + , AnyFlags(std::move(anyFlags)) + { + YQL_ENSURE(Sources.size() == AnyFlags.size()); + } + + void AllColumns() override { + for (auto& source: Sources) { + source->AllColumns(); + } + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override { + ISource* srcByName = nullptr; + if (column.IsArtificial()) { + return true; + } + if (const auto sourceName = *column.GetSourceName()) { + for (auto& source: Sources) { + if (sourceName == source->GetLabel()) { + srcByName = source.Get(); + break; + } + } + if (!srcByName) { + if (column.IsAsterisk()) { + ctx.Error(column.GetPos()) << "Unknown correlation name for asterisk: " << sourceName; + return {}; + } + // \todo add warning, either mistake in correlation name, either it's a column + column.ResetColumn("", sourceName); + column.SetUseSourceAsColumn(); + column.SetAsNotReliable(); + } + } + + if (column.IsAsterisk()) { + if (!column.GetCountHint()) { + if (srcByName) { + srcByName->AllColumns(); + } else { + for (auto& source: Sources) { + source->AllColumns(); + } + } + } + return true; + } + if (srcByName) { + column.ResetAsReliable(); + if (!srcByName->AddColumn(ctx, column)) { + return {}; + } + if (!KeysInitializing && !column.IsAsterisk()) { + column.SetUseSource(); + } + return true; + } else { + unsigned acceptedColumns = 0; + TIntrusivePtr<TColumnNode> tryColumn = static_cast<TColumnNode*>(column.Clone().Get()); + tryColumn->SetAsNotReliable(); + TString lastAcceptedColumnSource; + for (auto& source: Sources) { + if (source->AddColumn(ctx, *tryColumn)) { + ++acceptedColumns; + lastAcceptedColumnSource = source->GetLabel(); + } + } + if (!acceptedColumns) { + TStringBuilder sb; + const auto& fullColumnName = FullColumnName(column); + sb << "Column " << fullColumnName << " is not fit to any source"; + for (auto& source: Sources) { + if (const auto mistype = source->FindColumnMistype(fullColumnName)) { + sb << ". Did you mean " << mistype.GetRef() << "?"; + break; + } + } + ctx.Error(column.GetPos()) << sb; + return {}; + } else { + column.SetAsNotReliable(); + } + return false; + } + } + + const TColumns* GetColumns() const override { + YQL_ENSURE(IsColumnDone, "Unable to GetColumns while it's not finished"); + return &JoinedColumns; + } + + void GetInputTables(TTableList& tableList) const override { + for (auto& src: Sources) { + src->GetInputTables(tableList); + } + ISource::GetInputTables(tableList); + } + + TNodePtr BuildJoinKeys(TContext& ctx, const TVector<TDeferredAtom>& names) override { + const size_t n = JoinOps.size(); + TString what(Sources[n]->GetLabel()); + static const TSet<TString> noRightSourceJoinOps = {"LeftOnly", "LeftSemi"}; + for (size_t nn = n; nn > 0 && noRightSourceJoinOps.contains(JoinOps[nn-1]); --nn) { + what = Sources[nn-1]->GetLabel(); + } + const TString with(Sources[n + 1]->GetLabel()); + + for (auto index = n; index <= n + 1; ++index) { + const auto& label = Sources[index]->GetLabel(); + if (label.Contains('.')) { + ctx.Error(Sources[index]->GetPos()) << "Invalid label: " << label << ", unable to use name with dot symbol, you should use AS <simple alias name>"; + return nullptr; + } + } + if (what.empty() && with.empty()) { + ctx.Error() << "At least one correlation name is required in join"; + return nullptr; + } + if (what == with) { + ctx.Error() << "Self joins are not supporting ON syntax"; + return nullptr; + } + TPosition pos(ctx.Pos()); + TNodePtr expr; + for (auto& name: names) { + auto lhs = BuildColumn(Pos, name, what); + auto rhs = BuildColumn(Pos, name, with); + if (!lhs || !rhs) { + return nullptr; + } + TNodePtr eq(BuildBinaryOp(ctx, pos, "==", lhs, rhs)); + if (expr) { + expr = BuildBinaryOp(ctx, pos, "And", expr, eq); + } else { + expr = eq; + } + } + if (expr && Sources.size() > 2) { + ctx.Error() << "Multi-way JOINs should be connected with ON clause instead of USING clause"; + return nullptr; + } + return expr; + } + + bool DoInit(TContext& ctx, ISource* src) override; + + void SetupJoin(const TString& opName, TNodePtr expr, const TJoinLinkSettings& linkSettings) override { + JoinOps.push_back(opName); + JoinExprs.push_back(expr); + JoinLinkSettings.push_back(linkSettings); + } + + bool IsStream() const override { + return AnyOf(Sources, [] (const TSourcePtr& s) { return s->IsStream(); }); + } + +protected: + static TString FullColumnName(const TColumnNode& column) { + auto sourceName = *column.GetSourceName(); + auto columnName = *column.GetColumnName(); + return sourceName ? DotJoin(sourceName, columnName) : columnName; + } + + bool InitKeysOrFilters(TContext& ctx, ui32 joinIdx, TNodePtr expr) { + const TString joinOp(JoinOps[joinIdx]); + const TJoinLinkSettings linkSettings(JoinLinkSettings[joinIdx]); + const TCallNode* op = nullptr; + if (expr) { + const TString opName(expr->GetOpName()); + if (opName != "==") { + ctx.Error(expr->GetPos()) << "JOIN ON expression must be a conjunction of equality predicates"; + return false; + } + + op = expr->GetCallNode(); + YQL_ENSURE(op, "Invalid JOIN equal operation node"); + YQL_ENSURE(op->GetArgs().size() == 2, "Invalid JOIN equal operation arguments"); + } + + ui32 idx = 0; + THashMap<TString, ui32> sources; + for (auto& source: Sources) { + auto label = source->GetLabel(); + if (!label) { + ctx.Error(source->GetPos()) << "JOIN: missing correlation name for source"; + return false; + } + sources.insert({ source->GetLabel(), idx }); + ++idx; + } + if (sources.size() != Sources.size()) { + ctx.Error(expr ? expr->GetPos() : Pos) << "JOIN: all correlation names must be different"; + return false; + } + + ui32 pos = 0; + ui32 leftArg = 0; + ui32 rightArg = 0; + ui32 leftSourceIdx = 0; + ui32 rightSourceIdx = 0; + const TString* leftSource = nullptr; + const TString* rightSource = nullptr; + const TString* sameColumnNamePtr = nullptr; + TSet<TString> joinedSources; + if (op) { + const TString* columnNamePtr = nullptr; + for (auto& arg : op->GetArgs()) { + const auto sourceNamePtr = arg->GetSourceName(); + if (!sourceNamePtr) { + ctx.Error(expr->GetPos()) << "JOIN: each equality predicate argument must depend on exactly one JOIN input"; + return false; + } + const auto sourceName = *sourceNamePtr; + if (sourceName.empty()) { + ctx.Error(expr->GetPos()) << "JOIN: column requires correlation name"; + return false; + } + auto it = sources.find(sourceName); + if (it != sources.end()) { + joinedSources.insert(sourceName); + if (it->second == joinIdx + 1) { + rightArg = pos; + rightSource = sourceNamePtr; + rightSourceIdx = it->second; + } + else if (it->second > joinIdx + 1) { + ctx.Error(expr->GetPos()) << "JOIN: can not use source: " << sourceName << " in equality predicate, it is out of current join scope"; + return false; + } + else { + leftArg = pos; + leftSource = sourceNamePtr; + leftSourceIdx = it->second; + } + } + else { + ctx.Error(expr->GetPos()) << "JOIN: unknown corellation name: " << sourceName; + return false; + } + if (!columnNamePtr) { + columnNamePtr = arg->GetColumnName(); + } else { + auto curColumnNamePtr = arg->GetColumnName(); + if (curColumnNamePtr && *curColumnNamePtr == *columnNamePtr) { + sameColumnNamePtr = columnNamePtr; + } + } + ++pos; + } + } else { + for (auto& x : sources) { + if (x.second == joinIdx) { + leftArg = pos; + leftSourceIdx = x.second; + joinedSources.insert(x.first); + } + else if (x.second = joinIdx + 1) { + rightArg = pos; + rightSourceIdx = x.second; + joinedSources.insert(x.first); + } + } + } + + if (joinedSources.size() == 1) { + ctx.Error(expr ? expr->GetPos() : Pos) << "JOIN: different correlation names are required for joined tables"; + return false; + } + + if (op) { + if (joinedSources.size() != 2) { + ctx.Error(expr->GetPos()) << "JOIN ON expression must be a conjunction of equality predicates over at most two sources"; + return false; + } + if (!rightSource) { + ctx.Error(expr->GetPos()) << "JOIN ON equality predicate must have one of its arguments from the rightmost source"; + return false; + } + } + + KeysInitializing = true; + if (op) { + for (auto& arg : op->GetArgs()) { + if (!arg->Init(ctx, this)) { + return false; + } + } + + Y_DEBUG_ABORT_UNLESS(leftSource); + if (sameColumnNamePtr) { + SameKeyMap[*sameColumnNamePtr].insert(*leftSource); + SameKeyMap[*sameColumnNamePtr].insert(*rightSource); + } + } + + if (joinIdx == JoinDescrs.size()) { + TJoinDescr newDescr(joinOp); + newDescr.LinkSettings = linkSettings; + JoinDescrs.push_back(std::move(newDescr)); + } + + JoinDescrs.back().Keys.push_back({ { leftSourceIdx, op ? op->GetArgs()[leftArg] : nullptr}, + { rightSourceIdx, op ? op->GetArgs()[rightArg] : nullptr } }); + KeysInitializing = false; + return true; + } + + bool IsJoinKeysInitializing() const override { + return KeysInitializing; + } + +protected: + TVector<TString> JoinOps; + TVector<TNodePtr> JoinExprs; + TVector<TJoinLinkSettings> JoinLinkSettings; + TVector<TJoinDescr> JoinDescrs; + THashMap<TString, THashSet<TString>> SameKeyMap; + const TVector<TSourcePtr> Sources; + const TVector<bool> AnyFlags; + TColumns JoinedColumns; + bool KeysInitializing = false; + bool IsColumnDone = false; + + void FinishColumns() override { + if (IsColumnDone) { + return; + } + YQL_ENSURE(JoinOps.size()+1 == Sources.size()); + bool excludeNextSource = false; + decltype(JoinOps)::const_iterator opIter = JoinOps.begin(); + for (auto& src: Sources) { + if (excludeNextSource) { + excludeNextSource = false; + if (opIter != JoinOps.end()) { + ++opIter; + } + continue; + } + if (opIter != JoinOps.end()) { + auto joinOper = *opIter; + ++opIter; + if (joinOper == "LeftSemi" || joinOper == "LeftOnly") { + excludeNextSource = true; + } + if (joinOper == "RightSemi" || joinOper == "RightOnly") { + continue; + } + } + auto columnsPtr = src->GetColumns(); + if (!columnsPtr) { + continue; + } + TColumns upColumns; + upColumns.Merge(*columnsPtr); + upColumns.SetPrefix(src->GetLabel()); + JoinedColumns.Merge(upColumns); + } + IsColumnDone = true; + } +}; + +bool TJoinBase::DoInit(TContext& ctx, ISource* initSrc) { + for (auto& source: Sources) { + if (!source->Init(ctx, initSrc)) { + return false; + } + + auto src = source.Get(); + if (src->IsFlattenByExprs()) { + for (auto& expr : static_cast<ISource const*>(src)->Expressions(EExprSeat::FlattenByExpr)) { + if (!expr->Init(ctx, src)) { + return false; + } + } + } + } + + YQL_ENSURE(JoinOps.size() == JoinExprs.size(), "Invalid join exprs number"); + YQL_ENSURE(JoinOps.size() == JoinLinkSettings.size()); + + const TSet<TString> allowedJoinOps = {"Inner", "Left", "Right", "Full", "LeftOnly", "RightOnly", "Exclusion", "LeftSemi", "RightSemi", "Cross"}; + for (auto& opName: JoinOps) { + if (!allowedJoinOps.contains(opName)) { + ctx.Error(Pos) << "Invalid join op: " << opName; + return false; + } + } + + ui32 idx = 0; + for (auto expr: JoinExprs) { + if (expr) { + TDeque<TNodePtr> conjQueue; + conjQueue.push_back(expr); + while (!conjQueue.empty()) { + TNodePtr cur = conjQueue.front(); + conjQueue.pop_front(); + if (cur->GetOpName() == "And") { + auto conj = cur->GetCallNode(); + YQL_ENSURE(conj, "Invalid And operation node"); + conjQueue.insert(conjQueue.begin(), conj->GetArgs().begin(), conj->GetArgs().end()); + } else if (!InitKeysOrFilters(ctx, idx, cur)) { + return false; + } + } + } else { + if (!InitKeysOrFilters(ctx, idx, nullptr)) { + return false; + } + } + ++idx; + } + + TSet<ui32> joinedSources; + for (auto& descr: JoinDescrs) { + for (auto& key : descr.Keys) { + joinedSources.insert(key.first.Source); + joinedSources.insert(key.second.Source); + } + } + for (idx = 0; idx < Sources.size(); ++idx) { + if (!joinedSources.contains(idx)) { + ctx.Error(Sources[idx]->GetPos()) << "Source: " << Sources[idx]->GetLabel() << " was not used in join expressions"; + return false; + } + } + + return ISource::DoInit(ctx, initSrc); +} + +class TEquiJoin: public TJoinBase { +public: + TEquiJoin(TPosition pos, TVector<TSourcePtr>&& sources, TVector<bool>&& anyFlags, bool strictJoinKeyTypes) + : TJoinBase(pos, std::move(sources), std::move(anyFlags)) + , StrictJoinKeyTypes(strictJoinKeyTypes) + { + } + + TNodePtr Build(TContext& ctx) override { + TMap<std::pair<TString, TString>, TNodePtr> extraColumns; + TNodePtr joinTree; + for (auto& descr: JoinDescrs) { + auto leftBranch = joinTree; + bool leftAny = false; + if (!leftBranch) { + leftBranch = BuildQuotedAtom(Pos, Sources[descr.Keys[0].first.Source]->GetLabel()); + leftAny = AnyFlags[descr.Keys[0].first.Source]; + } + bool rightAny = AnyFlags[descr.Keys[0].second.Source]; + auto leftKeys = GetColumnNames(ctx, extraColumns, descr.Keys, true); + auto rightKeys = GetColumnNames(ctx, extraColumns, descr.Keys, false); + if (!leftKeys || !rightKeys) { + return nullptr; + } + + TNodePtr linkOptions = Y(); + if (TJoinLinkSettings::EStrategy::SortedMerge == descr.LinkSettings.Strategy) { + linkOptions = L(linkOptions, Q(Y(Q("forceSortedMerge")))); + } else if (TJoinLinkSettings::EStrategy::StreamLookup == descr.LinkSettings.Strategy) { + linkOptions = L(linkOptions, Q(Y(Q("forceStreamLookup")))); + } else if (TJoinLinkSettings::EStrategy::ForceMap == descr.LinkSettings.Strategy) { + linkOptions = L(linkOptions, Q(Y(Q("join_algo"), Q("MapJoin")))); + } else if (TJoinLinkSettings::EStrategy::ForceGrace == descr.LinkSettings.Strategy) { + linkOptions = L(linkOptions, Q(Y(Q("join_algo"), Q("GraceJoin")))); + } + if (leftAny) { + linkOptions = L(linkOptions, Q(Y(Q("left"), Q("any")))); + } + if (rightAny) { + linkOptions = L(linkOptions, Q(Y(Q("right"), Q("any")))); + } + + if (descr.LinkSettings.Compact) { + linkOptions = L(linkOptions, Q(Y(Q("compact")))); + } + + joinTree = Q(Y( + Q(descr.Op), + leftBranch, + BuildQuotedAtom(Pos, Sources[descr.Keys[0].second.Source]->GetLabel()), + leftKeys, + rightKeys, + Q(linkOptions) + )); + } + + TNodePtr equiJoin(Y("EquiJoin")); + bool ordered = false; + for (size_t i = 0; i < Sources.size(); ++i) { + auto& source = Sources[i]; + auto sourceNode = source->Build(ctx); + if (!sourceNode) { + return nullptr; + } + const bool useOrderedForSource = ctx.UseUnordered(*source); + ordered = ordered || useOrderedForSource; + if (source->IsFlattenByColumns() || source->IsFlattenColumns()) { + auto flatten = source->IsFlattenByColumns() ? + source->BuildFlattenByColumns("row") : + source->BuildFlattenColumns("row"); + + if (!flatten) { + return nullptr; + } + auto block = Y(Y("let", "flatten", sourceNode)); + + if (source->IsFlattenByExprs()) { + auto premap = source->BuildPreFlattenMap(ctx); + if (!premap) { + return nullptr; + } + + block = L(block, Y("let", "flatten", Y(useOrderedForSource ? "OrderedFlatMap" : "FlatMap", "flatten", BuildLambda(Pos, Y("row"), premap)))); + } + + block = L(block, Y("let", "flatten", Y(useOrderedForSource ? "OrderedFlatMap" : "FlatMap", "flatten", BuildLambda(Pos, Y("row"), flatten, "res")))); + sourceNode = Y("block", Q(L(block, Y("return", "flatten")))); + } + TNodePtr extraMembers; + for (auto it = extraColumns.lower_bound({ source->GetLabel(), "" }); it != extraColumns.end(); ++it) { + if (it->first.first != source->GetLabel()) { + break; + } + if (!extraMembers) { + extraMembers = Y(); + } + extraMembers = L( + extraMembers, + Y("let", "row", Y("AddMember", "row", BuildQuotedAtom(it->second->GetPos(), it->first.second), it->second)) + ); + } + if (extraMembers) { + sourceNode = Y(useOrderedForSource ? "OrderedMap" : "Map", sourceNode, BuildLambda(Pos, Y("row"), extraMembers, "row")); + } + sourceNode = Y("RemoveSystemMembers", sourceNode); + equiJoin = L(equiJoin, Q(Y(sourceNode, BuildQuotedAtom(source->GetPos(), source->GetLabel())))); + } + TNodePtr removeMembers; + for(auto it: extraColumns) { + if (!removeMembers) { + removeMembers = Y(); + } + removeMembers = L( + removeMembers, + Y("let", "row", Y("ForceRemoveMember", "row", BuildQuotedAtom(Pos, DotJoin(it.first.first, it.first.second)))) + ); + } + auto options = Y(); + if (StrictJoinKeyTypes) { + options = L(options, Q(Y(Q("strict_keys")))); + } + equiJoin = L(equiJoin, joinTree, Q(options)); + if (removeMembers) { + equiJoin = Y(ordered ? "OrderedMap" : "Map", equiJoin, BuildLambda(Pos, Y("row"), removeMembers, "row")); + } + return equiJoin; + } + + const THashMap<TString, THashSet<TString>>& GetSameKeysMap() const override { + return SameKeyMap; + } + + TVector<TString> GetJoinLabels() const override { + TVector<TString> labels; + for (auto& source: Sources) { + const auto label = source->GetLabel(); + YQL_ENSURE(label); + labels.push_back(label); + } + return labels; + } + + TPtr DoClone() const final { + TVector<TSourcePtr> clonedSources; + for (auto& cur: Sources) { + clonedSources.push_back(cur->CloneSource()); + } + auto newSource = MakeIntrusive<TEquiJoin>(Pos, std::move(clonedSources), TVector<bool>(AnyFlags), StrictJoinKeyTypes); + newSource->JoinOps = JoinOps; + newSource->JoinExprs = CloneContainer(JoinExprs); + newSource->JoinLinkSettings = JoinLinkSettings; + return newSource; + } + +private: + TNodePtr GetColumnNames( + TContext& ctx, + TMap<std::pair<TString, TString>, TNodePtr>& extraColumns, + const TVector<std::pair<TJoinDescr::TFullColumn, TJoinDescr::TFullColumn>>& keys, + bool left + ) { + Y_UNUSED(ctx); + auto res = Y(); + for (auto& it: keys) { + auto tableName = Sources[left ? it.first.Source : it.second.Source]->GetLabel(); + TString columnName; + auto column = left ? it.first.Column : it.second.Column; + if (!column) { + continue; + } + + if (column->GetColumnName()) { + columnName = *column->GetColumnName(); + } else { + TStringStream str; + str << "_equijoin_column_" << extraColumns.size(); + columnName = str.Str(); + extraColumns.insert({ std::make_pair(tableName, columnName), column }); + } + + res = L(res, BuildQuotedAtom(Pos, tableName)); + res = L(res, BuildQuotedAtom(Pos, columnName)); + } + + return Q(res); + } + + const bool StrictJoinKeyTypes; +}; + +TSourcePtr BuildEquiJoin(TPosition pos, TVector<TSourcePtr>&& sources, TVector<bool>&& anyFlags, bool strictJoinKeyTypes) { + return new TEquiJoin(pos, std::move(sources), std::move(anyFlags), strictJoinKeyTypes); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp new file mode 100644 index 00000000000..b6d2362f21f --- /dev/null +++ b/yql/essentials/sql/v1/lexer/lexer.cpp @@ -0,0 +1,77 @@ +#include "lexer.h" + +#include <yql/essentials/public/issue/yql_issue.h> +#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> +#include <yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h> +#include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h> +#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h> +#include <yql/essentials/parser/proto_ast/gen/v1_ansi/SQLv1Lexer.h> +#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> + +#if defined(_tsan_enabled_) +#include <util/system/mutex.h> +#endif + +namespace NALPDefault { +extern ANTLR_UINT8 *SQLv1ParserTokenNames[]; +} + +namespace NALPAnsi { +extern ANTLR_UINT8 *SQLv1ParserTokenNames[]; +} + + +namespace NSQLTranslationV1 { + +namespace { + +#if defined(_tsan_enabled_) +TMutex SanitizerSQLTranslationMutex; +#endif + +using NSQLTranslation::ILexer; + +class TV1Lexer : public ILexer { +public: + explicit TV1Lexer(bool ansi, bool antlr4) + : Ansi(ansi), Antlr4(antlr4) + { + } + + bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) override { + NYql::TIssues newIssues; +#if defined(_tsan_enabled_) + TGuard<TMutex> grd(SanitizerSQLTranslationMutex); +#endif + NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, ""); + if (Ansi && !Antlr4) { + NProtoAST::TLexerTokensCollector3<NALPAnsi::SQLv1Lexer> tokensCollector(query, (const char**)NALPAnsi::SQLv1ParserTokenNames, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + } else if (!Ansi && !Antlr4) { + NProtoAST::TLexerTokensCollector3<NALPDefault::SQLv1Lexer> tokensCollector(query, (const char**)NALPDefault::SQLv1ParserTokenNames, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + } else if (Ansi && Antlr4) { + NProtoAST::TLexerTokensCollector4<NALPAnsiAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + } else { + NProtoAST::TLexerTokensCollector4<NALPDefaultAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName); + tokensCollector.CollectTokens(collector, onNextToken); + } + + issues.AddIssues(newIssues); + return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; }); + } + +private: + const bool Ansi; + const bool Antlr4; +}; + +} // namespace + +NSQLTranslation::ILexer::TPtr MakeLexer(bool ansi, bool antlr4) { + return NSQLTranslation::ILexer::TPtr(new TV1Lexer(ansi, antlr4)); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h new file mode 100644 index 00000000000..fe0102be79c --- /dev/null +++ b/yql/essentials/sql/v1/lexer/lexer.h @@ -0,0 +1,9 @@ +#pragma once + +#include <yql/essentials/parser/lexer_common/lexer.h> + +namespace NSQLTranslationV1 { + +NSQLTranslation::ILexer::TPtr MakeLexer(bool ansi, bool antlr4); + +} diff --git a/yql/essentials/sql/v1/lexer/tsan.supp b/yql/essentials/sql/v1/lexer/tsan.supp new file mode 100644 index 00000000000..d8a9765b09e --- /dev/null +++ b/yql/essentials/sql/v1/lexer/tsan.supp @@ -0,0 +1 @@ +race:NALPDefault::SQLv1LexerCyclicDFA33::specialStateTransition diff --git a/yql/essentials/sql/v1/lexer/ya.make b/yql/essentials/sql/v1/lexer/ya.make new file mode 100644 index 00000000000..5174f6f595b --- /dev/null +++ b/yql/essentials/sql/v1/lexer/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +PEERDIR( + yql/essentials/core/issue/protos + yql/essentials/parser/proto_ast/gen/v1 + yql/essentials/parser/proto_ast/gen/v1_ansi + yql/essentials/parser/proto_ast/gen/v1_antlr4 + yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4 +) + +SRCS( + lexer.cpp +) + +SUPPRESSIONS( + tsan.supp +) + +END() diff --git a/yql/essentials/sql/v1/list_builtin.cpp b/yql/essentials/sql/v1/list_builtin.cpp new file mode 100644 index 00000000000..c059768cb4d --- /dev/null +++ b/yql/essentials/sql/v1/list_builtin.cpp @@ -0,0 +1,142 @@ +#include "list_builtin.h" + +using namespace NYql; + +namespace NSQLTranslationV1 { + +TAstNode* TListBuiltin::Translate(TContext& ctx) const { + Y_DEBUG_ABORT_UNLESS(Node); + return Node->Translate(ctx); +} + +TNodePtr TListBuiltin::GetIdentityLambda() { + return BuildLambda(Pos, Y("arg"), Y(), "arg"); +} + +bool TListSortBuiltin::DoInit(TContext& ctx, ISource* src) { + if (Args.size() < 1 || Args.size() > 2) { + ctx.Error(Pos) << OpName << " requires one or two parameters."; + return false; + } + if (!Args[0]->Init(ctx, src)) { + return false; + } + if (Args.size() == 2) { + if (!Args[1]->Init(ctx, src)) { + return false; + } + } else { + Args.push_back(GetIdentityLambda()); + } + Node = Y(OpName, Args[0], Y("Bool", Q(Asc ? "true" : "false")), Args[1]); + return true; +} + +bool TListExtractBuiltin::DoInit(TContext& ctx, ISource* src) { + if (Args.size() != 2) { + ctx.Error(Pos) << OpName << " requires exactly two parameters."; + return false; + } + + for (const auto& arg : Args) { + if (!arg->Init(ctx, src)) { + return false; + } + } + + Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build(); + Node = Y(OpName, Args[0], Args[1]); + return true; +} + +bool TListProcessBuiltin::CheckArgs(TContext& ctx, ISource* src) { + if (Args.size() != 2 ) { + ctx.Error(Pos) << OpName << " requires exactly two parameters"; + return false; + } + + for (const auto& arg : Args) { + if (!arg->Init(ctx, src)) { + return false; + } + } + + return true; +} + +bool TListMapBuiltin::DoInit(TContext& ctx, ISource* src) { + if (!CheckArgs(ctx, src)) { + return false; + }; + Node = Y(OpName, Args[0], Args[1]); + + return true; +} + +bool TListFilterBuiltin::DoInit(TContext& ctx, ISource* src) { + if (!CheckArgs(ctx, src)) { + return false; + }; + Node = Y(OpName, Args[0], GetFilterLambda()); + return true; +} + +TNodePtr TListFilterBuiltin::GetFilterLambda() { + return BuildLambda(Pos, Y("item"), Y("Coalesce", Y("Apply", Args[1], "item"), Y("Bool", Q("false")))); +} + +bool TListCreateBuiltin::DoInit(TContext& ctx, ISource* src) { + if (Args.size() != 1) { + ctx.Error(Pos) << OpName << " requires only one parameter"; + return false; + } + if (!Args[0]->Init(ctx, src)) { + return false; + } + Node = Y("List", Y("ListType", Args[0])); + return true; +} + +void TListCreateBuiltin::DoUpdateState() const { + State.Set(ENodeState::Const); +} + +bool TDictCreateBuiltin::DoInit(TContext& ctx, ISource* src) { + if (Args.size() != 2) { + ctx.Error(Pos) << OpName << " requires two parameters"; + return false; + } + + for (ui32 i = 0; i < 2; ++i) { + if (!Args[i]->Init(ctx, src)) { + return false; + } + } + + Node = Y("Dict", Y("DictType", Args[0], Args[1])); + return true; +} + +void TDictCreateBuiltin::DoUpdateState() const { + State.Set(ENodeState::Const); +} + +bool TSetCreateBuiltin::DoInit(TContext& ctx, ISource* src) { + if (Args.size() != 1) { + ctx.Error(Pos) << OpName << " requires one parameter"; + return false; + } + + if (!Args[0]->Init(ctx, src)) { + return false; + } + + Node = Y("Dict", Y("DictType", Args[0], Y("VoidType"))); + return true; +} + +void TSetCreateBuiltin::DoUpdateState() const { + State.Set(ENodeState::Const); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/list_builtin.h b/yql/essentials/sql/v1/list_builtin.h new file mode 100644 index 00000000000..61646498b7b --- /dev/null +++ b/yql/essentials/sql/v1/list_builtin.h @@ -0,0 +1,160 @@ +#pragma once + +#include "node.h" +#include "context.h" + +#include <yql/essentials/ast/yql_type_string.h> + +#include <library/cpp/charset/ci_string.h> +#include <util/string/builder.h> +#include <util/string/cast.h> +#include <util/string/util.h> + +using namespace NYql; + +namespace NSQLTranslationV1 { + +class TListBuiltin: public TCallNode { +public: + TListBuiltin(TPosition pos, + const TString& opName, + const TVector<TNodePtr>& args) + : TCallNode(pos, opName, args.size(), args.size(), args) + , OpName(opName) + , Args(args) + {} + + bool DoInit(TContext& ctx, ISource* src) override = 0; + + TAstNode* Translate(TContext& ctx) const override; + +protected: + const TString OpName; + TVector<TNodePtr> Args; + TNodePtr Node; + + inline TNodePtr GetIdentityLambda(); +}; + +class TListSortBuiltin final: public TListBuiltin { +public: + TListSortBuiltin(TPosition pos, const TVector<TNodePtr>& args, bool asc) + : TListBuiltin(pos, "ListSort", args) + , Asc(asc) + {} + + bool DoInit(TContext& ctx, ISource* src) override; + + TNodePtr DoClone() const final { + return new TListSortBuiltin(Pos, CloneContainer(Args), Asc); + } + +private: + const bool Asc; +}; + +class TListExtractBuiltin final: public TListBuiltin { +public: + TListExtractBuiltin(TPosition pos, const TVector<TNodePtr>& args) + : TListBuiltin(pos, "ListExtract", args) + {} + + bool DoInit(TContext& ctx, ISource* src) override; + + TNodePtr DoClone() const final { + return new TListExtractBuiltin(Pos, CloneContainer(Args)); + } +}; + +class TListProcessBuiltin: public TListBuiltin { +protected: + TListProcessBuiltin(TPosition pos, + const TString& opName, + const TVector<TNodePtr>& args) + : TListBuiltin(pos, opName, args) + {} + + bool CheckArgs(TContext& ctx, ISource* src); +}; + +class TListMapBuiltin final: public TListProcessBuiltin { +public: + TListMapBuiltin(TPosition pos, + const TVector<TNodePtr>& args, + bool flat) + : TListProcessBuiltin(pos, flat ? "ListFlatMap" : "ListMap", args) + , Flat(flat) + {} + + bool DoInit(TContext& ctx, ISource* src) override; + + TNodePtr DoClone() const final { + return new TListMapBuiltin(Pos, CloneContainer(Args), Flat); + } +private: + bool Flat; +}; + +class TListFilterBuiltin final: public TListProcessBuiltin { +public: + TListFilterBuiltin(TPosition pos, const TString& opName, + const TVector<TNodePtr>& args) + : TListProcessBuiltin(pos, opName, args) + {} + + + bool DoInit(TContext& ctx, ISource* src) override; + + TNodePtr DoClone() const final { + return new TListFilterBuiltin(Pos, OpName, CloneContainer(Args)); + } +protected: + virtual TNodePtr GetFilterLambda(); +}; + +class TListCreateBuiltin final: public TListBuiltin { +public: + TListCreateBuiltin(TPosition pos, + const TVector<TNodePtr>& args) + : TListBuiltin(pos, "ListCreate", args) + {} + + bool DoInit(TContext& ctx, ISource* src) override; + void DoUpdateState() const override; + + TNodePtr DoClone() const final { + return new TListCreateBuiltin(Pos, CloneContainer(Args)); + } +}; + +class TDictCreateBuiltin final: public TListBuiltin { +public: + TDictCreateBuiltin(TPosition pos, + const TVector<TNodePtr>& args) + : TListBuiltin(pos, "DictCreate", args) + {} + + bool DoInit(TContext& ctx, ISource* src) override; + void DoUpdateState() const override; + + TNodePtr DoClone() const final { + return new TDictCreateBuiltin(Pos, CloneContainer(Args)); + } +}; + +class TSetCreateBuiltin final: public TListBuiltin { +public: + TSetCreateBuiltin(TPosition pos, + const TVector<TNodePtr>& args) + : TListBuiltin(pos, "SetCreate", args) + {} + + bool DoInit(TContext& ctx, ISource* src) override; + void DoUpdateState() const override; + + TNodePtr DoClone() const final { + return new TSetCreateBuiltin(Pos, CloneContainer(Args)); + } +}; + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/match_recognize.cpp b/yql/essentials/sql/v1/match_recognize.cpp new file mode 100644 index 00000000000..47055e2f3d7 --- /dev/null +++ b/yql/essentials/sql/v1/match_recognize.cpp @@ -0,0 +1,254 @@ +#include "match_recognize.h" +#include "source.h" +#include "context.h" + +namespace NSQLTranslationV1 { + +namespace { + +const auto VarDataName = "data"; +const auto VarMatchedVarsName = "vars"; +const auto VarLastRowIndexName = "lri"; + +} //namespace { + +class TMatchRecognize: public TAstListNode { +public: + TMatchRecognize( + TPosition pos, + ISource* source, + const TString& inputTable, + std::pair<TPosition, TVector<TNamedFunction>>&& partitioners, + std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs, + std::pair<TPosition, TVector<TNamedFunction>>&& measures, + std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, + std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo, + std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, + std::pair<TPosition, TNodePtr>&& subset, + std::pair<TPosition, TVector<TNamedFunction>>&& definitions + ): TAstListNode(pos, {BuildAtom(pos, "block")}) + { + Add(BuildBlockStatements( + pos, + source, + inputTable, + std::move(partitioners), + std::move(sortSpecs), + std::move(measures), + std::move(rowsPerMatch), + std::move(skipTo), + std::move(pattern), + std::move(subset), + std::move(definitions) + )); + } +private: + TMatchRecognize(const TMatchRecognize& other) + : TAstListNode(other.Pos) + { + Nodes = CloneContainer(other.Nodes); + } + + TNodePtr BuildBlockStatements( + TPosition pos, + ISource* source, + const TString& inputTable, + std::pair<TPosition, TVector<TNamedFunction>>&& partitioners, + std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs, + std::pair<TPosition, TVector<TNamedFunction>>&& measures, + std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, + std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo, + std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, + std::pair<TPosition, TNodePtr>&& subset, + std::pair<TPosition, TVector<TNamedFunction>>&& definitions + ) { + Y_UNUSED(pos); + + auto inputRowType = Y("ListItemType",Y("TypeOf", inputTable)); + + auto patternNode = Pattern(pattern.first, pattern.second); + + auto partitionColumns = Y(); + for (const auto& p: partitioners.second){ + partitionColumns->Add(BuildQuotedAtom(p.callable->GetPos(), p.name)); + } + partitionColumns = Q(partitionColumns); + auto partitionKeySelector = Y(); + for (const auto& p: partitioners.second){ + partitionKeySelector->Add(p.callable); + } + partitionKeySelector = BuildLambda(partitioners.first, Y("row"), Q(partitionKeySelector)); + + auto measureNames = Y(); + for (const auto& m: measures.second){ + measureNames->Add(BuildQuotedAtom(m.callable->GetPos(), m.name)); + } + TNodePtr measuresNode = Y("MatchRecognizeMeasures", inputRowType, patternNode, Q(measureNames)); + for (const auto& m: measures.second){ + measuresNode->Add(BuildLambda(m.callable->GetPos(), Y(VarDataName, VarMatchedVarsName), m.callable)); + } + auto defineNames = Y(); + for (const auto& d: definitions.second) { + defineNames->Add(BuildQuotedAtom(d.callable->GetPos(), d.name)); + } + + TNodePtr defineNode = Y("MatchRecognizeDefines", inputRowType, patternNode, Q(defineNames)); + for (const auto& d: definitions.second) { + defineNode->Add(BuildLambda(d.callable->GetPos(), Y(VarDataName, VarMatchedVarsName, VarLastRowIndexName), d.callable)); + } + + return Q(Y( + Y("let", "input", inputTable), + Y("let", "partitionKeySelector", partitionKeySelector), + Y("let", "partitionColumns", partitionColumns), + Y("let", "sortTraits", sortSpecs.second.empty()? Y("Void") : source->BuildSortSpec(sortSpecs.second, inputTable, true, false)), + Y("let", "measures", measuresNode), + Y("let", "rowsPerMatch", BuildQuotedAtom(rowsPerMatch.first, "RowsPerMatch_" + ToString(rowsPerMatch.second))), + Y("let", "skipTo", BuildTuple(skipTo.first, {Q("AfterMatchSkip_" + ToString(skipTo.second.To)), Q(ToString(skipTo.second.Var))})), + Y("let", "pattern", patternNode), + Y("let", "subset", subset.second ? subset.second : Q("")), + Y("let", "define", defineNode), + Y("let", "res", Y("MatchRecognize", + "input", + "partitionKeySelector", + "partitionColumns", + "sortTraits", + Y("MatchRecognizeParams", + "measures", + "rowsPerMatch", + "skipTo", + "pattern", + "define" + ) + )), + Y("return", "res") + )); + } + + TPtr PatternFactor(const TPosition& pos, const NYql::NMatchRecognize::TRowPatternFactor& factor) { + return BuildTuple(pos, { + factor.Primary.index() == 0 ? + BuildQuotedAtom(pos, std::get<0>(factor.Primary)) : + Pattern(pos, std::get<1>(factor.Primary)), + BuildQuotedAtom(pos, ToString(factor.QuantityMin)), + BuildQuotedAtom(pos, ToString(factor.QuantityMax)), + BuildQuotedAtom(pos, ToString(factor.Greedy)), + BuildQuotedAtom(pos, ToString(factor.Output)), + BuildQuotedAtom(pos, ToString(factor.Unused)) + }); + } + + + TPtr PatternTerm(const TPosition& pos, const NYql::NMatchRecognize::TRowPatternTerm& term) { + auto factors = Y(); + for (const auto& f: term) + factors->Add(PatternFactor(pos, f)); + return Q(std::move(factors)); + } + + TPtr Pattern(const TPosition& pos, const NYql::NMatchRecognize::TRowPattern& pattern) { + TNodePtr patternNode = Y("MatchRecognizePattern"); + for (const auto& t: pattern) { + patternNode->Add(PatternTerm(pos, t)); + } + return patternNode; + } + + TPtr DoClone() const final{ + return new TMatchRecognize(*this); + } +}; + +TNodePtr TMatchRecognizeBuilder::Build(TContext& ctx, TString&& inputTable, ISource* source){ + TNodePtr node = new TMatchRecognize( + Pos, + source, + std::move(inputTable), + std::move(Partitioners), + std::move(SortSpecs), + std::move(Measures), + std::move(RowsPerMatch), + std::move(SkipTo), + std::move(Pattern), + std::move(Subset), + std::move(Definitions) + ); + if (!node->Init(ctx, source)) + return nullptr; + return node; +} + +namespace { +const auto DefaultNavigatingFunction = "MatchRecognizeDefaultNavigating"; +} + +bool TMatchRecognizeVarAccessNode::DoInit(TContext& ctx, ISource* src) { + //If referenced var is the var that is currently being defined + //then it's a reference to the last row in a partition + Node = new TMatchRecognizeNavigate(ctx.Pos(), DefaultNavigatingFunction, TVector<TNodePtr>{this->Clone()}); + return Node->Init(ctx, src); +} + +bool TMatchRecognizeNavigate::DoInit(TContext& ctx, ISource* src) { + Y_UNUSED(src); + if (Args.size() != 1) { + ctx.Error(Pos) << "Exactly one argument is required in MATCH_RECOGNIZE navigation function"; + return false; + } + const auto varColumn = dynamic_cast<TMatchRecognizeVarAccessNode *>(Args[0].Get()); + if (not varColumn) { + ctx.Error(Pos) << "Row pattern navigation operations are applicable to row pattern variable only"; + return false; + } + const auto varData = BuildAtom(ctx.Pos(), VarDataName); + const auto varMatchedVars = BuildAtom(ctx.Pos(), VarMatchedVarsName); + const auto varLastRowIndex = BuildAtom(ctx.Pos(), VarLastRowIndexName); + + const auto matchedRanges = Y("Member", varMatchedVars, Q(varColumn->GetVar())); + TNodePtr navigatedRowIndex; + if (DefaultNavigatingFunction == Name) { + if (not varColumn->IsTheSameVar()) { + ctx.Error(Pos) << "Row pattern navigation function is required"; + return false; + } + navigatedRowIndex = varLastRowIndex; + } + else if ("PREV" == Name) { + if (not varColumn->IsTheSameVar()) { + ctx.Error(Pos) << "PREV relative to matched vars is not implemented yet"; + return false; + } + navigatedRowIndex = Y( + "-", + varLastRowIndex, + Y("Uint64", Q("1")) + ); + } else if ("FIRST" == Name) { + navigatedRowIndex = Y( + "Member", + Y("Head", matchedRanges), + Q("From") + ); + } else if ("LAST" == Name) { + navigatedRowIndex = Y( + "Member", + Y("Last", matchedRanges), + Q("To") + ); + } else { + ctx.Error(Pos) << "Internal logic error"; + return false; + } + Add("Member"); + Add( + Y( + "Lookup", + Y("ToIndexDict", varData), + navigatedRowIndex + ) + ), + Add(Q(varColumn->GetColumn())); + return true; +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/match_recognize.h b/yql/essentials/sql/v1/match_recognize.h new file mode 100644 index 00000000000..b78c0faf65e --- /dev/null +++ b/yql/essentials/sql/v1/match_recognize.h @@ -0,0 +1,130 @@ +#pragma once +#include "node.h" +#include <yql/essentials/core/sql_types/match_recognize.h> +#include <util/generic/ptr.h> + +namespace NSQLTranslationV1 { + +struct TNamedFunction { + TNodePtr callable; //Callable with some free args + TString name; +}; + +enum class ERowsPerMatch { + OneRow, + AllRows +}; + +class TMatchRecognizeBuilder: public TSimpleRefCount<TMatchRecognizeBuilder> { +public: + TMatchRecognizeBuilder( + TPosition clausePos, + std::pair<TPosition, TVector<TNamedFunction>>&& partitioners, + std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs, + std::pair<TPosition, TVector<TNamedFunction>>&& measures, + std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch, + std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo, + std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern, + std::pair<TPosition, TNodePtr>&& subset, + std::pair<TPosition, TVector<TNamedFunction>>&& definitions + ) + : Pos(clausePos) + , Partitioners(std::move(partitioners)) + , SortSpecs(std::move(sortSpecs)) + , Measures(std::move(measures)) + , RowsPerMatch(std::move(rowsPerMatch)) + , SkipTo(std::move(skipTo)) + , Pattern(std::move(pattern)) + , Subset(std::move(subset)) + , Definitions(definitions) + + {} + TNodePtr Build(TContext& ctx, TString&& inputTable, ISource* source); +private: + TPosition Pos; + std::pair<TPosition, TVector<TNamedFunction>> Partitioners; + std::pair<TPosition, TVector<TSortSpecificationPtr>> SortSpecs; + std::pair<TPosition, TVector<TNamedFunction>> Measures; + std::pair<TPosition, ERowsPerMatch> RowsPerMatch; + std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> SkipTo; + std::pair<TPosition, NYql::NMatchRecognize::TRowPattern> Pattern; + std::pair<TPosition, TNodePtr> Subset; + std::pair<TPosition, TVector<TNamedFunction>> Definitions; +}; + +using TMatchRecognizeBuilderPtr=TIntrusivePtr<TMatchRecognizeBuilder> ; + +class TMatchRecognizeVarAccessNode: public INode { +public: + TMatchRecognizeVarAccessNode(TPosition pos, const TString& var, const TString& column, bool theSameVar) + : INode(pos) + , Var(var) + , TheSameVar(theSameVar) + , Column(column) + { + } + + TString GetVar() const { + return Var; + } + + bool IsTheSameVar() const { + return TheSameVar; + } + + TString GetColumn() const { + return Column; + } + + bool DoInit(TContext& ctx, ISource* src) override; + + TAstNode* Translate(TContext& ctx) const override { + return Node->Translate(ctx); + } + + TPtr DoClone() const override { + YQL_ENSURE(!Node, "TMatchRecognizeVarAccessNode::Clone: Node must not be initialized"); + auto copy = new TMatchRecognizeVarAccessNode(Pos, Var, Column, TheSameVar); + return copy; + } + +protected: + void DoUpdateState() const override { + YQL_ENSURE(Node); + } + + void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final { + Y_DEBUG_ABORT_UNLESS(Node); + Node->VisitTree(func, visited); + } + +private: + TNodePtr Node; + const TString Var; + const bool TheSameVar; //reference the same var as being defined by this expression; + const TString Column; +}; + +class TMatchRecognizeNavigate: public TAstListNode { +public: + TMatchRecognizeNavigate(TPosition pos, const TString& name, const TVector<TNodePtr>& args) + : TAstListNode(pos) + , Name(name) + , Args(args) + { + } + +private: + TNodePtr DoClone() const override { + return new TMatchRecognizeNavigate(GetPos(), Name, CloneContainer(Args)); + } + + bool DoInit(TContext& ctx, ISource* src) override; + +private: + const TString Name; + const TVector<TNodePtr> Args; +}; + +} // namespace NSQLTranslationV1 + diff --git a/yql/essentials/sql/v1/node.cpp b/yql/essentials/sql/v1/node.cpp new file mode 100644 index 00000000000..c7cafda7a52 --- /dev/null +++ b/yql/essentials/sql/v1/node.cpp @@ -0,0 +1,3477 @@ +#include "node.h" +#include "source.h" +#include "context.h" + +#include <yql/essentials/ast/yql_ast_escaping.h> +#include <yql/essentials/ast/yql_expr.h> +#include <yql/essentials/core/sql_types/simple_types.h> +#include <yql/essentials/minikql/mkql_type_ops.h> +#include <yql/essentials/parser/pg_catalog/catalog.h> +#include <yql/essentials/utils/yql_panic.h> + +#include <library/cpp/containers/stack_vector/stack_vec.h> +#include <library/cpp/charset/ci_string.h> +#include <util/generic/hash_set.h> +#include <util/stream/str.h> +#include <util/string/cast.h> +#include <util/string/escape.h> +#include <util/string/subst.h> + +using namespace NYql; + +namespace NSQLTranslationV1 { + +TString ErrorDistinctWithoutCorrelation(const TString& column) { + return TStringBuilder() << "DISTINCT columns for JOIN in SELECT should have table aliases (correlation name)," + " add it if necessary to FROM section over 'AS <alias>' keyword and put it like '<alias>." << column << "'"; +} + +TString ErrorDistinctByGroupKey(const TString& column) { + return TStringBuilder() << "Unable to use DISTINCT by grouping column: " << column << ". You should leave one of them."; +} + +TTopicRef::TTopicRef(const TString& refName, const TDeferredAtom& cluster, TNodePtr keys) + : RefName(refName) + , Cluster(cluster) + , Keys(keys) +{ +} + +TColumnConstraints::TColumnConstraints(TNodePtr defaultExpr, bool nullable) + : DefaultExpr(defaultExpr) + , Nullable(nullable) +{ +} + + +TColumnSchema::TColumnSchema(TPosition pos, const TString& name, const TNodePtr& type, bool nullable, + TVector<TIdentifier> families, bool serial, TNodePtr defaultExpr, ETypeOfChange typeOfChange) + : Pos(pos) + , Name(name) + , Type(type) + , Nullable(nullable) + , Families(families) + , Serial(serial) + , DefaultExpr(defaultExpr) + , TypeOfChange(typeOfChange) +{ +} + +INode::INode(TPosition pos) + : Pos(pos) +{ +} + +INode::~INode() +{ +} + +TPosition INode::GetPos() const { + return Pos; +} + +const TString& INode::GetLabel() const { + return Label; +} + +TMaybe<TPosition> INode::GetLabelPos() const { + return LabelPos; +} + +void INode::SetLabel(const TString& label, TMaybe<TPosition> pos) { + Label = label; + LabelPos = pos; +} + +bool INode::IsImplicitLabel() const { + return ImplicitLabel; +} + +void INode::MarkImplicitLabel(bool isImplicitLabel) { + ImplicitLabel = isImplicitLabel; +} + +void INode::SetCountHint(bool isCount) { + State.Set(ENodeState::CountHint, isCount); +} + +bool INode::GetCountHint() const { + return State.Test(ENodeState::CountHint); +} + +bool INode::IsConstant() const { + return HasState(ENodeState::Const); +} + +bool INode::MaybeConstant() const { + return HasState(ENodeState::MaybeConst); +} + +bool INode::IsAggregated() const { + return HasState(ENodeState::Aggregated); +} + +bool INode::IsAggregationKey() const { + return HasState(ENodeState::AggregationKey); +} + +bool INode::IsOverWindow() const { + return HasState(ENodeState::OverWindow); +} + +bool INode::IsOverWindowDistinct() const { + return HasState(ENodeState::OverWindowDistinct); +} + +bool INode::IsNull() const { + return false; +} + +bool INode::IsLiteral() const { + return false; +} + +TString INode::GetLiteralType() const { + return ""; +} + +TString INode::GetLiteralValue() const { + return ""; +} + +bool INode::IsIntegerLiteral() const { + return false; +} + +INode::TPtr INode::ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const { + Y_UNUSED(ctx); + if (IsNull()) { + return BuildLiteralNull(pos); + } + return new TCallNodeImpl(pos, opName, { Clone() }); +} + +bool INode::IsAsterisk() const { + return false; +} + +const TString* INode::SubqueryAlias() const { + return nullptr; +} + +TString INode::GetOpName() const { + return TString(); +} + +const TString* INode::GetLiteral(const TString& type) const { + Y_UNUSED(type); + return nullptr; +} + +const TString* INode::GetColumnName() const { + return nullptr; +} + +void INode::AssumeColumn() { +} + +const TString* INode::GetSourceName() const { + return nullptr; +} + +const TString* INode::GetAtomContent() const { + return nullptr; +} + +bool INode::IsOptionalArg() const { + return false; +} + +size_t INode::GetTupleSize() const { + return 0; +} + +INode::TPtr INode::GetTupleElement(size_t index) const { + Y_UNUSED(index); + return nullptr; +} + +ITableKeys* INode::GetTableKeys() { + return nullptr; +} + +ISource* INode::GetSource() { + return nullptr; +} + +TVector<TNodePtr>* INode::ContentListPtr() { + return nullptr; +} + +bool INode::Init(TContext& ctx, ISource* src) { + if (State.Test(ENodeState::Failed)) { + return false; + } + + if (!State.Test(ENodeState::Initialized)) { + if (!DoInit(ctx, src)) { + State.Set(ENodeState::Failed); + return false; + } + State.Set(ENodeState::Initialized); + } + return true; +} + +bool INode::InitReference(TContext& ctx) { + Y_UNUSED(ctx); + return true; +} + +bool INode::DoInit(TContext& ctx, ISource* src) { + Y_UNUSED(ctx); + Y_UNUSED(src); + return true; +} + +TNodePtr INode::AstNode() const { + return new TAstListNodeImpl(Pos); +} + +TNodePtr INode::AstNode(TNodePtr node) const { + return node; +} + +TNodePtr INode::AstNode(const TString& str) const { + return new TAstAtomNodeImpl(Pos, str, TNodeFlags::Default); +} + +TNodePtr INode::AstNode(TAstNode* node) const { + return new TAstDirectNode(node); +} + +TNodePtr INode::Clone() const { + TNodePtr clone = DoClone(); + if (!clone) { + clone = const_cast<INode*>(this); + } else { + YQL_ENSURE(!State.Test(ENodeState::Initialized), "Clone should be for uninitialized or persistent node"); + clone->SetLabel(Label, LabelPos); + clone->MarkImplicitLabel(ImplicitLabel); + } + return clone; +} + +TAggregationPtr INode::GetAggregation() const { + return {}; +} + +void INode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) { + Y_UNUSED(ctx); + Y_UNUSED(src); + Y_UNUSED(exprs); +} + +INode::TPtr INode::WindowSpecFunc(const TPtr& type) const { + Y_UNUSED(type); + return {}; +} + +bool INode::SetViewName(TContext& ctx, TPosition pos, const TString& view) { + Y_UNUSED(pos); + Y_UNUSED(view); + ctx.Error() << "Node not support views"; + return false; +} + +bool INode::SetPrimaryView(TContext& ctx, TPosition pos) { + Y_UNUSED(pos); + ctx.Error() << "Node not support primary views"; + return false; +} + +void INode::UseAsInner() { + AsInner = true; +} + +void INode::DisableSort() { + DisableSort_ = true; +} + +bool INode::UsedSubquery() const { + return false; +} + +bool INode::IsSelect() const { + return false; +} + +bool INode::HasSelectResult() const { + return false; +} + +const TString* INode::FuncName() const { + return nullptr; +} + +const TString* INode::ModuleName() const { + return nullptr; +} + +bool INode::HasSkip() const { + return false; +} + +TColumnNode* INode::GetColumnNode() { + return nullptr; +} + +const TColumnNode* INode::GetColumnNode() const { + return nullptr; +} + +TTupleNode* INode::GetTupleNode() { + return nullptr; +} + +const TTupleNode* INode::GetTupleNode() const { + return nullptr; +} + +TCallNode* INode::GetCallNode() { + return nullptr; +} + +const TCallNode* INode::GetCallNode() const { + return nullptr; +} + +TStructNode* INode::GetStructNode() { + return nullptr; +} + +const TStructNode* INode::GetStructNode() const { + return nullptr; +} + +TAccessNode* INode::GetAccessNode() { + return nullptr; +} + +const TAccessNode* INode::GetAccessNode() const { + return nullptr; +} + +TLambdaNode* INode::GetLambdaNode() { + return nullptr; +} + +const TLambdaNode* INode::GetLambdaNode() const { + return nullptr; +} + +TUdfNode* INode::GetUdfNode() { + return nullptr; +} + +const TUdfNode* INode::GetUdfNode() const { + return nullptr; +} + +void INode::VisitTree(const TVisitFunc& func) const { + TVisitNodeSet visited; + VisitTree(func, visited); +} + +void INode::VisitTree(const TVisitFunc& func, TVisitNodeSet& visited) const { + if (visited.emplace(this).second && HasState(ENodeState::Initialized) && func(*this)) { + DoVisitChildren(func, visited); + } +} + +TNodePtr INode::ShallowCopy() const { + Y_DEBUG_ABORT_UNLESS(false, "Node is not copyable"); + return nullptr; +} + +void INode::DoUpdateState() const { +} + +void INode::PrecacheState() const { + if (State.Test(ENodeState::Failed)) { + return; + } + + /// Not work right now! It's better use Init at first, because some kind of update depend on it + /// \todo turn on and remove all issues + //Y_DEBUG_ABORT_UNLESS(State.Test(ENodeState::Initialized)); + if (State.Test(ENodeState::Precached)) { + return; + } + DoUpdateState(); + State.Set(ENodeState::Precached); +} + +void INode::DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const { + Y_UNUSED(func); + Y_UNUSED(visited); +} + +void INode::DoAdd(TNodePtr node) { + Y_UNUSED(node); + Y_DEBUG_ABORT_UNLESS(false, "Node is not expandable"); +} + +bool IProxyNode::IsNull() const { + return Inner->IsNull(); +} + +bool IProxyNode::IsLiteral() const { + return Inner->IsNull(); +} + +TString IProxyNode::GetLiteralType() const { + return Inner->GetLiteralType(); +} + +TString IProxyNode::GetLiteralValue() const { + return Inner->GetLiteralValue(); +} + +bool IProxyNode::IsIntegerLiteral() const { + return Inner->IsIntegerLiteral(); +} + +INode::TPtr IProxyNode::ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const { + return Inner->ApplyUnaryOp(ctx, pos, opName); +} + +bool IProxyNode::IsAsterisk() const { + return Inner->IsAsterisk(); +} + +const TString* IProxyNode::SubqueryAlias() const { + return Inner->SubqueryAlias(); +} + +TString IProxyNode::GetOpName() const { + return Inner->GetOpName(); +} + +const TString* IProxyNode::GetLiteral(const TString& type) const { + return Inner->GetLiteral(type); +} + +const TString* IProxyNode::GetColumnName() const { + return Inner->GetColumnName(); +} + +void IProxyNode::AssumeColumn() { + Inner->AssumeColumn(); +} + +const TString* IProxyNode::GetSourceName() const { + return Inner->GetSourceName(); +} + +const TString* IProxyNode::GetAtomContent() const { + return Inner->GetAtomContent(); +} + +bool IProxyNode::IsOptionalArg() const { + return Inner->IsOptionalArg(); +} + +size_t IProxyNode::GetTupleSize() const { + return Inner->GetTupleSize(); +} + +INode::TPtr IProxyNode::GetTupleElement(size_t index) const { + return Inner->GetTupleElement(index); +} + +ITableKeys* IProxyNode::GetTableKeys() { + return Inner->GetTableKeys(); +} + +ISource* IProxyNode::GetSource() { + return Inner->GetSource(); +} + +TVector<INode::TPtr>* IProxyNode::ContentListPtr() { + return Inner->ContentListPtr(); +} + +TAggregationPtr IProxyNode::GetAggregation() const { + return Inner->GetAggregation(); +} + +void IProxyNode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) { + Inner->CollectPreaggregateExprs(ctx, src, exprs); +} + +INode::TPtr IProxyNode::WindowSpecFunc(const TPtr& type) const { + return Inner->WindowSpecFunc(type); +} + +bool IProxyNode::SetViewName(TContext& ctx, TPosition pos, const TString& view) { + return Inner->SetViewName(ctx, pos, view); +} + +bool IProxyNode::SetPrimaryView(TContext& ctx, TPosition pos) { + return Inner->SetPrimaryView(ctx, pos); +} + +bool IProxyNode::UsedSubquery() const { + return Inner->UsedSubquery(); +} + +bool IProxyNode::IsSelect() const { + return Inner->IsSelect(); +} + +bool IProxyNode::HasSelectResult() const { + return Inner->HasSelectResult(); +} + +const TString* IProxyNode::FuncName() const { + return Inner->FuncName(); +} + +const TString* IProxyNode::ModuleName() const { + return Inner->ModuleName(); +} + +bool IProxyNode::HasSkip() const { + return Inner->HasSkip(); +} + +TColumnNode* IProxyNode::GetColumnNode() { + return Inner->GetColumnNode(); +} + +const TColumnNode* IProxyNode::GetColumnNode() const { + return static_cast<const INode*>(Inner.Get())->GetColumnNode(); +} + +TTupleNode* IProxyNode::GetTupleNode() { + return Inner->GetTupleNode(); +} + +const TTupleNode* IProxyNode::GetTupleNode() const { + return static_cast<const INode*>(Inner.Get())->GetTupleNode(); +} + +TCallNode* IProxyNode::GetCallNode() { + return Inner->GetCallNode(); +} + +const TCallNode* IProxyNode::GetCallNode() const { + return static_cast<const INode*>(Inner.Get())->GetCallNode(); +} + +TStructNode* IProxyNode::GetStructNode() { + return Inner->GetStructNode(); +} + +const TStructNode* IProxyNode::GetStructNode() const { + return static_cast<const INode*>(Inner.Get())->GetStructNode(); +} + +TAccessNode* IProxyNode::GetAccessNode() { + return Inner->GetAccessNode(); +} + +const TAccessNode* IProxyNode::GetAccessNode() const { + return static_cast<const INode*>(Inner.Get())->GetAccessNode(); +} + +TLambdaNode* IProxyNode::GetLambdaNode() { + return Inner->GetLambdaNode(); +} + +const TLambdaNode* IProxyNode::GetLambdaNode() const { + return static_cast<const INode*>(Inner.Get())->GetLambdaNode(); +} + +TUdfNode* IProxyNode::GetUdfNode() { + return Inner->GetUdfNode(); +} + +const TUdfNode* IProxyNode::GetUdfNode() const { + return static_cast<const INode*>(Inner.Get())->GetUdfNode(); +} + +void IProxyNode::DoUpdateState() const { + static_assert(static_cast<int>(ENodeState::End) == 10, "Need to support new states here"); + State.Set(ENodeState::CountHint, Inner->GetCountHint()); + State.Set(ENodeState::Const, Inner->IsConstant()); + State.Set(ENodeState::MaybeConst, Inner->MaybeConstant()); + State.Set(ENodeState::Aggregated, Inner->IsAggregated()); + State.Set(ENodeState::AggregationKey, Inner->IsAggregationKey()); + State.Set(ENodeState::OverWindow, Inner->IsOverWindow()); + State.Set(ENodeState::OverWindowDistinct, Inner->IsOverWindowDistinct()); +} + +void IProxyNode::DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const { + Inner->VisitTree(func, visited); +} + +bool IProxyNode::InitReference(TContext& ctx) { + return Inner->InitReference(ctx); +} + +bool IProxyNode::DoInit(TContext& ctx, ISource* src) { + return Inner->Init(ctx, src); +} + +void IProxyNode::DoAdd(TPtr node) { + Inner->Add(node); +} + +void MergeHints(TTableHints& base, const TTableHints& overrides) { + for (auto& i : overrides) { + base[i.first] = i.second; + } +} + +TTableHints CloneContainer(const TTableHints& hints) { + TTableHints result; + for (auto& [name, nodes] : hints) { + result.emplace(std::make_pair(name, CloneContainer(nodes))); + } + return result; +} + +TAstAtomNode::TAstAtomNode(TPosition pos, const TString& content, ui32 flags, bool isOptionalArg) + : INode(pos) + , Content(content) + , Flags(flags) + , IsOptionalArg_(isOptionalArg) +{ +} + +TAstAtomNode::~TAstAtomNode() +{ +} + +void TAstAtomNode::DoUpdateState() const { + State.Set(ENodeState::Const); +} + +TAstNode* TAstAtomNode::Translate(TContext& ctx) const { + return TAstNode::NewAtom(Pos, Content, *ctx.Pool, Flags); +} + +const TString* TAstAtomNode::GetAtomContent() const { + return &Content; +} + +bool TAstAtomNode::IsOptionalArg() const { + return IsOptionalArg_; +} + +TAstDirectNode::TAstDirectNode(TAstNode* node) + : INode(node->GetPosition()) + , Node(node) +{ +} + +TAstNode* TAstDirectNode::Translate(TContext& ctx) const { + Y_UNUSED(ctx); + return Node; +} + +TNodePtr BuildAtom(TPosition pos, const TString& content, ui32 flags, bool isOptionalArg) { + return new TAstAtomNodeImpl(pos, content, flags, isOptionalArg); +} + +TAstListNode::TAstListNode(TPosition pos) + : INode(pos) +{ +} + +TAstListNode::~TAstListNode() +{ +} + +bool TAstListNode::DoInit(TContext& ctx, ISource* src) { + for (auto& node: Nodes) { + if (!node->Init(ctx, src)) { + return false; + } + } + return true; +} + +TAstNode* TAstListNode::Translate(TContext& ctx) const { + TSmallVec<TAstNode*> children; + children.reserve(Nodes.size()); + auto listPos = Pos; + for (auto& node: Nodes) { + if (node) { + auto astNode = node->Translate(ctx); + if (!astNode) { + return nullptr; + } + children.push_back(astNode); + } else { + ctx.Error(Pos) << "Translation error: encountered empty TNodePtr"; + return nullptr; + } + } + + return TAstNode::NewList(listPos, children.data(), children.size(), *ctx.Pool); +} + +void TAstListNode::UpdateStateByListNodes(const TVector<TNodePtr>& nodes) const { + bool isConst = true; + struct TAttributesFlags { + bool has = false; + bool all = true; + }; + std::array<ENodeState, 3> checkStates = {{ENodeState::Aggregated, ENodeState::AggregationKey, ENodeState::OverWindow}}; + std::map<ENodeState, TAttributesFlags> flags; + for (auto& node: nodes) { + const bool isNodeConst = node->IsConstant(); + const bool isNodeMaybeConst = node->MaybeConstant(); + for (auto state: checkStates) { + if (node->HasState(state)) { + flags[state].has = true; + } else if (!isNodeConst && !isNodeMaybeConst) { + flags[state].all = false; + } + + if (!isNodeConst) { + isConst = false; + } + } + } + State.Set(ENodeState::Const, isConst); + for (auto& flag: flags) { + State.Set(flag.first, flag.second.has && flag.second.all); + } + State.Set(ENodeState::MaybeConst, !isConst && AllOf(nodes, [](const auto& node) { return node->IsConstant() || node->MaybeConstant(); })); +} + +void TAstListNode::DoUpdateState() const { + UpdateStateByListNodes(Nodes); +} + +void TAstListNode::DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const { + for (auto& node : Nodes) { + node->VisitTree(func, visited); + } +} + +TAstListNode::TAstListNode(const TAstListNode& node) + : INode(node.Pos) + , Nodes(node.Nodes) +{ + Label = node.Label; + State = node.State; +} + +TAstListNode::TAstListNode(TPosition pos, TVector<TNodePtr>&& nodes) + : INode(pos) + , Nodes(std::move(nodes)) +{ + for (const auto& node: Nodes) { + YQL_ENSURE(node, "Null ptr passed as list element"); + } +} + +TNodePtr TAstListNode::ShallowCopy() const { + return new TAstListNodeImpl(Pos, Nodes); +} + +void TAstListNode::DoAdd(TNodePtr node) { + Y_DEBUG_ABORT_UNLESS(node); + Y_DEBUG_ABORT_UNLESS(node.Get() != this); + Nodes.push_back(node); +} + +TAstListNodeImpl::TAstListNodeImpl(TPosition pos) + : TAstListNode(pos) +{} + +TAstListNodeImpl::TAstListNodeImpl(TPosition pos, TVector<TNodePtr> nodes) + : TAstListNode(pos) +{ + for (const auto& node: nodes) { + YQL_ENSURE(node, "Null ptr passed as list element"); + } + Nodes.swap(nodes); +} + +void TAstListNodeImpl::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) { + for (auto& node : Nodes) { + node->CollectPreaggregateExprs(ctx, src, exprs); + } +} + +TNodePtr TAstListNodeImpl::DoClone() const { + return new TAstListNodeImpl(Pos, CloneContainer(Nodes)); +} + +TCallNode::TCallNode(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args) + : TAstListNode(pos) + , OpName(opName) + , MinArgs(minArgs) + , MaxArgs(maxArgs) + , Args(args) +{ + for (const auto& arg: Args) { + YQL_ENSURE(arg, "Null ptr passed as call argument"); + } +} + +TString TCallNode::GetOpName() const { + return OpName; +} + +const TString* DeriveCommonSourceName(const TVector<TNodePtr> &nodes) { + const TString* name = nullptr; + for (auto& node: nodes) { + auto n = node->GetSourceName(); + if (!n) { + continue; + } + if (name && *n != *name) { + return nullptr; + } + name = n; + } + return name; +} + + +const TString* TCallNode::GetSourceName() const { + return DeriveCommonSourceName(Args); +} + +const TVector<TNodePtr>& TCallNode::GetArgs() const { + return Args; +} + +void TCallNode::DoUpdateState() const { + UpdateStateByListNodes(Args); +} + +TString TCallNode::GetCallExplain() const { + auto derivedName = GetOpName(); + TStringBuilder sb; + sb << derivedName << "()"; + if (derivedName != OpName) { + sb << ", converted to " << OpName << "()"; + } + return std::move(sb); +} + +void TCallNode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) { + for (auto& arg : Args) { + arg->CollectPreaggregateExprs(ctx, src, exprs); + } +} + +bool TCallNode::ValidateArguments(TContext& ctx) const { + const auto argsCount = static_cast<i32>(Args.size()); + if (MinArgs >= 0 && MaxArgs == MinArgs && argsCount != MinArgs) { + ctx.Error(Pos) << GetCallExplain() << " requires exactly " << MinArgs << " arguments, given: " << Args.size(); + return false; + } + + if (MinArgs >= 0 && argsCount < MinArgs) { + ctx.Error(Pos) << GetCallExplain() << " requires at least " << MinArgs << " arguments, given: " << Args.size(); + return false; + } + + if (MaxArgs >= 0 && argsCount > MaxArgs) { + ctx.Error(Pos) << GetCallExplain() << " requires at most " << MaxArgs << " arguments, given: " << Args.size(); + return false; + } + + return true; +} + +bool TCallNode::DoInit(TContext& ctx, ISource* src) { + if (!ValidateArguments(ctx)) { + return false; + } + + bool hasError = false; + for (auto& arg: Args) { + if (!arg->Init(ctx, src)) { + hasError = true; + continue; + } + } + + if (hasError) { + return false; + } + + Nodes.push_back(BuildAtom(Pos, OpName, + OpName.cend() == std::find_if_not(OpName.cbegin(), OpName.cend(), [](char c) { return bool(std::isalnum(c)); }) ? TNodeFlags::Default : TNodeFlags::ArbitraryContent)); + Nodes.insert(Nodes.end(), Args.begin(), Args.end()); + return true; +} + +TCallNode* TCallNode::GetCallNode() { + return this; +} + +const TCallNode* TCallNode::GetCallNode() const { + return this; +} + +TCallNodeImpl::TCallNodeImpl(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, minArgs, maxArgs, args) +{} + +TCallNodeImpl::TCallNodeImpl(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, args.size(), args.size(), args) +{} + +TCallNode::TPtr TCallNodeImpl::DoClone() const { + return new TCallNodeImpl(GetPos(), OpName, MinArgs, MaxArgs, CloneContainer(Args)); +} + +TFuncNodeImpl::TFuncNodeImpl(TPosition pos, const TString& opName) + : TCallNode(pos, opName, 0, 0, {}) +{} + +TCallNode::TPtr TFuncNodeImpl::DoClone() const { + return new TFuncNodeImpl(GetPos(), OpName); +} + +const TString* TFuncNodeImpl::FuncName() const { + return &OpName; +} + +TCallNodeDepArgs::TCallNodeDepArgs(ui32 reqArgsCount, TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, minArgs, maxArgs, args) + , ReqArgsCount(reqArgsCount) +{} + +TCallNodeDepArgs::TCallNodeDepArgs(ui32 reqArgsCount, TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, args.size(), args.size(), args) + , ReqArgsCount(reqArgsCount) +{} + +TCallNode::TPtr TCallNodeDepArgs::DoClone() const { + return new TCallNodeDepArgs(ReqArgsCount, GetPos(), OpName, MinArgs, MaxArgs, CloneContainer(Args)); +} + +bool TCallNodeDepArgs::DoInit(TContext& ctx, ISource* src) { + if (!TCallNode::DoInit(ctx, src)) { + return false; + } + + for (ui32 i = 1 + ReqArgsCount; i < Nodes.size(); ++i) { + Nodes[i] = Y("DependsOn", Nodes[i]); + } + return true; +} + +TCallDirectRow::TPtr TCallDirectRow::DoClone() const { + return new TCallDirectRow(Pos, OpName, CloneContainer(Args)); +} + +TCallDirectRow::TCallDirectRow(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, minArgs, maxArgs, args) +{} + +TCallDirectRow::TCallDirectRow(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, 0, 0, args) +{} + +bool TCallDirectRow::DoInit(TContext& ctx, ISource* src) { + if (!src || (ctx.CompactNamedExprs && src->IsFake())) { + ctx.Error(Pos) << "Unable to use function: " << OpName << " without source"; + return false; + } + if (src->IsCompositeSource() || src->GetJoin() || src->HasAggregations() || src->IsFlattenByColumns() || src->IsOverWindowSource()) { + ctx.Error(Pos) << "Failed to use function: " << OpName << " with aggregation, join, flatten by or window functions"; + return false; + } + if (!TCallNode::DoInit(ctx, src)) { + return false; + } + Nodes.push_back(Y("DependsOn", "row")); + return true; +} + +void TCallDirectRow::DoUpdateState() const { + State.Set(ENodeState::Const, false); +} + +void TWinAggrEmulation::DoUpdateState() const { + State.Set(ENodeState::OverWindow, true); +} + +bool TWinAggrEmulation::DoInit(TContext& ctx, ISource* src) { + if (!src) { + ctx.Error(Pos) << "Unable to use window function " << OpName << " without source"; + return false; + } + + if (!src->IsOverWindowSource()) { + ctx.Error(Pos) << "Failed to use window function " << OpName << " without window specification"; + return false; + } + if (!src->AddFuncOverWindow(ctx, this)) { + ctx.Error(Pos) << "Failed to use window function " << OpName << " without window specification or in wrong place"; + return false; + } + + FuncAlias = "_yql_" + src->MakeLocalName(OpName); + src->AddTmpWindowColumn(FuncAlias); + if (!TCallNode::DoInit(ctx, src)) { + return false; + } + Nodes.clear(); + Add("Member", "row", Q(FuncAlias)); + return true; +} + +INode::TPtr TWinAggrEmulation::WindowSpecFunc(const TPtr& type) const { + auto result = Y(OpName, type); + for (const auto& arg: Args) { + result = L(result, arg); + } + return Q(Y(Q(FuncAlias), result)); +} + +TWinAggrEmulation::TWinAggrEmulation(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, minArgs, maxArgs, args) + , FuncAlias(opName) +{} + +TWinRowNumber::TWinRowNumber(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args) + : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args) +{} + +TWinCumeDist::TWinCumeDist(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args) + : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args) +{} + +bool TWinCumeDist::DoInit(TContext& ctx, ISource* src) { + if (!ValidateArguments(ctx)) { + return false; + } + + YQL_ENSURE(Args.size() == 0); + TVector<TNodePtr> optionsElements; + if (ctx.AnsiCurrentRow) { + optionsElements.push_back(BuildTuple(Pos, { BuildQuotedAtom(Pos, "ansi", NYql::TNodeFlags::Default) })); + } + Args.push_back(BuildTuple(Pos, optionsElements)); + + MinArgs = MaxArgs = 1; + if (!TWinAggrEmulation::DoInit(ctx, src)) { + return false; + } + + YQL_ENSURE(Args.size() == 1); + return true; +} + +TWinNTile::TWinNTile(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args) + : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args) +{ + FakeSource = BuildFakeSource(pos); +} + +bool TWinNTile::DoInit(TContext& ctx, ISource* src) { + if (Args.size() >= 1 && !Args[0]->Init(ctx, FakeSource.Get())) { + return false; + } + + if (!TWinAggrEmulation::DoInit(ctx, src)) { + return false; + } + return true; +} + +TWinLeadLag::TWinLeadLag(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args) + : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args) +{} + +bool TWinLeadLag::DoInit(TContext& ctx, ISource* src) { + if (Args.size() >= 2) { + if (!Args[1]->IsIntegerLiteral()) { + ctx.Error(Args[1]->GetPos()) << "Expected integer literal as second parameter of " << OpName << "( ) function"; + return false; + } + } + if (!TWinAggrEmulation::DoInit(ctx, src)) { + return false; + } + if (Args.size() >= 1) { + Args[0] = BuildLambda(Pos, Y("row"), Args[0]); + } + return true; +} + +TWinRank::TWinRank(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args) + : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args) +{ + +} + +bool TExternalFunctionConfig::DoInit(TContext& ctx, ISource* src) { + for (auto& param: Config) { + auto paramName = Y(BuildQuotedAtom(Pos, param.first)); + if (!param.second->Init(ctx, src)) { + return false; + } + Nodes.push_back(Q(L(paramName, param.second))); + } + return true; +} + +INode::TPtr TExternalFunctionConfig::DoClone() const { + TFunctionConfig cloned; + for (auto& [name, node] : Config) { + cloned[name] = SafeClone(node); + } + + return new TExternalFunctionConfig(GetPos(), cloned); +} + +bool TWinRank::DoInit(TContext& ctx, ISource* src) { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!src) { + ctx.Error(Pos) << "Unable to use window function: " << OpName << " without source"; + return false; + } + + auto winNamePtr = src->GetWindowName(); + if (!winNamePtr) { + ctx.Error(Pos) << "Failed to use window function: " << OpName << " without window"; + return false; + } + + auto winSpecPtr = src->FindWindowSpecification(ctx, *winNamePtr); + if (!winSpecPtr) { + return false; + } + + const auto& orderSpec = winSpecPtr->OrderBy; + if (orderSpec.empty()) { + if (Args.empty()) { + ctx.Warning(GetPos(), TIssuesIds::YQL_RANK_WITHOUT_ORDER_BY) << + OpName << "() is used with unordered window - all rows will be considered equal to each other"; + } else { + ctx.Warning(GetPos(), TIssuesIds::YQL_RANK_WITHOUT_ORDER_BY) << + OpName << "(<expression>) is used with unordered window - the result is likely to be undefined"; + } + } + + if (Args.empty()) { + for (const auto& spec: orderSpec) { + Args.push_back(spec->Clone()->OrderExpr); + } + + if (Args.size() != 1) { + Args = {BuildTuple(GetPos(), Args)}; + } + } + + YQL_ENSURE(Args.size() == 1); + + TVector<TNodePtr> optionsElements; + if (!ctx.AnsiRankForNullableKeys.Defined()) { + optionsElements.push_back(BuildTuple(Pos, { BuildQuotedAtom(Pos, "warnNoAnsi", NYql::TNodeFlags::Default) })); + } else if (*ctx.AnsiRankForNullableKeys) { + optionsElements.push_back(BuildTuple(Pos, { BuildQuotedAtom(Pos, "ansi", NYql::TNodeFlags::Default) })); + } + Args.push_back(BuildTuple(Pos, optionsElements)); + + MinArgs = MaxArgs = 2; + if (!TWinAggrEmulation::DoInit(ctx, src)) { + return false; + } + + YQL_ENSURE(Args.size() == 2); + Args[0] = BuildLambda(Pos, Y("row"), Args[0]); + return true; +} + +class TQuotedAtomNode: public TAstListNode { +public: + TQuotedAtomNode(TPosition pos, const TString& content, ui32 flags) + : TAstListNode(pos) + { + Add("quote", BuildAtom(pos, content, flags)); + } + +protected: + TQuotedAtomNode(const TQuotedAtomNode& other) + : TAstListNode(other.Pos) + { + Nodes = CloneContainer(other.Nodes); + } + TPtr DoClone() const final { + return new TQuotedAtomNode(*this); + } +}; + +TNodePtr BuildQuotedAtom(TPosition pos, const TString& content, ui32 flags) { + return new TQuotedAtomNode(pos, content, flags); +} + + +TNodePtr ITableKeys::AddView(TNodePtr key, const TViewDescription& view) { + if (view.PrimaryFlag) { + return L(key, Q(Y(Q("primary_view")))); + } else if (!view.empty()) { + return L(key, Q(Y(Q("view"), Y("String", BuildQuotedAtom(Pos, view.ViewName))))); + } else { + return key; + } +} + +TString TColumns::AddUnnamed() { + TString desiredResult = TStringBuilder() << "column" << List.size(); + if (!All) { + HasUnnamed = true; + List.emplace_back(); + NamedColumns.push_back(false); + } + return desiredResult; +} + +bool TColumns::Add(const TString* column, bool countHint, bool isArtificial, bool isReliable) { + if (!column || *column == "*") { + if (!countHint) { + SetAll(); + } + } else if (!All) { + if (column->EndsWith('*')) { + QualifiedAll = true; + } + + bool inserted = false; + if (isArtificial) { + inserted = Artificial.insert(*column).second; + } else { + inserted = Real.insert(*column).second; + } + if (!isReliable) { + HasUnreliable = true; + } + if (std::find(List.begin(), List.end(), *column) == List.end()) { + List.push_back(*column); + NamedColumns.push_back(true); + } + return inserted; + } + return All; +} + +void TColumns::Merge(const TColumns& columns) { + if (columns.All) { + SetAll(); + } else { + YQL_ENSURE(columns.List.size() == columns.NamedColumns.size()); + size_t myUnnamed = NamedColumns.size() - std::accumulate(NamedColumns.begin(), NamedColumns.end(), 0); + size_t otherUnnamed = 0; + for (size_t i = 0; i < columns.List.size(); ++i) { + auto& c = columns.List[i]; + if (!columns.NamedColumns[i]) { + if (++otherUnnamed > myUnnamed) { + AddUnnamed(); + ++myUnnamed; + } + continue; + } + if (columns.Real.contains(c)) { + Add(&c, false, false); + } + if (columns.Artificial.contains(c)) { + Add(&c, false, true); + } + } + HasUnreliable |= columns.HasUnreliable; + HasUnnamed |= columns.HasUnnamed; + } +} + +void TColumns::SetPrefix(const TString& prefix) { + Y_DEBUG_ABORT_UNLESS(!prefix.empty()); + auto addPrefixFunc = [&prefix](const TString& str) { + return prefix + "." + str; + }; + TSet<TString> newReal; + TSet<TString> newArtificial; + TVector<TString> newList; + std::transform(Real.begin(), Real.end(), std::inserter(newReal, newReal.begin()), addPrefixFunc); + std::transform(Artificial.begin(), Artificial.end(), std::inserter(newArtificial, newArtificial.begin()), addPrefixFunc); + std::transform(List.begin(), List.end(), std::back_inserter(newList), addPrefixFunc); + newReal.swap(Real); + newArtificial.swap(Artificial); + newList.swap(List); +} + +void TColumns::SetAll() { + All = true; + QualifiedAll = false; + Real.clear(); + List.clear(); + Artificial.clear(); + NamedColumns.clear(); + HasUnnamed = HasUnreliable = false; +} + +namespace { + +bool MaybeAutogenerated(const TString& name) { + TStringBuf prefix = "column"; + if (!name.StartsWith(prefix)) { + return false; + } + + TString suffix = name.substr(prefix.size()); + return !suffix.empty() && AllOf(suffix, [](const auto c) { return std::isdigit(c); }); +} + +bool MatchDotSuffix(const TSet<TString>& columns, const TString& column) { + for (const auto& col: columns) { + const auto pos = col.find_first_of("."); + if (pos == TString::npos) { + continue; + } + if (column == col.substr(pos + 1)) { + return true; + } + } + return false; +} + +} + +bool TColumns::IsColumnPossible(TContext& ctx, const TString& name) const { + if (All || Real.contains(name) || Artificial.contains(name)) { + return true; + } + + if (ctx.SimpleColumns && !name.Contains('.') && (MatchDotSuffix(Real, name) || MatchDotSuffix(Artificial, name))) { + return true; + } + + if (QualifiedAll) { + if (ctx.SimpleColumns) { + return true; + } + if (HasUnnamed) { + const auto dotPos = name.find_first_of("."); + TString suffix = (dotPos == TString::npos) ? name : name.substr(dotPos + 1); + if (MaybeAutogenerated(suffix)) { + return true; + } + } + for (const auto& real: Real) { + const auto pos = real.find_first_of("*"); + if (pos == TString::npos) { + continue; + } + if (name.StartsWith(real.substr(0, pos))) { + return true; + } + } + } else if (HasUnnamed && MaybeAutogenerated(name)) { + return true; + } + return false; +} + +TSortSpecification::TSortSpecification(const TNodePtr& orderExpr, bool ascending) + : OrderExpr(orderExpr->Clone()) + , Ascending(ascending) + , CleanOrderExpr(orderExpr->Clone()) +{ +} + +TSortSpecificationPtr TSortSpecification::Clone() const { + return MakeIntrusive<TSortSpecification>(CleanOrderExpr, Ascending); +} + +TFrameBoundPtr TFrameBound::Clone() const { + auto res = MakeIntrusive<TFrameBound>(); + res->Pos = Pos; + res->Bound = SafeClone(Bound); + res->Settings = Settings; + return res; +} + +TFrameSpecificationPtr TFrameSpecification::Clone() const { + YQL_ENSURE(FrameBegin); + YQL_ENSURE(FrameEnd); + auto res = MakeIntrusive<TFrameSpecification>(); + res->FrameType = FrameType; + res->FrameBegin = FrameBegin->Clone(); + res->FrameEnd = FrameEnd->Clone(); + res->FrameExclusion = FrameExclusion; + return res; +} + +TWindowSpecificationPtr TWindowSpecification::Clone() const { + YQL_ENSURE(Frame); + auto res = MakeIntrusive<TWindowSpecification>(); + res->ExistingWindowName = ExistingWindowName; + res->Partitions = CloneContainer(Partitions); + res->IsCompact = IsCompact; + res->OrderBy = CloneContainer(OrderBy); + res->Session = SafeClone(Session); + res->Frame = Frame->Clone(); + return res; +} + +TWinSpecs CloneContainer(const TWinSpecs& specs) { + TWinSpecs newSpecs; + for (auto cur: specs) { + newSpecs.emplace(cur.first, cur.second->Clone()); + } + return newSpecs; +} + +TLegacyHoppingWindowSpecPtr TLegacyHoppingWindowSpec::Clone() const { + auto res = MakeIntrusive<TLegacyHoppingWindowSpec>(); + res->TimeExtractor = TimeExtractor->Clone(); + res->Hop = Hop->Clone(); + res->Interval = Interval->Clone(); + res->Delay = Delay->Clone(); + res->DataWatermarks = DataWatermarks; + return res; +} + +TColumnNode::TColumnNode(TPosition pos, const TString& column, const TString& source, bool maybeType) + : INode(pos) + , ColumnName(column) + , Source(source) + , MaybeType(maybeType) +{ +} + +TColumnNode::TColumnNode(TPosition pos, const TNodePtr& column, const TString& source) + : INode(pos) + , ColumnExpr(column) + , Source(source) +{ +} + +TColumnNode::~TColumnNode() +{ +} + +bool TColumnNode::IsAsterisk() const { + return ColumnName == "*"; +} + +bool TColumnNode::IsArtificial() const { + return Artificial; +} + +const TString* TColumnNode::GetColumnName() const { + return UseSourceAsColumn ? &Source : (ColumnExpr ? nullptr : &ColumnName); +} + +const TString* TColumnNode::GetSourceName() const { + return UseSourceAsColumn ? &Empty : &Source; +} + +TColumnNode* TColumnNode::GetColumnNode() { + return this; +} + +const TColumnNode* TColumnNode::GetColumnNode () const { + return this; +} + +bool TColumnNode::DoInit(TContext& ctx, ISource* src) { + if (src) { + YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized or Aggregated already invalid + if (src->ShouldUseSourceAsColumn(*GetSourceName())) { + if (!IsAsterisk() && IsReliable()) { + SetUseSourceAsColumn(); + } + } + + if (GetColumnName()) { + auto fullName = Source ? DotJoin(Source, *GetColumnName()) : *GetColumnName(); + auto alias = src->GetGroupByColumnAlias(fullName); + if (alias) { + ResetColumn(alias, {}); + } + Artificial = !Source && src->IsExprAlias(*GetColumnName()); + } + + if (!src->AddColumn(ctx, *this)) { + return false; + } + if (GetColumnName()) { + if (src->GetJoin() && Source) { + GroupKey = src->IsGroupByColumn(DotJoin(Source, *GetColumnName())); + } else { + GroupKey = src->IsGroupByColumn(*GetColumnName()) || src->IsAlias(EExprSeat::GroupBy, *GetColumnName()); + } + } + } + if (IsAsterisk()) { + Node = AstNode("row"); + } else { + TString callable; + if (MaybeType) { + callable = Reliable && !UseSource ? "SqlPlainColumnOrType" : "SqlColumnOrType"; + } else { + // TODO: consider replacing Member -> SqlPlainColumn + callable = Reliable && !UseSource ? "Member" : "SqlColumn"; + } + Node = Y(callable, "row", ColumnExpr ? Y("EvaluateAtom", ColumnExpr) : BuildQuotedAtom(Pos, *GetColumnName())); + if (UseSource) { + YQL_ENSURE(Source); + Node = L(Node, BuildQuotedAtom(Pos, Source)); + } + } + return Node->Init(ctx, src); +} + +void TColumnNode::SetUseSourceAsColumn() { + YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized or Aggregated already invalid + YQL_ENSURE(!IsAsterisk()); + UseSourceAsColumn = true; +} + +void TColumnNode::ResetAsReliable() { + Reliable = true; +} + +void TColumnNode::SetAsNotReliable() { + Reliable = false; +} + +void TColumnNode::SetUseSource() { + UseSource = true; +} + +bool TColumnNode::IsUseSourceAsColumn() const { + return UseSourceAsColumn; +} + +bool TColumnNode::IsUseSource() const { + return UseSource; +} + +bool TColumnNode::IsReliable() const { + return Reliable; +} + +bool TColumnNode::CanBeType() const { + return MaybeType; +} + +TNodePtr TColumnNode::DoClone() const { + YQL_ENSURE(!Node, "TColumnNode::Clone: Node should not be initialized"); + auto copy = ColumnExpr ? new TColumnNode(Pos, ColumnExpr, Source) : new TColumnNode(Pos, ColumnName, Source, MaybeType); + copy->GroupKey = GroupKey; + copy->Artificial = Artificial; + copy->Reliable = Reliable; + copy->UseSource = UseSource; + copy->UseSourceAsColumn = UseSourceAsColumn; + return copy; +} + +void TColumnNode::DoUpdateState() const { + State.Set(ENodeState::Const, false); + State.Set(ENodeState::MaybeConst, MaybeType); + State.Set(ENodeState::Aggregated, GroupKey); + State.Set(ENodeState::AggregationKey, GroupKey); +} + +TAstNode* TColumnNode::Translate(TContext& ctx) const { + return Node->Translate(ctx); +} + +void TColumnNode::ResetColumn(const TString& column, const TString& source) { + YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized + Reliable = true; + UseSource = false; + UseSourceAsColumn = false; + ColumnName = column; + ColumnExpr = nullptr; + Source = source; +} + +void TColumnNode::ResetColumn(const TNodePtr& column, const TString& source) { + YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized + Reliable = true; + UseSource = false; + UseSourceAsColumn = false; + ColumnName = ""; + ColumnExpr = column; + Source = source; +} + +const TString TColumnNode::Empty; + +TNodePtr BuildColumn(TPosition pos, const TString& column, const TString& source) { + bool maybeType = false; + return new TColumnNode(pos, column, source, maybeType); +} + +TNodePtr BuildColumn(TPosition pos, const TNodePtr& column, const TString& source) { + return new TColumnNode(pos, column, source); +} + +TNodePtr BuildColumn(TPosition pos, const TDeferredAtom& column, const TString& source) { + return column.GetLiteral() ? BuildColumn(pos, *column.GetLiteral(), source) : BuildColumn(pos, column.Build(), source); +} + +TNodePtr BuildColumnOrType(TPosition pos, const TString& column) { + TString source = ""; + bool maybeType = true; + return new TColumnNode(pos, column, source, maybeType); +} + +ITableKeys::ITableKeys(TPosition pos) + : INode(pos) +{ +} + +const TString* ITableKeys::GetTableName() const { + return nullptr; +} + +ITableKeys* ITableKeys::GetTableKeys() { + return this; +} + +TAstNode* ITableKeys::Translate(TContext& ctx) const { + Y_DEBUG_ABORT_UNLESS(false); + Y_UNUSED(ctx); + return nullptr; +} + +bool IAggregation::IsDistinct() const { + return !DistinctKey.empty(); +} + +void IAggregation::DoUpdateState() const { + State.Set(ENodeState::Aggregated, AggMode == EAggregateMode::Normal); + State.Set(ENodeState::OverWindow, AggMode == EAggregateMode::OverWindow); + State.Set(ENodeState::OverWindowDistinct, AggMode == EAggregateMode::OverWindowDistinct); +} + +const TString* IAggregation::GetGenericKey() const { + return nullptr; +} + +void IAggregation::Join(IAggregation*) { + YQL_ENSURE(false, "Should not be called"); +} + +const TString& IAggregation::GetName() const { + return Name; +} + +EAggregateMode IAggregation::GetAggregationMode() const { + return AggMode; +} + +void IAggregation::MarkKeyColumnAsGenerated() { + IsGeneratedKeyColumn = true; +} + +IAggregation::IAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode) + : INode(pos), Name(name), Func(func), AggMode(aggMode) +{} + +TAstNode* IAggregation::Translate(TContext& ctx) const { + Y_DEBUG_ABORT_UNLESS(false); + Y_UNUSED(ctx); + return nullptr; +} + +std::pair<TNodePtr, bool> IAggregation::AggregationTraits(const TNodePtr& type, bool overState, bool many, bool allowAggApply, TContext& ctx) const { + const bool distinct = AggMode == EAggregateMode::Distinct; + const auto listType = distinct ? Y("ListType", Y("StructMemberType", Y("ListItemType", type), BuildQuotedAtom(Pos, DistinctKey))) : type; + auto apply = GetApply(listType, many, allowAggApply, ctx); + if (!apply) { + return { nullptr, false }; + } + + auto wrapped = WrapIfOverState(apply, overState, many, ctx); + if (!wrapped) { + return { nullptr, false }; + } + + return { distinct ? + Q(Y(Q(Name), wrapped, BuildQuotedAtom(Pos, DistinctKey))) : + Q(Y(Q(Name), wrapped)), true }; +} + +TNodePtr IAggregation::WrapIfOverState(const TNodePtr& input, bool overState, bool many, TContext& ctx) const { + if (!overState) { + return input; + } + + auto extractor = GetExtractor(many, ctx); + if (!extractor) { + return nullptr; + } + + return Y(ToString("AggOverState"), extractor, BuildLambda(Pos, Y(), input)); +} + +void IAggregation::AddFactoryArguments(TNodePtr& apply) const { + Y_UNUSED(apply); +} + +std::vector<ui32> IAggregation::GetFactoryColumnIndices() const { + return {0u}; +} + +TNodePtr IAggregation::WindowTraits(const TNodePtr& type, TContext& ctx) const { + YQL_ENSURE(AggMode == EAggregateMode::OverWindow || AggMode == EAggregateMode::OverWindowDistinct, "Windows traits is unavailable"); + + const bool distinct = AggMode == EAggregateMode::OverWindowDistinct; + const auto listType = distinct ? Y("ListType", Y("StructMemberType", Y("ListItemType", type), BuildQuotedAtom(Pos, DistinctKey))) : type; + auto traits = Y(Q(Name), GetApply(listType, false, false, ctx)); + if (AggMode == EAggregateMode::OverWindowDistinct) { + traits->Add(BuildQuotedAtom(Pos, DistinctKey)); + } + + return Q(traits); +} + +namespace { +bool UnescapeQuoted(const TString& str, TPosition& pos, char quoteChar, TString& result, TString& error, bool utf8Aware) { + result = error = {}; + + size_t readBytes = 0; + TStringBuf atom(str); + TStringOutput sout(result); + atom.Skip(1); + result.reserve(str.size()); + + auto unescapeResult = UnescapeArbitraryAtom(atom, quoteChar, &sout, &readBytes); + if (unescapeResult != EUnescapeResult::OK) { + TTextWalker walker(pos, utf8Aware); + walker.Advance(atom.Trunc(readBytes)); + error = UnescapeResultToString(unescapeResult); + return false; + } + return true; +} + +TString UnescapeAnsiQuoted(const TString& str) { + YQL_ENSURE(str.length() >= 2); + YQL_ENSURE(str[0] == str[str.length() - 1]); + YQL_ENSURE(str[0] == '\'' || str[0] == '"'); + + TString quote(1, str[0]); + TString replace(2, str[0]); + + TString result = str.substr(1, str.length() - 2); + SubstGlobal(result, replace, quote); + return result; +} + +enum class EStringContentMode : int { + Default = 0, + AnsiIdent, + TypedStringLiteral, +}; + +TMaybe<TStringContent> +StringContentInternal(TContext& ctx, TPosition pos, const TString& input, EStringContentMode mode) { + TStringContent result; + if (mode == EStringContentMode::AnsiIdent) { + if (!(input.size() >= 2 && input.StartsWith('"') && input.EndsWith('"'))) { + ctx.Error(pos) << "Expected double quoted identifier, got string literal"; + return {}; + } + + result.Flags = NYql::TNodeFlags::ArbitraryContent; + result.Content = UnescapeAnsiQuoted(input); + return result; + } + + TString str = input; + if (mode == EStringContentMode::TypedStringLiteral) { + auto lower = to_lower(str); + if (lower.EndsWith("y")) { + str = str.substr(0, str.size() - 1); + result.Type = NKikimr::NUdf::EDataSlot::Yson; + } else if (lower.EndsWith("j")) { + str = str.substr(0, str.size() - 1); + result.Type = NKikimr::NUdf::EDataSlot::Json; + } else if (lower.EndsWith("p")) { + str = str.substr(0, str.size() - 1); + result.PgType = "PgText"; + } else if (lower.EndsWith("pt")) { + str = str.substr(0, str.size() - 2); + result.PgType = "PgText"; + } else if (lower.EndsWith("pb")) { + str = str.substr(0, str.size() - 2); + result.PgType = "PgBytea"; + } else if (lower.EndsWith("pv")) { + str = str.substr(0, str.size() - 2); + result.PgType = "PgVarchar"; + } else if (lower.EndsWith("s")) { + str = str.substr(0, str.size() - 1); + result.Type = NKikimr::NUdf::EDataSlot::String; + } else if (lower.EndsWith("u")) { + str = str.substr(0, str.size() - 1); + result.Type = NKikimr::NUdf::EDataSlot::Utf8; + } else { + if (ctx.Scoped->WarnUntypedStringLiterals) { + ctx.Warning(pos, TIssuesIds::YQL_UNTYPED_STRING_LITERALS) + << "Please add suffix u for Utf8 strings or s for arbitrary binary strings"; + } + + if (ctx.Scoped->UnicodeLiterals) { + result.Type = NKikimr::NUdf::EDataSlot::Utf8; + } + } + } + + if (mode == EStringContentMode::Default && (result.Type != NKikimr::NUdf::EDataSlot::String || result.PgType)) { + ctx.Error(pos) << "Type suffix is not allowed here"; + return {}; + } + + bool doubleQuoted = (str.StartsWith('"') && str.EndsWith('"')); + bool singleQuoted = !doubleQuoted && (str.StartsWith('\'') && str.EndsWith('\'')); + + if (str.size() >= 2 && (doubleQuoted || singleQuoted)) { + result.Flags = NYql::TNodeFlags::ArbitraryContent; + if (ctx.Settings.AnsiLexer) { + YQL_ENSURE(singleQuoted); + result.Content = UnescapeAnsiQuoted(str); + } else { + TString error; + if (!UnescapeQuoted(str, pos, str[0], result.Content, error, ctx.Settings.Antlr4Parser)) { + ctx.Error(pos) << "Failed to parse string literal: " << error; + return {}; + } + } + } else if (str.size() >= 4 && str.StartsWith("@@") && str.EndsWith("@@")) { + result.Flags = TNodeFlags::MultilineContent; + TString s = str.substr(2, str.length() - 4); + SubstGlobal(s, "@@@@", "@@"); + result.Content.swap(s); + } else { + ctx.Error(pos) << "Invalid string literal: " << EscapeC(str); + return {}; + } + + if (!result.PgType.Defined() && !NKikimr::NMiniKQL::IsValidStringValue(result.Type, result.Content)) { + ctx.Error() << "Invalid value " << result.Content.Quote() << " for type " << result.Type; + return {}; + } + + return result; +} +} // namespace + +TMaybe<TStringContent> StringContent(TContext& ctx, TPosition pos, const TString& input) { + if (ctx.AnsiQuotedIdentifiers && input.StartsWith('"')) { + ctx.Error() << "Expected string literal, got quoted identifier"; + return {}; + } + + return StringContentInternal(ctx, pos, input, EStringContentMode::Default); +} + +TMaybe<TStringContent> StringContentOrIdContent(TContext& ctx, TPosition pos, const TString& input) { + return StringContentInternal(ctx, pos, input, + (ctx.AnsiQuotedIdentifiers && input.StartsWith('"'))? EStringContentMode::AnsiIdent : EStringContentMode::Default); +} + +TTtlSettings::TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr, const TMaybe<EUnit>& columnUnit) + : ColumnName(columnName) + , Expr(expr) + , ColumnUnit(columnUnit) +{ +} + +TString IdContent(TContext& ctx, const TString& s) { + YQL_ENSURE(!s.empty(), "Empty identifier not expected"); + if (!s.StartsWith('`')) { + return s; + } + auto endSym = '`'; + if (s.size() < 2 || !s.EndsWith(endSym)) { + ctx.Error() << "The identifier that starts with: '" << s[0] << "' should ends with: '" << endSym << "'"; + return {}; + } + size_t skipSymbols = 1; + + TStringBuf atom(s.data() + skipSymbols, s.size() - 2 * skipSymbols + 1); + TString unescapedStr; + TStringOutput sout(unescapedStr); + unescapedStr.reserve(s.size()); + + size_t readBytes = 0; + TPosition pos = ctx.Pos(); + pos.Column += skipSymbols - 1; + + auto unescapeResult = UnescapeArbitraryAtom(atom, endSym, &sout, &readBytes); + if (unescapeResult != EUnescapeResult::OK) { + TTextWalker walker(pos, ctx.Settings.Antlr4Parser); + walker.Advance(atom.Trunc(readBytes)); + ctx.Error(pos) << "Cannot parse broken identifier: " << UnescapeResultToString(unescapeResult); + return {}; + } + + if (readBytes != atom.size()) { + ctx.Error() << "The identifier not parsed completely"; + return {}; + } + + return unescapedStr; +} + +TString IdContentFromString(TContext& ctx, const TString& str) { + if (!ctx.AnsiQuotedIdentifiers) { + ctx.Error() << "String literal can not be used here"; + return {}; + } + auto parsed = StringContentInternal(ctx, ctx.Pos(), str, EStringContentMode::AnsiIdent); + if (!parsed) { + return {}; + } + + return parsed->Content; +} + + +namespace { +class TInvalidLiteralNode final: public INode { +public: + TInvalidLiteralNode(TPosition pos) + : INode(pos) + { + } + + bool DoInit(TContext& ctx, ISource* source) override { + Y_UNUSED(ctx); + Y_UNUSED(source); + return false; + } + + TAstNode* Translate(TContext& ctx) const override { + Y_UNUSED(ctx); + return nullptr; + } + + TPtr DoClone() const override { + return new TInvalidLiteralNode(GetPos()); + } +}; + +} + +TLiteralNode::TLiteralNode(TPosition pos, bool isNull) + : TAstListNode(pos) + , Null(isNull) + , Void(!isNull) +{ + Add(isNull ? "Null" : "Void"); +} + +TLiteralNode::TLiteralNode(TPosition pos, const TString& type, const TString& value) + : TAstListNode(pos) + , Null(false) + , Void(false) + , Type(type) + , Value(value) +{ + if (Type.StartsWith("Pg")) { + Add("PgConst", BuildQuotedAtom(Pos, Value), Y("PgType", Q(to_lower(Type.substr(2))))); + } else { + Add(Type, BuildQuotedAtom(Pos, Value)); + } +} + +TLiteralNode::TLiteralNode(TPosition pos, const TString& value, ui32 nodeFlags) + : TAstListNode(pos) + , Null(false) + , Void(false) + , Type("String") + , Value(value) +{ + Add(Type, BuildQuotedAtom(pos, Value, nodeFlags)); +} + +TLiteralNode::TLiteralNode(TPosition pos, const TString& value, ui32 nodeFlags, const TString& type) + : TAstListNode(pos) + , Null(false) + , Void(false) + , Type(type) + , Value(value) +{ + if (Type.StartsWith("Pg")) { + Add("PgConst", BuildQuotedAtom(Pos, Value, nodeFlags), Y("PgType", Q(to_lower(Type.substr(2))))); + } else { + Add(Type, BuildQuotedAtom(pos, Value, nodeFlags)); + } +} + +bool TLiteralNode::IsNull() const { + return Null; +} + +const TString* TLiteralNode::GetLiteral(const TString& type) const { + return type == Type ? &Value : nullptr; +} + +bool TLiteralNode::IsLiteral() const { + return true; +} + +TString TLiteralNode::GetLiteralType() const { + return Type; +} + +TString TLiteralNode::GetLiteralValue() const { + return Value; +} + +void TLiteralNode::DoUpdateState() const { + State.Set(ENodeState::Const); +} + +TNodePtr TLiteralNode::DoClone() const { + auto res = (Null || Void) ? MakeIntrusive<TLiteralNode>(Pos, Null) : MakeIntrusive<TLiteralNode>(Pos, Type, Value); + res->Nodes = Nodes; + return res; +} + +template<typename T> +TLiteralNumberNode<T>::TLiteralNumberNode(TPosition pos, const TString& type, const TString& value, bool implicitType) + : TLiteralNode(pos, type, value) + , ImplicitType(implicitType) +{} + +template<typename T> +TNodePtr TLiteralNumberNode<T>::DoClone() const { + return new TLiteralNumberNode<T>(Pos, Type, Value, ImplicitType); +} + +template<typename T> +bool TLiteralNumberNode<T>::DoInit(TContext& ctx, ISource* src) { + Y_UNUSED(src); + T val; + if (!TryFromString(Value, val)) { + ctx.Error(Pos) << "Failed to parse " << Value << " as integer literal of " << Type << " type: value out of range for " << Type; + return false; + } + return true; +} + +template<typename T> +bool TLiteralNumberNode<T>::IsIntegerLiteral() const { + return std::numeric_limits<T>::is_integer; +} + +template<typename T> +TNodePtr TLiteralNumberNode<T>::ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const { + YQL_ENSURE(!Value.empty()); + if (opName == "Minus" && IsIntegerLiteral() && Value[0] != '-') { + if (ImplicitType) { + ui64 val = FromString<ui64>(Value); + TString negated = "-" + Value; + if (val <= ui64(std::numeric_limits<i32>::max()) + 1) { + // negated value fits in Int32 + i32 v; + YQL_ENSURE(TryFromString(negated, v)); + return new TLiteralNumberNode<i32>(pos, Type.StartsWith("Pg") ? "PgInt4" : "Int32", negated); + } + if (val <= ui64(std::numeric_limits<i64>::max()) + 1) { + // negated value fits in Int64 + i64 v; + YQL_ENSURE(TryFromString(negated, v)); + return new TLiteralNumberNode<i64>(pos, Type.StartsWith("Pg") ? "PgInt8" : "Int64", negated); + } + + ctx.Error(pos) << "Failed to parse negative integer: " << negated << ", number limit overflow"; + return {}; + } + + if (std::numeric_limits<T>::is_signed) { + return new TLiteralNumberNode<T>(pos, Type, "-" + Value); + } + } + return INode::ApplyUnaryOp(ctx, pos, opName); +} + + +template class TLiteralNumberNode<i32>; +template class TLiteralNumberNode<i64>; +template class TLiteralNumberNode<ui32>; +template class TLiteralNumberNode<ui64>; +template class TLiteralNumberNode<float>; +template class TLiteralNumberNode<double>; +template class TLiteralNumberNode<ui8>; +template class TLiteralNumberNode<i8>; +template class TLiteralNumberNode<ui16>; +template class TLiteralNumberNode<i16>; + +TNodePtr BuildLiteralNull(TPosition pos) { + return new TLiteralNode(pos, true); +} + +TNodePtr BuildLiteralVoid(TPosition pos) { + return new TLiteralNode(pos, false); +} + +TNodePtr BuildLiteralSmartString(TContext& ctx, const TString& value) { + auto unescaped = StringContent(ctx, ctx.Pos(), value); + if (!unescaped) { + return new TInvalidLiteralNode(ctx.Pos()); + } + + YQL_ENSURE(unescaped->Type == NKikimr::NUdf::EDataSlot::String); + return new TLiteralNode(ctx.Pos(), unescaped->Content, unescaped->Flags, "String"); +} + +TMaybe<TExprOrIdent> BuildLiteralTypedSmartStringOrId(TContext& ctx, const TString& value) { + TExprOrIdent result; + if (ctx.AnsiQuotedIdentifiers && value.StartsWith('"')) { + auto unescaped = StringContentInternal(ctx, ctx.Pos(), value, EStringContentMode::AnsiIdent); + if (!unescaped) { + return {}; + } + result.Ident = unescaped->Content; + return result; + } + auto unescaped = StringContentInternal(ctx, ctx.Pos(), value, EStringContentMode::TypedStringLiteral); + if (!unescaped) { + return {}; + } + + TString type = unescaped->PgType ? *unescaped->PgType : ToString(unescaped->Type); + result.Expr = new TLiteralNode(ctx.Pos(), unescaped->Content, unescaped->Flags, type); + return result; +} + + +TNodePtr BuildLiteralRawString(TPosition pos, const TString& value, bool isUtf8) { + return new TLiteralNode(pos, isUtf8 ? "Utf8" : "String", value); +} + +TNodePtr BuildLiteralBool(TPosition pos, bool value) { + return new TLiteralNode(pos, "Bool", value ? "true" : "false"); +} + +TAsteriskNode::TAsteriskNode(TPosition pos) + : INode(pos) +{} + +bool TAsteriskNode::IsAsterisk() const { + return true; +}; + +TNodePtr TAsteriskNode::DoClone() const { + return new TAsteriskNode(Pos); +} + +TAstNode* TAsteriskNode::Translate(TContext& ctx) const { + ctx.Error(Pos) << "* is not allowed here"; + return nullptr; +} + +TNodePtr BuildEmptyAction(TPosition pos) { + TNodePtr params = new TAstListNodeImpl(pos); + TNodePtr arg = new TAstAtomNodeImpl(pos, "x", TNodeFlags::Default); + params->Add(arg); + return BuildLambda(pos, params, arg); +} + +TDeferredAtom::TDeferredAtom() +{} + +TDeferredAtom::TDeferredAtom(TPosition pos, const TString& str) +{ + Node = BuildQuotedAtom(pos, str); + Explicit = str; + Repr = str; +} + +TDeferredAtom::TDeferredAtom(TNodePtr node, TContext& ctx) +{ + Node = node; + Repr = ctx.MakeName("DeferredAtom"); +} + +const TString* TDeferredAtom::GetLiteral() const { + return Explicit.Get(); +} + +bool TDeferredAtom::GetLiteral(TString& value, TContext& ctx) const { + if (Explicit) { + value = *Explicit; + return true; + } + + ctx.Error(Node ? Node->GetPos() : ctx.Pos()) << "Expected literal value"; + return false; +} + +TNodePtr TDeferredAtom::Build() const { + return Node; +} + +TString TDeferredAtom::GetRepr() const { + return Repr; +} + +bool TDeferredAtom::Empty() const { + return !Node || Repr.empty(); +} + +bool TDeferredAtom::HasNode() const { + return !!Node; +} + +TTupleNode::TTupleNode(TPosition pos, const TVector<TNodePtr>& exprs) + : TAstListNode(pos) + , Exprs(exprs) +{} + +bool TTupleNode::IsEmpty() const { + return Exprs.empty(); +} + +const TVector<TNodePtr>& TTupleNode::Elements() const { + return Exprs; +} + +TTupleNode* TTupleNode::GetTupleNode() { + return this; +} + +const TTupleNode* TTupleNode::GetTupleNode() const { + return this; +} + +bool TTupleNode::DoInit(TContext& ctx, ISource* src) { + auto node(Y()); + for (auto& expr: Exprs) { + if (expr->GetLabel()) { + ctx.Error(expr->GetPos()) << "Tuple does not allow named members"; + return false; + } + node = L(node, expr); + } + Add("quote", node); + return TAstListNode::DoInit(ctx, src); +} + +size_t TTupleNode::GetTupleSize() const { + return Exprs.size(); +} + +TNodePtr TTupleNode::GetTupleElement(size_t index) const { + return Exprs[index]; +} + +TNodePtr TTupleNode::DoClone() const { + return new TTupleNode(Pos, CloneContainer(Exprs)); +} + +void TTupleNode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) { + for (auto& expr : Exprs) { + expr->CollectPreaggregateExprs(ctx, src, exprs); + } +} + +const TString* TTupleNode::GetSourceName() const { + return DeriveCommonSourceName(Exprs); +} + +TNodePtr BuildTuple(TPosition pos, const TVector<TNodePtr>& exprs) { + return new TTupleNode(pos, exprs); +} + +TStructNode::TStructNode(TPosition pos, const TVector<TNodePtr>& exprs, const TVector<TNodePtr>& labels, bool ordered) + : TAstListNode(pos) + , Exprs(exprs) + , Labels(labels) + , Ordered(ordered) +{ + YQL_ENSURE(Labels.empty() || Labels.size() == Exprs.size()); +} + +bool TStructNode::DoInit(TContext& ctx, ISource* src) { + Nodes.push_back(BuildAtom(Pos, (Ordered || Exprs.size() < 2) ? "AsStruct" : "AsStructUnordered", TNodeFlags::Default)); + size_t i = 0; + for (const auto& expr : Exprs) { + TNodePtr label; + if (Labels.empty()) { + if (!expr->GetLabel()) { + ctx.Error(expr->GetPos()) << "Structure does not allow anonymous members"; + return false; + } + label = BuildQuotedAtom(expr->GetPos(), expr->GetLabel()); + } else { + label = Labels[i++]; + } + Nodes.push_back(Q(Y(label, expr))); + } + return TAstListNode::DoInit(ctx, src); +} + +TNodePtr TStructNode::DoClone() const { + return new TStructNode(Pos, CloneContainer(Exprs), CloneContainer(Labels), Ordered); +} + +TStructNode* TStructNode::GetStructNode() { + return this; +} + +const TStructNode* TStructNode::GetStructNode() const { + return this; +} + +void TStructNode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) { + for (auto& expr : Exprs) { + expr->CollectPreaggregateExprs(ctx, src, exprs); + } +} + +const TString* TStructNode::GetSourceName() const { + return DeriveCommonSourceName(Exprs); +} + +TNodePtr BuildStructure(TPosition pos, const TVector<TNodePtr>& exprs) { + bool ordered = false; + return new TStructNode(pos, exprs, {}, ordered); +} + +TNodePtr BuildStructure(TPosition pos, const TVector<TNodePtr>& exprsUnlabeled, const TVector<TNodePtr>& labels) { + bool ordered = false; + return new TStructNode(pos, exprsUnlabeled, labels, ordered); +} + +TNodePtr BuildOrderedStructure(TPosition pos, const TVector<TNodePtr>& exprsUnlabeled, const TVector<TNodePtr>& labels) { + bool ordered = true; + return new TStructNode(pos, exprsUnlabeled, labels, ordered); +} + +TListOfNamedNodes::TListOfNamedNodes(TPosition pos, TVector<TNodePtr>&& exprs) + : INode(pos) + , Exprs(std::move(exprs)) +{} + +TVector<TNodePtr>* TListOfNamedNodes::ContentListPtr() { + return &Exprs; +} + +TAstNode* TListOfNamedNodes::Translate(TContext& ctx) const { + YQL_ENSURE(!"Unexpected usage"); + Y_UNUSED(ctx); + return nullptr; +} + +TNodePtr TListOfNamedNodes::DoClone() const { + return new TListOfNamedNodes(GetPos(), CloneContainer(Exprs)); +} + +void TListOfNamedNodes::DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const { + for (auto& expr : Exprs) { + expr->VisitTree(func, visited); + } +} + +TNodePtr BuildListOfNamedNodes(TPosition pos, TVector<TNodePtr>&& exprs) { + return new TListOfNamedNodes(pos, std::move(exprs)); +} + +TArgPlaceholderNode::TArgPlaceholderNode(TPosition pos, const TString &name) : + INode(pos), + Name(name) +{ +} + +bool TArgPlaceholderNode::DoInit(TContext& ctx, ISource* src) { + Y_UNUSED(src); + ctx.Error(Pos) << Name << " can't be used as a part of expression."; + return false; +} + +TAstNode* TArgPlaceholderNode::Translate(TContext& ctx) const { + Y_UNUSED(ctx); + return nullptr; +} + +TString TArgPlaceholderNode::GetName() const { + return Name; +} + +TNodePtr TArgPlaceholderNode::DoClone() const { + return new TArgPlaceholderNode(GetPos(), Name); +} + +TNodePtr BuildArgPlaceholder(TPosition pos, const TString& name) { + return new TArgPlaceholderNode(pos, name); +} + +class TAccessNode: public INode { +public: + TAccessNode(TPosition pos, const TVector<TIdPart>& ids, bool isLookup) + : INode(pos) + , Ids(ids) + , IsLookup(isLookup) + , ColumnOnly(false) + , IsColumnRequired(false) + , AccessOpName("AccessNode") + { + Y_DEBUG_ABORT_UNLESS(Ids.size() > 1); + Y_DEBUG_ABORT_UNLESS(Ids[0].Expr); + auto column = Ids[0].Expr->GetColumnNode(); + if (column) { + ui32 idx = 1; + TString source; + if (Ids.size() > 2) { + source = Ids[idx].Name; + ++idx; + } + + ColumnOnly = !IsLookup && Ids.size() < 4; + if (ColumnOnly && Ids[idx].Expr) { + column->ResetColumn(Ids[idx].Expr, source); + } else { + column->ResetColumn(Ids[idx].Name, source); + } + } + } + + void AssumeColumn() override { + IsColumnRequired = true; + } + + TMaybe<TString> TryMakeTable() { + if (!ColumnOnly) { + return Nothing(); + } + + ui32 idx = 1; + if (Ids.size() > 2) { + return Nothing(); + } + + return Ids[idx].Name; + } + + const TString* GetColumnName() const override { + return ColumnOnly ? Ids[0].Expr->GetColumnName() : nullptr; + } + + const TString* GetSourceName() const override { + return Ids[0].Expr->GetSourceName(); + } + + TAccessNode* GetAccessNode() override { + return this; + } + + const TAccessNode* GetAccessNode() const override { + return this; + } + + bool DoInit(TContext& ctx, ISource* src) override { + auto expr = Ids[0].Expr; + const TPosition pos(expr->GetPos()); + if (expr->IsAsterisk()) { + ctx.Error(pos) << "Asterisk column does not allow any access"; + return false; + } + if (!expr->Init(ctx, src)) { + return false; + } + for (auto& id: Ids) { + if (id.Expr && !id.Expr->Init(ctx, src)) { + return false; + } + } + ui32 idx = 1; + auto column = expr->GetColumnNode(); + if (column) { + const bool useSourceAsColumn = column->IsUseSourceAsColumn(); + ColumnOnly &= !useSourceAsColumn; + if (IsColumnRequired && !ColumnOnly) { + ctx.Error(pos) << "Please use a full form (corellation.struct.field) or an alias (struct.field as alias) to access struct's field in the GROUP BY"; + return false; + } + + if (Ids.size() > 2) { + if (!CheckColumnId(pos, ctx, Ids[idx], ColumnOnly ? "Correlation" : "Column", true)) { + return false; + } + ++idx; + } + if (!useSourceAsColumn) { + if (!IsLookup && !CheckColumnId(pos, ctx, Ids[idx], ColumnOnly ? "Column" : "Member", false)) { + return false; + } + ++idx; + } + } + for (; idx < Ids.size(); ++idx) { + const auto& id = Ids[idx]; + if (!id.Name.empty()) { + expr = Y("SqlAccess", Q("struct"), expr, id.Expr ? Y("EvaluateAtom", id.Expr) : BuildQuotedAtom(Pos, id.Name)); + AccessOpName = "AccessStructMember"; + } else if (id.Expr) { + expr = Y("SqlAccess", Q("dict"), expr, id.Expr); + AccessOpName = "AccessDictMember"; + } else { + continue; + } + + if (ctx.PragmaYsonAutoConvert || ctx.PragmaYsonStrict || ctx.PragmaYsonFast) { + auto ysonOptions = Y(); + if (ctx.PragmaYsonAutoConvert) { + ysonOptions->Add(BuildQuotedAtom(Pos, "yson_auto_convert")); + } + if (ctx.PragmaYsonStrict) { + ysonOptions->Add(BuildQuotedAtom(Pos, "yson_strict")); + } + if (ctx.PragmaYsonFast) { + ysonOptions->Add(BuildQuotedAtom(Pos, "yson_fast")); + } + expr->Add(Q(ysonOptions)); + } + } + Node = expr; + return true; + } + + TAstNode* Translate(TContext& ctx) const override { + Y_DEBUG_ABORT_UNLESS(Node); + return Node->Translate(ctx); + } + + TPtr DoClone() const override { + YQL_ENSURE(!Node, "TAccessNode::Clone: Node should not be initialized"); + TVector<TIdPart> cloneIds; + cloneIds.reserve(Ids.size()); + for (const auto& id: Ids) { + cloneIds.emplace_back(id.Clone()); + } + auto copy = new TAccessNode(Pos, cloneIds, IsLookup); + copy->ColumnOnly = ColumnOnly; + return copy; + } + + const TVector<TIdPart>& GetParts() const { + return Ids; + } + +protected: + void DoUpdateState() const override { + YQL_ENSURE(Node); + State.Set(ENodeState::Const, Node->IsConstant()); + State.Set(ENodeState::MaybeConst, Node->MaybeConstant()); + State.Set(ENodeState::Aggregated, Node->IsAggregated()); + State.Set(ENodeState::AggregationKey, Node->HasState(ENodeState::AggregationKey)); + State.Set(ENodeState::OverWindow, Node->IsOverWindow()); + } + + void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final { + Y_DEBUG_ABORT_UNLESS(Node); + Node->VisitTree(func, visited); + } + + bool CheckColumnId(TPosition pos, TContext& ctx, const TIdPart& id, const TString& where, bool checkLookup) { + if (id.Name.empty()) { + ctx.Error(pos) << where << " name can not be empty"; + return false; + } + if (checkLookup && id.Expr) { + ctx.Error(pos) << where << " name does not allow dict lookup"; + return false; + } + return true; + } + + TString GetOpName() const override { + return AccessOpName; + } + + void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override { + for (auto& id : Ids) { + if (id.Expr) { + id.Expr->CollectPreaggregateExprs(ctx, src, exprs); + } + } + } + +private: + TNodePtr Node; + TVector<TIdPart> Ids; + bool IsLookup; + bool ColumnOnly; + bool IsColumnRequired; + TString AccessOpName; +}; + +TNodePtr BuildAccess(TPosition pos, const TVector<INode::TIdPart>& ids, bool isLookup) { + return new TAccessNode(pos, ids, isLookup); +} + +TNodePtr BuildMatchRecognizeVarAccess(TPosition pos, const TString& var, const TString& column, bool theSameVar) { + return new TMatchRecognizeVarAccessNode(pos, var, column, theSameVar); +} + +void WarnIfAliasFromSelectIsUsedInGroupBy(TContext& ctx, const TVector<TNodePtr>& selectTerms, const TVector<TNodePtr>& groupByTerms, + const TVector<TNodePtr>& groupByExprTerms) +{ + THashMap<TString, TNodePtr> termsByLabel; + for (auto& term : selectTerms) { + auto label = term->GetLabel(); + if (!label || term->IsOverWindow()) { + continue; + } + + auto column = term->GetColumnName(); + + // do not warn for trivial renaming such as '[X.]foo AS foo' + if (column && *column == label) { + continue; + } + + // skip terms with aggregation functions inside + bool hasAggregationFunction = false; + auto visitor = [&](const INode& current) { + hasAggregationFunction = hasAggregationFunction || current.GetAggregation(); + return !hasAggregationFunction; + }; + + term->VisitTree(visitor); + if (!hasAggregationFunction) { + termsByLabel[label] = term; + } + } + + if (termsByLabel.empty()) { + return; + } + + bool found = false; + auto visitor = [&](const INode& current) { + if (found) { + return false; + } + + if (auto columnName = current.GetColumnName()) { + // do not warn if source name is set + auto src = current.GetSourceName(); + if (src && *src) { + return true; + } + auto it = termsByLabel.find(*columnName); + if (it != termsByLabel.end()) { + found = true; + ctx.Warning(current.GetPos(), TIssuesIds::YQL_PROJECTION_ALIAS_IS_REFERENCED_IN_GROUP_BY) + << "GROUP BY will aggregate by column `" << *columnName << "` instead of aggregating by SELECT expression with same alias"; + ctx.Warning(it->second->GetPos(), TIssuesIds::YQL_PROJECTION_ALIAS_IS_REFERENCED_IN_GROUP_BY) + << "You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details"; + return false; + } + } + + return true; + }; + + TVector<TNodePtr> originalGroupBy; + { + THashSet<TString> groupByExprLabels; + for (auto& expr : groupByExprTerms) { + auto label = expr->GetLabel(); + YQL_ENSURE(label); + groupByExprLabels.insert(label); + } + + originalGroupBy = groupByTerms; + EraseIf(originalGroupBy, [&](const TNodePtr& node) { + auto column = node->GetColumnName(); + auto src = node->GetSourceName(); + + return (!src || src->empty()) && column && groupByExprLabels.contains(*column); + }); + + originalGroupBy.insert(originalGroupBy.end(), groupByExprTerms.begin(), groupByExprTerms.end()); + } + + for (auto& groupByTerm : originalGroupBy) { + groupByTerm->VisitTree(visitor); + if (found) { + return; + } + } +} + +bool ValidateAllNodesForAggregation(TContext& ctx, const TVector<TNodePtr>& nodes) { + for (auto& node: nodes) { + if (!node->HasState(ENodeState::Initialized) || node->IsConstant() || node->MaybeConstant()) { + continue; + } + // TODO: "!node->IsOverWindow()" doesn't look right here + if (!node->IsAggregated() && !node->IsOverWindow() && !node->IsOverWindowDistinct()) { + // locate column which is not a key column and not aggregated + const INode* found = nullptr; + auto visitor = [&found](const INode& current) { + if (found || current.IsAggregated() || current.IsOverWindow() || current.IsOverWindowDistinct()) { + return false; + } + + if (current.GetColumnNode() || current.GetAccessNode()) { + found = ¤t; + return false; + } + return true; + }; + + node->VisitTree(visitor); + if (found) { + TString columnName; + if (auto col = found->GetColumnName(); col && *col) { + columnName = "`"; + if (auto src = found->GetSourceName(); src && *src) { + columnName += DotJoin(*src, *col); + } else { + columnName += *col; + } + columnName += "` "; + } + ctx.Error(found->GetPos()) << "Column " << columnName << "must either be a key column in GROUP BY or it should be used in aggregation function"; + } else { + ctx.Error(node->GetPos()) << "Expression has to be an aggregation function or key column, because aggregation is used elsewhere in this subquery"; + } + + return false; + } + } + return true; +} + +class TBindNode: public TAstListNode { +public: + TBindNode(TPosition pos, const TString& module, const TString& alias) + : TAstListNode(pos) + { + Add("bind", AstNode(module), BuildQuotedAtom(pos, alias)); + } +private: + TBindNode(const TBindNode& other) + : TAstListNode(other.GetPos()) + { + Nodes = CloneContainer(other.Nodes); + } + + TPtr DoClone() const final { + return new TBindNode(*this); + } +}; + +TNodePtr BuildBind(TPosition pos, const TString& module, const TString& alias) { + return new TBindNode(pos, module, alias); +} + +class TLambdaNode: public TAstListNode { +public: + TLambdaNode(TPosition pos, TNodePtr params, TNodePtr body, const TString& resName) + : TAstListNode(pos) + { + if (!resName.empty()) { + body = Y("block", Q(L(body, Y("return", resName)))); + } + Add("lambda", Q(params), body); + } + + TLambdaNode(TPosition pos, TNodePtr params, TVector<TNodePtr> bodies) + : TAstListNode(pos) + { + Add("lambda", Q(params)); + for (const auto& b : bodies) { + Add(b); + } + } + + TLambdaNode* GetLambdaNode() override { + return this; + } + + const TLambdaNode* GetLambdaNode() const override { + return this; + } + +private: + TLambdaNode(const TLambdaNode& other) + : TAstListNode(other.GetPos()) + { + Nodes = CloneContainer(other.Nodes); + } + + TPtr DoClone() const final { + return new TLambdaNode(*this); + } + + void DoUpdateState() const final { + State.Set(ENodeState::Const); + } +}; + +TNodePtr BuildLambda(TPosition pos, TNodePtr params, TNodePtr body, const TString& resName) { + return new TLambdaNode(pos, params, body, resName); +} + +TNodePtr BuildLambda(TPosition pos, TNodePtr params, const TVector<TNodePtr>& bodies) { + return new TLambdaNode(pos, params, bodies); +} + +TNodePtr BuildDataType(TPosition pos, const TString& typeName) { + return new TCallNodeImpl(pos, "DataType", {BuildQuotedAtom(pos, typeName, TNodeFlags::Default)}); +} + +TMaybe<TString> LookupSimpleType(const TStringBuf& alias, bool flexibleTypes, bool isPgType) { + TString normalized = to_lower(TString(alias)); + if (isPgType) { + // expecting original pg type (like _int4 or varchar) with optional pg suffix (i.e. _pgint4, pgvarchar) + if (normalized.StartsWith("pg")) { + normalized = normalized.substr(2); + } else if (normalized.StartsWith("_pg")) { + normalized = "_" + normalized.substr(3); + } + + if (!NPg::HasType(normalized)) { + return {}; + } + + if (normalized.StartsWith("_")) { + return "_pg" + normalized.substr(1); + } + return "pg" + normalized; + } + + if (auto sqlAlias = LookupSimpleTypeBySqlAlias(alias, flexibleTypes)) { + return TString(*sqlAlias); + } + + TString pgType; + if (normalized.StartsWith("_pg")) { + pgType = normalized.substr(3); + } else if (normalized.StartsWith("pg")) { + pgType = normalized.substr(2); + } else { + return {}; + } + + if (NPg::HasType(pgType)) { + return normalized; + } + + return {}; +} + +TNodePtr BuildSimpleType(TContext& ctx, TPosition pos, const TString& typeName, bool dataOnly) { + bool explicitPgType = ctx.GetColumnReferenceState() == EColumnRefState::AsPgType; + auto found = LookupSimpleType(typeName, ctx.FlexibleTypes, explicitPgType); + if (!found) { + ctx.Error(pos) << "Unknown " << (explicitPgType ? "pg" : "simple") << " type '" << typeName << "'"; + return {}; + } + + auto type = *found; + if (type == "Void" || type == "Unit" || type == "Generic" || type == "EmptyList" || type == "EmptyDict") { + if (dataOnly) { + ctx.Error(pos) << "Only data types are allowed here, but got: '" << typeName << "'"; + return {}; + } + type += "Type"; + return new TCallNodeImpl(pos, type, {}); + } + + if (type.StartsWith("_pg") || type.StartsWith("pg")) { + TString pgType; + if (type.StartsWith("_pg")) { + pgType = "_" + type.substr(3); + } else { + pgType = type.substr(2); + } + return new TCallNodeImpl(pos, "PgType", { BuildQuotedAtom(pos, pgType, TNodeFlags::Default) }); + } + + return new TCallNodeImpl(pos, "DataType", { BuildQuotedAtom(pos, type, TNodeFlags::Default) }); +} + +TString TypeByAlias(const TString& alias, bool normalize) { + TString type(alias); + TCiString typeAlias(alias); + if (typeAlias.StartsWith("varchar")) { + type = "String"; + } else if (typeAlias == "tinyint") { + type = "Int8"; + } else if (typeAlias == "byte") { + type = "Uint8"; + } else if (typeAlias == "smallint") { + type = "Int16"; + } else if (typeAlias == "int" || typeAlias == "integer") { + type = "Int32"; + } else if (typeAlias == "bigint") { + type = "Int64"; + } + return normalize ? NormalizeTypeString(type) : type; +} + +TNodePtr BuildIsNullOp(TPosition pos, TNodePtr a) { + if (!a) { + return nullptr; + } + if (a->IsNull()) { + return BuildLiteralBool(pos, true); + } + return new TCallNodeImpl(pos, "Not", {new TCallNodeImpl(pos, "Exists", {a})}); +} + + + +TUdfNode::TUdfNode(TPosition pos, const TVector<TNodePtr>& args) + : INode(pos) + , Args(args) +{ + if (Args.size()) { + // If there aren't any named args, args are passed as vector of positional args, + // else Args has length 2: tuple for positional args and struct for named args, + // so let's construct tuple of args there. Other type checks will within DoInit call. + if (!Args[0]->GetTupleNode()) { + Args = {BuildTuple(pos, args)}; + } + } +} + +bool TUdfNode::DoInit(TContext& ctx, ISource* src) { + Y_UNUSED(src); + if (Args.size() < 1) { + ctx.Error(Pos) << "Udf: expected at least one argument"; + return false; + } + + TTupleNode* as_tuple = Args[0]->GetTupleNode(); + + if (!as_tuple || as_tuple->GetTupleSize() < 1) { + ctx.Error(Pos) << "Udf: first argument must be a callable, like Foo::Bar"; + return false; + } + + TNodePtr function = as_tuple->GetTupleElement(0); + + if (!function || !function->FuncName()) { + ctx.Error(Pos) << "Udf: first argument must be a callable, like Foo::Bar"; + return false; + } + + FunctionName = function->FuncName(); + ModuleName = function->ModuleName(); + TVector<TNodePtr> external; + external.reserve(as_tuple->GetTupleSize() - 1); + + for (size_t i = 1; i < as_tuple->GetTupleSize(); ++i) { + // TODO(): support named args in GetFunctionArgColumnStatus + TNodePtr current = as_tuple->GetTupleElement(i); + if (TAccessNode* as_access = current->GetAccessNode(); as_access) { + external.push_back(Y("DataType", Q(as_access->GetParts()[1].Name))); + continue; + } + external.push_back(current); + } + + ExternalTypesTuple = new TCallNodeImpl(Pos, "TupleType", external); + + if (Args.size() == 1) { + return true; + } + + if (TStructNode* named_args = Args[1]->GetStructNode(); named_args) { + for (const auto &arg: named_args->GetExprs()) { + if (arg->GetLabel() == "TypeConfig") { + TypeConfig = MakeAtomFromExpression(Pos, ctx, arg); + } else if (arg->GetLabel() == "RunConfig") { + RunConfig = arg; + } + } + } + + return true; +} + +const TNodePtr TUdfNode::GetExternalTypes() const { + return ExternalTypesTuple; +} + +const TString& TUdfNode::GetFunction() const { + return *FunctionName; +} + +const TString& TUdfNode::GetModule() const { + return *ModuleName; +} + +TNodePtr TUdfNode::GetRunConfig() const { + return RunConfig; +} + +const TDeferredAtom& TUdfNode::GetTypeConfig() const { + return TypeConfig; +} + +TUdfNode* TUdfNode::GetUdfNode() { + return this; +} + +const TUdfNode* TUdfNode::GetUdfNode() const { + return this; +} + +TAstNode* TUdfNode::Translate(TContext& ctx) const { + ctx.Error(Pos) << "Abstract Udf Node can't be used as a part of expression."; + return nullptr; +} + +TNodePtr TUdfNode::DoClone() const { + return new TUdfNode(Pos, CloneContainer(Args)); +} + + +class TBinaryOpNode final: public TCallNode { +public: + TBinaryOpNode(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b); + + TNodePtr DoClone() const final { + YQL_ENSURE(Args.size() == 2); + return new TBinaryOpNode(Pos, OpName, Args[0]->Clone(), Args[1]->Clone()); + } +}; + +TBinaryOpNode::TBinaryOpNode(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b) + : TCallNode(pos, opName, 2, 2, { a, b }) +{ +} + +TNodePtr BuildBinaryOp(TContext& ctx, TPosition pos, const TString& opName, TNodePtr a, TNodePtr b) { + if (!a || !b) { + return nullptr; + } + + static const THashSet<TStringBuf> nullSafeOps = {"IsDistinctFrom", "IsNotDistinctFrom"}; + if (!nullSafeOps.contains(opName)) { + const bool bothArgNull = a->IsNull() && b->IsNull(); + const bool oneArgNull = a->IsNull() || b->IsNull(); + + if (bothArgNull || (oneArgNull && opName != "Or" && opName != "And")) { + ctx.Warning(pos, TIssuesIds::YQL_OPERATION_WILL_RETURN_NULL) << "Binary operation " + << opName.substr(0, opName.size() - 7 * opName.EndsWith("MayWarn")) + << " will return NULL here"; + } + } + + return new TBinaryOpNode(pos, opName, a, b); +} + +TNodePtr BuildBinaryOpRaw(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b) { + if (!a || !b) { + return nullptr; + } + + return new TBinaryOpNode(pos, opName, a, b); +} + +class TCalcOverWindow final: public INode { +public: + TCalcOverWindow(TPosition pos, const TString& windowName, TNodePtr node) + : INode(pos) + , WindowName(windowName) + , FuncNode(node) + {} + + TAstNode* Translate(TContext& ctx) const override { + return FuncNode->Translate(ctx); + } + + bool DoInit(TContext& ctx, ISource* src) override { + YQL_ENSURE(src); + TSourcePtr overWindowSource = BuildOverWindowSource(ctx.Pos(), WindowName, src); + if (!FuncNode->Init(ctx, overWindowSource.Get())) { + return false; + } + return true; + } + + TPtr DoClone() const final { + return new TCalcOverWindow(Pos, WindowName, SafeClone(FuncNode)); + } + + void DoUpdateState() const override { + State.Set(ENodeState::Const, FuncNode->IsConstant()); + State.Set(ENodeState::MaybeConst, FuncNode->MaybeConstant()); + State.Set(ENodeState::Aggregated, FuncNode->IsAggregated()); + State.Set(ENodeState::OverWindow, true); + } + + void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final { + Y_DEBUG_ABORT_UNLESS(FuncNode); + FuncNode->VisitTree(func, visited); + } + + void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override { + if (ctx.DistinctOverWindow) { + FuncNode->CollectPreaggregateExprs(ctx, src, exprs); + } else { + INode::CollectPreaggregateExprs(ctx, src, exprs); + } + } +protected: + const TString WindowName; + TNodePtr FuncNode; +}; + +TNodePtr BuildCalcOverWindow(TPosition pos, const TString& windowName, TNodePtr call) { + return new TCalcOverWindow(pos, windowName, call); +} + +template<bool Fast> +class TYsonOptionsNode final: public INode { +public: + TYsonOptionsNode(TPosition pos, bool autoConvert, bool strict) + : INode(pos) + , AutoConvert(autoConvert) + , Strict(strict) + { + auto udf = Y("Udf", Q(Fast ? "Yson2.Options" : "Yson.Options")); + auto autoConvertNode = BuildLiteralBool(pos, autoConvert); + autoConvertNode->SetLabel("AutoConvert"); + auto strictNode = BuildLiteralBool(pos, strict); + strictNode->SetLabel("Strict"); + Node = Y("NamedApply", udf, Q(Y()), BuildStructure(pos, { autoConvertNode, strictNode })); + } + + TAstNode* Translate(TContext& ctx) const override { + return Node->Translate(ctx); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!Node->Init(ctx, src)) { + return false; + } + return true; + } + + TPtr DoClone() const final { + return new TYsonOptionsNode(Pos, AutoConvert, Strict); + } + + void DoUpdateState() const override { + State.Set(ENodeState::Const, true); + } + +protected: + TNodePtr Node; + const bool AutoConvert; + const bool Strict; +}; + +TNodePtr BuildYsonOptionsNode(TPosition pos, bool autoConvert, bool strict, bool fastYson) { + if (fastYson) + return new TYsonOptionsNode<true>(pos, autoConvert, strict); + else + return new TYsonOptionsNode<false>(pos, autoConvert, strict); +} + +class TDoCall final : public INode { +public: + TDoCall(TPosition pos, const TNodePtr& node) + : INode(pos) + , Node(node) + { + FakeSource = BuildFakeSource(pos); + } + + ISource* GetSource() final { + return FakeSource.Get(); + } + + bool DoInit(TContext& ctx, ISource* src) final { + Y_UNUSED(src); + if (!Node->Init(ctx, FakeSource.Get())) { + return false; + } + + return true; + } + + TAstNode* Translate(TContext& ctx) const final { + return Node->Translate(ctx); + } + + TPtr DoClone() const final { + return new TDoCall(Pos, Node->Clone()); + } + + void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final { + Y_DEBUG_ABORT_UNLESS(Node); + Node->VisitTree(func, visited); + } +private: + TNodePtr Node; + TSourcePtr FakeSource; +}; + +TNodePtr BuildDoCall(TPosition pos, const TNodePtr& node) { + return new TDoCall(pos, node); +} + +bool Parseui32(TNodePtr from, ui32& to) { + const TString* val; + + if (!(val = from->GetLiteral("Int32"))) { + if (!(val = from->GetLiteral("Uint32"))) { + return false; + } + } + + return TryFromString(*val, to); +} + +TNodePtr GroundWithExpr(const TNodePtr& ground, const TNodePtr& expr) { + return ground ? expr->Y("block", expr->Q(expr->L(ground, expr->Y("return", expr)))) : expr; +} + +TSourcePtr TryMakeSourceFromExpression(TPosition pos, TContext& ctx, const TString& currService, const TDeferredAtom& currCluster, + TNodePtr node, const TString& view) { + if (currCluster.Empty()) { + ctx.Error() << "No cluster name given and no default cluster is selected"; + return nullptr; + } + + if (auto literal = node->GetLiteral("String")) { + TNodePtr tableKey = BuildTableKey(node->GetPos(), currService, currCluster, TDeferredAtom(node->GetPos(), *literal), {view}); + TTableRef table(ctx.MakeName("table"), currService, currCluster, tableKey); + table.Options = BuildInputOptions(node->GetPos(), GetContextHints(ctx)); + return BuildTableSource(node->GetPos(), table); + } + + if (node->GetLambdaNode()) { + ctx.Error() << "Lambda is not allowed to be used as source. Did you forget to call a subquery template?"; + return nullptr; + } + + auto wrappedNode = new TAstListNodeImpl(pos, { + new TAstAtomNodeImpl(pos, "EvaluateAtom", TNodeFlags::Default), + node + }); + + TNodePtr tableKey = BuildTableKey(node->GetPos(), currService, currCluster, TDeferredAtom(wrappedNode, ctx), {view}); + TTableRef table(ctx.MakeName("table"), currService, currCluster, tableKey); + table.Options = BuildInputOptions(node->GetPos(), GetContextHints(ctx)); + return BuildTableSource(node->GetPos(), table); +} + +void MakeTableFromExpression(TPosition pos, TContext& ctx, TNodePtr node, TDeferredAtom& table, const TString& prefix) { + if (auto literal = node->GetLiteral("String")) { + table = TDeferredAtom(node->GetPos(), prefix + *literal); + return; + } + + if (auto access = node->GetAccessNode()) { + auto ret = access->TryMakeTable(); + if (ret) { + table = TDeferredAtom(node->GetPos(), prefix + *ret); + return; + } + } + + if (!prefix.empty()) { + node = node->Y("Concat", node->Y("String", node->Q(prefix)), node); + } + + auto wrappedNode = new TAstListNodeImpl(pos, { + new TAstAtomNodeImpl(pos, "EvaluateAtom", TNodeFlags::Default), + node + }); + + table = TDeferredAtom(wrappedNode, ctx); +} + +TDeferredAtom MakeAtomFromExpression(TPosition pos, TContext& ctx, TNodePtr node, const TString& prefix) { + if (auto literal = node->GetLiteral("String")) { + return TDeferredAtom(node->GetPos(), prefix + *literal); + } + + if (!prefix.empty()) { + node = node->Y("Concat", node->Y("String", node->Q(prefix)), node); + } + + auto wrappedNode = new TAstListNodeImpl(pos, { + new TAstAtomNodeImpl(pos, "EvaluateAtom", TNodeFlags::Default), + node + }); + + return TDeferredAtom(wrappedNode, ctx); +} + +class TTupleResultNode: public INode { +public: + TTupleResultNode(TNodePtr&& tuple, size_t ensureTupleSize) + : INode(tuple->GetPos()) + , Node(std::move(tuple)) + , EnsureTupleSize(ensureTupleSize) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!Node->Init(ctx, src)) { + return false; + } + + Node = Y("EnsureTupleSize", Node, Q(ToString(EnsureTupleSize))); + + return true; + } + + TAstNode* Translate(TContext& ctx) const override { + return Node->Translate(ctx); + } + + TPtr DoClone() const final { + return new TTupleResultNode(Node->Clone(), EnsureTupleSize); + } + + void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final { + Y_DEBUG_ABORT_UNLESS(Node); + Node->VisitTree(func, visited); + } +protected: + TNodePtr Node; + const size_t EnsureTupleSize; +}; + +TNodePtr BuildTupleResult(TNodePtr tuple, size_t ensureTupleSize) { + return new TTupleResultNode(std::move(tuple), ensureTupleSize); +} + +class TNamedExprReferenceNode: public IProxyNode { +public: + TNamedExprReferenceNode(TNodePtr parent, const TString& name, TMaybe<size_t> tupleIndex) + : IProxyNode(parent->GetPos(), parent) + , Name(name) + , TupleIndex(tupleIndex) + { + } + + bool DoInit(TContext& ctx, ISource* src) final { + Y_UNUSED(src); + if (!IProxyNode::DoInit(ctx, nullptr) || !IProxyNode::InitReference(ctx)) { + return false; + } + + Node = BuildAtom(GetPos(), Name, TNodeFlags::Default); + if (TupleIndex.Defined()) { + Node = Y("Nth", Node, Q(ToString(*TupleIndex))); + } + + return true; + } + + TAstNode* Translate(TContext& ctx) const override { + YQL_ENSURE(Node, "Init() should be done before Translate()"); + return Node->Translate(ctx); + } + + TPtr DoClone() const final { + // do not clone Inner here + return new TNamedExprReferenceNode(Inner, Name, TupleIndex); + } + +private: + const TString Name; + const TMaybe<size_t> TupleIndex; + TNodePtr Node; +}; + +TNodePtr BuildNamedExprReference(TNodePtr parent, const TString& name, TMaybe<size_t> tupleIndex) { + YQL_ENSURE(parent); + return new TNamedExprReferenceNode(parent, name, tupleIndex); +} + +class TNamedExprNode: public IProxyNode { +public: + TNamedExprNode(TNodePtr parent) + : IProxyNode(parent->GetPos(), parent) + , FakeSource(BuildFakeSource(parent->GetPos())) + , Referenced(false) + { + } + + bool DoInit(TContext& ctx, ISource* src) final { + YQL_ENSURE(!Referenced, "Refrence is initialized before named expr itself"); + Y_UNUSED(src); + if (ctx.ValidateUnusedExprs) { + return IProxyNode::DoInit(ctx, FakeSource.Get()); + } + // do actual init in InitReference() + return true; + } + + bool InitReference(TContext& ctx) final { + Referenced = true; + return IProxyNode::DoInit(ctx, FakeSource.Get()); + } + + TAstNode* Translate(TContext& ctx) const override { + if (ctx.ValidateUnusedExprs || Referenced) { + return Inner->Translate(ctx); + } + auto unused = BuildQuotedAtom(GetPos(), "unused", TNodeFlags::Default); + return unused->Translate(ctx); + } + + TPtr DoClone() const final { + return new TNamedExprNode(Inner->Clone()); + } + +private: + const TSourcePtr FakeSource; + bool Referenced; +}; + +TNodePtr BuildNamedExpr(TNodePtr parent) { + YQL_ENSURE(parent); + return new TNamedExprNode(parent); +} + +bool TVectorIndexSettings::Validate(TContext& ctx) const { + if (!Distance && !Similarity) { + ctx.Error() << "either distance or similarity should be set"; + return false; + } + if (!VectorType) { + ctx.Error() << "vector_type should be set"; + return false; + } + if (!VectorDimension) { + ctx.Error() << "vector_dimension should be set"; + return false; + } + return true; +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/node.h b/yql/essentials/sql/v1/node.h new file mode 100644 index 00000000000..d9eb154031e --- /dev/null +++ b/yql/essentials/sql/v1/node.h @@ -0,0 +1,1567 @@ +#pragma once + +#include <google/protobuf/message.h> +#include <yql/essentials/utils/resetable_setting.h> +#include <yql/essentials/parser/proto_ast/common.h> +#include <yql/essentials/public/udf/udf_data_type.h> +#include <yql/essentials/ast/yql_ast.h> +#include <yql/essentials/ast/yql_expr.h> +#include <util/generic/vector.h> +#include <util/generic/set.h> +#include <util/generic/map.h> +#include <util/generic/hash.h> +#include <util/generic/hash_set.h> +#include <util/generic/maybe.h> +#include <util/string/builder.h> + +#include <library/cpp/enumbitset/enumbitset.h> + +#include <array> +#include <functional> +#include <variant> + +namespace NSQLTranslationV1 { + constexpr const size_t SQL_MAX_INLINE_SCRIPT_LEN = 24; + + using NYql::TPosition; + using NYql::TAstNode; + + enum class ENodeState { + Begin, + Precached = Begin, + Initialized, + CountHint, + Const, + MaybeConst, + Aggregated, + AggregationKey, + OverWindow, + OverWindowDistinct, + Failed, + End, + }; + typedef TEnumBitSet<ENodeState, static_cast<int>(ENodeState::Begin), static_cast<int>(ENodeState::End)> TNodeState; + + enum class ESQLWriteColumnMode { + InsertInto, + InsertOrAbortInto, + InsertOrIgnoreInto, + InsertOrRevertInto, + UpsertInto, + ReplaceInto, + InsertIntoWithTruncate, + Update, + Delete, + }; + + enum class EWriteColumnMode { + Default, + Insert, + InsertOrAbort, + InsertOrIgnore, + InsertOrRevert, + Upsert, + Replace, + Renew, + Update, + UpdateOn, + Delete, + DeleteOn, + }; + + enum class EAlterTableIntentnt { + AddColumn, + DropColumn + }; + + enum class ETableType { + Table, + TableStore, + ExternalTable + }; + + class TContext; + class ITableKeys; + class ISource; + class IAggregation; + class TObjectOperatorContext; + typedef TIntrusivePtr<IAggregation> TAggregationPtr; + class TColumnNode; + class TTupleNode; + class TCallNode; + class TStructNode; + class TAccessNode; + class TLambdaNode; + class TUdfNode; + typedef TIntrusivePtr<ISource> TSourcePtr; + + struct TScopedState; + typedef TIntrusivePtr<TScopedState> TScopedStatePtr; + + inline TString DotJoin(const TString& lhs, const TString& rhs) { + TStringBuilder sb; + sb << lhs << "." << rhs; + return sb; + } + + TString ErrorDistinctByGroupKey(const TString& column); + TString ErrorDistinctWithoutCorrelation(const TString& column); + + class INode: public TSimpleRefCount<INode> { + public: + typedef TIntrusivePtr<INode> TPtr; + + struct TIdPart { + TString Name; + TPtr Expr; + + TIdPart(const TString& name) + : Name(name) + { + } + TIdPart(TPtr expr) + : Expr(expr) + { + } + TIdPart Clone() const { + TIdPart res(Name); + res.Expr = Expr ? Expr->Clone() : nullptr; + return res; + } + }; + + public: + INode(TPosition pos); + virtual ~INode(); + + TPosition GetPos() const; + const TString& GetLabel() const; + TMaybe<TPosition> GetLabelPos() const; + void SetLabel(const TString& label, TMaybe<TPosition> pos = {}); + bool IsImplicitLabel() const; + void MarkImplicitLabel(bool isImplicitLabel); + + void SetCountHint(bool isCount); + bool GetCountHint() const; + bool Init(TContext& ctx, ISource* src); + virtual bool InitReference(TContext& ctx); + + bool IsConstant() const; + bool MaybeConstant() const; + bool IsAggregated() const; + bool IsAggregationKey() const; + bool IsOverWindow() const; + bool IsOverWindowDistinct() const; + bool HasState(ENodeState state) const { + PrecacheState(); + return State.Test(state); + } + + virtual bool IsNull() const; + virtual bool IsLiteral() const; + virtual TString GetLiteralType() const; + virtual TString GetLiteralValue() const; + virtual bool IsIntegerLiteral() const; + virtual TPtr ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const; + virtual bool IsAsterisk() const; + virtual const TString* SubqueryAlias() const; + virtual TString GetOpName() const; + virtual const TString* GetLiteral(const TString& type) const; + virtual const TString* GetColumnName() const; + virtual void AssumeColumn(); + virtual const TString* GetSourceName() const; + virtual const TString* GetAtomContent() const; + virtual bool IsOptionalArg() const; + virtual size_t GetTupleSize() const; + virtual TPtr GetTupleElement(size_t index) const; + virtual ITableKeys* GetTableKeys(); + virtual ISource* GetSource(); + virtual TVector<INode::TPtr>* ContentListPtr(); + virtual TAstNode* Translate(TContext& ctx) const = 0; + virtual TAggregationPtr GetAggregation() const; + virtual void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs); + virtual TPtr WindowSpecFunc(const TPtr& type) const; + virtual bool SetViewName(TContext& ctx, TPosition pos, const TString& view); + virtual bool SetPrimaryView(TContext& ctx, TPosition pos); + void UseAsInner(); + void DisableSort(); + virtual bool UsedSubquery() const; + virtual bool IsSelect() const; + virtual bool HasSelectResult() const; + virtual const TString* FuncName() const; + virtual const TString* ModuleName() const; + virtual bool HasSkip() const; + + virtual TColumnNode* GetColumnNode(); + virtual const TColumnNode* GetColumnNode() const; + + virtual TTupleNode* GetTupleNode(); + virtual const TTupleNode* GetTupleNode() const; + + virtual TCallNode* GetCallNode(); + virtual const TCallNode* GetCallNode() const; + + virtual TStructNode* GetStructNode(); + virtual const TStructNode* GetStructNode() const; + + virtual TAccessNode* GetAccessNode(); + virtual const TAccessNode* GetAccessNode() const; + + virtual TLambdaNode* GetLambdaNode(); + virtual const TLambdaNode* GetLambdaNode() const; + + virtual TUdfNode* GetUdfNode(); + virtual const TUdfNode* GetUdfNode() const; + + using TVisitFunc = std::function<bool (const INode&)>; + using TVisitNodeSet = std::unordered_set<const INode*>; + + void VisitTree(const TVisitFunc& func) const; + void VisitTree(const TVisitFunc& func, TVisitNodeSet& visited) const; + + TPtr AstNode() const; + TPtr AstNode(TAstNode* node) const; + TPtr AstNode(TPtr node) const; + TPtr AstNode(const TString& str) const; + + template <typename TVal, typename... TVals> + void Add(TVal val, TVals... vals) { + DoAdd(AstNode(val)); + Add(vals...); + } + + void Add() {} + + // Y() Q() L() + TPtr Y() const { + return AstNode(); + } + + template <typename... TVals> + TPtr Y(TVals... vals) const { + TPtr node(AstNode()); + node->Add(vals...); + return node; + } + + template <typename T> + TPtr Q(T a) const { + return Y("quote", a); + } + + template <typename... TVals> + TPtr L(TPtr list, TVals... vals) const { + Y_DEBUG_ABORT_UNLESS(list); + auto copy = list->ShallowCopy(); + copy->Add(vals...); + return copy; + } + + TPtr Clone() const; + protected: + virtual TPtr ShallowCopy() const; + virtual void DoUpdateState() const; + virtual TPtr DoClone() const = 0; + void PrecacheState() const; + + virtual void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const; + private: + virtual bool DoInit(TContext& ctx, ISource* src); + virtual void DoAdd(TPtr node); + + protected: + TPosition Pos; + TString Label; + TMaybe<TPosition> LabelPos; + bool ImplicitLabel = false; + mutable TNodeState State; + bool AsInner = false; + bool DisableSort_ = false; + }; + typedef INode::TPtr TNodePtr; + + class IProxyNode : public INode { + public: + IProxyNode(TPosition pos, const TNodePtr& parent) + : INode(pos) + , Inner(parent) + {} + + protected: + virtual bool IsNull() const override; + virtual bool IsLiteral() const override; + virtual TString GetLiteralType() const override; + virtual TString GetLiteralValue() const override; + virtual bool IsIntegerLiteral() const override; + virtual TPtr ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const override; + virtual bool IsAsterisk() const override; + virtual const TString* SubqueryAlias() const override; + virtual TString GetOpName() const override; + virtual const TString* GetLiteral(const TString &type) const override; + virtual const TString* GetColumnName() const override; + virtual void AssumeColumn() override; + virtual const TString* GetSourceName() const override; + virtual const TString* GetAtomContent() const override; + virtual bool IsOptionalArg() const override; + virtual size_t GetTupleSize() const override; + virtual TPtr GetTupleElement(size_t index) const override; + virtual ITableKeys* GetTableKeys() override; + virtual ISource* GetSource() override; + virtual TVector<INode::TPtr>* ContentListPtr() override; + virtual TAggregationPtr GetAggregation() const override; + virtual void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override; + virtual TPtr WindowSpecFunc(const TPtr& type) const override; + virtual bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override; + virtual bool SetPrimaryView(TContext& ctx, TPosition pos) override; + virtual bool UsedSubquery() const override; + virtual bool IsSelect() const override; + virtual bool HasSelectResult() const override; + virtual const TString* FuncName() const override; + virtual const TString* ModuleName() const override; + virtual bool HasSkip() const override; + + virtual TColumnNode* GetColumnNode() override; + virtual const TColumnNode* GetColumnNode() const override; + + virtual TTupleNode* GetTupleNode() override; + virtual const TTupleNode* GetTupleNode() const override; + + virtual TCallNode* GetCallNode() override; + virtual const TCallNode* GetCallNode() const override; + + virtual TStructNode* GetStructNode() override; + virtual const TStructNode* GetStructNode() const override; + + virtual TAccessNode* GetAccessNode() override; + virtual const TAccessNode* GetAccessNode() const override; + + virtual TLambdaNode* GetLambdaNode() override; + virtual const TLambdaNode* GetLambdaNode() const override; + + virtual TUdfNode* GetUdfNode() override; + virtual const TUdfNode* GetUdfNode() const override; + + protected: + virtual void DoUpdateState() const override; + virtual void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const override; + virtual bool InitReference(TContext& ctx) override; + virtual bool DoInit(TContext& ctx, ISource* src) override; + + private: + virtual void DoAdd(TPtr node) override; + + protected: + const TNodePtr Inner; + }; + + using TTableHints = TMap<TString, TVector<TNodePtr>>; + void MergeHints(TTableHints& base, const TTableHints& overrides); + + template<class T> + inline T SafeClone(const T& node) { + return node ? node->Clone() : nullptr; + } + + template<class T> + inline TVector<T> CloneContainer(const TVector<T>& args) { + TVector<T> cloneArgs; + cloneArgs.reserve(args.size()); + for (const auto& arg: args) { + cloneArgs.emplace_back(SafeClone(arg)); + } + return cloneArgs; + } + + TTableHints CloneContainer(const TTableHints& hints); + + class TAstAtomNode: public INode { + public: + TAstAtomNode(TPosition pos, const TString& content, ui32 flags, bool isOptionalArg); + + ~TAstAtomNode() override; + + TAstNode* Translate(TContext& ctx) const override; + const TString& GetContent() const { + return Content; + } + + const TString* GetAtomContent() const override; + bool IsOptionalArg() const override; + + protected: + TString Content; + ui32 Flags; + bool IsOptionalArg_; + + void DoUpdateState() const override; + }; + + class TAstAtomNodeImpl final: public TAstAtomNode { + public: + TAstAtomNodeImpl(TPosition pos, const TString& content, ui32 flags, bool isOptionalArg = false) + : TAstAtomNode(pos, content, flags, isOptionalArg) + {} + + TNodePtr DoClone() const final { + return new TAstAtomNodeImpl(Pos, Content, Flags, IsOptionalArg_); + } + }; + + class TAstDirectNode final: public INode { + public: + TAstDirectNode(TAstNode* node); + + TAstNode* Translate(TContext& ctx) const override; + + TPtr DoClone() const final { + return new TAstDirectNode(Node); + } + protected: + TAstNode* Node; + }; + + class TAstListNode: public INode { + public: + TAstListNode(TPosition pos); + virtual ~TAstListNode(); + + TAstNode* Translate(TContext& ctx) const override; + + protected: + explicit TAstListNode(const TAstListNode& node); + explicit TAstListNode(TPosition pos, TVector<TNodePtr>&& nodes); + TPtr ShallowCopy() const override; + bool DoInit(TContext& ctx, ISource* src) override; + void DoAdd(TNodePtr node) override; + void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const override; + + void DoUpdateState() const override; + + void UpdateStateByListNodes(const TVector<TNodePtr>& Nodes) const; + + protected: + TVector<TNodePtr> Nodes; + mutable TMaybe<bool> CacheGroupKey; + }; + + class TAstListNodeImpl final: public TAstListNode { + public: + TAstListNodeImpl(TPosition pos); + TAstListNodeImpl(TPosition pos, TVector<TNodePtr> nodes); + void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override; + + protected: + TNodePtr DoClone() const final; + }; + + class TCallNode: public TAstListNode { + public: + TCallNode(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); + TCallNode(TPosition pos, const TString& opName, const TVector<TNodePtr>& args) + : TCallNode(pos, opName, args.size(), args.size(), args) + {} + + TString GetOpName() const override; + const TString* GetSourceName() const override; + + const TVector<TNodePtr>& GetArgs() const; + TCallNode* GetCallNode() override; + const TCallNode* GetCallNode() const override; + + protected: + bool DoInit(TContext& ctx, ISource* src) override; + bool ValidateArguments(TContext& ctx) const; + TString GetCallExplain() const; + void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override; + + protected: + TString OpName; + i32 MinArgs; + i32 MaxArgs; + TVector<TNodePtr> Args; + mutable TMaybe<bool> CacheGroupKey; + + void DoUpdateState() const override; + }; + + class TCallNodeImpl final: public TCallNode { + TPtr DoClone() const final; + public: + TCallNodeImpl(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); + TCallNodeImpl(TPosition pos, const TString& opName, const TVector<TNodePtr>& args); + }; + + class TFuncNodeImpl final : public TCallNode { + TPtr DoClone() const final; + public: + TFuncNodeImpl(TPosition pos, const TString& opName); + const TString* FuncName() const override; + }; + + class TCallNodeDepArgs final : public TCallNode { + TPtr DoClone() const final; + public: + TCallNodeDepArgs(ui32 reqArgsCount, TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); + TCallNodeDepArgs(ui32 reqArgsCount, TPosition pos, const TString& opName, const TVector<TNodePtr>& args); + protected: + bool DoInit(TContext& ctx, ISource* src) override; + + private: + const ui32 ReqArgsCount; + }; + + class TCallDirectRow final : public TCallNode { + TPtr DoClone() const final; + public: + TCallDirectRow(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); + TCallDirectRow(TPosition pos, const TString& opName, const TVector<TNodePtr>& args); + protected: + bool DoInit(TContext& ctx, ISource* src) override; + void DoUpdateState() const override; + }; + + class TWinAggrEmulation: public TCallNode { + protected: + void DoUpdateState() const override; + bool DoInit(TContext& ctx, ISource* src) override; + TPtr WindowSpecFunc(const TNodePtr& type) const override; + public: + TWinAggrEmulation(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); + protected: + template<class TNodeType> + TPtr CallNodeClone() const { + return new TNodeType(GetPos(), OpName, MinArgs, MaxArgs, CloneContainer(Args)); + } + TString FuncAlias; + }; + + using TFunctionConfig = TMap<TString, TNodePtr>; + + class TExternalFunctionConfig final: public TAstListNode { + public: + TExternalFunctionConfig(TPosition pos, const TFunctionConfig& config) + : TAstListNode(pos) + , Config(config) + { + } + + bool DoInit(TContext& ctx, ISource* src) override; + TPtr DoClone() const final; + + private: + TFunctionConfig Config; + }; + + class TWinRowNumber final: public TWinAggrEmulation { + TPtr DoClone() const final { + return CallNodeClone<TWinRowNumber>(); + } + public: + TWinRowNumber(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); + }; + + class TWinCumeDist final: public TWinAggrEmulation { + TPtr DoClone() const final { + return CallNodeClone<TWinCumeDist>(); + } + + bool DoInit(TContext& ctx, ISource* src) override; + public: + TWinCumeDist(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); + }; + + class TWinNTile final: public TWinAggrEmulation { + TPtr DoClone() const final { + return CallNodeClone<TWinNTile>(); + } + bool DoInit(TContext& ctx, ISource* src) override; + public: + TWinNTile(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); + + private: + TSourcePtr FakeSource; + }; + + class TWinLeadLag final: public TWinAggrEmulation { + TPtr DoClone() const final { + return CallNodeClone<TWinLeadLag>(); + } + bool DoInit(TContext& ctx, ISource* src) override; + public: + TWinLeadLag(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); + }; + + class TWinRank final: public TWinAggrEmulation { + TPtr DoClone() const final { + return CallNodeClone<TWinRank>(); + } + bool DoInit(TContext& ctx, ISource* src) override; + public: + TWinRank(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args); + }; + + struct TViewDescription { + TString ViewName = ""; + bool PrimaryFlag = false; + + bool empty() const { return *this == TViewDescription(); } + bool operator == (const TViewDescription&) const = default; + }; + + class ITableKeys: public INode { + public: + enum class EBuildKeysMode { + CREATE, + DROP, + INPUT, + WRITE + }; + + ITableKeys(TPosition pos); + virtual const TString* GetTableName() const; + virtual TNodePtr BuildKeys(TContext& ctx, EBuildKeysMode mode) = 0; + + protected: + TNodePtr AddView(TNodePtr key, const TViewDescription& view); + + private: + /// all TableKeys no clonnable + TPtr DoClone() const final { + return {}; + } + + ITableKeys* GetTableKeys() override; + TAstNode* Translate(TContext& ctx) const override; + }; + + enum class ESampleClause { + TableSample, //from SQL standard, percantage rate (0..100) + Sample //simplified (implied Bernulli mode), fraction (0..1) + }; + + enum class ESampleMode { + Bernoulli, + System + }; + + class TDeferredAtom { + public: + TDeferredAtom(); + TDeferredAtom(TPosition pos, const TString& str); + TDeferredAtom(TNodePtr node, TContext& ctx); + const TString* GetLiteral() const; + bool GetLiteral(TString& value, TContext& ctx) const; + TNodePtr Build() const; + TString GetRepr() const; + bool Empty() const; + bool HasNode() const; + + private: + TMaybe<TString> Explicit; + TNodePtr Node; // atom or evaluation node + TString Repr; + }; + + struct TTopicRef { + TString RefName; + TDeferredAtom Cluster; + TNodePtr Consumers; + TNodePtr Settings; + TNodePtr Keys; + + TTopicRef() = default; + TTopicRef(const TString& refName, const TDeferredAtom& cluster, TNodePtr keys); + TTopicRef(const TTopicRef&) = default; + TTopicRef& operator=(const TTopicRef&) = default; + }; + + struct TIdentifier { + TPosition Pos; + TString Name; + + TIdentifier(TPosition pos, const TString& name) + : Pos(pos) + , Name(name) {} + }; + + struct TColumnConstraints { + TNodePtr DefaultExpr; + bool Nullable = true; + + TColumnConstraints(TNodePtr defaultExpr, bool nullable); + }; + + struct TColumnSchema { + enum class ETypeOfChange { + Nothing, + DropNotNullConstraint, + SetNotNullConstraint, // todo flown4qqqq + SetFamily + }; + + TPosition Pos; + TString Name; + TNodePtr Type; + bool Nullable; + TVector<TIdentifier> Families; + bool Serial; + TNodePtr DefaultExpr; + const ETypeOfChange TypeOfChange; + + TColumnSchema(TPosition pos, const TString& name, const TNodePtr& type, bool nullable, + TVector<TIdentifier> families, bool serial, TNodePtr defaultExpr, ETypeOfChange typeOfChange = ETypeOfChange::Nothing); + }; + + struct TColumns: public TSimpleRefCount<TColumns> { + TSet<TString> Real; + TSet<TString> Artificial; + TVector<TString> List; + TVector<bool> NamedColumns; + bool All = false; + bool QualifiedAll = false; + bool HasUnreliable = false; + bool HasUnnamed = false; + + bool Add(const TString* column, bool countHint, bool isArtificial = false, bool isReliable = true); + TString AddUnnamed(); + void Merge(const TColumns& columns); + void SetPrefix(const TString& prefix); + void SetAll(); + bool IsColumnPossible(TContext& ctx, const TString& column) const; + }; + + class TSortSpecification: public TSimpleRefCount<TSortSpecification> { + public: + TSortSpecification(const TNodePtr& orderExpr, bool ascending); + const TNodePtr OrderExpr; + const bool Ascending; + TIntrusivePtr<TSortSpecification> Clone() const; + ~TSortSpecification() {} + private: + const TNodePtr CleanOrderExpr; + }; + typedef TIntrusivePtr<TSortSpecification> TSortSpecificationPtr; + + enum EFrameType { + FrameByRows, + FrameByRange, + FrameByGroups, + }; + enum EFrameExclusions { + FrameExclNone, // same as EXCLUDE NO OTHERS + FrameExclCurRow, + FrameExclGroup, + FrameExclTies, + }; + enum EFrameSettings { + // keep order + FrameUndefined, + FramePreceding, + FrameCurrentRow, + FrameFollowing, + }; + + struct TFrameBound: public TSimpleRefCount<TFrameBound> { + TPosition Pos; + TNodePtr Bound; + EFrameSettings Settings = FrameUndefined; + + TIntrusivePtr<TFrameBound> Clone() const; + ~TFrameBound() {} + }; + typedef TIntrusivePtr<TFrameBound> TFrameBoundPtr; + + + struct TFrameSpecification: public TSimpleRefCount<TFrameSpecification> { + EFrameType FrameType = FrameByRows; + TFrameBoundPtr FrameBegin; + TFrameBoundPtr FrameEnd; + EFrameExclusions FrameExclusion = FrameExclNone; + + TIntrusivePtr<TFrameSpecification> Clone() const; + ~TFrameSpecification() {} + }; + typedef TIntrusivePtr<TFrameSpecification> TFrameSpecificationPtr; + + struct TLegacyHoppingWindowSpec: public TSimpleRefCount<TLegacyHoppingWindowSpec> { + TNodePtr TimeExtractor; + TNodePtr Hop; + TNodePtr Interval; + TNodePtr Delay; + bool DataWatermarks; + + TIntrusivePtr<TLegacyHoppingWindowSpec> Clone() const; + ~TLegacyHoppingWindowSpec() {} + }; + typedef TIntrusivePtr<TLegacyHoppingWindowSpec> TLegacyHoppingWindowSpecPtr; + + struct TWindowSpecification: public TSimpleRefCount<TWindowSpecification> { + TMaybe<TString> ExistingWindowName; + TVector<TNodePtr> Partitions; + bool IsCompact = false; + TVector<TSortSpecificationPtr> OrderBy; + TNodePtr Session; + TFrameSpecificationPtr Frame; + + TIntrusivePtr<TWindowSpecification> Clone() const; + ~TWindowSpecification() {} + }; + typedef TIntrusivePtr<TWindowSpecification> TWindowSpecificationPtr; + typedef TMap<TString, TWindowSpecificationPtr> TWinSpecs; + + TWinSpecs CloneContainer(const TWinSpecs& specs); + + void WarnIfAliasFromSelectIsUsedInGroupBy(TContext& ctx, const TVector<TNodePtr>& selectTerms, const TVector<TNodePtr>& groupByTerms, + const TVector<TNodePtr>& groupByExprTerms); + bool ValidateAllNodesForAggregation(TContext& ctx, const TVector<TNodePtr>& nodes); + + struct TWriteSettings { + bool Discard = false; + TDeferredAtom Label; + }; + + class TColumnNode final: public INode { + public: + TColumnNode(TPosition pos, const TString& column, const TString& source, bool maybeType); + TColumnNode(TPosition pos, const TNodePtr& column, const TString& source); + + virtual ~TColumnNode(); + bool IsAsterisk() const override; + virtual bool IsArtificial() const; + const TString* GetColumnName() const override; + const TString* GetSourceName() const override; + TColumnNode* GetColumnNode() override; + const TColumnNode* GetColumnNode() const override; + TAstNode* Translate(TContext& ctx) const override; + void ResetColumn(const TString& column, const TString& source); + void ResetColumn(const TNodePtr& column, const TString& source); + + void SetUseSourceAsColumn(); + void SetUseSource(); + void ResetAsReliable(); + void SetAsNotReliable(); + bool IsReliable() const; + bool IsUseSourceAsColumn() const; + bool IsUseSource() const; + bool CanBeType() const; + + private: + bool DoInit(TContext& ctx, ISource* src) override; + TPtr DoClone() const final; + + void DoUpdateState() const override; + + private: + static const TString Empty; + TNodePtr Node; + TString ColumnName; + TNodePtr ColumnExpr; + TString Source; + bool GroupKey = false; + bool Artificial = false; + bool Reliable = true; + bool UseSource = false; + bool UseSourceAsColumn = false; + bool MaybeType = false; + }; + + class TArgPlaceholderNode final: public INode + { + public: + TArgPlaceholderNode(TPosition pos, const TString &name); + + TAstNode* Translate(TContext& ctx) const override; + + TString GetName() const; + TNodePtr DoClone() const final; + + protected: + bool DoInit(TContext& ctx, ISource* src) override; + + private: + TString Name; + }; + + enum class EAggregateMode { + Normal, + Distinct, + OverWindow, + OverWindowDistinct, + }; + + class TTupleNode: public TAstListNode { + public: + TTupleNode(TPosition pos, const TVector<TNodePtr>& exprs); + + bool IsEmpty() const; + const TVector<TNodePtr>& Elements() const; + TTupleNode* GetTupleNode() override; + const TTupleNode* GetTupleNode() const override; + bool DoInit(TContext& ctx, ISource* src) override; + size_t GetTupleSize() const override; + TPtr GetTupleElement(size_t index) const override; + TNodePtr DoClone() const final; + private: + void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override; + const TString* GetSourceName() const override; + + const TVector<TNodePtr> Exprs; + }; + + class TStructNode: public TAstListNode { + public: + TStructNode(TPosition pos, const TVector<TNodePtr>& exprs, const TVector<TNodePtr>& labels, bool ordered); + + bool DoInit(TContext& ctx, ISource* src) override; + TNodePtr DoClone() const final; + const TVector<TNodePtr>& GetExprs() { + return Exprs; + } + TStructNode* GetStructNode() override; + const TStructNode* GetStructNode() const override; + + private: + void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override; + const TString* GetSourceName() const override; + + const TVector<TNodePtr> Exprs; + const TVector<TNodePtr> Labels; + const bool Ordered; + }; + + + class TUdfNode: public INode { + public: + TUdfNode(TPosition pos, const TVector<TNodePtr>& args); + bool DoInit(TContext& ctx, ISource* src) override final; + TNodePtr DoClone() const override final; + TAstNode* Translate(TContext& ctx) const override; + const TNodePtr GetExternalTypes() const; + const TString& GetFunction() const; + const TString& GetModule() const; + TNodePtr GetRunConfig() const; + const TDeferredAtom& GetTypeConfig() const; + TUdfNode* GetUdfNode() override; + const TUdfNode* GetUdfNode() const override; + private: + TVector<TNodePtr> Args; + const TString* FunctionName; + const TString* ModuleName; + TNodePtr ExternalTypesTuple = nullptr; + TNodePtr RunConfig; + TDeferredAtom TypeConfig; + }; + + class IAggregation: public INode { + public: + bool IsDistinct() const; + + void DoUpdateState() const override; + + virtual const TString* GetGenericKey() const; + + virtual bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) = 0; + + virtual std::pair<TNodePtr, bool> AggregationTraits(const TNodePtr& type, bool overState, bool many, bool allowAggApply, TContext& ctx) const; + + virtual TNodePtr AggregationTraitsFactory() const = 0; + + virtual std::vector<ui32> GetFactoryColumnIndices() const; + + virtual void AddFactoryArguments(TNodePtr& apply) const; + + virtual TNodePtr WindowTraits(const TNodePtr& type, TContext& ctx) const; + + const TString& GetName() const; + + EAggregateMode GetAggregationMode() const; + void MarkKeyColumnAsGenerated(); + + virtual void Join(IAggregation* aggr); + + private: + virtual TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const = 0; + + protected: + IAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode mode); + TAstNode* Translate(TContext& ctx) const override; + TNodePtr WrapIfOverState(const TNodePtr& input, bool overState, bool many, TContext& ctx) const; + virtual TNodePtr GetExtractor(bool many, TContext& ctx) const = 0; + + TString Name; + TString Func; + const EAggregateMode AggMode; + TString DistinctKey; + bool IsGeneratedKeyColumn = false; + }; + + enum class EExprSeat: int { + Open = 0, + FlattenByExpr, + FlattenBy, + GroupBy, + DistinctAggr, + WindowPartitionBy, + Max + }; + + enum class EExprType: int { + WithExpression, + ColumnOnly, + }; + + enum class EOrderKind: int { + None, + Sort, + Assume, + Passthrough + }; + + class TListOfNamedNodes final: public INode { + public: + TListOfNamedNodes(TPosition pos, TVector<TNodePtr>&& exprs); + + TVector<TNodePtr>* ContentListPtr() override; + TAstNode* Translate(TContext& ctx) const override; + TPtr DoClone() const final; + void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final; + private: + TVector<TNodePtr> Exprs; + TString Meaning; + }; + + class TLiteralNode: public TAstListNode { + public: + TLiteralNode(TPosition pos, bool isNull); + TLiteralNode(TPosition pos, const TString& type, const TString& value); + TLiteralNode(TPosition pos, const TString& value, ui32 nodeFlags); + TLiteralNode(TPosition pos, const TString& value, ui32 nodeFlags, const TString& type); + bool IsNull() const override; + const TString* GetLiteral(const TString& type) const override; + void DoUpdateState() const override; + TPtr DoClone() const override; + bool IsLiteral() const override; + TString GetLiteralType() const override; + TString GetLiteralValue() const override; + protected: + bool Null; + bool Void; + TString Type; + TString Value; + }; + + class TAsteriskNode: public INode { + public: + TAsteriskNode(TPosition pos); + bool IsAsterisk() const override; + TPtr DoClone() const override; + TAstNode* Translate(TContext& ctx) const override; + }; + + template<typename T> + class TLiteralNumberNode: public TLiteralNode { + public: + TLiteralNumberNode(TPosition pos, const TString& type, const TString& value, bool implicitType = false); + TPtr DoClone() const override final; + bool DoInit(TContext& ctx, ISource* src) override; + bool IsIntegerLiteral() const override; + TPtr ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const override; + private: + const bool ImplicitType; + }; + + struct TTableArg { + bool HasAt = false; + TNodePtr Expr; + TDeferredAtom Id; + TViewDescription View; + }; + + class TTableRows final : public INode { + public: + TTableRows(TPosition pos, const TVector<TNodePtr>& args); + TTableRows(TPosition pos, ui32 argsCount); + + bool DoInit(TContext& ctx, ISource* src) override; + + void DoUpdateState() const override; + + TNodePtr DoClone() const final; + TAstNode* Translate(TContext& ctx) const override; + + private: + ui32 ArgsCount; + TNodePtr Node; + }; + + struct TStringContent { + TString Content; + NYql::NUdf::EDataSlot Type = NYql::NUdf::EDataSlot::String; + TMaybe<TString> PgType; + ui32 Flags = NYql::TNodeFlags::Default; + }; + + TMaybe<TStringContent> StringContent(TContext& ctx, TPosition pos, const TString& input); + TMaybe<TStringContent> StringContentOrIdContent(TContext& ctx, TPosition pos, const TString& input); + + struct TTtlSettings { + enum class EUnit { + Seconds /* "seconds" */, + Milliseconds /* "milliseconds" */, + Microseconds /* "microseconds" */, + Nanoseconds /* "nanoseconds" */, + }; + + TIdentifier ColumnName; + TNodePtr Expr; + TMaybe<EUnit> ColumnUnit; + + TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr, const TMaybe<EUnit>& columnUnit = {}); + }; + + struct TTableSettings { + TNodePtr CompactionPolicy; + TMaybe<TIdentifier> AutoPartitioningBySize; + TNodePtr PartitionSizeMb; + TMaybe<TIdentifier> AutoPartitioningByLoad; + TNodePtr MinPartitions; + TNodePtr MaxPartitions; + TNodePtr PartitionCount; + TNodePtr UniformPartitions; + TVector<TVector<TNodePtr>> PartitionAtKeys; + TMaybe<TIdentifier> KeyBloomFilter; + TNodePtr ReadReplicasSettings; + NYql::TResetableSetting<TTtlSettings, void> TtlSettings; + NYql::TResetableSetting<TNodePtr, void> Tiering; + TMaybe<TIdentifier> StoreType; + TNodePtr PartitionByHashFunction; + TMaybe<TIdentifier> StoreExternalBlobs; + + TNodePtr DataSourcePath; + NYql::TResetableSetting<TNodePtr, void> Location; + TVector<NYql::TResetableSetting<std::pair<TIdentifier, TNodePtr>, TIdentifier>> ExternalSourceParameters; + + bool IsSet() const { + return CompactionPolicy || AutoPartitioningBySize || PartitionSizeMb || AutoPartitioningByLoad + || MinPartitions || MaxPartitions || UniformPartitions || PartitionAtKeys || KeyBloomFilter + || ReadReplicasSettings || TtlSettings || Tiering || StoreType || PartitionByHashFunction + || StoreExternalBlobs || DataSourcePath || Location || ExternalSourceParameters; + } + }; + + struct TFamilyEntry { + TFamilyEntry(const TIdentifier& name) + :Name(name) + {} + + TIdentifier Name; + TNodePtr Data; + TNodePtr Compression; + TNodePtr CompressionLevel; + }; + + struct TVectorIndexSettings { + enum class EDistance { + Cosine /* "cosine" */ + , Manhattan /* "manhattan" */ + , Euclidean /* "euclidean" */ + }; + + enum class ESimilarity { + Cosine /* "cosine" */ + , InnerProduct /* "inner_product" */ + }; + + enum class EVectorType { + Float /* "float" */ + , Uint8 /* "uint8" */ + , Int8 /* "int8" */ + , Bit /* "bit" */ + }; + + std::optional<EDistance> Distance; + std::optional<ESimilarity> Similarity; + std::optional<EVectorType> VectorType; + ui32 VectorDimension = 0; + ui32 Clusters = 0; + ui32 Levels = 0; + + bool Validate(TContext& ctx) const; + }; + + struct TIndexDescription { + enum class EType { + GlobalSync, + GlobalAsync, + GlobalSyncUnique, + GlobalVectorKmeansTree, + }; + + TIndexDescription(const TIdentifier& name, EType type = EType::GlobalSync) + : Name(name) + , Type(type) + {} + + TIdentifier Name; + EType Type; + TVector<TIdentifier> IndexColumns; + TVector<TIdentifier> DataColumns; + TTableSettings TableSettings; + + using TIndexSettings = std::variant<std::monostate, TVectorIndexSettings>; + TIndexSettings IndexSettings; + }; + + struct TChangefeedSettings { + struct TLocalSinkSettings { + // no special settings + }; + + TNodePtr Mode; + TNodePtr Format; + TNodePtr InitialScan; + TNodePtr VirtualTimestamps; + TNodePtr ResolvedTimestamps; + TNodePtr RetentionPeriod; + TNodePtr TopicAutoPartitioning; + TNodePtr TopicPartitions; + TNodePtr TopicMaxActivePartitions; + TNodePtr AwsRegion; + std::optional<std::variant<TLocalSinkSettings>> SinkSettings; + }; + + struct TChangefeedDescription { + TChangefeedDescription(const TIdentifier& name) + : Name(name) + , Disable(false) + {} + + TIdentifier Name; + TChangefeedSettings Settings; + bool Disable; + }; + + struct TCreateTableParameters { + TVector<TColumnSchema> Columns; + TVector<TIdentifier> PkColumns; + TVector<TIdentifier> PartitionByColumns; + TVector<std::pair<TIdentifier, bool>> OrderByColumns; + TVector<TIndexDescription> Indexes; + TVector<TFamilyEntry> ColumnFamilies; + TVector<TChangefeedDescription> Changefeeds; + TTableSettings TableSettings; + ETableType TableType = ETableType::Table; + bool Temporary = false; + }; + + struct TTableRef; + struct TAnalyzeParams { + std::shared_ptr<TTableRef> Table; + TVector<TString> Columns; + }; + + struct TAlterTableParameters { + TVector<TColumnSchema> AddColumns; + TVector<TString> DropColumns; + TVector<TColumnSchema> AlterColumns; + TVector<TFamilyEntry> AddColumnFamilies; + TVector<TFamilyEntry> AlterColumnFamilies; + TTableSettings TableSettings; + TVector<TIndexDescription> AddIndexes; + TVector<TIndexDescription> AlterIndexes; + TVector<TIdentifier> DropIndexes; + TMaybe<std::pair<TIdentifier, TIdentifier>> RenameIndexTo; + TMaybe<TIdentifier> RenameTo; + TVector<TChangefeedDescription> AddChangefeeds; + TVector<TChangefeedDescription> AlterChangefeeds; + TVector<TIdentifier> DropChangefeeds; + ETableType TableType = ETableType::Table; + + bool IsEmpty() const { + return AddColumns.empty() && DropColumns.empty() && AlterColumns.empty() + && AddColumnFamilies.empty() && AlterColumnFamilies.empty() + && !TableSettings.IsSet() + && AddIndexes.empty() && AlterIndexes.empty() && DropIndexes.empty() && !RenameIndexTo.Defined() + && !RenameTo.Defined() + && AddChangefeeds.empty() && AlterChangefeeds.empty() && DropChangefeeds.empty(); + } + }; + + struct TRoleParameters { + TMaybe<TDeferredAtom> Password; + bool IsPasswordEncrypted = false; + TVector<TDeferredAtom> Roles; + }; + + struct TTopicConsumerSettings { + struct TLocalSinkSettings { + // no special settings + }; + + TNodePtr Important; + NYql::TResetableSetting<TNodePtr, void> ReadFromTs; + NYql::TResetableSetting<TNodePtr, void> SupportedCodecs; + }; + + struct TTopicConsumerDescription { + TTopicConsumerDescription(const TIdentifier& name) + : Name(name) + {} + + TIdentifier Name; + TTopicConsumerSettings Settings; + }; + struct TTopicSettings { + NYql::TResetableSetting<TNodePtr, void> MinPartitions; + NYql::TResetableSetting<TNodePtr, void> MaxPartitions; + NYql::TResetableSetting<TNodePtr, void> RetentionPeriod; + NYql::TResetableSetting<TNodePtr, void> RetentionStorage; + NYql::TResetableSetting<TNodePtr, void> SupportedCodecs; + NYql::TResetableSetting<TNodePtr, void> PartitionWriteSpeed; + NYql::TResetableSetting<TNodePtr, void> PartitionWriteBurstSpeed; + NYql::TResetableSetting<TNodePtr, void> MeteringMode; + NYql::TResetableSetting<TNodePtr, void> AutoPartitioningStabilizationWindow; + NYql::TResetableSetting<TNodePtr, void> AutoPartitioningUpUtilizationPercent; + NYql::TResetableSetting<TNodePtr, void> AutoPartitioningDownUtilizationPercent; + NYql::TResetableSetting<TNodePtr, void> AutoPartitioningStrategy; + + bool IsSet() const { + return MinPartitions || + MaxPartitions || + RetentionPeriod || + RetentionStorage || + SupportedCodecs || + PartitionWriteSpeed || + PartitionWriteBurstSpeed || + MeteringMode || + AutoPartitioningStabilizationWindow || + AutoPartitioningUpUtilizationPercent || + AutoPartitioningDownUtilizationPercent || + AutoPartitioningStrategy + ; + } + }; + + + struct TCreateTopicParameters { + TVector<TTopicConsumerDescription> Consumers; + TTopicSettings TopicSettings; + bool ExistingOk; + }; + + struct TAlterTopicParameters { + TVector<TTopicConsumerDescription> AddConsumers; + THashMap<TString, TTopicConsumerDescription> AlterConsumers; + TVector<TIdentifier> DropConsumers; + TTopicSettings TopicSettings; + bool MissingOk; + }; + + struct TDropTopicParameters { + bool MissingOk; + }; + + struct TCreateBackupCollectionParameters { + std::map<TString, TDeferredAtom> Settings; + + bool Database; + TVector<TDeferredAtom> Tables; + + bool ExistingOk; + }; + + struct TAlterBackupCollectionParameters { + enum class EDatabase { + Unchanged, + Add, + Drop, + }; + + std::map<TString, TDeferredAtom> Settings; + std::set<TString> SettingsToReset; + + EDatabase Database = EDatabase::Unchanged; + TVector<TDeferredAtom> TablesToAdd; + TVector<TDeferredAtom> TablesToDrop; + + bool MissingOk; + }; + + struct TDropBackupCollectionParameters { + bool MissingOk; + }; + + struct TBackupParameters { + bool Incremental = false; + }; + + struct TRestoreParameters { + TString At; + }; + + TString IdContent(TContext& ctx, const TString& str); + TString IdContentFromString(TContext& ctx, const TString& str); + TTableHints GetContextHints(TContext& ctx); + + TString TypeByAlias(const TString& alias, bool normalize = true); + + TNodePtr BuildAtom(TPosition pos, const TString& content, ui32 flags = NYql::TNodeFlags::ArbitraryContent, + bool isOptionalArg = false); + TNodePtr BuildQuotedAtom(TPosition pos, const TString& content, ui32 flags = NYql::TNodeFlags::ArbitraryContent); + + TNodePtr BuildLiteralNull(TPosition pos); + TNodePtr BuildLiteralVoid(TPosition pos); + /// String is checked as quotable, support escaping and multiline + TNodePtr BuildLiteralSmartString(TContext& ctx, const TString& value); + + struct TExprOrIdent { + TNodePtr Expr; + TString Ident; + }; + TMaybe<TExprOrIdent> BuildLiteralTypedSmartStringOrId(TContext& ctx, const TString& value); + + TNodePtr BuildLiteralRawString(TPosition pos, const TString& value, bool isUtf8 = false); + TNodePtr BuildLiteralBool(TPosition pos, bool value); + TNodePtr BuildEmptyAction(TPosition pos); + + TNodePtr BuildTuple(TPosition pos, const TVector<TNodePtr>& exprs); + + TNodePtr BuildStructure(TPosition pos, const TVector<TNodePtr>& exprs); + TNodePtr BuildStructure(TPosition pos, const TVector<TNodePtr>& exprsUnlabeled, const TVector<TNodePtr>& labels); + TNodePtr BuildOrderedStructure(TPosition pos, const TVector<TNodePtr>& exprsUnlabeled, const TVector<TNodePtr>& labels); + + TNodePtr BuildListOfNamedNodes(TPosition pos, TVector<TNodePtr>&& exprs); + + TNodePtr BuildArgPlaceholder(TPosition pos, const TString& name); + + TNodePtr BuildColumn(TPosition pos, const TString& column = TString(), const TString& source = TString()); + TNodePtr BuildColumn(TPosition pos, const TNodePtr& column, const TString& source = TString()); + TNodePtr BuildColumn(TPosition pos, const TDeferredAtom& column, const TString& source = TString()); + TNodePtr BuildColumnOrType(TPosition pos, const TString& column = TString()); + TNodePtr BuildAccess(TPosition pos, const TVector<INode::TIdPart>& ids, bool isLookup); + TNodePtr BuildMatchRecognizeVarAccess(TPosition pos, const TString& var, const TString& column, bool theSameVar); + TNodePtr BuildBind(TPosition pos, const TString& module, const TString& alias); + TNodePtr BuildLambda(TPosition pos, TNodePtr params, TNodePtr body, const TString& resName = TString()); + TNodePtr BuildLambda(TPosition pos, TNodePtr params, const TVector<TNodePtr>& bodies); + TNodePtr BuildDataType(TPosition pos, const TString& typeName); + TMaybe<TString> LookupSimpleType(const TStringBuf& alias, bool flexibleTypes, bool isPgType); + TNodePtr BuildSimpleType(TContext& ctx, TPosition pos, const TString& typeName, bool dataOnly); + TNodePtr BuildIsNullOp(TPosition pos, TNodePtr a); + TNodePtr BuildBinaryOp(TContext& ctx, TPosition pos, const TString& opName, TNodePtr a, TNodePtr b); + TNodePtr BuildBinaryOpRaw(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b); + + TNodePtr BuildCalcOverWindow(TPosition pos, const TString& windowName, TNodePtr call); + TNodePtr BuildYsonOptionsNode(TPosition pos, bool autoConvert, bool strict, bool fastYson); + + TNodePtr BuildDoCall(TPosition pos, const TNodePtr& node); + TNodePtr BuildTupleResult(TNodePtr tuple, size_t ensureTupleSize); + TNodePtr BuildNamedExprReference(TNodePtr parent, const TString& name, TMaybe<size_t> tupleIndex); + TNodePtr BuildNamedExpr(TNodePtr parent); + + // Implemented in aggregation.cpp + TAggregationPtr BuildFactoryAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode, bool multi = false); + TAggregationPtr BuildKeyPayloadFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + TAggregationPtr BuildPayloadPredicateFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + TAggregationPtr BuildTwoArgsFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + TAggregationPtr BuildHistogramFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + TAggregationPtr BuildLinearHistogramFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + template <bool HasKey> + TAggregationPtr BuildTopFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + TAggregationPtr BuildTopFreqFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + TAggregationPtr BuildCountDistinctEstimateFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + TAggregationPtr BuildListFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + TAggregationPtr BuildPercentileFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + TAggregationPtr BuildCountAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode); + TAggregationPtr BuildUserDefinedFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + TAggregationPtr BuildPGFactoryAggregation(TPosition pos, const TString& name, EAggregateMode aggMode); + TAggregationPtr BuildNthFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode); + + + // Implemented in builtin.cpp + TNodePtr BuildCallable(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, bool forReduce = false); + TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args); + TNodePtr BuildBuiltinFunc( + TContext& ctx, + TPosition pos, + TString name, + const TVector<TNodePtr>& args, + const TString& nameSpace = TString(), + EAggregateMode aggMode = EAggregateMode::Normal, + bool* mustUseNamed = nullptr, + bool warnOnYqlNameSpace = true + ); + + // Implemented in query.cpp + TNodePtr BuildCreateUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TMaybe<TRoleParameters>& params, TScopedStatePtr scoped); + TNodePtr BuildCreateGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TMaybe<TRoleParameters>& params, TScopedStatePtr scoped); + TNodePtr BuildAlterUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TRoleParameters& params, TScopedStatePtr scoped); + TNodePtr BuildRenameUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped); + TNodePtr BuildAlterGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TVector<TDeferredAtom>& toChange, bool isDrop, + TScopedStatePtr scoped); + TNodePtr BuildRenameGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped); + TNodePtr BuildDropRoles(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& toDrop, bool isUser, bool missingOk, TScopedStatePtr scoped); + TNodePtr BuildGrantPermissions(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& permissions, const TVector<TDeferredAtom>& schemaPaths, const TVector<TDeferredAtom>& roleName, TScopedStatePtr scoped); + TNodePtr BuildRevokePermissions(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& permissions, const TVector<TDeferredAtom>& schemaPaths, const TVector<TDeferredAtom>& roleName, TScopedStatePtr scoped); + TNodePtr BuildUpsertObjectOperation(TPosition pos, const TString& objectId, const TString& typeId, + std::map<TString, TDeferredAtom>&& features, const TObjectOperatorContext& context); + TNodePtr BuildCreateObjectOperation(TPosition pos, const TString& objectId, const TString& typeId, + bool existingOk, bool replaceIfExists, std::map<TString, TDeferredAtom>&& features, const TObjectOperatorContext& context); + TNodePtr BuildAlterObjectOperation(TPosition pos, const TString& secretId, const TString& typeId, + std::map<TString, TDeferredAtom>&& features, std::set<TString>&& featuresToReset, const TObjectOperatorContext& context); + TNodePtr BuildDropObjectOperation(TPosition pos, const TString& secretId, const TString& typeId, + bool missingOk, std::map<TString, TDeferredAtom>&& options, const TObjectOperatorContext& context); + TNodePtr BuildCreateAsyncReplication(TPosition pos, const TString& id, + std::vector<std::pair<TString, TString>>&& targets, + std::map<TString, TNodePtr>&& settings, + const TObjectOperatorContext& context); + TNodePtr BuildAlterAsyncReplication(TPosition pos, const TString& id, + std::map<TString, TNodePtr>&& settings, + const TObjectOperatorContext& context); + TNodePtr BuildDropAsyncReplication(TPosition pos, const TString& id, bool cascade, const TObjectOperatorContext& context); + TNodePtr BuildWriteResult(TPosition pos, const TString& label, TNodePtr settings); + TNodePtr BuildCommitClusters(TPosition pos); + TNodePtr BuildRollbackClusters(TPosition pos); + TNodePtr BuildQuery(TPosition pos, const TVector<TNodePtr>& blocks, bool topLevel, TScopedStatePtr scoped); + TNodePtr BuildPragma(TPosition pos, const TString& prefix, const TString& name, const TVector<TDeferredAtom>& values, bool valueDefault); + TNodePtr BuildSqlLambda(TPosition pos, TVector<TString>&& args, TVector<TNodePtr>&& exprSeq); + TNodePtr BuildWorldIfNode(TPosition pos, TNodePtr predicate, TNodePtr thenNode, TNodePtr elseNode, bool isEvaluate); + TNodePtr BuildWorldForNode(TPosition pos, TNodePtr list, TNodePtr bodyNode, TNodePtr elseNode, bool isEvaluate, bool isParallel); + + TNodePtr BuildCreateTopic(TPosition pos, const TTopicRef& tr, const TCreateTopicParameters& params, + TScopedStatePtr scoped); + TNodePtr BuildAlterTopic(TPosition pos, const TTopicRef& tr, const TAlterTopicParameters& params, + TScopedStatePtr scoped); + TNodePtr BuildDropTopic(TPosition pos, const TTopicRef& topic, const TDropTopicParameters& params, + TScopedStatePtr scoped); + + TNodePtr BuildCreateBackupCollection(TPosition pos, const TString& id, + const TCreateBackupCollectionParameters& params, + const TObjectOperatorContext& context); + TNodePtr BuildAlterBackupCollection(TPosition pos, const TString& id, + const TAlterBackupCollectionParameters& params, + const TObjectOperatorContext& context); + TNodePtr BuildDropBackupCollection(TPosition pos, const TString& id, + const TDropBackupCollectionParameters& params, + const TObjectOperatorContext& context); + + TNodePtr BuildBackup(TPosition pos, const TString& id, + const TBackupParameters& params, + const TObjectOperatorContext& context); + TNodePtr BuildRestore(TPosition pos, const TString& id, + const TRestoreParameters& params, + const TObjectOperatorContext& context); + + template<class TContainer> + TMaybe<TString> FindMistypeIn(const TContainer& container, const TString& name) { + for (auto& item: container) { + if (NLevenshtein::Distance(name, item) < NYql::DefaultMistypeDistance) { + return item; + } + } + return {}; + } + + bool Parseui32(TNodePtr from, ui32& to); + TNodePtr GroundWithExpr(const TNodePtr& ground, const TNodePtr& expr); + const TString* DeriveCommonSourceName(const TVector<TNodePtr> &nodes); +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/object_processing.cpp b/yql/essentials/sql/v1/object_processing.cpp new file mode 100644 index 00000000000..80e3962bf8d --- /dev/null +++ b/yql/essentials/sql/v1/object_processing.cpp @@ -0,0 +1,68 @@ +#include "object_processing.h" + +#include <yql/essentials/core/sql_types/yql_callable_names.h> + +namespace NSQLTranslationV1 { +using namespace NYql; + +INode::TPtr TObjectProcessorImpl::BuildKeys() const { + auto keys = Y("Key"); + keys = L(keys, Q(Y(Q("objectId"), Y("String", BuildQuotedAtom(Pos, ObjectId))))); + keys = L(keys, Q(Y(Q("typeId"), Y("String", BuildQuotedAtom(Pos, TypeId))))); + return keys; +} + +TObjectProcessorImpl::TObjectProcessorImpl(TPosition pos, const TString& objectId, const TString& typeId, const TObjectOperatorContext& context) + : TBase(pos) + , TObjectOperatorContext(context) + , ObjectId(objectId) + , TypeId(typeId) +{ + +} + +bool TObjectProcessorImpl::DoInit(TContext& ctx, ISource* src) { + Y_UNUSED(src); + Scoped->UseCluster(ServiceId, Cluster); + auto options = FillFeatures(BuildOptions()); + auto keys = BuildKeys(); + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, ServiceId), Scoped->WrapCluster(Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(options))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + return TAstListNode::DoInit(ctx, src); +} + +INode::TPtr TCreateObject::FillFeatures(INode::TPtr options) const { + if (!Features.empty()) { + auto features = Y(); + for (auto&& i : Features) { + if (i.second.HasNode()) { + features->Add(Q(Y(BuildQuotedAtom(Pos, i.first), i.second.Build()))); + } else { + features->Add(Q(Y(BuildQuotedAtom(Pos, i.first)))); + } + } + options->Add(Q(Y(Q("features"), Q(features)))); + } + if (!FeaturesToReset.empty()) { + auto reset = Y(); + for (const auto& featureName : FeaturesToReset) { + reset->Add(BuildQuotedAtom(Pos, featureName)); + } + options->Add(Q(Y(Q("resetFeatures"), Q(reset)))); + } + return options; +} + +TObjectOperatorContext::TObjectOperatorContext(TScopedStatePtr scoped) + : Scoped(scoped) + , ServiceId(Scoped->CurrService) + , Cluster(Scoped->CurrCluster) +{ + +} + +} diff --git a/yql/essentials/sql/v1/object_processing.h b/yql/essentials/sql/v1/object_processing.h new file mode 100644 index 00000000000..4114235ee6d --- /dev/null +++ b/yql/essentials/sql/v1/object_processing.h @@ -0,0 +1,105 @@ +#pragma once +#include "node.h" +#include "context.h" + +namespace NSQLTranslationV1 { + +class TObjectOperatorContext { +protected: + TScopedStatePtr Scoped; +public: + TString ServiceId; + TDeferredAtom Cluster; + TObjectOperatorContext(const TObjectOperatorContext& baseItem) = default; + TObjectOperatorContext(TScopedStatePtr scoped); +}; + +class TObjectProcessorImpl: public TAstListNode, public TObjectOperatorContext { +protected: + using TBase = TAstListNode; + TString ObjectId; + TString TypeId; + + virtual INode::TPtr BuildOptions() const = 0; + virtual INode::TPtr FillFeatures(INode::TPtr options) const = 0; + INode::TPtr BuildKeys() const; +public: + TObjectProcessorImpl(TPosition pos, const TString& objectId, const TString& typeId, const TObjectOperatorContext& context); + + bool DoInit(TContext& ctx, ISource* src) override; + + TPtr DoClone() const final { + return {}; + } +}; + +class TCreateObject: public TObjectProcessorImpl { +private: + using TBase = TObjectProcessorImpl; + std::map<TString, TDeferredAtom> Features; + std::set<TString> FeaturesToReset; +protected: + bool ExistingOk = false; + bool ReplaceIfExists = false; +protected: + virtual INode::TPtr BuildOptions() const override { + TString mode; + if (ExistingOk) { + mode = "createObjectIfNotExists"; + } else if (ReplaceIfExists) { + mode = "createObjectOrReplace"; + } else { + mode = "createObject"; + } + + return Y(Q(Y(Q("mode"), Q(mode)))); + } + virtual INode::TPtr FillFeatures(INode::TPtr options) const override; +public: + TCreateObject(TPosition pos, const TString& objectId, + const TString& typeId, bool existingOk, bool replaceIfExists, std::map<TString, TDeferredAtom>&& features, std::set<TString>&& featuresToReset, const TObjectOperatorContext& context) + : TBase(pos, objectId, typeId, context) + , Features(std::move(features)) + , FeaturesToReset(std::move(featuresToReset)) + , ExistingOk(existingOk) + , ReplaceIfExists(replaceIfExists) { + } +}; + +class TUpsertObject final: public TCreateObject { +private: + using TBase = TCreateObject; +protected: + virtual INode::TPtr BuildOptions() const override { + return Y(Q(Y(Q("mode"), Q("upsertObject")))); + } +public: + using TBase::TBase; +}; + +class TAlterObject final: public TCreateObject { +private: + using TBase = TCreateObject; +protected: + virtual INode::TPtr BuildOptions() const override { + return Y(Q(Y(Q("mode"), Q("alterObject")))); + } +public: + using TBase::TBase; +}; + +class TDropObject final: public TCreateObject { +private: + using TBase = TCreateObject; + bool MissingOk() const { + return ExistingOk; // Because we were derived from TCreateObject + } +protected: + virtual INode::TPtr BuildOptions() const override { + return Y(Q(Y(Q("mode"), Q(MissingOk() ? "dropObjectIfExists" : "dropObject")))); + } +public: + using TBase::TBase; +}; + +} diff --git a/yql/essentials/sql/v1/perf/parse.cpp b/yql/essentials/sql/v1/perf/parse.cpp new file mode 100644 index 00000000000..33174bb95c0 --- /dev/null +++ b/yql/essentials/sql/v1/perf/parse.cpp @@ -0,0 +1,70 @@ +#include <yql/essentials/sql/v1/sql.h> +#include <yql/essentials/providers/common/provider/yql_provider_names.h> +#include <util/datetime/cputimer.h> +#include <util/string/builder.h> + +using namespace NSQLTranslationV1; + +enum class EDebugOutput { + None, + ToCerr, +}; + +TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) { + TStringStream s; + res.Issues.PrintTo(s); + + if (debug == EDebugOutput::ToCerr) { + Cerr << s.Str() << Endl; + } + return s.Str(); +} + +NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) { + google::protobuf::Arena arena; + const auto service = provider ? provider : TString(NYql::YtProviderName); + const TString cluster = "plato"; + NSQLTranslation::TTranslationSettings settings; + settings.ClusterMapping[cluster] = service; + settings.MaxErrors = maxErrors; + settings.Mode = mode; + settings.Arena = &arena; + auto res = SqlToYql(query, settings); + if (debug == EDebugOutput::ToCerr) { + Err2Str(res, debug); + } + return res; +} + +NYql::TAstParseResult SqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) { + return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug); +} + +int main(int, char**) { + TStringBuilder builder; + builder << "USE plato;\n"; + for (ui32 i = 0; i < 10; ++i) { + builder << "$query = SELECT "; + for (ui32 j = 0; j < 500; ++j) { + if (j > 0) { + builder << ","; + } + + builder << "fld" << j; + }; + + builder << " FROM " << (i == 0? "Input" : "$query") << ";\n"; + } + + builder << "SELECT * FROM $query;\n"; + TString sql = builder; + //Cerr << sql; + TSimpleTimer timer; + for (ui32 i = 0; i < 100; ++i) { + NYql::TAstParseResult res = SqlToYql(sql); + Y_ENSURE(res.Root); + } + + Cerr << "Elapsed: " << timer.Get() << "\n"; + return 0; +} diff --git a/yql/essentials/sql/v1/perf/ya.make b/yql/essentials/sql/v1/perf/ya.make new file mode 100644 index 00000000000..99e9087682c --- /dev/null +++ b/yql/essentials/sql/v1/perf/ya.make @@ -0,0 +1,14 @@ +PROGRAM() + +SRCS( + parse.cpp +) + +PEERDIR( + yql/essentials/public/udf/service/exception_policy + yql/essentials/sql + yql/essentials/sql/v1 + yql/essentials/sql/pg_dummy +) + +END() diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.cpp b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp new file mode 100644 index 00000000000..f977f57411e --- /dev/null +++ b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp @@ -0,0 +1,152 @@ +#include "proto_parser.h" + +#include <yql/essentials/utils/yql_panic.h> + +#include <yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h> +#include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h> +#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> +#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h> +#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Parser.h> +#include <yql/essentials/parser/proto_ast/gen/v1_ansi/SQLv1Lexer.h> +#include <yql/essentials/parser/proto_ast/gen/v1_ansi/SQLv1Parser.h> +#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h> +#include <yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Parser.h> + +#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h> + +#include <library/cpp/protobuf/util/simple_reflection.h> +#include <util/generic/algorithm.h> + +#if defined(_tsan_enabled_) +#include <util/system/mutex.h> +#endif + +using namespace NYql; + +namespace NSQLTranslationV1 { + + +#if defined(_tsan_enabled_) + TMutex SanitizerSQLTranslationMutex; +#endif + +using namespace NSQLv1Generated; + +void ValidateMessagesImpl(const google::protobuf::Message* msg1, const google::protobuf::Message* msg2, bool hasNonAscii) { + YQL_ENSURE(!msg1 == !msg2); + if (!msg1) { + return; + } + + YQL_ENSURE(msg1->GetDescriptor() == msg2->GetDescriptor()); + const auto descr = msg1->GetDescriptor(); + if (descr == NSQLv1Generated::TToken::GetDescriptor()) { + const auto& token1 = dynamic_cast<const NSQLv1Generated::TToken&>(*msg1); + const auto& token2 = dynamic_cast<const NSQLv1Generated::TToken&>(*msg2); + const bool isEof1 = token1.GetId() == Max<ui32>(); + const bool isEof2 = token2.GetId() == Max<ui32>(); + YQL_ENSURE(isEof1 == isEof2); + YQL_ENSURE(token1.GetValue() == token2.GetValue()); + if (!isEof1) { + YQL_ENSURE(token1.GetLine() == token2.GetLine()); + if (!hasNonAscii) { + YQL_ENSURE(token1.GetColumn() == token2.GetColumn()); + } + } + + return; + } + + for (int i = 0; i < descr->field_count(); ++i) { + const NProtoBuf::FieldDescriptor* fd = descr->field(i); + NProtoBuf::TConstField field1(*msg1, fd); + NProtoBuf::TConstField field2(*msg2, fd); + YQL_ENSURE(field1.IsMessage() == field2.IsMessage()); + if (field1.IsMessage()) { + YQL_ENSURE(field1.Size() == field2.Size()); + for (size_t j = 0; j < field1.Size(); ++j) { + ValidateMessagesImpl(field1.template Get<NProtoBuf::Message>(j), field2.template Get<NProtoBuf::Message>(j), hasNonAscii); + } + } + } +} + +void ValidateMessages(const TString& query, const google::protobuf::Message* msg1, const google::protobuf::Message* msg2) { + const bool hasNonAscii = AnyOf(query, [](char c) { return !isascii(c);}); + return ValidateMessagesImpl(msg1, msg2, hasNonAscii); +} + +google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, TIssues& err, + size_t maxErrors, bool ansiLexer, bool anlr4Parser, bool testAntlr4, google::protobuf::Arena* arena) { + YQL_ENSURE(arena); +#if defined(_tsan_enabled_) + TGuard<TMutex> grd(SanitizerSQLTranslationMutex); +#endif + NSQLTranslation::TErrorCollectorOverIssues collector(err, maxErrors, ""); + if (ansiLexer && !anlr4Parser) { + NProtoAST::TProtoASTBuilder3<NALPAnsi::SQLv1Parser, NALPAnsi::SQLv1Lexer> builder(query, queryName, arena); + auto res = builder.BuildAST(collector); + if (testAntlr4) { + NProtoAST::TProtoASTBuilder4<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena); + auto res2 = builder.BuildAST(collector); + ValidateMessages(query, res, res2); + } + + return res; + } else if (!ansiLexer && !anlr4Parser) { + NProtoAST::TProtoASTBuilder3<NALPDefault::SQLv1Parser, NALPDefault::SQLv1Lexer> builder(query, queryName, arena); + auto res = builder.BuildAST(collector); + if (testAntlr4) { + NProtoAST::TProtoASTBuilder4<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena); + auto res2 = builder.BuildAST(collector); + ValidateMessages(query, res, res2); + } + + return res; + } else if (ansiLexer && anlr4Parser) { + NProtoAST::TProtoASTBuilder4<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena); + return builder.BuildAST(collector); + } else { + NProtoAST::TProtoASTBuilder4<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena); + return builder.BuildAST(collector); + } +} + +google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err, + bool ansiLexer, bool anlr4Parser, bool testAntlr4, google::protobuf::Arena* arena) { + YQL_ENSURE(arena); +#if defined(_tsan_enabled_) + TGuard<TMutex> grd(SanitizerSQLTranslationMutex); +#endif + if (ansiLexer && !anlr4Parser) { + NProtoAST::TProtoASTBuilder3<NALPAnsi::SQLv1Parser, NALPAnsi::SQLv1Lexer> builder(query, queryName, arena); + auto res = builder.BuildAST(err); + if (testAntlr4) { + NProtoAST::TProtoASTBuilder4<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena); + auto res2 = builder.BuildAST(err); + ValidateMessages(query, res, res2); + } + + return res; + } else if (!ansiLexer && !anlr4Parser) { + NProtoAST::TProtoASTBuilder3<NALPDefault::SQLv1Parser, NALPDefault::SQLv1Lexer> builder(query, queryName, arena); + auto res = builder.BuildAST(err); + if (testAntlr4) { + NProtoAST::TProtoASTBuilder4<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena); + auto res2 = builder.BuildAST(err); + ValidateMessages(query, res, res2); + } + + return res; + } else if (ansiLexer && anlr4Parser) { + NProtoAST::TProtoASTBuilder4<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena); + return builder.BuildAST(err); + } else { + NProtoAST::TProtoASTBuilder4<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena); + return builder.BuildAST(err); + } +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.h b/yql/essentials/sql/v1/proto_parser/proto_parser.h new file mode 100644 index 00000000000..14440953196 --- /dev/null +++ b/yql/essentials/sql/v1/proto_parser/proto_parser.h @@ -0,0 +1,22 @@ +#pragma once + +#include <yql/essentials/ast/yql_ast.h> +#include <yql/essentials/parser/proto_ast/common.h> +#include <yql/essentials/public/issue/yql_warning.h> +#include <yql/essentials/public/issue/yql_issue_manager.h> +#include <yql/essentials/sql/settings/translation_settings.h> + +#include <google/protobuf/message.h> + +namespace NSQLTranslation { + struct TTranslationSettings; +} + +namespace NSQLTranslationV1 { + + google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, + NYql::TIssues& err, size_t maxErrors, bool ansiLexer, bool antlr4Parser, bool testAntlr4, google::protobuf::Arena* arena); + google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, + NProtoAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, bool testAntlr4, google::protobuf::Arena* arena); + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/proto_parser/ya.make b/yql/essentials/sql/v1/proto_parser/ya.make new file mode 100644 index 00000000000..edb24868baf --- /dev/null +++ b/yql/essentials/sql/v1/proto_parser/ya.make @@ -0,0 +1,21 @@ +LIBRARY() + +PEERDIR( + yql/essentials/utils + yql/essentials/ast + + yql/essentials/parser/proto_ast/antlr3 + yql/essentials/parser/proto_ast/antlr4 + yql/essentials/parser/proto_ast/collect_issues + yql/essentials/parser/proto_ast/gen/v1 + yql/essentials/parser/proto_ast/gen/v1_ansi + yql/essentials/parser/proto_ast/gen/v1_proto_split + yql/essentials/parser/proto_ast/gen/v1_antlr4 + yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4 +) + +SRCS( + proto_parser.cpp +) + +END() diff --git a/yql/essentials/sql/v1/query.cpp b/yql/essentials/sql/v1/query.cpp new file mode 100644 index 00000000000..8abc2a92bff --- /dev/null +++ b/yql/essentials/sql/v1/query.cpp @@ -0,0 +1,3567 @@ +#include "node.h" +#include "context.h" +#include "object_processing.h" + +#include <yql/essentials/ast/yql_type_string.h> +#include <yql/essentials/core/sql_types/yql_callable_names.h> +#include <yql/essentials/providers/common/provider/yql_provider_names.h> + +#include <library/cpp/charset/ci_string.h> + +#include <util/digest/fnv.h> + +using namespace NYql; + +namespace NSQLTranslationV1 { + +bool ValidateView(TPosition pos, TContext& ctx, TStringBuf service, TViewDescription& view) { + if (view.PrimaryFlag && !(service == KikimrProviderName || service == YdbProviderName)) { + ctx.Error(pos) << "primary view is not supported for " << service << " tables"; + return false; + } + return true; +} + +class TUniqueTableKey: public ITableKeys { +public: + TUniqueTableKey(TPosition pos, const TString& service, const TDeferredAtom& cluster, + const TDeferredAtom& name, const TViewDescription& view) + : ITableKeys(pos) + , Service(service) + , Cluster(cluster) + , Name(name) + , View(view) + , Full(name.GetRepr()) + { + if (!View.ViewName.empty()) { + Full += ":" + View.ViewName; + } + } + + bool SetPrimaryView(TContext& ctx, TPosition pos) override { + Y_UNUSED(ctx); + Y_UNUSED(pos); + View = {"", true}; + return true; + } + + bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override { + Y_UNUSED(ctx); + Y_UNUSED(pos); + Full = Name.GetRepr(); + View = {view}; + if (!View.empty()) { + Full = ":" + View.ViewName; + } + + return true; + } + + const TString* GetTableName() const override { + return Name.GetLiteral() ? &Full : nullptr; + } + + TNodePtr BuildKeys(TContext& ctx, ITableKeys::EBuildKeysMode mode) override { + if (View == TViewDescription{"@"}) { + auto key = Y("TempTable", Name.Build()); + return key; + } + + bool tableScheme = mode == ITableKeys::EBuildKeysMode::CREATE; + if (tableScheme && !View.empty()) { + ctx.Error(Pos) << "Table view can not be created with CREATE TABLE clause"; + return nullptr; + } + auto path = ctx.GetPrefixedPath(Service, Cluster, Name); + if (!path) { + return nullptr; + } + auto key = Y("Key", Q(Y(Q(tableScheme ? "tablescheme" : "table"), Y("String", path)))); + key = AddView(key, View); + if (!ValidateView(GetPos(), ctx, Service, View)) { + return nullptr; + } + if (mode == ITableKeys::EBuildKeysMode::INPUT && + IsQueryMode(ctx.Settings.Mode) && + Service != KikimrProviderName && + Service != RtmrProviderName && + Service != YdbProviderName) { + + key = Y("MrTableConcat", key); + } + return key; + } + +private: + TString Service; + TDeferredAtom Cluster; + TDeferredAtom Name; + TViewDescription View; + TString Full; +}; + +TNodePtr BuildTableKey(TPosition pos, const TString& service, const TDeferredAtom& cluster, + const TDeferredAtom& name, const TViewDescription& view) { + return new TUniqueTableKey(pos, service, cluster, name, view); +} + +class TTopicKey: public ITableKeys { +public: + TTopicKey(TPosition pos, const TDeferredAtom& cluster, const TDeferredAtom& name) + : ITableKeys(pos) + , Cluster(cluster) + , Name(name) + , Full(name.GetRepr()) + { + } + + const TString* GetTableName() const override { + return Name.GetLiteral() ? &Full : nullptr; + } + + TNodePtr BuildKeys(TContext& ctx, ITableKeys::EBuildKeysMode) override { + const auto path = ctx.GetPrefixedPath(Service, Cluster, Name); + if (!path) { + return nullptr; + } + auto key = Y("Key", Q(Y(Q("topic"), Y("String", path)))); + return key; + } + +private: + TString Service; + TDeferredAtom Cluster; + TDeferredAtom Name; + TString View; + TString Full; +}; + +TNodePtr BuildTopicKey(TPosition pos, const TDeferredAtom& cluster, const TDeferredAtom& name) { + return new TTopicKey(pos, cluster, name); +} + +static INode::TPtr CreateIndexType(TIndexDescription::EType type, const INode& node) { + switch (type) { + case TIndexDescription::EType::GlobalSync: + return node.Q("syncGlobal"); + case TIndexDescription::EType::GlobalAsync: + return node.Q("asyncGlobal"); + case TIndexDescription::EType::GlobalSyncUnique: + return node.Q("syncGlobalUnique"); + case TIndexDescription::EType::GlobalVectorKmeansTree: + return node.Q("globalVectorKmeansTree"); + } +} + +enum class ETableSettingsParsingMode { + Create, + Alter +}; + +static INode::TPtr CreateTableSettings(const TTableSettings& tableSettings, ETableSettingsParsingMode parsingMode, const INode& node) { + // short aliases for member function calls + auto Y = [&node](auto&&... args) { return node.Y(std::forward<decltype(args)>(args)...); }; + auto Q = [&node](auto&&... args) { return node.Q(std::forward<decltype(args)>(args)...); }; + auto L = [&node](auto&&... args) { return node.L(std::forward<decltype(args)>(args)...); }; + + auto settings = Y(); + + if (tableSettings.DataSourcePath) { + settings = L(settings, Q(Y(Q("data_source_path"), tableSettings.DataSourcePath))); + } + if (tableSettings.Location) { + if (tableSettings.Location.IsSet()) { + settings = L(settings, Q(Y(Q("location"), tableSettings.Location.GetValueSet()))); + } else { + Y_ENSURE(parsingMode != ETableSettingsParsingMode::Create, "Can't reset LOCATION in create mode"); + settings = L(settings, Q(Y(Q("location")))); + } + } + for (const auto& resetableParam : tableSettings.ExternalSourceParameters) { + Y_ENSURE(resetableParam, "Empty parameter"); + if (resetableParam.IsSet()) { + const auto& [id, value] = resetableParam.GetValueSet(); + settings = L(settings, Q(Y(Q(id.Name), value))); + } else { + Y_ENSURE(parsingMode != ETableSettingsParsingMode::Create, + "Can't reset " << resetableParam.GetValueReset().Name << " in create mode" + ); + settings = L(settings, Q(Y(Q(resetableParam.GetValueReset().Name)))); + } + } + if (tableSettings.CompactionPolicy) { + settings = L(settings, Q(Y(Q("compactionPolicy"), tableSettings.CompactionPolicy))); + } + if (tableSettings.AutoPartitioningBySize) { + const auto& ref = tableSettings.AutoPartitioningBySize.GetRef(); + settings = L(settings, Q(Y(Q("autoPartitioningBySize"), BuildQuotedAtom(ref.Pos, ref.Name)))); + } + if (tableSettings.UniformPartitions && parsingMode == ETableSettingsParsingMode::Create) { + settings = L(settings, Q(Y(Q("uniformPartitions"), tableSettings.UniformPartitions))); + } + if (tableSettings.PartitionAtKeys && parsingMode == ETableSettingsParsingMode::Create) { + auto keysDesc = Y(); + for (const auto& key : tableSettings.PartitionAtKeys) { + auto columnsDesc = Y(); + for (auto column : key) { + columnsDesc = L(columnsDesc, column); + } + keysDesc = L(keysDesc, Q(columnsDesc)); + } + settings = L(settings, Q(Y(Q("partitionAtKeys"), Q(keysDesc)))); + } + if (tableSettings.PartitionSizeMb) { + settings = L(settings, Q(Y(Q("partitionSizeMb"), tableSettings.PartitionSizeMb))); + } + if (tableSettings.AutoPartitioningByLoad) { + const auto& ref = tableSettings.AutoPartitioningByLoad.GetRef(); + settings = L(settings, Q(Y(Q("autoPartitioningByLoad"), BuildQuotedAtom(ref.Pos, ref.Name)))); + } + if (tableSettings.MinPartitions) { + settings = L(settings, Q(Y(Q("minPartitions"), tableSettings.MinPartitions))); + } + if (tableSettings.MaxPartitions) { + settings = L(settings, Q(Y(Q("maxPartitions"), tableSettings.MaxPartitions))); + } + if (tableSettings.PartitionCount) { + settings = L(settings, Q(Y(Q("maxPartitions"), tableSettings.PartitionCount))); + settings = L(settings, Q(Y(Q("minPartitions"), tableSettings.PartitionCount))); + } + if (tableSettings.KeyBloomFilter) { + const auto& ref = tableSettings.KeyBloomFilter.GetRef(); + settings = L(settings, Q(Y(Q("keyBloomFilter"), BuildQuotedAtom(ref.Pos, ref.Name)))); + } + if (tableSettings.ReadReplicasSettings) { + settings = L(settings, Q(Y(Q("readReplicasSettings"), tableSettings.ReadReplicasSettings))); + } + if (const auto& ttl = tableSettings.TtlSettings) { + if (ttl.IsSet()) { + const auto& ttlSettings = ttl.GetValueSet(); + auto opts = Y(); + + opts = L(opts, Q(Y(Q("columnName"), BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name)))); + opts = L(opts, Q(Y(Q("expireAfter"), ttlSettings.Expr))); + + if (ttlSettings.ColumnUnit) { + opts = L(opts, Q(Y(Q("columnUnit"), Q(ToString(*ttlSettings.ColumnUnit))))); + } + + settings = L(settings, Q(Y(Q("setTtlSettings"), Q(opts)))); + } else { + YQL_ENSURE(parsingMode != ETableSettingsParsingMode::Create, "Can't reset TTL settings in create mode"); + settings = L(settings, Q(Y(Q("resetTtlSettings"), Q(Y())))); + } + } + if (const auto& tiering = tableSettings.Tiering) { + if (tiering.IsSet()) { + settings = L(settings, Q(Y(Q("setTiering"), tiering.GetValueSet()))); + } else { + YQL_ENSURE(parsingMode != ETableSettingsParsingMode::Create, "Can't reset TIERING in create mode"); + settings = L(settings, Q(Y(Q("resetTiering"), Q(Y())))); + } + } + if (tableSettings.StoreExternalBlobs) { + const auto& ref = tableSettings.StoreExternalBlobs.GetRef(); + settings = L(settings, Q(Y(Q("storeExternalBlobs"), BuildQuotedAtom(ref.Pos, ref.Name)))); + } + if (tableSettings.StoreType && parsingMode == ETableSettingsParsingMode::Create) { + const auto& ref = tableSettings.StoreType.GetRef(); + settings = L(settings, Q(Y(Q("storeType"), BuildQuotedAtom(ref.Pos, ref.Name)))); + } + if (tableSettings.PartitionByHashFunction && parsingMode == ETableSettingsParsingMode::Create) { + settings = L(settings, Q(Y(Q("partitionByHashFunction"), tableSettings.PartitionByHashFunction))); + } + + return settings; +} + +static INode::TPtr CreateVectorIndexSettings(const TVectorIndexSettings& vectorIndexSettings, const INode& node) { + // short aliases for member function calls + auto Y = [&node](auto&&... args) { return node.Y(std::forward<decltype(args)>(args)...); }; + auto Q = [&node](auto&&... args) { return node.Q(std::forward<decltype(args)>(args)...); }; + auto L = [&node](auto&&... args) { return node.L(std::forward<decltype(args)>(args)...); }; + + auto settings = Y(); + + if (vectorIndexSettings.Distance && vectorIndexSettings.Similarity) { + Y_ENSURE(false, "distance and similarity shouldn't be set at the same time"); + } else if (vectorIndexSettings.Distance) { + settings = L(settings, Q(Y(Q("distance"), Q(ToString(*vectorIndexSettings.Distance))))); + } else if (vectorIndexSettings.Similarity) { + settings = L(settings, Q(Y(Q("similarity"), Q(ToString(*vectorIndexSettings.Similarity))))); + } else { + Y_ENSURE(false, "distance or similarity should be set"); + } + + settings = L(settings, Q(Y(Q("vector_type"), Q(ToString(*vectorIndexSettings.VectorType))))); + settings = L(settings, Q(Y(Q("vector_dimension"), Q(ToString(vectorIndexSettings.VectorDimension))))); + settings = L(settings, Q(Y(Q("clusters"), Q(ToString(vectorIndexSettings.Clusters))))); + settings = L(settings, Q(Y(Q("levels"), Q(ToString(vectorIndexSettings.Levels))))); + + return settings; +} + +static INode::TPtr CreateIndexDesc(const TIndexDescription& index, ETableSettingsParsingMode parsingMode, const INode& node) { + auto indexColumns = node.Y(); + for (const auto& col : index.IndexColumns) { + indexColumns = node.L(indexColumns, BuildQuotedAtom(col.Pos, col.Name)); + } + auto dataColumns = node.Y(); + for (const auto& col : index.DataColumns) { + dataColumns = node.L(dataColumns, BuildQuotedAtom(col.Pos, col.Name)); + } + const auto& indexType = node.Y(node.Q("indexType"), CreateIndexType(index.Type, node)); + const auto& indexName = node.Y(node.Q("indexName"), BuildQuotedAtom(index.Name.Pos, index.Name.Name)); + auto indexNode = node.Y( + node.Q(indexName), + node.Q(indexType), + node.Q(node.Y(node.Q("indexColumns"), node.Q(indexColumns))), + node.Q(node.Y(node.Q("dataColumns"), node.Q(dataColumns))) + ); + if (index.TableSettings.IsSet()) { + const auto& tableSettings = node.Y( + node.Q("tableSettings"), + node.Q(CreateTableSettings(index.TableSettings, parsingMode, node)) + ); + indexNode = node.L(indexNode, tableSettings); + } + if (const auto* indexSettingsPtr = std::get_if<TVectorIndexSettings>(&index.IndexSettings)) { + const auto& indexSettings = node.Q(node.Y( + node.Q("indexSettings"), + node.Q(CreateVectorIndexSettings(*indexSettingsPtr, node)))); + indexNode = node.L(indexNode, indexSettings); + } + return indexNode; +} + +static INode::TPtr CreateAlterIndex(const TIndexDescription& index, const INode& node) { + const auto& indexName = node.Y(node.Q("indexName"), BuildQuotedAtom(index.Name.Pos, index.Name.Name)); + const auto& tableSettings = node.Y( + node.Q("tableSettings"), + node.Q(CreateTableSettings(index.TableSettings, ETableSettingsParsingMode::Alter, node)) + ); + return node.Y( + node.Q(indexName), + node.Q(tableSettings) + ); +} + +static INode::TPtr CreateChangefeedDesc(const TChangefeedDescription& desc, const INode& node) { + auto settings = node.Y(); + if (desc.Settings.Mode) { + settings = node.L(settings, node.Q(node.Y(node.Q("mode"), desc.Settings.Mode))); + } + if (desc.Settings.Format) { + settings = node.L(settings, node.Q(node.Y(node.Q("format"), desc.Settings.Format))); + } + if (desc.Settings.InitialScan) { + settings = node.L(settings, node.Q(node.Y(node.Q("initial_scan"), desc.Settings.InitialScan))); + } + if (desc.Settings.VirtualTimestamps) { + settings = node.L(settings, node.Q(node.Y(node.Q("virtual_timestamps"), desc.Settings.VirtualTimestamps))); + } + if (desc.Settings.ResolvedTimestamps) { + settings = node.L(settings, node.Q(node.Y(node.Q("resolved_timestamps"), desc.Settings.ResolvedTimestamps))); + } + if (desc.Settings.RetentionPeriod) { + settings = node.L(settings, node.Q(node.Y(node.Q("retention_period"), desc.Settings.RetentionPeriod))); + } + if (desc.Settings.TopicAutoPartitioning) { + settings = node.L(settings, node.Q(node.Y(node.Q("topic_auto_partitioning"), desc.Settings.TopicAutoPartitioning))); + } + if (desc.Settings.TopicMaxActivePartitions) { + settings = node.L(settings, node.Q(node.Y(node.Q("topic_max_active_partitions"), desc.Settings.TopicMaxActivePartitions))); + } + if (desc.Settings.TopicPartitions) { + settings = node.L(settings, node.Q(node.Y(node.Q("topic_min_active_partitions"), desc.Settings.TopicPartitions))); + } + if (desc.Settings.AwsRegion) { + settings = node.L(settings, node.Q(node.Y(node.Q("aws_region"), desc.Settings.AwsRegion))); + } + if (const auto& sink = desc.Settings.SinkSettings) { + switch (sink->index()) { + case 0: // local + settings = node.L(settings, node.Q(node.Y(node.Q("local"), node.Q(node.Y())))); + break; + default: + YQL_ENSURE(false, "Unexpected sink settings"); + } + } + + auto state = node.Y(); + if (desc.Disable) { + state = node.Q("disable"); + } + + return node.Y( + node.Q(node.Y(node.Q("name"), BuildQuotedAtom(desc.Name.Pos, desc.Name.Name))), + node.Q(node.Y(node.Q("settings"), node.Q(settings))), + node.Q(node.Y(node.Q("state"), node.Q(state))) + ); +} + +class TPrepTableKeys: public ITableKeys { +public: + TPrepTableKeys(TPosition pos, const TString& service, const TDeferredAtom& cluster, + const TString& func, const TVector<TTableArg>& args) + : ITableKeys(pos) + , Service(service) + , Cluster(cluster) + , Func(func) + , Args(args) + { + } + + void ExtractTableName(TContext&ctx, TTableArg& arg) { + MakeTableFromExpression(Pos, ctx, arg.Expr, arg.Id); + } + + TNodePtr BuildKeys(TContext& ctx, ITableKeys::EBuildKeysMode mode) override { + if (mode == ITableKeys::EBuildKeysMode::CREATE) { + // TODO: allow creation of multiple tables + ctx.Error(Pos) << "Mutiple table creation is not implemented yet"; + return nullptr; + } + + TCiString func(Func); + if (func != "object" && func != "walkfolders") { + for (auto& arg: Args) { + if (arg.Expr->GetLabel()) { + ctx.Error(Pos) << "Named arguments are not supported for table function " << to_upper(Func); + return nullptr; + } + } + } + if (func == "concat_strict") { + auto tuple = Y(); + for (auto& arg: Args) { + ExtractTableName(ctx, arg); + TNodePtr key; + if (arg.HasAt) { + key = Y("TempTable", arg.Id.Build()); + } else { + auto path = ctx.GetPrefixedPath(Service, Cluster, arg.Id); + if (!path) { + return nullptr; + } + + key = Y("Key", Q(Y(Q("table"), Y("String", path)))); + key = AddView(key, arg.View); + if (!ValidateView(GetPos(), ctx, Service, arg.View)) { + return nullptr; + } + } + + tuple = L(tuple, key); + } + return Q(tuple); + } + else if (func == "concat") { + auto concat = Y("MrTableConcat"); + for (auto& arg : Args) { + ExtractTableName(ctx, arg); + TNodePtr key; + if (arg.HasAt) { + key = Y("TempTable", arg.Id.Build()); + } else { + auto path = ctx.GetPrefixedPath(Service, Cluster, arg.Id); + if (!path) { + return nullptr; + } + + key = Y("Key", Q(Y(Q("table"), Y("String", path)))); + key = AddView(key, arg.View); + if (!ValidateView(GetPos(), ctx, Service, arg.View)) { + return nullptr; + } + } + + concat = L(concat, key); + } + + return concat; + } + + else if (func == "range" || func == "range_strict" || func == "like" || func == "like_strict" || + func == "regexp" || func == "regexp_strict" || func == "filter" || func == "filter_strict") { + bool isRange = func.StartsWith("range"); + bool isFilter = func.StartsWith("filter"); + size_t minArgs = isRange ? 1 : 2; + size_t maxArgs = isRange ? 5 : 4; + if (Args.size() < minArgs || Args.size() > maxArgs) { + ctx.Error(Pos) << Func << " requires from " << minArgs << " to " << maxArgs << " arguments, but got: " << Args.size(); + return nullptr; + } + if (ctx.DiscoveryMode) { + ctx.Error(Pos, TIssuesIds::YQL_NOT_ALLOWED_IN_DISCOVERY) << Func << " is not allowed in Discovery mode"; + return nullptr; + } + + for (ui32 index=0; index < Args.size(); ++index) { + auto& arg = Args[index]; + if (arg.HasAt) { + ctx.Error(Pos) << "Temporary tables are not supported here"; + return nullptr; + } + + if (!arg.View.empty()) { + TStringBuilder sb; + sb << "Use the last argument of " << Func << " to specify a VIEW." << Endl; + if (isRange) { + sb << "Possible arguments are: prefix, from, to, suffix, view." << Endl; + } else if (isFilter) { + sb << "Possible arguments are: prefix, filtering callable, suffix, view." << Endl; + } else { + sb << "Possible arguments are: prefix, pattern, suffix, view." << Endl; + } + sb << "Pass empty string in arguments if you want to skip."; + + ctx.Error(Pos) << sb; + return nullptr; + } + + if (!func.StartsWith("filter") || index != 1) { + ExtractTableName(ctx, arg); + } + } + + auto path = ctx.GetPrefixedPath(Service, Cluster, Args[0].Id); + if (!path) { + return nullptr; + } + auto range = Y(func.EndsWith("_strict") ? "MrTableRangeStrict" : "MrTableRange", path); + TNodePtr predicate; + TDeferredAtom suffix; + if (func.StartsWith("range")) { + TDeferredAtom min; + TDeferredAtom max; + if (Args.size() > 1) { + min = Args[1].Id; + } + + if (Args.size() > 2) { + max = Args[2].Id; + } + + if (Args.size() > 3) { + suffix = Args[3].Id; + } + + if (min.Empty() && max.Empty()) { + predicate = BuildLambda(Pos, Y("item"), Y("Bool", Q("true"))); + } + else { + auto minPred = !min.Empty() ? Y(">=", "item", Y("String", min.Build())) : nullptr; + auto maxPred = !max.Empty() ? Y("<=", "item", Y("String", max.Build())) : nullptr; + if (!minPred) { + predicate = BuildLambda(Pos, Y("item"), maxPred); + } else if (!maxPred) { + predicate = BuildLambda(Pos, Y("item"), minPred); + } else { + predicate = BuildLambda(Pos, Y("item"), Y("And", minPred, maxPred)); + } + } + } else { + if (Args.size() > 2) { + suffix = Args[2].Id; + } + + if (func.StartsWith("regexp")) { + if (!ctx.PragmaRegexUseRe2) { + ctx.Warning(Pos, TIssuesIds::CORE_LEGACY_REGEX_ENGINE) << "Legacy regex engine works incorrectly with unicode. Use PRAGMA RegexUseRe2='true';"; + } + + auto pattern = Args[1].Id; + auto udf = ctx.PragmaRegexUseRe2 ? + Y("Udf", Q("Re2.Grep"), Q(Y(Y("String", pattern.Build()), Y("Null")))): + Y("Udf", Q("Pcre.BacktrackingGrep"), Y("String", pattern.Build())); + predicate = BuildLambda(Pos, Y("item"), Y("Apply", udf, "item")); + } else if (func.StartsWith("like")) { + auto pattern = Args[1].Id; + auto convertedPattern = Y("Apply", Y("Udf", Q("Re2.PatternFromLike")), + Y("String", pattern.Build())); + auto udf = Y("Udf", Q("Re2.Match"), Q(Y(convertedPattern, Y("Null")))); + predicate = BuildLambda(Pos, Y("item"), Y("Apply", udf, "item")); + } else { + predicate = BuildLambda(Pos, Y("item"), Y("Apply", Args[1].Expr, "item")); + } + } + + range = L(range, predicate); + range = L(range, suffix.Build() ? suffix.Build() : BuildQuotedAtom(Pos, "")); + auto key = Y("Key", Q(Y(Q("table"), range))); + if (Args.size() == maxArgs) { + const auto& lastArg = Args.back(); + if (!lastArg.View.empty()) { + ctx.Error(Pos) << Func << " requires that view should be set as last argument"; + return nullptr; + } + + if (!lastArg.Id.Empty()) { + key = L(key, Q(Y(Q("view"), Y("String", lastArg.Id.Build())))); + } + } + + return key; + } else if (func == "each" || func == "each_strict") { + auto each = Y(func == "each" ? "MrTableEach" : "MrTableEachStrict"); + for (auto& arg : Args) { + if (arg.HasAt) { + ctx.Error(Pos) << "Temporary tables are not supported here"; + return nullptr; + } + + auto type = Y("ListType", Y("DataType", Q("String"))); + auto key = Y("Key", Q(Y(Q("table"), Y("EvaluateExpr", + Y("EnsureType", Y("Coalesce", arg.Expr, + Y("List", type)), type))))); + + key = AddView(key, arg.View); + if (!ValidateView(GetPos(), ctx, Service, arg.View)) { + return nullptr; + } + each = L(each, key); + } + if (ctx.PragmaUseTablePrefixForEach) { + TStringBuf prefixPath = ctx.GetPrefixPath(Service, Cluster); + if (prefixPath) { + each = L(each, BuildQuotedAtom(Pos, TString(prefixPath))); + } + } + return each; + } + else if (func == "folder") { + size_t minArgs = 1; + size_t maxArgs = 2; + if (Args.size() < minArgs || Args.size() > maxArgs) { + ctx.Error(Pos) << Func << " requires from " << minArgs << " to " << maxArgs << " arguments, but found: " << Args.size(); + return nullptr; + } + + if (ctx.DiscoveryMode) { + ctx.Error(Pos, TIssuesIds::YQL_NOT_ALLOWED_IN_DISCOVERY) << Func << " is not allowed in Discovery mode"; + return nullptr; + } + + for (ui32 index = 0; index < Args.size(); ++index) { + auto& arg = Args[index]; + if (arg.HasAt) { + ctx.Error(Pos) << "Temporary tables are not supported here"; + return nullptr; + } + + if (!arg.View.empty()) { + ctx.Error(Pos) << Func << " doesn't supports views"; + return nullptr; + } + + ExtractTableName(ctx, arg); + } + + auto folder = Y("MrFolder"); + folder = L(folder, Args[0].Id.Build()); + folder = L(folder, Args.size() > 1 ? Args[1].Id.Build() : BuildQuotedAtom(Pos, "")); + return folder; + } + else if (func == "walkfolders") { + const size_t minPositionalArgs = 1; + const size_t maxPositionalArgs = 2; + + size_t positionalArgsCnt = 0; + for (const auto& arg : Args) { + if (!arg.Expr->GetLabel()) { + positionalArgsCnt++; + } else { + break; + } + } + if (positionalArgsCnt < minPositionalArgs || positionalArgsCnt > maxPositionalArgs) { + ctx.Error(Pos) << Func << " requires from " << minPositionalArgs + << " to " << maxPositionalArgs + << " positional arguments, but got: " << positionalArgsCnt; + return nullptr; + } + + constexpr auto walkFoldersModuleName = "walk_folders_module"; + ctx.RequiredModules.emplace(walkFoldersModuleName, "/lib/yql/walk_folders.yql"); + + auto& rootFolderArg = Args[0]; + if (rootFolderArg.HasAt) { + ctx.Error(Pos) << "Temporary tables are not supported here"; + return nullptr; + } + if (!rootFolderArg.View.empty()) { + ctx.Error(Pos) << Func << " doesn't supports views"; + return nullptr; + } + ExtractTableName(ctx, rootFolderArg); + + const auto initState = + positionalArgsCnt > 1 + ? Args[1].Expr + : Y("List", Y("ListType", Y("DataType", Q("String")))); + + TNodePtr rootAttributes; + TNodePtr preHandler; + TNodePtr resolveHandler; + TNodePtr diveHandler; + TNodePtr postHandler; + for (auto it = Args.begin() + positionalArgsCnt; it != Args.end(); ++it) { + auto& arg = *it; + const auto label = arg.Expr->GetLabel(); + if (label == "RootAttributes") { + ExtractTableName(ctx, arg); + rootAttributes = arg.Id.Build(); + } + else if (label == "PreHandler") { + preHandler = arg.Expr; + } + else if (label == "ResolveHandler") { + resolveHandler = arg.Expr; + } + else if (label == "DiveHandler") { + diveHandler = arg.Expr; + } + else if (label == "PostHandler") { + postHandler = arg.Expr; + } + else { + ctx.Warning(Pos, DEFAULT_ERROR) << "Unsupported named argument: " + << label << " in " << Func; + } + } + if (rootAttributes == nullptr) { + rootAttributes = BuildQuotedAtom(Pos, ""); + } + + if (preHandler != nullptr || postHandler != nullptr) { + const auto makePrePostHandlerType = BuildBind(Pos, walkFoldersModuleName, "MakePrePostHandlersType"); + const auto prePostHandlerType = Y("EvaluateType", Y("TypeHandle", Y("Apply", makePrePostHandlerType, Y("TypeOf", initState)))); + + if (preHandler != nullptr) { + preHandler = Y("Callable", prePostHandlerType, preHandler); + } + if (postHandler != nullptr) { + postHandler = Y("Callable", prePostHandlerType, postHandler); + } + } + if (preHandler == nullptr) { + preHandler = Y("Void"); + } + if (postHandler == nullptr) { + postHandler = Y("Void"); + } + + const auto makeResolveDiveHandlerType = BuildBind(Pos, walkFoldersModuleName, "MakeResolveDiveHandlersType"); + const auto resolveDiveHandlerType = Y("EvaluateType", Y("TypeHandle", Y("Apply", makeResolveDiveHandlerType, Y("TypeOf", initState)))); + if (resolveHandler == nullptr) { + resolveHandler = BuildBind(Pos, walkFoldersModuleName, "AnyNodeDiveHandler"); + } + if (diveHandler == nullptr) { + diveHandler = BuildBind(Pos, walkFoldersModuleName, "AnyNodeDiveHandler"); + } + + resolveHandler = Y("Callable", resolveDiveHandlerType, resolveHandler); + diveHandler = Y("Callable", resolveDiveHandlerType, diveHandler); + + const auto initStateType = Y("EvaluateType", Y("TypeHandle", Y("TypeOf", initState))); + const auto pickledInitState = Y("Pickle", initState); + + const auto initPath = rootFolderArg.Id.Build(); + + return Y("MrWalkFolders", initPath, rootAttributes, pickledInitState, initStateType, + preHandler, resolveHandler, diveHandler, postHandler); + } + else if (func == "tables") { + if (!Args.empty()) { + ctx.Error(Pos) << Func << " doesn't accept arguments"; + return nullptr; + } + + return L(Y("DataTables")); + } + else if (func == "object") { + const size_t positionalArgs = 2; + auto result = Y("MrObject"); + auto settings = Y(); + //TVector<TNodePtr> settings; + size_t argc = 0; + for (ui32 index = 0; index < Args.size(); ++index) { + auto& arg = Args[index]; + if (arg.HasAt) { + ctx.Error(arg.Expr->GetPos()) << "Temporary tables are not supported here"; + return nullptr; + } + + if (!arg.View.empty()) { + ctx.Error(Pos) << to_upper(Func) << " doesn't supports views"; + return nullptr; + } + + if (!arg.Expr->GetLabel()) { + ExtractTableName(ctx, arg); + result = L(result, arg.Id.Build()); + ++argc; + } else { + settings = L(settings, Q(Y(BuildQuotedAtom(arg.Expr->GetPos(), arg.Expr->GetLabel()), arg.Expr))); + } + } + + if (argc != positionalArgs) { + ctx.Error(Pos) << to_upper(Func) << " requires exacty " << positionalArgs << " positional args, but got " << argc; + return nullptr; + } + + result = L(result, Q(settings)); + return result; + } + + ctx.Error(Pos) << "Unknown table name preprocessor: " << Func; + return nullptr; + } + +private: + TString Service; + TDeferredAtom Cluster; + TString Func; + TVector<TTableArg> Args; +}; + +TNodePtr BuildTableKeys(TPosition pos, const TString& service, const TDeferredAtom& cluster, + const TString& func, const TVector<TTableArg>& args) { + return new TPrepTableKeys(pos, service, cluster, func, args); +} + +class TInputOptions final: public TAstListNode { +public: + TInputOptions(TPosition pos, const TTableHints& hints) + : TAstListNode(pos) + , Hints(hints) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + for (auto& hint: Hints) { + TString hintName = hint.first; + TMaybe<TIssue> normalizeError = NormalizeName(Pos, hintName); + if (!normalizeError.Empty()) { + ctx.Error() << normalizeError->GetMessage(); + ctx.IncrementMonCounter("sql_errors", "NormalizeHintError"); + return false; + } + TNodePtr option = Y(BuildQuotedAtom(Pos, hintName)); + for (auto& x : hint.second) { + if (!x->Init(ctx, src)) { + return false; + } + + option = L(option, x); + } + + Nodes.push_back(Q(option)); + } + return true; + } + + TPtr DoClone() const final { + return {}; + } + +private: + TTableHints Hints; +}; + +TNodePtr BuildInputOptions(TPosition pos, const TTableHints& hints) { + if (hints.empty()) { + return nullptr; + } + + return new TInputOptions(pos, hints); +} + +class TIntoTableOptions: public TAstListNode { +public: + TIntoTableOptions(TPosition pos, const TVector<TString>& columns, const TTableHints& hints) + : TAstListNode(pos) + , Columns(columns) + , Hints(hints) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(ctx); + Y_UNUSED(src); + + TNodePtr options = Y(); + for (const auto& column: Columns) { + options->Add(Q(column)); + } + if (Columns) { + Add(Q(Y(Q("erase_columns"), Q(options)))); + } + + for (const auto& hint : Hints) { + TString hintName = hint.first; + TMaybe<TIssue> normalizeError = NormalizeName(Pos, hintName); + if (!normalizeError.Empty()) { + ctx.Error() << normalizeError->GetMessage(); + ctx.IncrementMonCounter("sql_errors", "NormalizeHintError"); + return false; + } + TNodePtr option = Y(BuildQuotedAtom(Pos, hintName)); + for (auto& x : hint.second) { + if (!x->Init(ctx, src)) { + return false; + } + option = L(option, x); + } + Add(Q(option)); + } + + return true; + } + + TNodePtr DoClone() const final { + return new TIntoTableOptions(GetPos(), Columns, CloneContainer(Hints)); + } + +private: + TVector<TString> Columns; + TTableHints Hints; +}; + +TNodePtr BuildIntoTableOptions(TPosition pos, const TVector<TString>& eraseColumns, const TTableHints& hints) { + return new TIntoTableOptions(pos, eraseColumns, hints); +} + +class TInputTablesNode final: public TAstListNode { +public: + TInputTablesNode(TPosition pos, const TTableList& tables, bool inSubquery, TScopedStatePtr scoped) + : TAstListNode(pos) + , Tables(tables) + , InSubquery(inSubquery) + , Scoped(scoped) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + THashSet<TString> processedTables; + for (auto& tr: Tables) { + if (!processedTables.insert(tr.RefName).second) { + continue; + } + + Scoped->UseCluster(tr.Service, tr.Cluster); + auto tableKeys = tr.Keys->GetTableKeys(); + auto keys = tableKeys->BuildKeys(ctx, ITableKeys::EBuildKeysMode::INPUT); + if (!keys || !keys->Init(ctx, src)) { + return false; + } + auto fields = Y("Void"); + auto source = Y("DataSource", BuildQuotedAtom(Pos, tr.Service), Scoped->WrapCluster(tr.Cluster, ctx)); + auto options = tr.Options ? Q(tr.Options) : Q(Y()); + Add(Y("let", "x", keys->Y(TString(ReadName), "world", source, keys, fields, options))); + + if (IsIn({KikimrProviderName, YdbProviderName}, tr.Service) && InSubquery) { + ctx.Error() << "Using of system '" << tr.Service << "' is not allowed in SUBQUERY"; + return false; + } + + if (tr.Service != YtProviderName || ctx.Settings.SaveWorldDependencies) { + Add(Y("let", "world", Y(TString(LeftName), "x"))); + } + + Add(Y("let", tr.RefName, Y(TString(RightName), "x"))); + } + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return {}; + } + +private: + TTableList Tables; + const bool InSubquery; + TScopedStatePtr Scoped; +}; + +TNodePtr BuildInputTables(TPosition pos, const TTableList& tables, bool inSubquery, TScopedStatePtr scoped) { + return new TInputTablesNode(pos, tables, inSubquery, scoped); +} + +class TCreateTableNode final: public TAstListNode { +public: + TCreateTableNode(TPosition pos, const TTableRef& tr, bool existingOk, bool replaceIfExists, const TCreateTableParameters& params, TSourcePtr values, TScopedStatePtr scoped) + : TAstListNode(pos) + , Table(tr) + , Params(params) + , ExistingOk(existingOk) + , ReplaceIfExists(replaceIfExists) + , Values(std::move(values)) + , Scoped(scoped) + { + scoped->UseCluster(Table.Service, Table.Cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + auto keys = Table.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::CREATE); + if (!keys || !keys->Init(ctx, src)) { + return false; + } + + if (!Params.PkColumns.empty() + || !Params.PartitionByColumns.empty() + || !Params.OrderByColumns.empty() + || !Params.Indexes.empty() + || !Params.Changefeeds.empty()) + { + THashSet<TString> columnsSet; + for (auto& col : Params.Columns) { + columnsSet.insert(col.Name); + } + + const bool allowUndefinedColumns = (Values != nullptr) && columnsSet.empty(); + + THashSet<TString> pkColumns; + for (auto& keyColumn : Params.PkColumns) { + if (!allowUndefinedColumns && !columnsSet.contains(keyColumn.Name)) { + ctx.Error(keyColumn.Pos) << "Undefined column: " << keyColumn.Name; + return false; + } + if (!pkColumns.insert(keyColumn.Name).second) { + ctx.Error(keyColumn.Pos) << "Duplicated column in PK: " << keyColumn.Name; + return false; + } + } + for (auto& keyColumn : Params.PartitionByColumns) { + if (!allowUndefinedColumns && !columnsSet.contains(keyColumn.Name)) { + ctx.Error(keyColumn.Pos) << "Undefined column: " << keyColumn.Name; + return false; + } + } + for (auto& keyColumn : Params.OrderByColumns) { + if (!allowUndefinedColumns && !columnsSet.contains(keyColumn.first.Name)) { + ctx.Error(keyColumn.first.Pos) << "Undefined column: " << keyColumn.first.Name; + return false; + } + } + + THashSet<TString> indexNames; + for (const auto& index : Params.Indexes) { + if (!indexNames.insert(index.Name.Name).second) { + ctx.Error(index.Name.Pos) << "Index " << index.Name.Name << " must be defined once"; + return false; + } + + for (const auto& indexColumn : index.IndexColumns) { + if (!allowUndefinedColumns && !columnsSet.contains(indexColumn.Name)) { + ctx.Error(indexColumn.Pos) << "Undefined column: " << indexColumn.Name; + return false; + } + } + + for (const auto& dataColumn : index.DataColumns) { + if (!allowUndefinedColumns && !columnsSet.contains(dataColumn.Name)) { + ctx.Error(dataColumn.Pos) << "Undefined column: " << dataColumn.Name; + return false; + } + } + } + + THashSet<TString> cfNames; + for (const auto& cf : Params.Changefeeds) { + if (!cfNames.insert(cf.Name.Name).second) { + ctx.Error(cf.Name.Pos) << "Changefeed " << cf.Name.Name << " must be defined once"; + return false; + } + } + } + + auto opts = Y(); + if (Table.Options) { + if (!Table.Options->Init(ctx, src)) { + return false; + } + opts = Table.Options; + } + + if (ExistingOk) { + opts = L(opts, Q(Y(Q("mode"), Q("create_if_not_exists")))); + } else if (ReplaceIfExists) { + opts = L(opts, Q(Y(Q("mode"), Q("create_or_replace")))); + } else { + opts = L(opts, Q(Y(Q("mode"), Q("create")))); + } + + THashSet<TString> columnFamilyNames; + + if (Params.ColumnFamilies) { + auto columnFamilies = Y(); + for (const auto& family : Params.ColumnFamilies) { + if (!columnFamilyNames.insert(family.Name.Name).second) { + ctx.Error(family.Name.Pos) << "Family " << family.Name.Name << " specified more than once"; + return false; + } + auto familyDesc = Y(); + familyDesc = L(familyDesc, Q(Y(Q("name"), BuildQuotedAtom(family.Name.Pos, family.Name.Name)))); + if (family.Data) { + familyDesc = L(familyDesc, Q(Y(Q("data"), family.Data))); + } + if (family.Compression) { + familyDesc = L(familyDesc, Q(Y(Q("compression"), family.Compression))); + } + if (family.CompressionLevel) { + familyDesc = L(familyDesc, Q(Y(Q("compression_level"), family.CompressionLevel))); + } + columnFamilies = L(columnFamilies, Q(familyDesc)); + } + opts = L(opts, Q(Y(Q("columnFamilies"), Q(columnFamilies)))); + } + + auto columns = Y(); + THashSet<TString> columnsWithDefaultValue; + auto columnsDefaultValueSettings = Y(); + + for (auto& col : Params.Columns) { + auto columnDesc = Y(); + columnDesc = L(columnDesc, BuildQuotedAtom(Pos, col.Name)); + auto type = col.Type; + + if (type) { + if (col.Nullable) { + type = Y("AsOptionalType", type); + } + + columnDesc = L(columnDesc, type); + + auto columnConstraints = Y(); + + if (!col.Nullable) { + columnConstraints = L(columnConstraints, Q(Y(Q("not_null")))); + } + + if (col.Serial) { + columnConstraints = L(columnConstraints, Q(Y(Q("serial")))); + } + + if (col.DefaultExpr) { + if (!col.DefaultExpr->Init(ctx, src)) { + return false; + } + + columnConstraints = L(columnConstraints, Q(Y(Q("default"), col.DefaultExpr))); + } + + columnDesc = L(columnDesc, Q(Y(Q("columnConstrains"), Q(columnConstraints)))); + + auto familiesDesc = Y(); + + if (col.Families) { + for (const auto& family : col.Families) { + if (columnFamilyNames.find(family.Name) == columnFamilyNames.end()) { + ctx.Error(family.Pos) << "Unknown family " << family.Name; + return false; + } + familiesDesc = L(familiesDesc, BuildQuotedAtom(family.Pos, family.Name)); + } + } + + columnDesc = L(columnDesc, Q(familiesDesc)); + } + + columns = L(columns, Q(columnDesc)); + } + opts = L(opts, Q(Y(Q("columns"), Q(columns)))); + + if (!columnsWithDefaultValue.empty()) { + opts = L(opts, Q(Y(Q("columnsDefaultValues"), Q(columnsDefaultValueSettings)))); + } + + if (Table.Service == RtmrProviderName) { + if (!Params.PkColumns.empty() && !Params.PartitionByColumns.empty()) { + ctx.Error() << "Only one of PRIMARY KEY or PARTITION BY constraints may be specified"; + return false; + } + } else { + if (!Params.OrderByColumns.empty()) { + ctx.Error() << "ORDER BY is supported only for " << RtmrProviderName << " provider"; + return false; + } + } + + if (!Params.PkColumns.empty()) { + auto primaryKey = Y(); + for (auto& col : Params.PkColumns) { + primaryKey = L(primaryKey, BuildQuotedAtom(col.Pos, col.Name)); + } + opts = L(opts, Q(Y(Q("primarykey"), Q(primaryKey)))); + if (!Params.OrderByColumns.empty()) { + ctx.Error() << "PRIMARY KEY cannot be used with ORDER BY, use PARTITION BY instead"; + return false; + } + } + + if (!Params.PartitionByColumns.empty()) { + auto partitionBy = Y(); + for (auto& col : Params.PartitionByColumns) { + partitionBy = L(partitionBy, BuildQuotedAtom(col.Pos, col.Name)); + } + opts = L(opts, Q(Y(Q("partitionby"), Q(partitionBy)))); + } + + if (!Params.OrderByColumns.empty()) { + auto orderBy = Y(); + for (auto& col : Params.OrderByColumns) { + orderBy = L(orderBy, Q(Y(BuildQuotedAtom(col.first.Pos, col.first.Name), col.second ? Q("1") : Q("0")))); + } + opts = L(opts, Q(Y(Q("orderby"), Q(orderBy)))); + } + + for (const auto& index : Params.Indexes) { + const auto& desc = CreateIndexDesc(index, ETableSettingsParsingMode::Create, *this); + opts = L(opts, Q(Y(Q("index"), Q(desc)))); + } + + for (const auto& cf : Params.Changefeeds) { + const auto& desc = CreateChangefeedDesc(cf, *this); + opts = L(opts, Q(Y(Q("changefeed"), Q(desc)))); + } + + if (Params.TableSettings.IsSet()) { + opts = L(opts, Q(Y(Q("tableSettings"), Q( + CreateTableSettings(Params.TableSettings, ETableSettingsParsingMode::Create, *this) + )))); + } + + switch (Params.TableType) { + case ETableType::TableStore: + opts = L(opts, Q(Y(Q("tableType"), Q("tableStore")))); + break; + case ETableType::ExternalTable: + opts = L(opts, Q(Y(Q("tableType"), Q("externalTable")))); + break; + case ETableType::Table: + break; + } + + if (Params.Temporary) { + opts = L(opts, Q(Y(Q("temporary")))); + } + + TNodePtr node = nullptr; + if (Values) { + if (!Values->Init(ctx, nullptr)) { + return false; + } + TTableList tableList; + Values->GetInputTables(tableList); + auto valuesSource = Values.Get(); + auto values = Values->Build(ctx); + if (!Values) { + return false; + } + + TNodePtr inputTables(BuildInputTables(Pos, tableList, false, Scoped)); + if (!inputTables->Init(ctx, valuesSource)) { + return false; + } + + node = inputTables; + node = L(node, Y("let", "values", values)); + } else { + node = Y(Y("let", "values", Y("Void"))); + } + + auto write = Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Table.Service), Scoped->WrapCluster(Table.Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, "values", Q(opts))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ); + + node = L(node, Y("let", "world", Y("block", Q(write)))); + node = L(node, Y("return", "world")); + + Add("block", Q(node)); + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return {}; + } +private: + const TTableRef Table; + const TCreateTableParameters Params; + const bool ExistingOk; + const bool ReplaceIfExists; + const TSourcePtr Values; + TScopedStatePtr Scoped; +}; + +TNodePtr BuildCreateTable(TPosition pos, const TTableRef& tr, bool existingOk, bool replaceIfExists, const TCreateTableParameters& params, TSourcePtr values, TScopedStatePtr scoped) +{ + return new TCreateTableNode(pos, tr, existingOk, replaceIfExists, params, std::move(values), scoped); +} + +class TAlterTableNode final: public TAstListNode { +public: + TAlterTableNode(TPosition pos, const TTableRef& tr, const TAlterTableParameters& params, TScopedStatePtr scoped) + : TAstListNode(pos) + , Table(tr) + , Params(params) + , Scoped(scoped) + { + scoped->UseCluster(Table.Service, Table.Cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + auto keys = Table.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::CREATE); + if (!keys || !keys->Init(ctx, src)) { + return false; + } + + auto actions = Y(); + + if (Params.AddColumns) { + auto columns = Y(); + for (auto& col : Params.AddColumns) { + auto columnDesc = Y(); + columnDesc = L(columnDesc, BuildQuotedAtom(Pos, col.Name)); + auto type = col.Type; + if (col.Nullable) { + type = Y("AsOptionalType", type); + } + + columnDesc = L(columnDesc, type); + auto columnConstraints = Y(); + if (!col.Nullable) { + columnConstraints = L(columnConstraints, Q(Y(Q("not_null")))); + } + + if (col.Serial) { + columnConstraints = L(columnConstraints, Q(Y(Q("serial")))); + } + + if (col.DefaultExpr) { + if (!col.DefaultExpr->Init(ctx, src)) { + return false; + } + + columnConstraints = L(columnConstraints, Q(Y(Q("default"), col.DefaultExpr))); + } + + columnDesc = L(columnDesc, Q(Y(Q("columnConstrains"), Q(columnConstraints)))); + + auto familiesDesc = Y(); + for (const auto& family : col.Families) { + familiesDesc = L(familiesDesc, BuildQuotedAtom(family.Pos, family.Name)); + } + columnDesc = L(columnDesc, Q(familiesDesc)); + + columns = L(columns, Q(columnDesc)); + } + actions = L(actions, Q(Y(Q("addColumns"), Q(columns)))); + } + + if (Params.DropColumns) { + auto columns = Y(); + for (auto& colName : Params.DropColumns) { + columns = L(columns, BuildQuotedAtom(Pos, colName)); + } + actions = L(actions, Q(Y(Q("dropColumns"), Q(columns)))); + } + + if (Params.AlterColumns) { + auto columns = Y(); + for (auto& col : Params.AlterColumns) { + if (col.TypeOfChange == TColumnSchema::ETypeOfChange::DropNotNullConstraint) { + auto columnDesc = Y(); + columnDesc = L(columnDesc, BuildQuotedAtom(Pos, col.Name)); + + auto columnConstraints = Y(); + columnConstraints = L(columnConstraints, Q(Y(Q("drop_not_null")))); + columnDesc = L(columnDesc, Q(Y(Q("changeColumnConstraints"), Q(columnConstraints)))); + columns = L(columns, Q(columnDesc)); + } else if (col.TypeOfChange == TColumnSchema::ETypeOfChange::SetNotNullConstraint) { + // todo flown4qqqq + } else if (col.TypeOfChange == TColumnSchema::ETypeOfChange::SetFamily) { + auto columnDesc = Y(); + columnDesc = L(columnDesc, BuildQuotedAtom(Pos, col.Name)); + auto familiesDesc = Y(); + for (const auto& family : col.Families) { + familiesDesc = L(familiesDesc, BuildQuotedAtom(family.Pos, family.Name)); + } + + columnDesc = L(columnDesc, Q(Y(Q("setFamily"), Q(familiesDesc)))); + columns = L(columns, Q(columnDesc)); + } else if (col.TypeOfChange == TColumnSchema::ETypeOfChange::Nothing) { + // do nothing + } else { + ctx.Error(Pos) << " action is not supported"; + } + } + actions = L(actions, Q(Y(Q("alterColumns"), Q(columns)))); + } + + if (Params.AddColumnFamilies) { + auto columnFamilies = Y(); + for (const auto& family : Params.AddColumnFamilies) { + auto familyDesc = Y(); + familyDesc = L(familyDesc, Q(Y(Q("name"), BuildQuotedAtom(family.Name.Pos, family.Name.Name)))); + if (family.Data) { + familyDesc = L(familyDesc, Q(Y(Q("data"), family.Data))); + } + if (family.Compression) { + familyDesc = L(familyDesc, Q(Y(Q("compression"), family.Compression))); + } + if (family.CompressionLevel) { + familyDesc = L(familyDesc, Q(Y(Q("compression_level"), family.CompressionLevel))); + } + columnFamilies = L(columnFamilies, Q(familyDesc)); + } + actions = L(actions, Q(Y(Q("addColumnFamilies"), Q(columnFamilies)))); + } + + if (Params.AlterColumnFamilies) { + auto columnFamilies = Y(); + for (const auto& family : Params.AlterColumnFamilies) { + auto familyDesc = Y(); + familyDesc = L(familyDesc, Q(Y(Q("name"), BuildQuotedAtom(family.Name.Pos, family.Name.Name)))); + if (family.Data) { + familyDesc = L(familyDesc, Q(Y(Q("data"), family.Data))); + } + if (family.Compression) { + familyDesc = L(familyDesc, Q(Y(Q("compression"), family.Compression))); + } + if (family.CompressionLevel) { + familyDesc = L(familyDesc, Q(Y(Q("compression_level"), family.CompressionLevel))); + } + columnFamilies = L(columnFamilies, Q(familyDesc)); + } + actions = L(actions, Q(Y(Q("alterColumnFamilies"), Q(columnFamilies)))); + } + + if (Params.TableSettings.IsSet()) { + actions = L(actions, Q(Y(Q("setTableSettings"), Q( + CreateTableSettings(Params.TableSettings, ETableSettingsParsingMode::Alter, *this) + )))); + } + + for (const auto& index : Params.AddIndexes) { + const auto& desc = CreateIndexDesc(index, ETableSettingsParsingMode::Alter, *this); + actions = L(actions, Q(Y(Q("addIndex"), Q(desc)))); + } + + for (const auto& index : Params.AlterIndexes) { + const auto& desc = CreateAlterIndex(index, *this); + actions = L(actions, Q(Y(Q("alterIndex"), Q(desc)))); + } + + for (const auto& id : Params.DropIndexes) { + auto indexName = BuildQuotedAtom(id.Pos, id.Name); + actions = L(actions, Q(Y(Q("dropIndex"), indexName))); + } + + if (Params.RenameIndexTo) { + auto src = BuildQuotedAtom(Params.RenameIndexTo->first.Pos, Params.RenameIndexTo->first.Name); + auto dst = BuildQuotedAtom(Params.RenameIndexTo->second.Pos, Params.RenameIndexTo->second.Name); + + auto desc = Y(); + + desc = L(desc, Q(Y(Q("src"), src))); + desc = L(desc, Q(Y(Q("dst"), dst))); + + actions = L(actions, Q(Y(Q("renameIndexTo"), Q(desc)))); + } + + if (Params.RenameTo) { + auto destination = ctx.GetPrefixedPath(Scoped->CurrService, Scoped->CurrCluster, + TDeferredAtom(Params.RenameTo->Pos, Params.RenameTo->Name)); + actions = L(actions, Q(Y(Q("renameTo"), destination))); + } + + for (const auto& cf : Params.AddChangefeeds) { + const auto& desc = CreateChangefeedDesc(cf, *this); + actions = L(actions, Q(Y(Q("addChangefeed"), Q(desc)))); + } + + for (const auto& cf : Params.AlterChangefeeds) { + const auto& desc = CreateChangefeedDesc(cf, *this); + actions = L(actions, Q(Y(Q("alterChangefeed"), Q(desc)))); + } + + for (const auto& id : Params.DropChangefeeds) { + const auto name = BuildQuotedAtom(id.Pos, id.Name); + actions = L(actions, Q(Y(Q("dropChangefeed"), name))); + } + + auto opts = Y(); + + opts = L(opts, Q(Y(Q("mode"), Q("alter")))); + opts = L(opts, Q(Y(Q("actions"), Q(actions)))); + + switch (Params.TableType) { + case ETableType::TableStore: + opts = L(opts, Q(Y(Q("tableType"), Q("tableStore")))); + break; + case ETableType::ExternalTable: + opts = L(opts, Q(Y(Q("tableType"), Q("externalTable")))); + break; + case ETableType::Table: + break; + } + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Table.Service), Scoped->WrapCluster(Table.Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, src); + } + TPtr DoClone() const final { + return {}; + } +private: + TTableRef Table; + const TAlterTableParameters Params; + TScopedStatePtr Scoped; +}; + +TNodePtr BuildAlterTable(TPosition pos, const TTableRef& tr, const TAlterTableParameters& params, TScopedStatePtr scoped) +{ + return new TAlterTableNode(pos, tr, params, scoped); +} + +class TDropTableNode final: public TAstListNode { +public: + TDropTableNode(TPosition pos, const TTableRef& tr, bool missingOk, ETableType tableType, TScopedStatePtr scoped) + : TAstListNode(pos) + , Table(tr) + , TableType(tableType) + , Scoped(scoped) + , MissingOk(missingOk) + { + FakeSource = BuildFakeSource(pos); + scoped->UseCluster(Table.Service, Table.Cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + auto keys = Table.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::DROP); + if (!keys || !keys->Init(ctx, FakeSource.Get())) { + return false; + } + + auto opts = Y(); + + opts = L(opts, Q(Y(Q("mode"), Q(MissingOk ? "drop_if_exists" : "drop")))); + + switch (TableType) { + case ETableType::TableStore: + opts = L(opts, Q(Y(Q("tableType"), Q("tableStore")))); + break; + case ETableType::ExternalTable: + opts = L(opts, Q(Y(Q("tableType"), Q("externalTable")))); + break; + case ETableType::Table: + break; + } + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Table.Service), Scoped->WrapCluster(Table.Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, FakeSource.Get()); + } + + TPtr DoClone() const final { + return {}; + } +private: + TTableRef Table; + ETableType TableType; + TScopedStatePtr Scoped; + TSourcePtr FakeSource; + const bool MissingOk; +}; + +TNodePtr BuildDropTable(TPosition pos, const TTableRef& tr, bool missingOk, ETableType tableType, TScopedStatePtr scoped) { + return new TDropTableNode(pos, tr, missingOk, tableType, scoped); +} + + +static INode::TPtr CreateConsumerDesc(const TTopicConsumerDescription& desc, const INode& node, bool alter) { + auto settings = node.Y(); + if (desc.Settings.Important) { + settings = node.L(settings, node.Q(node.Y(node.Q("important"), desc.Settings.Important))); + } + if (const auto& readFromTs = desc.Settings.ReadFromTs) { + if (readFromTs.IsSet()) { + settings = node.L(settings, node.Q(node.Y(node.Q("setReadFromTs"), readFromTs.GetValueSet()))); + } else if (alter) { + settings = node.L(settings, node.Q(node.Y(node.Q("resetReadFromTs"), node.Q(node.Y())))); + } else { + YQL_ENSURE(false, "Cannot reset on create"); + } + } + if (const auto& readFromTs = desc.Settings.SupportedCodecs) { + if (readFromTs.IsSet()) { + settings = node.L(settings, node.Q(node.Y(node.Q("setSupportedCodecs"), readFromTs.GetValueSet()))); + } else if (alter) { + settings = node.L(settings, node.Q(node.Y(node.Q("resetSupportedCodecs"), node.Q(node.Y())))); + } else { + YQL_ENSURE(false, "Cannot reset on create"); + } + } + return node.Y( + node.Q(node.Y(node.Q("name"), BuildQuotedAtom(desc.Name.Pos, desc.Name.Name))), + node.Q(node.Y(node.Q("settings"), node.Q(settings))) + ); +} + +class TCreateTopicNode final: public TAstListNode { +public: + TCreateTopicNode(TPosition pos, const TTopicRef& tr, const TCreateTopicParameters& params, TScopedStatePtr scoped) + : TAstListNode(pos) + , Topic(tr) + , Params(params) + , Scoped(scoped) + { + scoped->UseCluster(TString(KikimrProviderName), Topic.Cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + auto keys = Topic.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::CREATE); + if (!keys || !keys->Init(ctx, src)) { + return false; + } + + if (!Params.Consumers.empty()) + { + THashSet<TString> consumerNames; + for (const auto& consumer : Params.Consumers) { + if (!consumerNames.insert(consumer.Name.Name).second) { + ctx.Error(consumer.Name.Pos) << "Consumer " << consumer.Name.Name << " defined more than once"; + return false; + } + } + } + + auto opts = Y(); + TString mode = Params.ExistingOk ? "create_if_not_exists" : "create"; + opts = L(opts, Q(Y(Q("mode"), Q(mode)))); + + for (const auto& consumer : Params.Consumers) { + const auto& desc = CreateConsumerDesc(consumer, *this, false); + opts = L(opts, Q(Y(Q("consumer"), Q(desc)))); + } + + if (Params.TopicSettings.IsSet()) { + auto settings = Y(); + +#define INSERT_TOPIC_SETTING(NAME) \ + if (const auto& NAME##Val = Params.TopicSettings.NAME) { \ + if (NAME##Val.IsSet()) { \ + settings = L(settings, Q(Y(Q(Y_STRINGIZE(set##NAME)), NAME##Val.GetValueSet()))); \ + } else { \ + YQL_ENSURE(false, "Can't reset on create"); \ + } \ + } + + INSERT_TOPIC_SETTING(MaxPartitions) + INSERT_TOPIC_SETTING(MinPartitions) + INSERT_TOPIC_SETTING(RetentionPeriod) + INSERT_TOPIC_SETTING(SupportedCodecs) + INSERT_TOPIC_SETTING(PartitionWriteSpeed) + INSERT_TOPIC_SETTING(PartitionWriteBurstSpeed) + INSERT_TOPIC_SETTING(MeteringMode) + INSERT_TOPIC_SETTING(AutoPartitioningStabilizationWindow) + INSERT_TOPIC_SETTING(AutoPartitioningUpUtilizationPercent) + INSERT_TOPIC_SETTING(AutoPartitioningDownUtilizationPercent) + INSERT_TOPIC_SETTING(AutoPartitioningStrategy) + +#undef INSERT_TOPIC_SETTING + + opts = L(opts, Q(Y(Q("topicSettings"), Q(settings)))); + } + + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, TString(KikimrProviderName)), + Scoped->WrapCluster(Topic.Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return {}; + } +private: + const TTopicRef Topic; + const TCreateTopicParameters Params; + TScopedStatePtr Scoped; +}; + +TNodePtr BuildCreateTopic( + TPosition pos, const TTopicRef& tr, const TCreateTopicParameters& params, TScopedStatePtr scoped +){ + return new TCreateTopicNode(pos, tr, params, scoped); +} + +class TAlterTopicNode final: public TAstListNode { +public: + TAlterTopicNode(TPosition pos, const TTopicRef& tr, const TAlterTopicParameters& params, TScopedStatePtr scoped) + : TAstListNode(pos) + , Topic(tr) + , Params(params) + , Scoped(scoped) + { + scoped->UseCluster(TString(KikimrProviderName), Topic.Cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + auto keys = Topic.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::CREATE); + if (!keys || !keys->Init(ctx, src)) { + return false; + } + + if (!Params.AddConsumers.empty()) + { + THashSet<TString> consumerNames; + for (const auto& consumer : Params.AddConsumers) { + if (!consumerNames.insert(consumer.Name.Name).second) { + ctx.Error(consumer.Name.Pos) << "Consumer " << consumer.Name.Name << " defined more than once"; + return false; + } + } + } + if (!Params.AlterConsumers.empty()) + { + THashSet<TString> consumerNames; + for (const auto& [_, consumer] : Params.AlterConsumers) { + if (!consumerNames.insert(consumer.Name.Name).second) { + ctx.Error(consumer.Name.Pos) << "Consumer " << consumer.Name.Name << " altered more than once"; + return false; + } + } + } + if (!Params.DropConsumers.empty()) + { + THashSet<TString> consumerNames; + for (const auto& consumer : Params.DropConsumers) { + if (!consumerNames.insert(consumer.Name).second) { + ctx.Error(consumer.Pos) << "Consumer " << consumer.Name << " dropped more than once"; + return false; + } + } + } + + auto opts = Y(); + TString mode = Params.MissingOk ? "alter_if_exists" : "alter"; + opts = L(opts, Q(Y(Q("mode"), Q(mode)))); + + for (const auto& consumer : Params.AddConsumers) { + const auto& desc = CreateConsumerDesc(consumer, *this, false); + opts = L(opts, Q(Y(Q("addConsumer"), Q(desc)))); + } + + for (const auto& [_, consumer] : Params.AlterConsumers) { + const auto& desc = CreateConsumerDesc(consumer, *this, true); + opts = L(opts, Q(Y(Q("alterConsumer"), Q(desc)))); + } + + for (const auto& consumer : Params.DropConsumers) { + const auto name = BuildQuotedAtom(consumer.Pos, consumer.Name); + opts = L(opts, Q(Y(Q("dropConsumer"), name))); + } + + if (Params.TopicSettings.IsSet()) { + auto settings = Y(); + +#define INSERT_TOPIC_SETTING(NAME) \ + if (const auto& NAME##Val = Params.TopicSettings.NAME) { \ + if (NAME##Val.IsSet()) { \ + settings = L(settings, Q(Y(Q(Y_STRINGIZE(set##NAME)), NAME##Val.GetValueSet()))); \ + } else { \ + settings = L(settings, Q(Y(Q(Y_STRINGIZE(reset##NAME)), Y()))); \ + } \ + } + + INSERT_TOPIC_SETTING(MaxPartitions) + INSERT_TOPIC_SETTING(MinPartitions) + INSERT_TOPIC_SETTING(RetentionPeriod) + INSERT_TOPIC_SETTING(SupportedCodecs) + INSERT_TOPIC_SETTING(PartitionWriteSpeed) + INSERT_TOPIC_SETTING(PartitionWriteBurstSpeed) + INSERT_TOPIC_SETTING(MeteringMode) + INSERT_TOPIC_SETTING(AutoPartitioningStabilizationWindow) + INSERT_TOPIC_SETTING(AutoPartitioningUpUtilizationPercent) + INSERT_TOPIC_SETTING(AutoPartitioningDownUtilizationPercent) + INSERT_TOPIC_SETTING(AutoPartitioningStrategy) + +#undef INSERT_TOPIC_SETTING + + opts = L(opts, Q(Y(Q("topicSettings"), Q(settings)))); + } + + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, TString(KikimrProviderName)), + Scoped->WrapCluster(Topic.Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return {}; + } +private: + const TTopicRef Topic; + const TAlterTopicParameters Params; + TScopedStatePtr Scoped; +}; + +TNodePtr BuildAlterTopic( + TPosition pos, const TTopicRef& tr, const TAlterTopicParameters& params, TScopedStatePtr scoped +){ + return new TAlterTopicNode(pos, tr, params, scoped); +} + +class TDropTopicNode final: public TAstListNode { +public: + TDropTopicNode(TPosition pos, const TTopicRef& tr, const TDropTopicParameters& params, TScopedStatePtr scoped) + : TAstListNode(pos) + , Topic(tr) + , Params(params) + , Scoped(scoped) + { + scoped->UseCluster(TString(KikimrProviderName), Topic.Cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + auto keys = Topic.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::DROP); + if (!keys || !keys->Init(ctx, FakeSource.Get())) { + return false; + } + + auto opts = Y(); + + TString mode = Params.MissingOk ? "drop_if_exists" : "drop"; + opts = L(opts, Q(Y(Q("mode"), Q(mode)))); + + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, TString(KikimrProviderName)), + Scoped->WrapCluster(Topic.Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, FakeSource.Get()); + } + + TPtr DoClone() const final { + return {}; + } +private: + TTopicRef Topic; + TDropTopicParameters Params; + TScopedStatePtr Scoped; + TSourcePtr FakeSource; +}; + +TNodePtr BuildDropTopic(TPosition pos, const TTopicRef& tr, const TDropTopicParameters& params, TScopedStatePtr scoped) { + return new TDropTopicNode(pos, tr, params, scoped); +} + +class TCreateRole final: public TAstListNode { +public: + TCreateRole(TPosition pos, bool isUser, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TMaybe<TRoleParameters>& params, TScopedStatePtr scoped) + : TAstListNode(pos) + , IsUser(isUser) + , Service(service) + , Cluster(cluster) + , Name(name) + , Params(params) + , Scoped(scoped) + { + FakeSource = BuildFakeSource(pos); + scoped->UseCluster(service, cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + auto name = Name.Build(); + TNodePtr password; + if (Params && Params->Password) { + password = Params->Password->Build(); + } + TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx); + + if (!name->Init(ctx, FakeSource.Get()) || !cluster->Init(ctx, FakeSource.Get())) { + return false; + } + if (password && !password->Init(ctx, FakeSource.Get())) { + return false; + } + + TVector<TNodePtr> roles; + if (Params && !Params->Roles.empty()) { + for (auto& item : Params->Roles) { + roles.push_back(item.Build()); + if (!roles.back()->Init(ctx, FakeSource.Get())) { + return false; + } + } + } + + + auto options = Y(Q(Y(Q("mode"), Q(IsUser ? "createUser" : "createGroup")))); + if (Params) { + if (Params->IsPasswordEncrypted) { + options = L(options, Q(Y(Q("passwordEncrypted")))); + } + if (Params->Password) { + options = L(options, Q(Y(Q("password"), password))); + } else { + options = L(options, Q(Y(Q("nullPassword")))); + } + if (!Params->Roles.empty()) { + options = L(options, Q(Y(Q("roles"), Q(new TAstListNodeImpl(Pos, std::move(roles)))))); + } + } + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster)), + Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("role"), Y("String", name)))), Y("Void"), Q(options))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, FakeSource.Get()); + } + + TPtr DoClone() const final { + return {}; + } +private: + const bool IsUser; + const TString Service; + TDeferredAtom Cluster; + TDeferredAtom Name; + const TMaybe<TRoleParameters> Params; + TScopedStatePtr Scoped; + TSourcePtr FakeSource; +}; + +TNodePtr BuildCreateUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TMaybe<TRoleParameters>& params, TScopedStatePtr scoped) { + bool isUser = true; + return new TCreateRole(pos, isUser, service, cluster, name, params, scoped); +} + +TNodePtr BuildCreateGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TMaybe<TRoleParameters>& params, TScopedStatePtr scoped) { + bool isUser = false; + return new TCreateRole(pos, isUser, service, cluster, name, params, scoped); +} + +class TAlterUser final: public TAstListNode { +public: + TAlterUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TRoleParameters& params, TScopedStatePtr scoped) + : TAstListNode(pos) + , Service(service) + , Cluster(cluster) + , Name(name) + , Params(params) + , Scoped(scoped) + { + FakeSource = BuildFakeSource(pos); + scoped->UseCluster(service, cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + auto name = Name.Build(); + TNodePtr password; + if (Params.Password) { + password = Params.Password->Build(); + } + TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx); + + if (!name->Init(ctx, FakeSource.Get()) || !cluster->Init(ctx, FakeSource.Get())) { + return false; + } + if (password && !password->Init(ctx, FakeSource.Get())) { + return false; + } + + auto options = Y(Q(Y(Q("mode"), Q("alterUser")))); + if (Params.IsPasswordEncrypted) { + options = L(options, Q(Y(Q("passwordEncrypted")))); + } + if (Params.Password) { + options = L(options, Q(Y(Q("password"), password))); + } else { + options = L(options, Q(Y(Q("nullPassword")))); + } + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster)), + Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("role"), Y("String", name)))), Y("Void"), Q(options))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, FakeSource.Get()); + } + + TPtr DoClone() const final { + return {}; + } +private: + const TString Service; + TDeferredAtom Cluster; + TDeferredAtom Name; + const TRoleParameters Params; + TScopedStatePtr Scoped; + TSourcePtr FakeSource; +}; + +TNodePtr BuildAlterUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TRoleParameters& params, TScopedStatePtr scoped) { + return new TAlterUser(pos, service, cluster, name, params, scoped); +} + +class TRenameRole final: public TAstListNode { +public: + TRenameRole(TPosition pos, bool isUser, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped) + : TAstListNode(pos) + , IsUser(isUser) + , Service(service) + , Cluster(cluster) + , Name(name) + , NewName(newName) + , Scoped(scoped) + { + FakeSource = BuildFakeSource(pos); + scoped->UseCluster(service, cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + auto name = Name.Build(); + auto newName = NewName.Build(); + TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx); + + if (!name->Init(ctx, FakeSource.Get()) || + !newName->Init(ctx, FakeSource.Get()) || + !cluster->Init(ctx, FakeSource.Get())) + { + return false; + } + + auto options = Y(Q(Y(Q("mode"), Q(IsUser ? "renameUser" : "renameGroup")))); + options = L(options, Q(Y(Q("newName"), newName))); + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster)), + Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("role"), Y("String", name)))), Y("Void"), Q(options))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, FakeSource.Get()); + } + + TPtr DoClone() const final { + return {}; + } +private: + const bool IsUser; + const TString Service; + TDeferredAtom Cluster; + TDeferredAtom Name; + TDeferredAtom NewName; + TScopedStatePtr Scoped; + TSourcePtr FakeSource; +}; + +TNodePtr BuildRenameUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped) { + const bool isUser = true; + return new TRenameRole(pos, isUser, service, cluster, name, newName, scoped); +} + +TNodePtr BuildRenameGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped) { + const bool isUser = false; + return new TRenameRole(pos, isUser, service, cluster, name, newName, scoped); +} + +class TAlterGroup final: public TAstListNode { +public: + TAlterGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TVector<TDeferredAtom>& toChange, bool isDrop, TScopedStatePtr scoped) + : TAstListNode(pos) + , Service(service) + , Cluster(cluster) + , Name(name) + , ToChange(toChange) + , IsDrop(isDrop) + , Scoped(scoped) + { + FakeSource = BuildFakeSource(pos); + scoped->UseCluster(service, cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + auto name = Name.Build(); + TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx); + + if (!name->Init(ctx, FakeSource.Get()) || !cluster->Init(ctx, FakeSource.Get())) { + return false; + } + + TVector<TNodePtr> toChange; + for (auto& item : ToChange) { + toChange.push_back(item.Build()); + if (!toChange.back()->Init(ctx, FakeSource.Get())) { + return false; + } + } + + auto options = Y(Q(Y(Q("mode"), Q(IsDrop ? "dropUsersFromGroup" : "addUsersToGroup")))); + options = L(options, Q(Y(Q("roles"), Q(new TAstListNodeImpl(Pos, std::move(toChange)))))); + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster)), + Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("role"), Y("String", name)))), Y("Void"), Q(options))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, FakeSource.Get()); + } + + TPtr DoClone() const final { + return {}; + } +private: + const TString Service; + TDeferredAtom Cluster; + TDeferredAtom Name; + TVector<TDeferredAtom> ToChange; + const bool IsDrop; + TScopedStatePtr Scoped; + TSourcePtr FakeSource; +}; + +TNodePtr BuildAlterGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TVector<TDeferredAtom>& toChange, bool isDrop, + TScopedStatePtr scoped) +{ + return new TAlterGroup(pos, service, cluster, name, toChange, isDrop, scoped); +} + +class TDropRoles final: public TAstListNode { +public: + TDropRoles(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& toDrop, bool isUser, bool missingOk, TScopedStatePtr scoped) + : TAstListNode(pos) + , Service(service) + , Cluster(cluster) + , ToDrop(toDrop) + , IsUser(isUser) + , MissingOk(missingOk) + , Scoped(scoped) + { + FakeSource = BuildFakeSource(pos); + scoped->UseCluster(service, cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx); + + if (!cluster->Init(ctx, FakeSource.Get())) { + return false; + } + + const char* mode = IsUser ? + (MissingOk ? "dropUserIfExists" : "dropUser") : + (MissingOk ? "dropGroupIfExists" : "dropGroup"); + + auto options = Y(Q(Y(Q("mode"), Q(mode)))); + + auto block = Y(Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster))); + for (auto& item : ToDrop) { + auto name = item.Build(); + if (!name->Init(ctx, FakeSource.Get())) { + return false; + } + + block = L(block, Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("role"), Y("String", name)))), Y("Void"), Q(options)))); + } + block = L(block, Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))); + Add("block", Q(block)); + + return TAstListNode::DoInit(ctx, FakeSource.Get()); + } + + TPtr DoClone() const final { + return {}; + } +private: + const TString Service; + TDeferredAtom Cluster; + TVector<TDeferredAtom> ToDrop; + const bool IsUser; + const bool MissingOk; + TScopedStatePtr Scoped; + TSourcePtr FakeSource; +}; + +TNodePtr BuildUpsertObjectOperation(TPosition pos, const TString& objectId, const TString& typeId, + std::map<TString, TDeferredAtom>&& features, const TObjectOperatorContext& context) { + return new TUpsertObject(pos, objectId, typeId, false, false, std::move(features), std::set<TString>(), context); +} +TNodePtr BuildCreateObjectOperation(TPosition pos, const TString& objectId, const TString& typeId, + bool existingOk, bool replaceIfExists, std::map<TString, TDeferredAtom>&& features, const TObjectOperatorContext& context) { + return new TCreateObject(pos, objectId, typeId, existingOk, replaceIfExists, std::move(features), std::set<TString>(), context); +} +TNodePtr BuildAlterObjectOperation(TPosition pos, const TString& secretId, const TString& typeId, + std::map<TString, TDeferredAtom>&& features, std::set<TString>&& featuresToReset, const TObjectOperatorContext& context) +{ + return new TAlterObject(pos, secretId, typeId, false, false, std::move(features), std::move(featuresToReset), context); +} +TNodePtr BuildDropObjectOperation(TPosition pos, const TString& secretId, const TString& typeId, + bool missingOk, std::map<TString, TDeferredAtom>&& options, const TObjectOperatorContext& context) +{ + return new TDropObject(pos, secretId, typeId, missingOk, false, std::move(options), std::set<TString>(), context); +} + +TNodePtr BuildDropRoles(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& toDrop, bool isUser, bool missingOk, TScopedStatePtr scoped) { + return new TDropRoles(pos, service, cluster, toDrop, isUser, missingOk, scoped); +} + +class TPermissionsAction final : public TAstListNode { +public: + struct TPermissionParameters { + TString PermissionAction; + TVector<TDeferredAtom> Permissions; + TVector<TDeferredAtom> SchemaPaths; + TVector<TDeferredAtom> RoleNames; + }; + + TPermissionsAction(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TPermissionParameters& parameters, TScopedStatePtr scoped) + : TAstListNode(pos) + , Service(service) + , Cluster(cluster) + , Parameters(parameters) + , Scoped(scoped) + { + FakeSource = BuildFakeSource(pos); + scoped->UseCluster(service, cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + + TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx); + TNodePtr permissionAction = TDeferredAtom(Pos, Parameters.PermissionAction).Build(); + + if (!permissionAction->Init(ctx, FakeSource.Get()) || + !cluster->Init(ctx, FakeSource.Get())) { + return false; + } + + TVector<TNodePtr> paths; + paths.reserve(Parameters.SchemaPaths.size()); + for (auto& item : Parameters.SchemaPaths) { + paths.push_back(item.Build()); + if (!paths.back()->Init(ctx, FakeSource.Get())) { + return false; + } + } + auto options = Y(Q(Y(Q("paths"), Q(new TAstListNodeImpl(Pos, std::move(paths)))))); + + TVector<TNodePtr> permissions; + permissions.reserve(Parameters.Permissions.size()); + for (auto& item : Parameters.Permissions) { + permissions.push_back(item.Build()); + if (!permissions.back()->Init(ctx, FakeSource.Get())) { + return false; + } + } + options = L(options, Q(Y(Q("permissions"), Q(new TAstListNodeImpl(Pos, std::move(permissions)))))); + + TVector<TNodePtr> roles; + roles.reserve(Parameters.RoleNames.size()); + for (auto& item : Parameters.RoleNames) { + roles.push_back(item.Build()); + if (!roles.back()->Init(ctx, FakeSource.Get())) { + return false; + } + } + options = L(options, Q(Y(Q("roles"), Q(new TAstListNodeImpl(Pos, std::move(roles)))))); + + auto block = Y(Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster))); + block = L(block, Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("permission"), Y("String", permissionAction)))), Y("Void"), Q(options)))); + block = L(block, Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))); + Add("block", Q(block)); + + return TAstListNode::DoInit(ctx, FakeSource.Get()); + } + + TPtr DoClone() const final { + return {}; + } + +private: + const TString Service; + TDeferredAtom Cluster; + TPermissionParameters Parameters; + TScopedStatePtr Scoped; + TSourcePtr FakeSource; +}; + +TNodePtr BuildGrantPermissions(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& permissions, const TVector<TDeferredAtom>& schemaPaths, const TVector<TDeferredAtom>& roleNames, TScopedStatePtr scoped) { + return new TPermissionsAction(pos, + service, + cluster, + {.PermissionAction = "grant", + .Permissions = permissions, + .SchemaPaths = schemaPaths, + .RoleNames = roleNames}, + scoped); +} + +TNodePtr BuildRevokePermissions(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& permissions, const TVector<TDeferredAtom>& schemaPaths, const TVector<TDeferredAtom>& roleNames, TScopedStatePtr scoped) { + return new TPermissionsAction(pos, + service, + cluster, + {.PermissionAction = "revoke", + .Permissions = permissions, + .SchemaPaths = schemaPaths, + .RoleNames = roleNames}, + scoped); +} + +class TAsyncReplication + : public TAstListNode + , protected TObjectOperatorContext +{ +protected: + virtual INode::TPtr FillOptions(INode::TPtr options) const = 0; + +public: + explicit TAsyncReplication(TPosition pos, const TString& id, const TString& mode, const TObjectOperatorContext& context) + : TAstListNode(pos) + , TObjectOperatorContext(context) + , Id(id) + , Mode(mode) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + Scoped->UseCluster(ServiceId, Cluster); + + auto keys = Y("Key", Q(Y(Q("replication"), Y("String", BuildQuotedAtom(Pos, Id))))); + auto options = FillOptions(Y(Q(Y(Q("mode"), Q(Mode))))); + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, ServiceId), Scoped->WrapCluster(Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(options))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return {}; + } + +private: + const TString Id; + const TString Mode; + +}; // TAsyncReplication + +class TCreateAsyncReplication final: public TAsyncReplication { +public: + explicit TCreateAsyncReplication(TPosition pos, const TString& id, + std::vector<std::pair<TString, TString>>&& targets, + std::map<TString, TNodePtr>&& settings, + const TObjectOperatorContext& context) + : TAsyncReplication(pos, id, "create", context) + , Targets(std::move(targets)) + , Settings(std::move(settings)) + { + } + +protected: + INode::TPtr FillOptions(INode::TPtr options) const override { + if (!Targets.empty()) { + auto targets = Y(); + for (auto&& [remote, local] : Targets) { + auto target = Y(); + target = L(target, Q(Y(Q("remote"), Q(remote)))); + target = L(target, Q(Y(Q("local"), Q(local)))); + targets = L(targets, Q(target)); + } + options = L(options, Q(Y(Q("targets"), Q(targets)))); + } + + if (!Settings.empty()) { + auto settings = Y(); + for (auto&& [k, v] : Settings) { + if (v) { + settings = L(settings, Q(Y(BuildQuotedAtom(Pos, k), v))); + } else { + settings = L(settings, Q(Y(BuildQuotedAtom(Pos, k)))); + } + } + options = L(options, Q(Y(Q("settings"), Q(settings)))); + } + + return options; + } + +private: + std::vector<std::pair<TString, TString>> Targets; // (remote, local) + std::map<TString, TNodePtr> Settings; + +}; // TCreateAsyncReplication + +TNodePtr BuildCreateAsyncReplication(TPosition pos, const TString& id, + std::vector<std::pair<TString, TString>>&& targets, + std::map<TString, TNodePtr>&& settings, + const TObjectOperatorContext& context) +{ + return new TCreateAsyncReplication(pos, id, std::move(targets), std::move(settings), context); +} + +class TDropAsyncReplication final: public TAsyncReplication { +public: + explicit TDropAsyncReplication(TPosition pos, const TString& id, bool cascade, const TObjectOperatorContext& context) + : TAsyncReplication(pos, id, cascade ? "dropCascade" : "drop", context) + { + } + +protected: + INode::TPtr FillOptions(INode::TPtr options) const override { + return options; + } + +}; // TDropAsyncReplication + +TNodePtr BuildDropAsyncReplication(TPosition pos, const TString& id, bool cascade, const TObjectOperatorContext& context) { + return new TDropAsyncReplication(pos, id, cascade, context); +} + +class TAlterAsyncReplication final: public TAsyncReplication { +public: + explicit TAlterAsyncReplication(TPosition pos, const TString& id, + std::map<TString, TNodePtr>&& settings, + const TObjectOperatorContext& context) + : TAsyncReplication(pos, id, "alter", context) + , Settings(std::move(settings)) + { + } + +protected: + INode::TPtr FillOptions(INode::TPtr options) const override { + if (!Settings.empty()) { + auto settings = Y(); + for (auto&& [k, v] : Settings) { + if (v) { + settings = L(settings, Q(Y(BuildQuotedAtom(Pos, k), v))); + } else { + settings = L(settings, Q(Y(BuildQuotedAtom(Pos, k)))); + } + } + options = L(options, Q(Y(Q("settings"), Q(settings)))); + } + + return options; + } + +private: + std::map<TString, TNodePtr> Settings; + +}; // TAlterAsyncReplication + +TNodePtr BuildAlterAsyncReplication(TPosition pos, const TString& id, + std::map<TString, TNodePtr>&& settings, + const TObjectOperatorContext& context) +{ + return new TAlterAsyncReplication(pos, id, std::move(settings), context); +} + +static const TMap<EWriteColumnMode, TString> columnModeToStrMapMR { + {EWriteColumnMode::Default, ""}, + {EWriteColumnMode::Insert, "append"}, + {EWriteColumnMode::Renew, "renew"} +}; + +static const TMap<EWriteColumnMode, TString> columnModeToStrMapStat { + {EWriteColumnMode::Upsert, "upsert"} +}; + +static const TMap<EWriteColumnMode, TString> columnModeToStrMapKikimr { + {EWriteColumnMode::Default, ""}, + {EWriteColumnMode::Insert, "insert_abort"}, + {EWriteColumnMode::InsertOrAbort, "insert_abort"}, + {EWriteColumnMode::InsertOrIgnore, "insert_ignore"}, + {EWriteColumnMode::InsertOrRevert, "insert_revert"}, + {EWriteColumnMode::Upsert, "upsert"}, + {EWriteColumnMode::Replace, "replace"}, + {EWriteColumnMode::Update, "update"}, + {EWriteColumnMode::UpdateOn, "update_on"}, + {EWriteColumnMode::Delete, "delete"}, + {EWriteColumnMode::DeleteOn, "delete_on"}, +}; + +class TWriteTableNode final: public TAstListNode { +public: + TWriteTableNode(TPosition pos, const TString& label, const TTableRef& table, EWriteColumnMode mode, + TNodePtr options, TScopedStatePtr scoped) + : TAstListNode(pos) + , Label(label) + , Table(table) + , Mode(mode) + , Options(options) + , Scoped(scoped) + { + scoped->UseCluster(Table.Service, Table.Cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + auto keys = Table.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::WRITE); + if (!keys || !keys->Init(ctx, src)) { + return false; + } + + auto getModesMap = [] (const TString& serviceName) -> const TMap<EWriteColumnMode, TString>& { + if (serviceName == KikimrProviderName || serviceName == YdbProviderName) { + return columnModeToStrMapKikimr; + } else if (serviceName == StatProviderName) { + return columnModeToStrMapStat; + } else { + return columnModeToStrMapMR; + } + }; + + auto options = Y(); + if (Options) { + if (!Options->Init(ctx, src)) { + return false; + } + + options = L(Options); + } + + if (Mode != EWriteColumnMode::Default) { + auto modeStr = getModesMap(Table.Service).FindPtr(Mode); + + options->Add(Q(Y(Q("mode"), Q(modeStr ? *modeStr : "unsupported")))); + } + + Add("block", Q((Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Table.Service), Scoped->WrapCluster(Table.Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Label, Q(options))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + )))); + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return {}; + } +private: + TString Label; + TTableRef Table; + EWriteColumnMode Mode; + TNodePtr Options; + TScopedStatePtr Scoped; +}; + +TNodePtr BuildWriteTable(TPosition pos, const TString& label, const TTableRef& table, EWriteColumnMode mode, TNodePtr options, + TScopedStatePtr scoped) +{ + return new TWriteTableNode(pos, label, table, mode, std::move(options), scoped); +} + +class TClustersSinkOperationBase: public TAstListNode { +protected: + TClustersSinkOperationBase(TPosition pos) + : TAstListNode(pos) + {} + + virtual TPtr ProduceOperation() = 0; + + bool DoInit(TContext& ctx, ISource* src) override { + auto block(Y()); + + auto op = ProduceOperation(); + if (!op) { + return false; + } + + block = L(block, op); + block = L(block, Y("return", "world")); + Add("block", Q(block)); + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return {}; + } +}; + +class TCommitClustersNode: public TClustersSinkOperationBase { +public: + TCommitClustersNode(TPosition pos) + : TClustersSinkOperationBase(pos) + { + } + + TPtr ProduceOperation() override { + return Y("let", "world", Y("CommitAll!", "world")); + } +}; + +TNodePtr BuildCommitClusters(TPosition pos) { + return new TCommitClustersNode(pos); +} + +class TRollbackClustersNode: public TClustersSinkOperationBase { +public: + TRollbackClustersNode(TPosition pos) + : TClustersSinkOperationBase(pos) + { + } + + TPtr ProduceOperation() override { + return Y("let", "world", Y("CommitAll!", "world", Q(Y(Q(Y(Q("mode"), Q("rollback"))))))); + } +}; + +TNodePtr BuildRollbackClusters(TPosition pos) { + return new TRollbackClustersNode(pos); +} + +class TWriteResultNode final: public TAstListNode { +public: + TWriteResultNode(TPosition pos, const TString& label, TNodePtr settings) + : TAstListNode(pos) + , Label(label) + , Settings(settings) + , CommitClusters(BuildCommitClusters(Pos)) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + auto block(Y( + Y("let", "result_sink", Y("DataSink", Q(TString(ResultProviderName)))), + Y("let", "world", Y(TString(WriteName), "world", "result_sink", Y("Key"), Label, Q(Settings))) + )); + if (ctx.PragmaAutoCommit) { + block = L(block, Y("let", "world", CommitClusters)); + } + + block = L(block, Y("return", Y(TString(CommitName), "world", "result_sink"))); + Add("block", Q(block)); + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return {}; + } +private: + TString Label; + TNodePtr Settings; + TNodePtr CommitClusters; +}; + +TNodePtr BuildWriteResult(TPosition pos, const TString& label, TNodePtr settings) { + return new TWriteResultNode(pos, label, settings); +} + +class TYqlProgramNode: public TAstListNode { +public: + TYqlProgramNode(TPosition pos, const TVector<TNodePtr>& blocks, bool topLevel, TScopedStatePtr scoped) + : TAstListNode(pos) + , Blocks(blocks) + , TopLevel(topLevel) + , Scoped(scoped) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + bool hasError = false; + if (TopLevel) { + for (auto& var: ctx.Variables) { + if (!var.second.second->Init(ctx, src)) { + hasError = true; + continue; + } + Add(Y( + "declare", + new TAstAtomNodeImpl(var.second.first, var.first, TNodeFlags::ArbitraryContent), + var.second.second)); + } + + for (const auto& overrideLibrary: ctx.OverrideLibraries) { + auto node = Y( + "override_library", + new TAstAtomNodeImpl( + std::get<TPosition>(overrideLibrary.second), + overrideLibrary.first, TNodeFlags::ArbitraryContent + )); + + Add(node); + } + + for (const auto& package: ctx.Packages) { + const auto& [url, urlPosition] = std::get<1U>(package.second); + + auto node = Y( + "package", + new TAstAtomNodeImpl( + std::get<TPosition>(package.second), package.first, + TNodeFlags::ArbitraryContent + ), + new TAstAtomNodeImpl(urlPosition, url, TNodeFlags::ArbitraryContent)); + + if (const auto& tokenWithPosition = std::get<2U>(package.second)) { + const auto& [token, tokenPosition] = *tokenWithPosition; + + node = L(node, new TAstAtomNodeImpl(tokenPosition, token, TNodeFlags::ArbitraryContent)); + } + + Add(node); + } + + for (const auto& lib : ctx.Libraries) { + auto node = Y("library", new TAstAtomNodeImpl(std::get<TPosition>(lib.second), lib.first, TNodeFlags::ArbitraryContent)); + if (const auto& first = std::get<1U>(lib.second)) { + node = L(node, new TAstAtomNodeImpl(first->second, first->first, TNodeFlags::ArbitraryContent)); + if (const auto& second = std::get<2U>(lib.second)) { + node = L(node, new TAstAtomNodeImpl(second->second, second->first, TNodeFlags::ArbitraryContent)); + } + } + + Add(node); + } + + for (const auto& p : ctx.PackageVersions) { + Add(Y("set_package_version", BuildQuotedAtom(Pos, p.first), BuildQuotedAtom(Pos, ToString(p.second)))); + } + + Add(Y("import", "aggregate_module", BuildQuotedAtom(Pos, "/lib/yql/aggregate.yql"))); + Add(Y("import", "window_module", BuildQuotedAtom(Pos, "/lib/yql/window.yql"))); + for (const auto& module : ctx.Settings.ModuleMapping) { + TString moduleName(module.first + "_module"); + moduleName.to_lower(); + Add(Y("import", moduleName, BuildQuotedAtom(Pos, module.second))); + } + for (const auto& moduleAlias : ctx.ImportModuleAliases) { + Add(Y("import", moduleAlias.second, BuildQuotedAtom(Pos, moduleAlias.first))); + } + + for (const auto& x : ctx.SimpleUdfs) { + Add(Y("let", x.second, Y("Udf", BuildQuotedAtom(Pos, x.first)))); + } + + if (!ctx.CompactNamedExprs) { + for (auto& nodes: Scoped->NamedNodes) { + if (src || ctx.Exports.contains(nodes.first)) { + auto& item = nodes.second.front(); + if (!item->Node->Init(ctx, src)) { + hasError = true; + continue; + } + + // Some constants may be used directly by YQL code and need to be translated without reference from SQL AST + if (item->Node->IsConstant() || ctx.Exports.contains(nodes.first)) { + Add(Y("let", BuildAtom(item->Node->GetPos(), nodes.first), item->Node)); + } + } + } + } + + if (ctx.Settings.Mode != NSQLTranslation::ESqlMode::LIBRARY) { + auto configSource = Y("DataSource", BuildQuotedAtom(Pos, TString(ConfigProviderName))); + auto resultSink = Y("DataSink", BuildQuotedAtom(Pos, TString(ResultProviderName))); + + for (const auto& warningPragma : ctx.WarningPolicy.GetRules()) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos, "Warning"), BuildQuotedAtom(Pos, warningPragma.GetPattern()), + BuildQuotedAtom(Pos, to_lower(ToString(warningPragma.GetAction())))))); + } + + if (ctx.ResultSizeLimit > 0) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", resultSink, + BuildQuotedAtom(Pos, "SizeLimit"), BuildQuotedAtom(Pos, ToString(ctx.ResultSizeLimit))))); + } + + if (!ctx.PragmaPullUpFlatMapOverJoin) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos, "DisablePullUpFlatMapOverJoin")))); + } + + if (ctx.FilterPushdownOverJoinOptionalSide) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos, "FilterPushdownOverJoinOptionalSide")))); + } + + if (!ctx.RotateJoinTree) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos, "RotateJoinTree"), BuildQuotedAtom(Pos, "false")))); + } + + if (ctx.DiscoveryMode) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos, "DiscoveryMode")))); + } + + if (ctx.DqEngineEnable) { + TString mode = "auto"; + if (ctx.PqReadByRtmrCluster && ctx.PqReadByRtmrCluster != "dq") { + mode = "disable"; + } else if (ctx.DqEngineForce) { + mode = "force"; + } + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos, "DqEngine"), BuildQuotedAtom(Pos, mode)))); + } + + if (ctx.CostBasedOptimizer) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos, "CostBasedOptimizer"), BuildQuotedAtom(Pos, ctx.CostBasedOptimizer)))); + } + + if (ctx.JsonQueryReturnsJsonDocument.Defined()) { + TString pragmaName = "DisableJsonQueryReturnsJsonDocument"; + if (*ctx.JsonQueryReturnsJsonDocument) { + pragmaName = "JsonQueryReturnsJsonDocument"; + } + + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, BuildQuotedAtom(Pos, pragmaName)))); + } + + if (ctx.OrderedColumns) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos, "OrderedColumns")))); + } + + if (ctx.PqReadByRtmrCluster) { + auto pqSourceAll = Y("DataSource", BuildQuotedAtom(Pos, TString(PqProviderName)), BuildQuotedAtom(Pos, "$all")); + Add(Y("let", "world", Y(TString(ConfigureName), "world", pqSourceAll, + BuildQuotedAtom(Pos, "Attr"), BuildQuotedAtom(Pos, "PqReadByRtmrCluster_"), BuildQuotedAtom(Pos, ctx.PqReadByRtmrCluster)))); + + auto rtmrSourceAll = Y("DataSource", BuildQuotedAtom(Pos, TString(RtmrProviderName)), BuildQuotedAtom(Pos, "$all")); + Add(Y("let", "world", Y(TString(ConfigureName), "world", rtmrSourceAll, + BuildQuotedAtom(Pos, "Attr"), BuildQuotedAtom(Pos, "PqReadByRtmrCluster_"), BuildQuotedAtom(Pos, ctx.PqReadByRtmrCluster)))); + + if (ctx.PqReadByRtmrCluster != "dq") { + // set any dynamic settings for particular RTMR cluster for CommitAll! + auto rtmrSource = Y("DataSource", BuildQuotedAtom(Pos, TString(RtmrProviderName)), BuildQuotedAtom(Pos, ctx.PqReadByRtmrCluster)); + Add(Y("let", "world", Y(TString(ConfigureName), "world", rtmrSource, + BuildQuotedAtom(Pos, "Attr"), BuildQuotedAtom(Pos, "Dummy_"), BuildQuotedAtom(Pos, "1")))); + } + } + + if (ctx.YsonCastToString.Defined()) { + const TString pragmaName = *ctx.YsonCastToString ? "YsonCastToString" : "DisableYsonCastToString"; + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, BuildQuotedAtom(Pos, pragmaName)))); + } + + if (ctx.UseBlocks) { + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, BuildQuotedAtom(Pos, "UseBlocks")))); + } + + if (ctx.BlockEngineEnable) { + TString mode = ctx.BlockEngineForce ? "force" : "auto"; + Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, + BuildQuotedAtom(Pos, "BlockEngine"), BuildQuotedAtom(Pos, mode)))); + } + } + } + + for (auto& block: Blocks) { + if (block->SubqueryAlias()) { + continue; + } + if (!block->Init(ctx, nullptr)) { + hasError = true; + continue; + } + } + + for (const auto& x : Scoped->Local.ExprClusters) { + auto& data = Scoped->Local.ExprClustersMap[x.Get()]; + auto& node = data.second; + + if (!node->Init(ctx, nullptr)) { + hasError = true; + continue; + } + + Add(Y("let", data.first, node)); + } + + for (auto& block: Blocks) { + const auto subqueryAliasPtr = block->SubqueryAlias(); + if (subqueryAliasPtr) { + if (block->UsedSubquery()) { + const auto& ref = block->GetLabel(); + YQL_ENSURE(!ref.empty()); + Add(block); + Add(Y("let", "world", Y("Nth", *subqueryAliasPtr, Q("0")))); + Add(Y("let", ref, Y("Nth", *subqueryAliasPtr, Q("1")))); + } + } else { + const auto& ref = block->GetLabel(); + Add(Y("let", ref ? ref : "world", block)); + } + } + + if (TopLevel) { + if (ctx.UniversalAliases) { + decltype(Nodes) preparedNodes; + preparedNodes.swap(Nodes); + for (const auto& [name, node] : ctx.UniversalAliases) { + Add(Y("let", name, node)); + } + Nodes.insert(Nodes.end(), preparedNodes.begin(), preparedNodes.end()); + } + + decltype(Nodes) imports; + for (const auto& [alias, path]: ctx.RequiredModules) { + imports.push_back(Y("import", alias, BuildQuotedAtom(Pos, path))); + } + Nodes.insert(Nodes.begin(), std::make_move_iterator(imports.begin()), std::make_move_iterator(imports.end())); + + for (const auto& symbol: ctx.Exports) { + if (ctx.CompactNamedExprs) { + auto node = Scoped->LookupNode(symbol); + YQL_ENSURE(node); + if (!node->Init(ctx, src)) { + hasError = true; + continue; + } + Add(Y("let", BuildAtom(node->GetPos(), symbol), node)); + } + Add(Y("export", symbol)); + } + } + + if (!TopLevel || ctx.Settings.Mode != NSQLTranslation::ESqlMode::LIBRARY) { + Add(Y("return", "world")); + } + + return !hasError; + } + + TPtr DoClone() const final { + return {}; + } +private: + TVector<TNodePtr> Blocks; + const bool TopLevel; + TScopedStatePtr Scoped; +}; + +TNodePtr BuildQuery(TPosition pos, const TVector<TNodePtr>& blocks, bool topLevel, TScopedStatePtr scoped) { + return new TYqlProgramNode(pos, blocks, topLevel, scoped); +} + +class TPragmaNode final: public INode { +public: + TPragmaNode(TPosition pos, const TString& prefix, const TString& name, const TVector<TDeferredAtom>& values, bool valueDefault) + : INode(pos) + , Prefix(prefix) + , Name(name) + , Values(values) + , ValueDefault(valueDefault) + { + FakeSource = BuildFakeSource(pos); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + TString serviceName; + TString cluster; + if (std::find(Providers.cbegin(), Providers.cend(), Prefix) != Providers.cend()) { + cluster = "$all"; + serviceName = Prefix; + } else { + serviceName = *ctx.GetClusterProvider(Prefix, cluster); + } + + auto datasource = Y("DataSource", BuildQuotedAtom(Pos, serviceName)); + if (Prefix != ConfigProviderName) { + datasource = L(datasource, BuildQuotedAtom(Pos, cluster)); + } + + Node = Y(); + Node = L(Node, AstNode(TString(ConfigureName))); + Node = L(Node, AstNode(TString(TStringBuf("world")))); + Node = L(Node, datasource); + + if (Name == TStringBuf("flags")) { + for (ui32 i = 0; i < Values.size(); ++i) { + Node = L(Node, Values[i].Build()); + } + } + else if (Name == TStringBuf("AddFileByUrl") || Name == TStringBuf("SetFileOption") || Name == TStringBuf("AddFolderByUrl") || Name == TStringBuf("ImportUdfs") || Name == TStringBuf("SetPackageVersion")) { + Node = L(Node, BuildQuotedAtom(Pos, Name)); + for (ui32 i = 0; i < Values.size(); ++i) { + Node = L(Node, Values[i].Build()); + } + } + else if (Name == TStringBuf("auth")) { + Node = L(Node, BuildQuotedAtom(Pos, "Auth")); + Node = L(Node, Values.empty() ? BuildQuotedAtom(Pos, TString()) : Values.front().Build()); + } + else { + Node = L(Node, BuildQuotedAtom(Pos, "Attr")); + Node = L(Node, BuildQuotedAtom(Pos, Name)); + if (!ValueDefault) { + Node = L(Node, Values.empty() ? BuildQuotedAtom(Pos, TString()) : Values.front().Build()); + } + } + + if (!Node->Init(ctx, FakeSource.Get())) { + return false; + } + + return true; + } + + TAstNode* Translate(TContext& ctx) const final { + return Node->Translate(ctx); + } + + TPtr DoClone() const final { + return {}; + } + +private: + TString Prefix; + TString Name; + TVector<TDeferredAtom> Values; + bool ValueDefault; + TNodePtr Node; + TSourcePtr FakeSource; +}; + +TNodePtr BuildPragma(TPosition pos, const TString& prefix, const TString& name, const TVector<TDeferredAtom>& values, bool valueDefault) { + return new TPragmaNode(pos, prefix, name, values, valueDefault); +} + +class TSqlLambda final: public TAstListNode { +public: + TSqlLambda(TPosition pos, TVector<TString>&& args, TVector<TNodePtr>&& exprSeq) + : TAstListNode(pos) + , Args(args) + , ExprSeq(exprSeq) + { + FakeSource = BuildFakeSource(pos); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + for (auto& exprPtr: ExprSeq) { + if (!exprPtr->Init(ctx, FakeSource.Get())) { + return {}; + } + } + YQL_ENSURE(!ExprSeq.empty()); + auto body = Y(); + auto end = ExprSeq.end() - 1; + for (auto iter = ExprSeq.begin(); iter != end; ++iter) { + auto exprPtr = *iter; + const auto& label = exprPtr->GetLabel(); + YQL_ENSURE(label); + body = L(body, Y("let", label, exprPtr)); + } + body = Y("block", Q(L(body, Y("return", *end)))); + auto args = Y(); + for (const auto& arg: Args) { + args = L(args, BuildAtom(GetPos(), arg)); + } + Add("lambda", Q(args), body); + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return new TSqlLambda(Pos, TVector<TString>(Args), CloneContainer(ExprSeq)); + } + + void DoUpdateState() const override { + State.Set(ENodeState::Const); + } + +private: + TVector<TString> Args; + TVector<TNodePtr> ExprSeq; + TSourcePtr FakeSource; +}; + +TNodePtr BuildSqlLambda(TPosition pos, TVector<TString>&& args, TVector<TNodePtr>&& exprSeq) { + return new TSqlLambda(pos, std::move(args), std::move(exprSeq)); +} + +class TWorldIf final : public TAstListNode { +public: + TWorldIf(TPosition pos, TNodePtr predicate, TNodePtr thenNode, TNodePtr elseNode, bool isEvaluate) + : TAstListNode(pos) + , Predicate(predicate) + , ThenNode(thenNode) + , ElseNode(elseNode) + , IsEvaluate(isEvaluate) + { + FakeSource = BuildFakeSource(pos); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!Predicate->Init(ctx, FakeSource.Get())) { + return{}; + } + Add(IsEvaluate ? "EvaluateIf!" : "If!"); + Add("world"); + auto coalesced = Y("Coalesce", Predicate, Y("Bool", Q("false"))); + Add(IsEvaluate ? Y("EvaluateExpr", Y("EnsureType", coalesced, Y("DataType", Q("Bool")))) : coalesced); + + if (!ThenNode->Init(ctx, FakeSource.Get())) { + return{}; + } + + Add(ThenNode); + if (ElseNode) { + if (!ElseNode->Init(ctx, FakeSource.Get())) { + return{}; + } + + Add(ElseNode); + } + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return new TWorldIf(GetPos(), SafeClone(Predicate), SafeClone(ThenNode), SafeClone(ElseNode), IsEvaluate); + } + +private: + TNodePtr Predicate; + TNodePtr ThenNode; + TNodePtr ElseNode; + bool IsEvaluate; + TSourcePtr FakeSource; +}; + +TNodePtr BuildWorldIfNode(TPosition pos, TNodePtr predicate, TNodePtr thenNode, TNodePtr elseNode, bool isEvaluate) { + return new TWorldIf(pos, predicate, thenNode, elseNode, isEvaluate); +} + +class TWorldFor final : public TAstListNode { +public: + TWorldFor(TPosition pos, TNodePtr list, TNodePtr bodyNode, TNodePtr elseNode, bool isEvaluate, bool isParallel) + : TAstListNode(pos) + , List(list) + , BodyNode(bodyNode) + , ElseNode(elseNode) + , IsEvaluate(isEvaluate) + , IsParallel(isParallel) + { + FakeSource = BuildFakeSource(pos); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!List->Init(ctx, FakeSource.Get())) { + return{}; + } + Add(TStringBuilder() << (IsEvaluate ? "Evaluate": "") << (IsParallel ? "Parallel" : "") << "For!"); + Add("world"); + Add(IsEvaluate ? Y("EvaluateExpr", List) : List); + + if (!BodyNode->Init(ctx, FakeSource.Get())) { + return{}; + } + Add(BodyNode); + + if (ElseNode) { + if (!ElseNode->Init(ctx, FakeSource.Get())) { + return{}; + } + Add(ElseNode); + } + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return new TWorldFor(GetPos(), SafeClone(List), SafeClone(BodyNode), SafeClone(ElseNode), IsEvaluate, IsParallel); + } + +private: + TNodePtr List; + TNodePtr BodyNode; + TNodePtr ElseNode; + bool IsEvaluate; + bool IsParallel; + TSourcePtr FakeSource; +}; + +TNodePtr BuildWorldForNode(TPosition pos, TNodePtr list, TNodePtr bodyNode, TNodePtr elseNode, bool isEvaluate, bool isParallel) { + return new TWorldFor(pos, list, bodyNode, elseNode, isEvaluate, isParallel); +} + +class TAnalyzeNode final: public TAstListNode { +public: + TAnalyzeNode(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TAnalyzeParams& params, TScopedStatePtr scoped) + : TAstListNode(pos) + , Service(service) + , Cluster(cluster) + , Params(params) + , Scoped(scoped) + { + FakeSource = BuildFakeSource(pos); + scoped->UseCluster(Service, Cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + auto keys = Params.Table->Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::DROP); + if (!keys || !keys->Init(ctx, FakeSource.Get())) { + return false; + } + + auto opts = Y(); + + auto columns = Y(); + for (const auto& column: Params.Columns) { + columns->Add(Q(column)); + } + opts->Add(Q(Y(Q("columns"), Q(columns)))); + + opts->Add(Q(Y(Q("mode"), Q("analyze")))); + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), Scoped->WrapCluster(Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, FakeSource.Get()); + } + + TPtr DoClone() const final { + return {}; + } +private: + TString Service; + TDeferredAtom Cluster; + TAnalyzeParams Params; + + TScopedStatePtr Scoped; + TSourcePtr FakeSource; +}; + +TNodePtr BuildAnalyze(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TAnalyzeParams& params, TScopedStatePtr scoped) { + return new TAnalyzeNode(pos, service, cluster, params, scoped); +} + +class TBaseBackupCollectionNode + : public TAstListNode + , public TObjectOperatorContext +{ + using TBase = TAstListNode; +public: + TBaseBackupCollectionNode( + TPosition pos, + const TString& objectId, + const TObjectOperatorContext& context) + : TBase(pos) + , TObjectOperatorContext(context) + , Id(objectId) + {} + + bool DoInit(TContext& ctx, ISource* src) final { + auto keys = Y("Key"); + keys = L(keys, Q(Y(Q("backupCollection"), Y("String", BuildQuotedAtom(Pos, Id))))); + auto options = this->FillOptions(ctx, Y()); + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, ServiceId), Scoped->WrapCluster(Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(options))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, src); + } + + virtual INode::TPtr FillOptions(TContext& ctx, INode::TPtr options) const = 0; + +protected: + TString Id; +}; + +class TCreateBackupCollectionNode + : public TBaseBackupCollectionNode +{ + using TBase = TBaseBackupCollectionNode; +public: + TCreateBackupCollectionNode( + TPosition pos, + const TString& objectId, + const TCreateBackupCollectionParameters& params, + const TObjectOperatorContext& context) + : TBase(pos, objectId, context) + , Params(params) + {} + + virtual INode::TPtr FillOptions(TContext& ctx, INode::TPtr options) const final { + options->Add(Q(Y(Q("mode"), Q("create")))); + + auto settings = Y(); + for (auto& [key, value] : Params.Settings) { + settings->Add(Q(Y(BuildQuotedAtom(Pos, key), value.Build()))); + } + options->Add(Q(Y(Q("settings"), Q(settings)))); + + auto entries = Y(); + if (Params.Database) { + entries->Add(Q(Y(Q(Y(Q("type"), Q("database")))))); + } + for (auto& table : Params.Tables) { + auto path = ctx.GetPrefixedPath(ServiceId, Cluster, table); + entries->Add(Q(Y(Q(Y(Q("type"), Q("table"))), Q(Y(Q("path"), path))))); + } + options->Add(Q(Y(Q("entries"), Q(entries)))); + + return options; + } + + TPtr DoClone() const final { + return new TCreateBackupCollectionNode(GetPos(), Id, Params, *this); + } + +private: + TCreateBackupCollectionParameters Params; +}; + +class TAlterBackupCollectionNode + : public TBaseBackupCollectionNode +{ + using TBase = TBaseBackupCollectionNode; +public: + TAlterBackupCollectionNode( + TPosition pos, + const TString& objectId, + const TAlterBackupCollectionParameters& params, + const TObjectOperatorContext& context) + : TBase(pos, objectId, context) + , Params(params) + {} + + virtual INode::TPtr FillOptions(TContext& ctx, INode::TPtr options) const final { + options->Add(Q(Y(Q("mode"), Q("alter")))); + + auto settings = Y(); + for (auto& [key, value] : Params.Settings) { + settings->Add(Q(Y(BuildQuotedAtom(Pos, key), value.Build()))); + } + options->Add(Q(Y(Q("settings"), Q(settings)))); + + auto resetSettings = Y(); + for (auto& key : Params.SettingsToReset) { + resetSettings->Add(BuildQuotedAtom(Pos, key)); + } + options->Add(Q(Y(Q("resetSettings"), Q(resetSettings)))); + + auto entries = Y(); + if (Params.Database != TAlterBackupCollectionParameters::EDatabase::Unchanged) { + entries->Add(Q(Y(Q(Y(Q("type"), Q("database"))), Q(Y(Q("action"), Q(Params.Database == TAlterBackupCollectionParameters::EDatabase::Add ? "add" : "drop")))))); + } + for (auto& table : Params.TablesToAdd) { + auto path = ctx.GetPrefixedPath(ServiceId, Cluster, table); + entries->Add(Q(Y(Q(Y(Q("type"), Q("table"))), Q(Y(Q("path"), path)), Q(Y(Q("action"), Q("add")))))); + } + for (auto& table : Params.TablesToDrop) { + auto path = ctx.GetPrefixedPath(ServiceId, Cluster, table); + entries->Add(Q(Y(Q(Y(Q("type"), Q("table"))), Q(Y(Q("path"), path)), Q(Y(Q("action"), Q("drop")))))); + } + options->Add(Q(Y(Q("alterEntries"), Q(entries)))); + + return options; + } + + TPtr DoClone() const final { + return new TAlterBackupCollectionNode(GetPos(), Id, Params, *this); + } + +private: + TAlterBackupCollectionParameters Params; +}; + +class TDropBackupCollectionNode + : public TBaseBackupCollectionNode +{ + using TBase = TBaseBackupCollectionNode; +public: + TDropBackupCollectionNode( + TPosition pos, + const TString& objectId, + const TDropBackupCollectionParameters&, + const TObjectOperatorContext& context) + : TBase(pos, objectId, context) + {} + + virtual INode::TPtr FillOptions(TContext&, INode::TPtr options) const final { + options->Add(Q(Y(Q("mode"), Q("drop")))); + + return options; + } + + TPtr DoClone() const final { + TDropBackupCollectionParameters params; + return new TDropBackupCollectionNode(GetPos(), Id, params, *this); + } +}; + +TNodePtr BuildCreateBackupCollection(TPosition pos, const TString& id, + const TCreateBackupCollectionParameters& params, + const TObjectOperatorContext& context) +{ + return new TCreateBackupCollectionNode(pos, id, params, context); +} + +TNodePtr BuildAlterBackupCollection(TPosition pos, const TString& id, + const TAlterBackupCollectionParameters& params, + const TObjectOperatorContext& context) +{ + return new TAlterBackupCollectionNode(pos, id, params, context); +} + +TNodePtr BuildDropBackupCollection(TPosition pos, const TString& id, + const TDropBackupCollectionParameters& params, + const TObjectOperatorContext& context) +{ + return new TDropBackupCollectionNode(pos, id, params, context); +} + +class TBackupNode final + : public TAstListNode + , public TObjectOperatorContext +{ + using TBase = TAstListNode; +public: + TBackupNode( + TPosition pos, + const TString& id, + const TBackupParameters& params, + const TObjectOperatorContext& context) + : TBase(pos) + , TObjectOperatorContext(context) + , Id(id) + , Params(params) + { + Y_UNUSED(Params); + } + + bool DoInit(TContext& ctx, ISource* src) override { + auto keys = Y("Key"); + keys = L(keys, Q(Y(Q("backup"), Y("String", BuildQuotedAtom(Pos, Id))))); + + auto opts = Y(); + opts->Add(Q(Y(Q("mode"), Q("backup")))); + + if (Params.Incremental) { + opts->Add(Q(Y(Q("incremental")))); + } + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, ServiceId), Scoped->WrapCluster(Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return new TBackupNode(GetPos(), Id, Params, *this); + } +private: + TString Id; + TBackupParameters Params; +}; + +TNodePtr BuildBackup(TPosition pos, const TString& id, + const TBackupParameters& params, + const TObjectOperatorContext& context) +{ + return new TBackupNode(pos, id, params, context); +} + +class TRestoreNode final + : public TAstListNode + , public TObjectOperatorContext +{ + using TBase = TAstListNode; +public: + TRestoreNode( + TPosition pos, + const TString& id, + const TRestoreParameters& params, + const TObjectOperatorContext& context) + : TBase(pos) + , TObjectOperatorContext(context) + , Id(id) + , Params(params) + { + Y_UNUSED(Params); + } + + bool DoInit(TContext& ctx, ISource* src) override { + auto keys = Y("Key"); + keys = L(keys, Q(Y(Q("restore"), Y("String", BuildQuotedAtom(Pos, Id))))); + + auto opts = Y(); + opts->Add(Q(Y(Q("mode"), Q("restore")))); + + if (Params.At) { + opts->Add(Q(Y(Q("at"), BuildQuotedAtom(Pos, Params.At)))); + } + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, ServiceId), Scoped->WrapCluster(Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return new TRestoreNode(GetPos(), Id, Params, *this); + } +private: + TString Id; + TRestoreParameters Params; +}; + +TNodePtr BuildRestore(TPosition pos, const TString& id, + const TRestoreParameters& params, + const TObjectOperatorContext& context) +{ + return new TRestoreNode(pos, id, params, context); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/select.cpp b/yql/essentials/sql/v1/select.cpp new file mode 100644 index 00000000000..4eee1efb266 --- /dev/null +++ b/yql/essentials/sql/v1/select.cpp @@ -0,0 +1,3195 @@ +#include "sql.h" +#include "source.h" + +#include "context.h" +#include "match_recognize.h" + +#include <yql/essentials/providers/common/provider/yql_provider_names.h> +#include <yql/essentials/utils/yql_panic.h> + +#include <library/cpp/charset/ci_string.h> + +using namespace NYql; + +namespace NSQLTranslationV1 { + +class TSubqueryNode: public INode { +public: + TSubqueryNode(TSourcePtr&& source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped) + : INode(source->GetPos()) + , Source(std::move(source)) + , Alias(alias) + , InSubquery(inSubquery) + , EnsureTupleSize(ensureTupleSize) + , Scoped(scoped) + { + YQL_ENSURE(!Alias.empty()); + } + + ISource* GetSource() override { + return Source.Get(); + } + + bool DoInit(TContext& ctx, ISource* src) override { + YQL_ENSURE(!src, "Source not expected for subquery node"); + Source->UseAsInner(); + if (!Source->Init(ctx, nullptr)) { + return false; + } + + TTableList tableList; + Source->GetInputTables(tableList); + + auto tables = BuildInputTables(Pos, tableList, InSubquery, Scoped); + if (!tables->Init(ctx, Source.Get())) { + return false; + } + + auto source = Source->Build(ctx); + if (!source) { + return false; + } + if (EnsureTupleSize != -1) { + source = Y("EnsureTupleSize", source, Q(ToString(EnsureTupleSize))); + } + + Node = Y("let", Alias, Y("block", Q(L(tables, Y("return", Q(Y("world", source))))))); + IsUsed = true; + return true; + } + + void DoUpdateState() const override { + State.Set(ENodeState::Const, true); + } + + bool UsedSubquery() const override { + return IsUsed; + } + + TAstNode* Translate(TContext& ctx) const override { + Y_DEBUG_ABORT_UNLESS(Node); + return Node->Translate(ctx); + } + + const TString* SubqueryAlias() const override { + return &Alias; + } + + TPtr DoClone() const final { + return new TSubqueryNode(Source->CloneSource(), Alias, InSubquery, EnsureTupleSize, Scoped); + } + +protected: + TSourcePtr Source; + TNodePtr Node; + const TString Alias; + const bool InSubquery; + const int EnsureTupleSize; + bool IsUsed = false; + TScopedStatePtr Scoped; +}; + +TNodePtr BuildSubquery(TSourcePtr source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped) { + return new TSubqueryNode(std::move(source), alias, inSubquery, ensureTupleSize, scoped); +} + +class TSourceNode: public INode { +public: + TSourceNode(TPosition pos, TSourcePtr&& source, bool checkExist) + : INode(pos) + , Source(std::move(source)) + , CheckExist(checkExist) + {} + + ISource* GetSource() override { + return Source.Get(); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (AsInner) { + Source->UseAsInner(); + } + if (!Source->Init(ctx, src)) { + return false; + } + Node = Source->Build(ctx); + if (!Node) { + return false; + } + if (src) { + if (IsSubquery()) { + /// should be not used? + auto columnsPtr = Source->GetColumns(); + if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) { + Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0")))); + } else { + ctx.Error(Pos) << "Source used in expression should contain one concrete column"; + return false; + } + } + src->AddDependentSource(Source.Get()); + } + return true; + } + + bool IsSubquery() const { + return !AsInner && Source->IsSelect() && !CheckExist; + } + + void DoUpdateState() const override { + State.Set(ENodeState::Const, IsSubquery()); + } + + TAstNode* Translate(TContext& ctx) const override { + Y_DEBUG_ABORT_UNLESS(Node); + return Node->Translate(ctx); + } + + TPtr DoClone() const final { + return new TSourceNode(Pos, Source->CloneSource(), CheckExist); + } +protected: + TSourcePtr Source; + TNodePtr Node; + bool CheckExist; +}; + +TNodePtr BuildSourceNode(TPosition pos, TSourcePtr source, bool checkExist) { + return new TSourceNode(pos, std::move(source), checkExist); +} + +class TFakeSource: public ISource { +public: + TFakeSource(TPosition pos, bool missingFrom, bool inSubquery) + : ISource(pos) + , MissingFrom(missingFrom) + , InSubquery(inSubquery) + {} + + bool IsFake() const override { + return true; + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override { + // TODO: fix column reference scope - with proper scopes error below should happen earlier + if (column.CanBeType()) { + return true; + } + ctx.Error(Pos) << (MissingFrom ? "Column references are not allowed without FROM" : "Source does not allow column references"); + ctx.Error(column.GetPos()) << "Column reference " + << (column.GetColumnName() ? "'" + *column.GetColumnName() + "'" : "(expr)"); + return {}; + } + + bool AddFilter(TContext& ctx, TNodePtr filter) override { + Y_UNUSED(filter); + auto pos = filter ? filter->GetPos() : Pos; + ctx.Error(pos) << (MissingFrom ? "Filtering is not allowed without FROM" : "Source does not allow filtering"); + return false; + } + + TNodePtr Build(TContext& ctx) override { + Y_UNUSED(ctx); + auto ret = Y("AsList", Y("AsStruct")); + if (InSubquery) { + return Y("WithWorld", ret, "world"); + } else { + return ret; + } + } + + bool AddGroupKey(TContext& ctx, const TString& column) override { + Y_UNUSED(column); + ctx.Error(Pos) << "Grouping is not allowed " << (MissingFrom ? "without FROM" : "in this context"); + return false; + } + + bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override { + YQL_ENSURE(aggr); + ctx.Error(aggr->GetPos()) << "Aggregation is not allowed " << (MissingFrom ? "without FROM" : "in this context"); + return false; + } + + bool AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func) override { + Y_UNUSED(windowName); + YQL_ENSURE(func); + ctx.Error(func->GetPos()) << "Aggregation is not allowed " << (MissingFrom ? "without FROM" : "in this context"); + return false; + } + + bool AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func) override { + Y_UNUSED(windowName); + YQL_ENSURE(func); + ctx.Error(func->GetPos()) << "Window functions are not allowed " << (MissingFrom ? "without FROM" : "in this context"); + return false; + } + + TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const override { + Y_UNUSED(windowName); + ctx.Error(Pos) << "Window and aggregation functions are not allowed " << (MissingFrom ? "without FROM" : "in this context"); + return {}; + } + + bool IsGroupByColumn(const TString& column) const override { + Y_UNUSED(column); + return false; + } + + TNodePtr BuildFilter(TContext& ctx, const TString& label) override { + Y_UNUSED(ctx); + Y_UNUSED(label); + return nullptr; + } + + std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx) override { + Y_UNUSED(label); + Y_UNUSED(ctx); + return { nullptr, true }; + } + + TPtr DoClone() const final { + return new TFakeSource(Pos, MissingFrom, InSubquery); + } +private: + const bool MissingFrom; + const bool InSubquery; +}; + +TSourcePtr BuildFakeSource(TPosition pos, bool missingFrom, bool inSubquery) { + return new TFakeSource(pos, missingFrom, inSubquery); +} + +class TNodeSource: public ISource { +public: + TNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList) + : ISource(pos) + , Node(node) + , WrapToList(wrapToList) + { + YQL_ENSURE(Node); + FakeSource = BuildFakeSource(pos); + } + + bool ShouldUseSourceAsColumn(const TString& source) const final { + return source && source != GetLabel(); + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final { + Y_UNUSED(ctx); + Y_UNUSED(column); + return true; + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (!Node->Init(ctx, FakeSource.Get())) { + return false; + } + return ISource::DoInit(ctx, src); + } + + TNodePtr Build(TContext& /*ctx*/) final { + auto nodeAst = AstNode(Node); + if (WrapToList) { + nodeAst = Y("ToList", nodeAst); + } + return nodeAst; + } + + TPtr DoClone() const final { + return new TNodeSource(Pos, SafeClone(Node), WrapToList); + } + +private: + TNodePtr Node; + bool WrapToList; + TSourcePtr FakeSource; +}; + +TSourcePtr BuildNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList) { + return new TNodeSource(pos, node, wrapToList); +} + +class IProxySource: public ISource { +protected: + IProxySource(TPosition pos, ISource* src) + : ISource(pos) + , Source(src) + {} + + void AllColumns() override { + Y_DEBUG_ABORT_UNLESS(Source); + return Source->AllColumns(); + } + + const TColumns* GetColumns() const override { + Y_DEBUG_ABORT_UNLESS(Source); + return Source->GetColumns(); + } + + void GetInputTables(TTableList& tableList) const override { + Source->GetInputTables(tableList); + ISource::GetInputTables(tableList); + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override { + Y_DEBUG_ABORT_UNLESS(Source); + const TString label(Source->GetLabel()); + Source->SetLabel(Label); + const auto ret = Source->AddColumn(ctx, column); + Source->SetLabel(label); + return ret; + } + + bool ShouldUseSourceAsColumn(const TString& source) const override { + return Source->ShouldUseSourceAsColumn(source); + } + + bool IsStream() const override { + Y_DEBUG_ABORT_UNLESS(Source); + return Source->IsStream(); + } + + EOrderKind GetOrderKind() const override { + Y_DEBUG_ABORT_UNLESS(Source); + return Source->GetOrderKind(); + } + + TWriteSettings GetWriteSettings() const override { + Y_DEBUG_ABORT_UNLESS(Source); + return Source->GetWriteSettings(); + } + +protected: + void SetSource(ISource* source) { + Source = source; + } + + ISource* Source; +}; + +class IRealSource: public ISource { +protected: + IRealSource(TPosition pos) + : ISource(pos) + { + } + + void AllColumns() override { + Columns.SetAll(); + } + + const TColumns* GetColumns() const override { + return &Columns; + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override { + const auto& label = *column.GetSourceName(); + const auto& source = GetLabel(); + if (!label.empty() && label != source && !(source.StartsWith(label) && source[label.size()] == ':')) { + if (column.IsReliable()) { + ctx.Error(column.GetPos()) << "Unknown correlation name: " << label; + } + return {}; + } + if (column.IsAsterisk()) { + return true; + } + const auto* name = column.GetColumnName(); + if (name && !column.CanBeType() && !Columns.IsColumnPossible(ctx, *name) && !IsAlias(EExprSeat::GroupBy, *name) && !IsAlias(EExprSeat::DistinctAggr, *name)) { + if (column.IsReliable()) { + TStringBuilder sb; + sb << "Column " << *name << " is not in source column set"; + if (const auto mistype = FindColumnMistype(*name)) { + sb << ". Did you mean " << mistype.GetRef() << "?"; + } + ctx.Error(column.GetPos()) << sb; + } + return {}; + } + return true; + } + + TMaybe<TString> FindColumnMistype(const TString& name) const override { + auto result = FindMistypeIn(Columns.Real, name); + if (!result) { + auto result = FindMistypeIn(Columns.Artificial, name); + } + return result ? result : ISource::FindColumnMistype(name); + } + +protected: + TColumns Columns; +}; + +class IComposableSource : private TNonCopyable { +public: + virtual ~IComposableSource() = default; + virtual void BuildProjectWindowDistinct(TNodePtr& blocks, TContext& ctx, bool ordered) = 0; +}; + +using TComposableSourcePtr = TIntrusivePtr<IComposableSource>; + +class TMuxSource: public ISource { +public: + TMuxSource(TPosition pos, TVector<TSourcePtr>&& sources) + : ISource(pos) + , Sources(std::move(sources)) + { + YQL_ENSURE(Sources.size() > 1); + } + + void AllColumns() final { + for (auto& source: Sources) { + source->AllColumns(); + } + } + + const TColumns* GetColumns() const final { + // Columns are equal in all sources. Return from the first one + return Sources.front()->GetColumns(); + } + + void GetInputTables(TTableList& tableList) const final { + for (auto& source: Sources) { + source->GetInputTables(tableList); + } + ISource::GetInputTables(tableList); + } + + bool IsStream() const final { + return AnyOf(Sources, [] (const TSourcePtr& s) { return s->IsStream(); }); + } + + bool DoInit(TContext& ctx, ISource* src) final { + for (auto& source: Sources) { + if (AsInner) { + source->UseAsInner(); + } + + if (src) { + src->AddDependentSource(source.Get()); + } + if (!source->Init(ctx, src)) { + return false; + } + if (!source->InitFilters(ctx)) { + return false; + } + } + return true; + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final { + for (auto& source: Sources) { + if (!source->AddColumn(ctx, column)) { + return {}; + } + } + return true; + } + + TNodePtr Build(TContext& ctx) final { + TNodePtr block; + auto muxArgs = Y(); + for (size_t i = 0; i < Sources.size(); ++i) { + auto& source = Sources[i]; + auto input = source->Build(ctx); + auto ref = ctx.MakeName("src"); + muxArgs->Add(ref); + if (block) { + block = L(block, Y("let", ref, input)); + } else { + block = Y(Y("let", ref, input)); + } + auto filter = source->BuildFilter(ctx, ref); + if (filter) { + block = L(block, Y("let", ref, filter)); + } + if (ctx.EnableSystemColumns) { + block = L(block, Y("let", ref, Y("RemoveSystemMembers", ref))); + } + } + return GroundWithExpr(block, Y("Mux", Q(muxArgs))); + } + + bool AddFilter(TContext& ctx, TNodePtr filter) final { + Y_UNUSED(filter); + ctx.Error() << "Filter is not allowed for multiple sources"; + return false; + } + + TPtr DoClone() const final { + return new TMuxSource(Pos, CloneContainer(Sources)); + } + +protected: + TVector<TSourcePtr> Sources; +}; + +TSourcePtr BuildMuxSource(TPosition pos, TVector<TSourcePtr>&& sources) { + return new TMuxSource(pos, std::move(sources)); +} + +class TSubqueryRefNode: public IRealSource { +public: + TSubqueryRefNode(const TNodePtr& subquery, const TString& alias, int tupleIndex) + : IRealSource(subquery->GetPos()) + , Subquery(subquery) + , Alias(alias) + , TupleIndex(tupleIndex) + { + YQL_ENSURE(subquery->GetSource()); + } + + ISource* GetSource() override { + return this; + } + + bool DoInit(TContext& ctx, ISource* src) override { + // independent subquery should not connect source + Subquery->UseAsInner(); + if (!Subquery->Init(ctx, nullptr)) { + return false; + } + Columns = *Subquery->GetSource()->GetColumns(); + Node = BuildAtom(Pos, Alias, TNodeFlags::Default); + if (TupleIndex != -1) { + Node = Y("Nth", Node, Q(ToString(TupleIndex))); + } + if (!Node->Init(ctx, src)) { + return false; + } + if (src && Subquery->GetSource()->IsSelect()) { + auto columnsPtr = &Columns; + if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) { + Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0")))); + } else { + ctx.Error(Pos) << "Source used in expression should contain one concrete column"; + return false; + } + } + TNodePtr sample; + if (!BuildSamplingLambda(sample)) { + return false; + } else if (sample) { + Node = Y("block", Q(Y(Y("let", Node, Y("OrderedFlatMap", Node, sample)), Y("return", Node)))); + } + return true; + } + + TNodePtr Build(TContext& ctx) override { + Y_UNUSED(ctx); + return Node; + } + + bool SetSamplingOptions( + TContext& ctx, + TPosition pos, + ESampleClause sampleClause, + ESampleMode mode, + TNodePtr samplingRate, + TNodePtr samplingSeed) override { + if (mode == ESampleMode::System) { + ctx.Error(pos) << "only Bernoulli sampling mode is supported for subqueries"; + return false; + } + if (samplingSeed) { + ctx.Error(pos) << "'Repeatable' keyword is not supported for subqueries"; + return false; + } + return SetSamplingRate(ctx, sampleClause, samplingRate); + } + + bool IsStream() const override { + return Subquery->GetSource()->IsStream(); + } + + void DoUpdateState() const override { + State.Set(ENodeState::Const, true); + } + + TAstNode* Translate(TContext& ctx) const override { + Y_DEBUG_ABORT_UNLESS(Node); + return Node->Translate(ctx); + } + + TPtr DoClone() const final { + return new TSubqueryRefNode(Subquery, Alias, TupleIndex); + } + +protected: + TNodePtr Subquery; + const TString Alias; + const int TupleIndex; + TNodePtr Node; +}; + +TNodePtr BuildSubqueryRef(TNodePtr subquery, const TString& alias, int tupleIndex) { + return new TSubqueryRefNode(std::move(subquery), alias, tupleIndex); +} + +class TInvalidSubqueryRefNode: public ISource { +public: + TInvalidSubqueryRefNode(TPosition pos) + : ISource(pos) + , Pos(pos) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + ctx.Error(Pos) << "Named subquery can not be used as a top level statement in libraries"; + return false; + } + + TNodePtr Build(TContext& ctx) override { + Y_UNUSED(ctx); + return {}; + } + + TPtr DoClone() const final { + return new TInvalidSubqueryRefNode(Pos); + } + +protected: + const TPosition Pos; +}; + +TNodePtr BuildInvalidSubqueryRef(TPosition subqueryPos) { + return new TInvalidSubqueryRefNode(subqueryPos); +} + +class TTableSource: public IRealSource { +public: + TTableSource(TPosition pos, const TTableRef& table, const TString& label) + : IRealSource(pos) + , Table(table) + , FakeSource(BuildFakeSource(pos)) + { + SetLabel(label.empty() ? Table.ShortName() : label); + } + + void GetInputTables(TTableList& tableList) const override { + tableList.push_back(Table); + ISource::GetInputTables(tableList); + } + + bool ShouldUseSourceAsColumn(const TString& source) const override { + const auto& label = GetLabel(); + return source && source != label && !(label.StartsWith(source) && label[source.size()] == ':'); + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override { + Columns.Add(column.GetColumnName(), column.GetCountHint(), column.IsArtificial(), column.IsReliable()); + if (!IRealSource::AddColumn(ctx, column)) { + return {}; + } + return false; + } + + bool SetSamplingOptions( + TContext& ctx, + TPosition pos, + ESampleClause sampleClause, + ESampleMode mode, + TNodePtr samplingRate, + TNodePtr samplingSeed) override + { + Y_UNUSED(pos); + TString modeName; + if (!samplingSeed) { + samplingSeed = Y("Int32", Q("0")); + } + if (ESampleClause::Sample == sampleClause) { + YQL_ENSURE(ESampleMode::Bernoulli == mode, "Internal logic error"); + } + switch (mode) { + case ESampleMode::Bernoulli: + modeName = "bernoulli"; + break; + case ESampleMode::System: + modeName = "system"; + break; + } + + if (!samplingRate->Init(ctx, FakeSource.Get())) { + return false; + } + + samplingRate = PrepareSamplingRate(pos, sampleClause, samplingRate); + + auto sampleSettings = Q(Y(Q(modeName), Y("EvaluateAtom", Y("ToString", samplingRate)), Y("EvaluateAtom", Y("ToString", samplingSeed)))); + auto sampleOption = Q(Y(Q("sample"), sampleSettings)); + if (Table.Options) { + if (!Table.Options->Init(ctx, this)) { + return false; + } + Table.Options = L(Table.Options, sampleOption); + } else { + Table.Options = Y(sampleOption); + } + return true; + } + + bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override { + Y_UNUSED(ctx); + TTableHints merged = contextHints; + MergeHints(merged, hints); + Table.Options = BuildInputOptions(pos, merged); + return true; + } + + bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override { + return Table.Keys->SetViewName(ctx, pos, view); + } + + TNodePtr Build(TContext& ctx) override { + if (!Table.Keys->Init(ctx, nullptr)) { + return nullptr; + } + return AstNode(Table.RefName); + } + + bool IsStream() const override { + return IsStreamingService(Table.Service); + } + + TPtr DoClone() const final { + return new TTableSource(Pos, Table, GetLabel()); + } + + bool IsTableSource() const override { + return true; + } +protected: + TTableRef Table; +private: + const TSourcePtr FakeSource; +}; + +TSourcePtr BuildTableSource(TPosition pos, const TTableRef& table, const TString& label) { + return new TTableSource(pos, table, label); +} + +class TInnerSource: public IProxySource { +public: + TInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label) + : IProxySource(pos, nullptr) + , Node(node) + , Service(service) + , Cluster(cluster) + { + SetLabel(label); + } + + bool SetSamplingOptions(TContext& ctx, TPosition pos, ESampleClause sampleClause, ESampleMode mode, TNodePtr samplingRate, TNodePtr samplingSeed) override { + Y_UNUSED(ctx); + SamplingPos = pos; + SamplingClause = sampleClause; + SamplingMode = mode; + SamplingRate = samplingRate; + SamplingSeed = samplingSeed; + return true; + } + + bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override { + Y_UNUSED(ctx); + HintsPos = pos; + Hints = hints; + ContextHints = contextHints; + return true; + } + + bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override { + Y_UNUSED(ctx); + ViewPos = pos; + View = view; + return true; + } + + bool ShouldUseSourceAsColumn(const TString& source) const override { + return source && source != GetLabel(); + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override { + if (const TString* columnName = column.GetColumnName()) { + if (columnName && IsExprAlias(*columnName)) { + return true; + } + } + return IProxySource::AddColumn(ctx, column); + } + + bool DoInit(TContext& ctx, ISource* initSrc) override { + Y_UNUSED(initSrc); + auto source = Node->GetSource(); + if (!source) { + NewSource = TryMakeSourceFromExpression(Pos, ctx, Service, Cluster, Node); + source = NewSource.Get(); + } + + if (!source) { + ctx.Error(Pos) << "Invalid inner source node"; + return false; + } + + if (SamplingPos) { + if (!source->SetSamplingOptions(ctx, *SamplingPos, SamplingClause, SamplingMode, SamplingRate, SamplingSeed)) { + return false; + } + } + + if (ViewPos) { + if (!source->SetViewName(ctx, *ViewPos, View)) { + return false; + } + } + + if (HintsPos) { + if (!source->SetTableHints(ctx, *HintsPos, Hints, ContextHints)) { + return false; + } + } + + source->SetLabel(Label); + if (!NewSource) { + Node->UseAsInner(); + if (!Node->Init(ctx, nullptr)) { + return false; + } + } + + SetSource(source); + if (NewSource && !NewSource->Init(ctx, nullptr)) { + return false; + } + + return ISource::DoInit(ctx, source); + } + + TNodePtr Build(TContext& ctx) override { + Y_UNUSED(ctx); + return NewSource ? NewSource->Build(ctx) : Node; + } + + bool IsStream() const override { + auto source = Node->GetSource(); + if (source) { + return source->IsStream(); + } + // NewSource will be built later in DoInit->TryMakeSourceFromExpression + // where Service will be used in all situations + // let's detect IsStream by Service value + return IsStreamingService(Service); + } + + TPtr DoClone() const final { + return new TInnerSource(Pos, SafeClone(Node), Service, Cluster, GetLabel()); + } +protected: + TNodePtr Node; + TString Service; + TDeferredAtom Cluster; + TSourcePtr NewSource; + +private: + TMaybe<TPosition> SamplingPos; + ESampleClause SamplingClause; + ESampleMode SamplingMode; + TNodePtr SamplingRate; + TNodePtr SamplingSeed; + + TMaybe<TPosition> ViewPos; + TString View; + + TMaybe<TPosition> HintsPos; + TTableHints Hints; + TTableHints ContextHints; +}; + +TSourcePtr BuildInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label) { + return new TInnerSource(pos, node, service, cluster, label); +} + +static bool IsComparableExpression(TContext& ctx, const TNodePtr& expr, bool assume, const char* sqlConstruction) { + if (assume && !expr->GetColumnName()) { + ctx.Error(expr->GetPos()) << "Only column names can be used in " << sqlConstruction; + return false; + } + + if (expr->IsConstant()) { + ctx.Error(expr->GetPos()) << "Unable to " << sqlConstruction << " constant expression"; + return false; + } + if (expr->IsAggregated() && !expr->HasState(ENodeState::AggregationKey)) { + ctx.Error(expr->GetPos()) << "Unable to " << sqlConstruction << " aggregated values"; + return false; + } + if (expr->GetColumnName()) { + return true; + } + if (expr->GetOpName().empty()) { + ctx.Error(expr->GetPos()) << "You should use in " << sqlConstruction << " column name, qualified field, callable function or expression"; + return false; + } + return true; +} + +/// \todo move to reduce.cpp? or mapreduce.cpp? +class TReduceSource: public IRealSource { +public: + TReduceSource(TPosition pos, + ReduceMode mode, + TSourcePtr source, + TVector<TSortSpecificationPtr>&& orderBy, + TVector<TNodePtr>&& keys, + TVector<TNodePtr>&& args, + TNodePtr udf, + TNodePtr having, + const TWriteSettings& settings, + const TVector<TSortSpecificationPtr>& assumeOrderBy, + bool listCall) + : IRealSource(pos) + , Mode(mode) + , Source(std::move(source)) + , OrderBy(std::move(orderBy)) + , Keys(std::move(keys)) + , Args(std::move(args)) + , Udf(udf) + , Having(having) + , Settings(settings) + , AssumeOrderBy(assumeOrderBy) + , ListCall(listCall) + { + YQL_ENSURE(!Keys.empty()); + YQL_ENSURE(Source); + } + + void GetInputTables(TTableList& tableList) const override { + Source->GetInputTables(tableList); + ISource::GetInputTables(tableList); + } + + bool DoInit(TContext& ctx, ISource* src) final { + if (AsInner) { + Source->UseAsInner(); + } + + YQL_ENSURE(!src); + if (!Source->Init(ctx, src)) { + return false; + } + if (!Source->InitFilters(ctx)) { + return false; + } + src = Source.Get(); + for (auto& key: Keys) { + if (!key->Init(ctx, src)) { + return false; + } + auto keyNamePtr = key->GetColumnName(); + YQL_ENSURE(keyNamePtr); + if (!src->AddGroupKey(ctx, *keyNamePtr)) { + return false; + } + } + if (Having && !Having->Init(ctx, nullptr)) { + return false; + } + + /// SIN: verify reduce one argument + if (Args.size() != 1) { + ctx.Error(Pos) << "REDUCE requires exactly one UDF argument"; + return false; + } + if (!Args[0]->Init(ctx, src)) { + return false; + } + + for (auto orderSpec: OrderBy) { + if (!orderSpec->OrderExpr->Init(ctx, src)) { + return false; + } + } + + if (!Udf->Init(ctx, src)) { + return false; + } + + if (Udf->GetLabel().empty()) { + Columns.SetAll(); + } else { + Columns.Add(&Udf->GetLabel(), false); + } + + const auto label = GetLabel(); + for (const auto& sortSpec: AssumeOrderBy) { + auto& expr = sortSpec->OrderExpr; + SetLabel(Source->GetLabel()); + if (!expr->Init(ctx, this)) { + return false; + } + if (!IsComparableExpression(ctx, expr, true, "ASSUME ORDER BY")) { + return false; + } + } + SetLabel(label); + + return true; + } + + TNodePtr Build(TContext& ctx) final { + auto input = Source->Build(ctx); + if (!input) { + return nullptr; + } + + auto keysTuple = Y(); + if (Keys.size() == 1) { + keysTuple = Y("Member", "row", BuildQuotedAtom(Pos, *Keys.back()->GetColumnName())); + } + else { + for (const auto& key: Keys) { + keysTuple = L(keysTuple, Y("Member", "row", BuildQuotedAtom(Pos, *key->GetColumnName()))); + } + keysTuple = Q(keysTuple); + } + auto extractKey = Y("SqlExtractKey", "row", BuildLambda(Pos, Y("row"), keysTuple)); + auto extractKeyLambda = BuildLambda(Pos, Y("row"), extractKey); + + TNodePtr processPartitions; + if (ListCall) { + if (Mode != ReduceMode::ByAll) { + ctx.Error(Pos) << "TableRows() must be used only with USING ALL"; + return nullptr; + } + + TNodePtr expr = BuildAtom(Pos, "partitionStream"); + processPartitions = Y("SqlReduce", "partitionStream", BuildQuotedAtom(Pos, "byAllList", TNodeFlags::Default), Udf, expr); + } else { + switch (Mode) { + case ReduceMode::ByAll: { + auto columnPtr = Args[0]->GetColumnName(); + TNodePtr expr = BuildAtom(Pos, "partitionStream"); + if (!columnPtr || *columnPtr != "*") { + expr = Y("Map", "partitionStream", BuildLambda(Pos, Y("keyPair"), Q(L(Y(),\ + Y("Nth", "keyPair", Q(ToString("0"))),\ + Y("Map", Y("Nth", "keyPair", Q(ToString("1"))), BuildLambda(Pos, Y("row"), Args[0])))))); + } + processPartitions = Y("SqlReduce", "partitionStream", BuildQuotedAtom(Pos, "byAll", TNodeFlags::Default), Udf, expr); + break; + } + case ReduceMode::ByPartition: { + processPartitions = Y("SqlReduce", "partitionStream", extractKeyLambda, Udf, + BuildLambda(Pos, Y("row"), Args[0])); + break; + } + default: + YQL_ENSURE(false, "Unexpected REDUCE mode"); + } + } + + TNodePtr sortDirection; + TNodePtr sortKeySelector; + FillSortParts(OrderBy, sortDirection, sortKeySelector); + if (!OrderBy.empty()) { + sortKeySelector = BuildLambda(Pos, Y("row"), Y("SqlExtractKey", "row", sortKeySelector)); + } + + auto partitionByKey = Y(!ListCall && Mode == ReduceMode::ByAll ? "PartitionByKey" : "PartitionsByKeys", "core", extractKeyLambda, + sortDirection, sortKeySelector, BuildLambda(Pos, Y("partitionStream"), processPartitions)); + + auto inputLabel = ListCall ? "inputRowsList" : "core"; + auto block(Y(Y("let", inputLabel, input))); + auto filter = Source->BuildFilter(ctx, inputLabel); + if (filter) { + block = L(block, Y("let", inputLabel, filter)); + } + if (ListCall) { + block = L(block, Y("let", "core", "inputRowsList")); + } + + if (ctx.EnableSystemColumns) { + block = L(block, Y("let", "core", Y("RemoveSystemMembers", "core"))); + } + block = L(block, Y("let", "core", Y("AutoDemux", partitionByKey))); + if (Having) { + block = L(block, Y("let", "core", + Y("Filter", "core", BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false"))))) + )); + } + return Y("block", Q(L(block, Y("return", "core")))); + } + + TNodePtr BuildSort(TContext& ctx, const TString& label) override { + Y_UNUSED(ctx); + if (AssumeOrderBy.empty()) { + return nullptr; + } + + return Y("let", label, BuildSortSpec(AssumeOrderBy, label, false, true)); + } + + EOrderKind GetOrderKind() const override { + return AssumeOrderBy.empty() ? EOrderKind::None : EOrderKind::Assume; + } + + TWriteSettings GetWriteSettings() const final { + return Settings; + } + + bool HasSelectResult() const final { + return !Settings.Discard; + } + + TPtr DoClone() const final { + return new TReduceSource(Pos, Mode, Source->CloneSource(), CloneContainer(OrderBy), + CloneContainer(Keys), CloneContainer(Args), SafeClone(Udf), SafeClone(Having), Settings, + CloneContainer(AssumeOrderBy), ListCall); + } +private: + ReduceMode Mode; + TSourcePtr Source; + TVector<TSortSpecificationPtr> OrderBy; + TVector<TNodePtr> Keys; + TVector<TNodePtr> Args; + TNodePtr Udf; + TNodePtr Having; + const TWriteSettings Settings; + TVector<TSortSpecificationPtr> AssumeOrderBy; + const bool ListCall; +}; + +TSourcePtr BuildReduce(TPosition pos, + ReduceMode mode, + TSourcePtr source, + TVector<TSortSpecificationPtr>&& orderBy, + TVector<TNodePtr>&& keys, + TVector<TNodePtr>&& args, + TNodePtr udf, + TNodePtr having, + const TWriteSettings& settings, + const TVector<TSortSpecificationPtr>& assumeOrderBy, + bool listCall) { + return new TReduceSource(pos, mode, std::move(source), std::move(orderBy), std::move(keys), + std::move(args), udf, having, settings, assumeOrderBy, listCall); +} + +namespace { + +bool InitAndGetGroupKey(TContext& ctx, const TNodePtr& expr, ISource* src, TStringBuf where, TString& keyColumn) { + keyColumn.clear(); + + YQL_ENSURE(src); + const bool isJoin = src->GetJoin(); + + if (!expr->Init(ctx, src)) { + return false; + } + + auto keyNamePtr = expr->GetColumnName(); + if (keyNamePtr && expr->GetLabel().empty()) { + keyColumn = *keyNamePtr; + auto sourceNamePtr = expr->GetSourceName(); + auto columnNode = expr->GetColumnNode(); + if (isJoin && (!columnNode || !columnNode->IsArtificial())) { + if (!sourceNamePtr || sourceNamePtr->empty()) { + if (!src->IsAlias(EExprSeat::GroupBy, keyColumn)) { + ctx.Error(expr->GetPos()) << "Columns in " << where << " should have correlation name, error in key: " << keyColumn; + return false; + } + } else { + keyColumn = DotJoin(*sourceNamePtr, keyColumn); + } + } + } + + return true; +} + +} + +class TCompositeSelect: public IRealSource { +public: + TCompositeSelect(TPosition pos, TSourcePtr source, TSourcePtr originalSource, const TWriteSettings& settings) + : IRealSource(pos) + , Source(std::move(source)) + , OriginalSource(std::move(originalSource)) + , Settings(settings) + { + YQL_ENSURE(Source); + } + + void SetSubselects(TVector<TSourcePtr>&& subselects, TVector<TNodePtr>&& grouping, TVector<TNodePtr>&& groupByExpr) { + Subselects = std::move(subselects); + Grouping = std::move(grouping); + GroupByExpr = std::move(groupByExpr); + Y_DEBUG_ABORT_UNLESS(Subselects.size() > 1); + } + + void GetInputTables(TTableList& tableList) const override { + for (const auto& select: Subselects) { + select->GetInputTables(tableList); + } + ISource::GetInputTables(tableList); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (AsInner) { + Source->UseAsInner(); + } + + if (src) { + src->AddDependentSource(Source.Get()); + } + if (!Source->Init(ctx, src)) { + return false; + } + if (!Source->InitFilters(ctx)) { + return false; + } + + if (!CalculateGroupingCols(ctx, src)) { + return false; + } + + auto origSrc = OriginalSource.Get(); + if (!origSrc->Init(ctx, src)) { + return false; + } + + if (origSrc->IsFlattenByColumns() || origSrc->IsFlattenColumns()) { + Flatten = origSrc->IsFlattenByColumns() ? + origSrc->BuildFlattenByColumns("row") : + origSrc->BuildFlattenColumns("row"); + if (!Flatten || !Flatten->Init(ctx, src)) { + return false; + } + } + + if (origSrc->IsFlattenByExprs()) { + for (auto& expr : static_cast<ISource const*>(origSrc)->Expressions(EExprSeat::FlattenByExpr)) { + if (!expr->Init(ctx, origSrc)) { + return false; + } + } + PreFlattenMap = origSrc->BuildPreFlattenMap(ctx); + if (!PreFlattenMap) { + return false; + } + } + + for (const auto& select: Subselects) { + select->SetLabel(Label); + if (AsInner) { + select->UseAsInner(); + } + + if (!select->Init(ctx, Source.Get())) { + return false; + } + } + + TMaybe<size_t> groupingColumnsCount; + size_t idx = 0; + for (const auto& select : Subselects) { + size_t count = select->GetGroupingColumnsCount(); + if (!groupingColumnsCount.Defined()) { + groupingColumnsCount = count; + } else if (*groupingColumnsCount != count) { + ctx.Error(select->GetPos()) << TStringBuilder() << "Mismatch GROUPING() column count in composite select input #" + << idx << ": expected " << *groupingColumnsCount << ", got: " << count << ". Please submit bug report"; + return false; + } + ++idx; + } + return true; + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override { + for (const auto& select: Subselects) { + if (!select->AddColumn(ctx, column)) { + return {}; + } + } + return true; + } + + TNodePtr Build(TContext& ctx) override { + auto input = Source->Build(ctx); + auto block(Y(Y("let", "composite", input))); + + bool ordered = ctx.UseUnordered(*this); + if (PreFlattenMap) { + block = L(block, Y("let", "composite", Y(ordered ? "OrderedFlatMap" : "FlatMap", "composite", BuildLambda(Pos, Y("row"), PreFlattenMap)))); + } + if (Flatten) { + block = L(block, Y("let", "composite", Y(ordered ? "OrderedFlatMap" : "FlatMap", "composite", BuildLambda(Pos, Y("row"), Flatten, "res")))); + } + auto filter = Source->BuildFilter(ctx, "composite"); + if (filter) { + block = L(block, Y("let", "composite", filter)); + } + + TNodePtr compositeNode = Y("UnionAll"); + for (const auto& select: Subselects) { + YQL_ENSURE(dynamic_cast<IComposableSource*>(select.Get())); + auto addNode = select->Build(ctx); + if (!addNode) { + return nullptr; + } + compositeNode->Add(addNode); + } + + block = L(block, Y("let", "core", compositeNode)); + YQL_ENSURE(!Subselects.empty()); + dynamic_cast<IComposableSource*>(Subselects.front().Get())->BuildProjectWindowDistinct(block, ctx, false); + return Y("block", Q(L(block, Y("return", "core")))); + } + + bool IsGroupByColumn(const TString& column) const override { + YQL_ENSURE(!GroupingCols.empty()); + return GroupingCols.contains(column); + } + + const TSet<TString>& GetGroupingCols() const { + return GroupingCols; + } + + TNodePtr BuildSort(TContext& ctx, const TString& label) override { + return Subselects.front()->BuildSort(ctx, label); + } + + EOrderKind GetOrderKind() const override { + return Subselects.front()->GetOrderKind(); + } + + const TColumns* GetColumns() const override{ + return Subselects.front()->GetColumns(); + } + + ISource* RealSource() const { + return Source.Get(); + } + + TWriteSettings GetWriteSettings() const override { + return Settings; + } + + bool HasSelectResult() const override { + return !Settings.Discard; + } + + TNodePtr DoClone() const final { + auto newSource = MakeIntrusive<TCompositeSelect>(Pos, Source->CloneSource(), OriginalSource->CloneSource(), Settings); + newSource->SetSubselects(CloneContainer(Subselects), CloneContainer(Grouping), CloneContainer(GroupByExpr)); + return newSource; + } +private: + bool CalculateGroupingCols(TContext& ctx, ISource* initSrc) { + auto origSrc = OriginalSource->CloneSource(); + if (!origSrc->Init(ctx, initSrc)) { + return false; + } + + bool hasError = false; + for (auto& expr: GroupByExpr) { + if (!expr->Init(ctx, origSrc.Get()) || !IsComparableExpression(ctx, expr, false, "GROUP BY")) { + hasError = true; + } + } + if (!origSrc->AddExpressions(ctx, GroupByExpr, EExprSeat::GroupBy)) { + hasError = true; + } + + YQL_ENSURE(!Grouping.empty()); + for (auto& grouping : Grouping) { + TString keyColumn; + if (!InitAndGetGroupKey(ctx, grouping, origSrc.Get(), "grouping sets", keyColumn)) { + hasError = true; + } else if (!keyColumn.empty()) { + GroupingCols.insert(keyColumn); + } + } + + return !hasError; + } + + TSourcePtr Source; + TSourcePtr OriginalSource; + TNodePtr Flatten; + TNodePtr PreFlattenMap; + const TWriteSettings Settings; + TVector<TSourcePtr> Subselects; + TVector<TNodePtr> Grouping; + TVector<TNodePtr> GroupByExpr; + TSet<TString> GroupingCols; +}; + +namespace { + TString FullColumnName(const TColumnNode& column) { + YQL_ENSURE(column.GetColumnName()); + TString columnName = *column.GetColumnName(); + if (column.IsUseSource()) { + columnName = DotJoin(*column.GetSourceName(), columnName); + } + return columnName; + } +} + +/// \todo simplify class +class TSelectCore: public IRealSource, public IComposableSource { +public: + TSelectCore( + TPosition pos, + TSourcePtr source, + const TVector<TNodePtr>& groupByExpr, + const TVector<TNodePtr>& groupBy, + bool compactGroupBy, + const TString& groupBySuffix, + bool assumeSorted, + const TVector<TSortSpecificationPtr>& orderBy, + TNodePtr having, + const TWinSpecs& winSpecs, + TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec, + const TVector<TNodePtr>& terms, + bool distinct, + const TVector<TNodePtr>& without, + bool selectStream, + const TWriteSettings& settings, + TColumnsSets&& uniqueSets, + TColumnsSets&& distinctSets + ) + : IRealSource(pos) + , Source(std::move(source)) + , GroupByExpr(groupByExpr) + , GroupBy(groupBy) + , AssumeSorted(assumeSorted) + , CompactGroupBy(compactGroupBy) + , GroupBySuffix(groupBySuffix) + , OrderBy(orderBy) + , Having(having) + , WinSpecs(winSpecs) + , Terms(terms) + , Without(without) + , Distinct(distinct) + , LegacyHoppingWindowSpec(legacyHoppingWindowSpec) + , SelectStream(selectStream) + , Settings(settings) + , UniqueSets(std::move(uniqueSets)) + , DistinctSets(std::move(distinctSets)) + { + } + + void AllColumns() override { + if (!OrderByInit) { + Columns.SetAll(); + } + } + + void GetInputTables(TTableList& tableList) const override { + Source->GetInputTables(tableList); + ISource::GetInputTables(tableList); + } + + size_t GetGroupingColumnsCount() const override { + return Source->GetGroupingColumnsCount(); + } + + bool DoInit(TContext& ctx, ISource* initSrc) override { + if (AsInner) { + Source->UseAsInner(); + } + + if (!Source->Init(ctx, initSrc)) { + return false; + } + if (SelectStream && !Source->IsStream()) { + ctx.Error(Pos) << "SELECT STREAM is unsupported for non-streaming sources"; + return false; + } + + auto src = Source.Get(); + bool hasError = false; + + if (src->IsFlattenByExprs()) { + for (auto& expr : static_cast<ISource const*>(src)->Expressions(EExprSeat::FlattenByExpr)) { + if (!expr->Init(ctx, src)) { + hasError = true; + continue; + } + } + } + + if (hasError) { + return false; + } + + src->SetCompactGroupBy(CompactGroupBy); + src->SetGroupBySuffix(GroupBySuffix); + + for (auto& term: Terms) { + term->CollectPreaggregateExprs(ctx, *src, DistinctAggrExpr); + } + + if (Having) { + Having->CollectPreaggregateExprs(ctx, *src, DistinctAggrExpr); + } + + for (auto& expr: GroupByExpr) { + if (auto sessionWindow = dynamic_cast<TSessionWindow*>(expr.Get())) { + if (Source->IsStream()) { + ctx.Error(Pos) << "SessionWindow is unsupported for streaming sources"; + return false; + } + sessionWindow->MarkValid(); + } + + if (auto hoppingWindow = dynamic_cast<THoppingWindow*>(expr.Get())) { + hoppingWindow->MarkValid(); + } + + // need to collect and Init() preaggregated exprs before calling Init() on GROUP BY expression + TVector<TNodePtr> distinctAggrsInGroupBy; + expr->CollectPreaggregateExprs(ctx, *src, distinctAggrsInGroupBy); + for (auto& distinct : distinctAggrsInGroupBy) { + if (!distinct->Init(ctx, src)) { + return false; + } + } + DistinctAggrExpr.insert(DistinctAggrExpr.end(), distinctAggrsInGroupBy.begin(), distinctAggrsInGroupBy.end()); + + if (!expr->Init(ctx, src) || !IsComparableExpression(ctx, expr, false, "GROUP BY")) { + hasError = true; + } + } + if (hasError || !src->AddExpressions(ctx, GroupByExpr, EExprSeat::GroupBy)) { + return false; + } + + for (auto& expr: DistinctAggrExpr) { + if (!expr->Init(ctx, src)) { + hasError = true; + } + } + if (hasError || !src->AddExpressions(ctx, DistinctAggrExpr, EExprSeat::DistinctAggr)) { + return false; + } + + /// grouped expressions are available in filters + if (!Source->InitFilters(ctx)) { + return false; + } + + for (auto& expr: GroupBy) { + TString usedColumn; + if (!InitAndGetGroupKey(ctx, expr, src, "GROUP BY", usedColumn)) { + hasError = true; + } else if (usedColumn) { + if (!src->AddGroupKey(ctx, usedColumn)) { + hasError = true; + } + } + } + + if (hasError) { + return false; + } + + if (Having && !Having->Init(ctx, src)) { + return false; + } + src->AddWindowSpecs(WinSpecs); + + const bool isJoin = Source->GetJoin(); + if (!InitSelect(ctx, src, isJoin, hasError)) { + return false; + } + + src->FinishColumns(); + auto aggRes = src->BuildAggregation("core", ctx); + if (!aggRes.second) { + return false; + } + + Aggregate = aggRes.first; + if (src->IsFlattenByColumns() || src->IsFlattenColumns()) { + Flatten = src->IsFlattenByColumns() ? + src->BuildFlattenByColumns("row") : + src->BuildFlattenColumns("row"); + if (!Flatten || !Flatten->Init(ctx, src)) { + return false; + } + } + + if (src->IsFlattenByExprs()) { + PreFlattenMap = src->BuildPreFlattenMap(ctx); + if (!PreFlattenMap) { + return false; + } + } + + if (GroupByExpr || DistinctAggrExpr) { + PreaggregatedMap = src->BuildPreaggregatedMap(ctx); + if (!PreaggregatedMap) { + return false; + } + } + if (Aggregate) { + if (!Aggregate->Init(ctx, src)) { + return false; + } + if (Having) { + Aggregate = Y( + "Filter", + Aggregate, + BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false")))) + ); + } + } else if (Having) { + if (Distinct) { + Aggregate = Y( + "Filter", + "core", + BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false")))) + ); + ctx.Warning(Having->GetPos(), TIssuesIds::YQL_HAVING_WITHOUT_AGGREGATION_IN_SELECT_DISTINCT) + << "The usage of HAVING without aggregations with SELECT DISTINCT is non-standard and will stop working soon. Please use WHERE instead."; + } else { + ctx.Error(Having->GetPos()) << "HAVING with meaning GROUP BY () should be with aggregation function."; + return false; + } + } else if (!Distinct && !GroupBy.empty()) { + ctx.Error(Pos) << "No aggregations were specified"; + return false; + } + if (hasError) { + return false; + } + + if (src->IsCalcOverWindow()) { + if (src->IsExprSeat(EExprSeat::WindowPartitionBy, EExprType::WithExpression)) { + PrewindowMap = src->BuildPrewindowMap(ctx); + if (!PrewindowMap) { + return false; + } + } + CalcOverWindow = src->BuildCalcOverWindow(ctx, "core"); + if (!CalcOverWindow || !CalcOverWindow->Init(ctx, src)) { + return false; + } + } + return true; + } + + TNodePtr Build(TContext& ctx) override { + auto input = Source->Build(ctx); + if (!input) { + return nullptr; + } + + auto block(Y(Y("let", "core", input))); + + if (Source->HasMatchRecognize()) { + if (auto matchRecognize = Source->BuildMatchRecognize(ctx, "core")) { + //use unique name match_recognize to find this block easily in unit tests + block = L(block, Y("let", "match_recognize", matchRecognize)); + //then bind to the conventional name + block = L(block, Y("let", "core", "match_recognize")); + } else { + return nullptr; + } + } + + bool ordered = ctx.UseUnordered(*this); + if (PreFlattenMap) { + block = L(block, Y("let", "core", Y(ordered ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), PreFlattenMap)))); + } + if (Flatten) { + block = L(block, Y("let", "core", Y(ordered ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), Flatten, "res")))); + } + if (PreaggregatedMap) { + block = L(block, Y("let", "core", PreaggregatedMap)); + if (Source->IsCompositeSource() && !Columns.QualifiedAll) { + block = L(block, Y("let", "preaggregated", "core")); + } + } else if (Source->IsCompositeSource() && !Columns.QualifiedAll) { + block = L(block, Y("let", "origcore", "core")); + } + auto filter = Source->BuildFilter(ctx, "core"); + if (filter) { + block = L(block, Y("let", "core", filter)); + } + if (Aggregate) { + block = L(block, Y("let", "core", Aggregate)); + ordered = false; + } + + const bool haveCompositeTerms = Source->IsCompositeSource() && !Columns.All && !Columns.QualifiedAll && !Columns.List.empty(); + if (haveCompositeTerms) { + // column order does not matter here - it will be set in projection + YQL_ENSURE(Aggregate); + block = L(block, Y("let", "core", Y("Map", "core", BuildLambda(Pos, Y("row"), CompositeTerms, "row")))); + } + + if (auto grouping = Source->BuildGroupingColumns("core")) { + block = L(block, Y("let", "core", grouping)); + } + + if (!Source->GetCompositeSource()) { + BuildProjectWindowDistinct(block, ctx, ordered); + } + + return Y("block", Q(L(block, Y("return", "core")))); + } + + void BuildProjectWindowDistinct(TNodePtr& block, TContext& ctx, bool ordered) override { + if (PrewindowMap) { + block = L(block, Y("let", "core", PrewindowMap)); + } + if (CalcOverWindow) { + block = L(block, Y("let", "core", CalcOverWindow)); + } + + block = L(block, Y("let", "core", Y("PersistableRepr", BuildSqlProject(ctx, ordered)))); + + if (Distinct) { + block = L(block, Y("let", "core", Y("PersistableRepr", Y("SqlAggregateAll", Y("RemoveSystemMembers", "core"))))); + } + } + + TNodePtr BuildSort(TContext& ctx, const TString& label) override { + Y_UNUSED(ctx); + if (OrderBy.empty() || DisableSort_) { + return nullptr; + } + + auto sorted = BuildSortSpec(OrderBy, label, false, AssumeSorted); + if (ExtraSortColumns.empty()) { + return Y("let", label, sorted); + } + auto body = Y(); + for (const auto& [column, _] : ExtraSortColumns) { + body = L(body, Y("let", "row", Y("RemoveMember", "row", Q(column)))); + } + body = L(body, Y("let", "res", "row")); + return Y("let", label, Y("OrderedMap", sorted, BuildLambda(Pos, Y("row"), body, "res"))); + } + + TNodePtr BuildCleanupColumns(TContext& ctx, const TString& label) override { + TNodePtr cleanup; + if (ctx.EnableSystemColumns && ctx.Settings.Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW) { + if (Columns.All) { + cleanup = Y("let", label, Y("RemoveSystemMembers", label)); + } else if (!Columns.List.empty()) { + const bool isJoin = Source->GetJoin(); + if (!isJoin && Columns.QualifiedAll) { + if (ctx.SimpleColumns) { + cleanup = Y("let", label, Y("RemoveSystemMembers", label)); + } else { + TNodePtr members; + for (auto& term: Terms) { + if (term->IsAsterisk()) { + auto sourceName = term->GetSourceName(); + YQL_ENSURE(*sourceName && !sourceName->empty()); + auto prefix = *sourceName + "._yql_"; + members = members ? L(members, Q(prefix)) : Y(Q(prefix)); + } + } + if (members) { + cleanup = Y("let", label, Y("RemovePrefixMembers", label, Q(members))); + } + } + } + } + } + return cleanup; + } + + bool IsSelect() const override { + return true; + } + + bool HasSelectResult() const override { + return !Settings.Discard; + } + + bool IsStream() const override { + return Source->IsStream(); + } + + EOrderKind GetOrderKind() const override { + if (OrderBy.empty()) { + return EOrderKind::None; + } + return AssumeSorted ? EOrderKind::Assume : EOrderKind::Sort; + } + + TWriteSettings GetWriteSettings() const override { + return Settings; + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override { + const bool aggregated = Source->HasAggregations() || Distinct; + if (OrderByInit && (Source->GetJoin() || !aggregated)) { + // ORDER BY will try to find column not only in projection items, but also in Source. + // ```SELECT a, b FROM T ORDER BY c``` should work if c is present in T + const bool reliable = column.IsReliable(); + column.SetAsNotReliable(); + auto maybeExist = IRealSource::AddColumn(ctx, column); + if (reliable && !Source->GetJoin()) { + column.ResetAsReliable(); + } + if (!maybeExist || !maybeExist.GetRef()) { + maybeExist = Source->AddColumn(ctx, column); + } + if (!maybeExist.Defined()) { + return maybeExist; + } + if (!DisableSort_ && !aggregated && column.GetColumnName() && IsMissingInProjection(ctx, column)) { + ExtraSortColumns[FullColumnName(column)] = &column; + } + return maybeExist; + } + + return IRealSource::AddColumn(ctx, column); + } + + bool IsMissingInProjection(TContext& ctx, const TColumnNode& column) const { + TString columnName = FullColumnName(column); + if (Columns.Real.contains(columnName) || Columns.Artificial.contains(columnName)) { + return false; + } + + if (!Columns.IsColumnPossible(ctx, columnName)) { + return true; + } + + for (auto without: Without) { + auto name = *without->GetColumnName(); + if (Source && Source->GetJoin()) { + name = DotJoin(*without->GetSourceName(), name); + } + if (name == columnName) { + return true; + } + } + + return false; + } + + TNodePtr PrepareWithout(const TNodePtr& base) { + auto terms = base; + if (Without) { + for (auto without: Without) { + auto name = *without->GetColumnName(); + if (Source && Source->GetJoin()) { + name = DotJoin(*without->GetSourceName(), name); + } + terms = L(terms, Y("let", "row", Y("RemoveMember", "row", Q(name)))); + } + } + + if (Source) { + for (auto column : Source->GetTmpWindowColumns()) { + terms = L(terms, Y("let", "row", Y("RemoveMember", "row", Q(column)))); + } + } + + return terms; + } + + TNodePtr DoClone() const final { + return new TSelectCore(Pos, Source->CloneSource(), CloneContainer(GroupByExpr), + CloneContainer(GroupBy), CompactGroupBy, GroupBySuffix, AssumeSorted, CloneContainer(OrderBy), + SafeClone(Having), CloneContainer(WinSpecs), SafeClone(LegacyHoppingWindowSpec), + CloneContainer(Terms), Distinct, Without, SelectStream, Settings, TColumnsSets(UniqueSets), TColumnsSets(DistinctSets)); + } + +private: + bool InitSelect(TContext& ctx, ISource* src, bool isJoin, bool& hasError) { + for (auto& [name, winSpec] : WinSpecs) { + for (size_t i = 0; i < winSpec->Partitions.size(); ++i) { + auto partitionNode = winSpec->Partitions[i]; + if (auto sessionWindow = dynamic_cast<TSessionWindow*>(partitionNode.Get())) { + if (winSpec->Session) { + ctx.Error(partitionNode->GetPos()) << "Duplicate session window specification:"; + ctx.Error(winSpec->Session->GetPos()) << "Previous session window is declared here"; + hasError = true; + continue; + } + sessionWindow->MarkValid(); + winSpec->Session = partitionNode; + } + + if (!partitionNode->Init(ctx, src)) { + hasError = true; + continue; + } + if (!partitionNode->GetLabel() && !partitionNode->GetColumnName()) { + TString label = TStringBuilder() << "group_" << name << "_" << i; + partitionNode->SetLabel(label); + src->AddTmpWindowColumn(label); + } + } + if (!src->AddExpressions(ctx, winSpec->Partitions, EExprSeat::WindowPartitionBy)) { + hasError = true; + } + } + + if (LegacyHoppingWindowSpec) { + if (!LegacyHoppingWindowSpec->TimeExtractor->Init(ctx, src)) { + hasError = true; + } + src->SetLegacyHoppingWindowSpec(LegacyHoppingWindowSpec); + } + + for (auto& term: Terms) { + if (!term->Init(ctx, src)) { + hasError = true; + continue; + } + auto column = term->GetColumnName(); + TString label(term->GetLabel()); + bool hasName = true; + if (label.empty()) { + auto source = term->GetSourceName(); + if (term->IsAsterisk() && !source->empty()) { + Columns.QualifiedAll = true; + label = DotJoin(*source, "*"); + } else if (column) { + label = isJoin && source && *source ? DotJoin(*source, *column) : *column; + } else { + label = Columns.AddUnnamed(); + hasName = false; + if (ctx.WarnUnnamedColumns) { + ctx.Warning(term->GetPos(), TIssuesIds::YQL_UNNAMED_COLUMN) + << "Autogenerated column name " << label << " will be used for expression"; + } + } + } + if (hasName && !Columns.Add(&label, false, false, true)) { + ctx.Error(Pos) << "Duplicate column: " << label; + hasError = true; + } + } + + CompositeTerms = Y(); + if (!hasError && Source->IsCompositeSource() && !Columns.All && !Columns.QualifiedAll && !Columns.List.empty()) { + auto compositeSrcPtr = static_cast<TCompositeSelect*>(Source->GetCompositeSource()); + if (compositeSrcPtr) { + const auto& groupings = compositeSrcPtr->GetGroupingCols(); + for (const auto& column: groupings) { + if (Source->IsGroupByColumn(column)) { + continue; + } + const TString tableName = (GroupByExpr || DistinctAggrExpr) ? "preaggregated" : "origcore"; + CompositeTerms = L(CompositeTerms, Y("let", "row", Y("AddMember", "row", BuildQuotedAtom(Pos, column), Y("Nothing", Y("MatchType", + Y("StructMemberType", Y("ListItemType", Y("TypeOf", tableName)), Q(column)), + Q("Optional"), Y("lambda", Q(Y("item")), "item"), Y("lambda", Q(Y("item")), Y("OptionalType", "item"))))))); + } + } + } + + for (auto iter: WinSpecs) { + auto winSpec = *iter.second; + for (auto orderSpec: winSpec.OrderBy) { + if (!orderSpec->OrderExpr->Init(ctx, src)) { + hasError = true; + } + } + } + + if (Columns.All || Columns.QualifiedAll) { + Source->AllColumns(); + } + for (const auto& without: Without) { + auto namePtr = without->GetColumnName(); + auto sourcePtr = without->GetSourceName(); + YQL_ENSURE(namePtr && *namePtr); + if (isJoin && !(sourcePtr && *sourcePtr)) { + ctx.Error(without->GetPos()) << "Expected correlation name for WITHOUT in JOIN"; + hasError = true; + continue; + } + } + if (Having && !Having->Init(ctx, src)) { + hasError = true; + } + if (!src->IsCompositeSource() && !Columns.All && src->HasAggregations()) { + WarnIfAliasFromSelectIsUsedInGroupBy(ctx, Terms, GroupBy, GroupByExpr); + + /// verify select aggregation compatibility + TVector<TNodePtr> exprs(Terms); + if (Having) { + exprs.push_back(Having); + } + for (const auto& iter: WinSpecs) { + for (const auto& sortSpec: iter.second->OrderBy) { + exprs.push_back(sortSpec->OrderExpr); + } + } + if (!ValidateAllNodesForAggregation(ctx, exprs)) { + hasError = true; + } + } + const auto label = GetLabel(); + for (const auto& sortSpec: OrderBy) { + auto& expr = sortSpec->OrderExpr; + SetLabel(Source->GetLabel()); + OrderByInit = true; + if (!expr->Init(ctx, this)) { + hasError = true; + continue; + } + OrderByInit = false; + if (!IsComparableExpression(ctx, expr, AssumeSorted, AssumeSorted ? "ASSUME ORDER BY" : "ORDER BY")) { + hasError = true; + continue; + } + } + SetLabel(label); + + return !hasError; + } + + TNodePtr PrepareJoinCoalesce(TContext& ctx, const TNodePtr& base, bool multipleQualifiedAll, const TVector<TString>& coalesceLabels) { + const bool isJoin = Source->GetJoin(); + const bool needCoalesce = isJoin && ctx.SimpleColumns && + (Columns.All || multipleQualifiedAll || ctx.CoalesceJoinKeysOnQualifiedAll); + + if (!needCoalesce) { + return base; + } + + auto terms = base; + const auto& sameKeyMap = Source->GetJoin()->GetSameKeysMap(); + if (sameKeyMap) { + terms = L(terms, Y("let", "flatSameKeys", "row")); + for (const auto& [key, sources]: sameKeyMap) { + auto coalesceKeys = Y(); + for (const auto& label : coalesceLabels) { + if (sources.contains(label)) { + coalesceKeys = L(coalesceKeys, Q(DotJoin(label, key))); + } + } + terms = L(terms, Y("let", "flatSameKeys", Y("CoalesceMembers", "flatSameKeys", Q(coalesceKeys)))); + } + terms = L(terms, Y("let", "row", "flatSameKeys")); + } + + return terms; + } + + TNodePtr BuildSqlProject(TContext& ctx, bool ordered) { + auto sqlProjectArgs = Y(); + const bool isJoin = Source->GetJoin(); + + if (Columns.All) { + YQL_ENSURE(Columns.List.empty()); + auto terms = PrepareWithout(Y()); + auto options = Y(); + if (isJoin && ctx.SimpleColumns) { + terms = PrepareJoinCoalesce(ctx, terms, false, Source->GetJoin()->GetJoinLabels()); + + auto members = Y(); + for (auto& source : Source->GetJoin()->GetJoinLabels()) { + YQL_ENSURE(!source.empty()); + members = L(members, BuildQuotedAtom(Pos, source + ".")); + } + if (GroupByExpr.empty() || ctx.BogousStarInGroupByOverJoin) { + terms = L(terms, Y("let", "res", Y("DivePrefixMembers", "row", Q(members)))); + } else { + auto groupExprStruct = Y("AsStruct"); + for (auto node : GroupByExpr) { + auto label = node->GetLabel(); + YQL_ENSURE(label); + if (Source->IsGroupByColumn(label)) { + auto name = BuildQuotedAtom(Pos, label); + groupExprStruct = L(groupExprStruct, Q(Y(name, Y("Member", "row", name)))); + } + } + auto groupColumnsStruct = Y("DivePrefixMembers", "row", Q(members)); + + terms = L(terms, Y("let", "res", Y("FlattenMembers", Q(Y(BuildQuotedAtom(Pos, ""), groupExprStruct)), + Q(Y(BuildQuotedAtom(Pos, ""), groupColumnsStruct))))); + } + options = L(options, Q(Y(Q("divePrefix"), Q(members)))); + } else { + terms = L(terms, Y("let", "res", "row")); + } + sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, ""), BuildLambda(Pos, Y("row"), terms, "res"), Q(options))); + } else { + YQL_ENSURE(!Columns.List.empty()); + YQL_ENSURE(Columns.List.size() == Terms.size()); + + TVector<TString> coalesceLabels; + bool multipleQualifiedAll = false; + + if (isJoin && ctx.SimpleColumns) { + THashSet<TString> starTerms; + for (auto& term: Terms) { + if (term->IsAsterisk()) { + auto sourceName = term->GetSourceName(); + YQL_ENSURE(*sourceName && !sourceName->empty()); + YQL_ENSURE(Columns.QualifiedAll); + starTerms.insert(*sourceName); + } + } + + TVector<TString> matched; + TVector<TString> unmatched; + for (auto& label : Source->GetJoin()->GetJoinLabels()) { + if (starTerms.contains(label)) { + matched.push_back(label); + } else { + unmatched.push_back(label); + } + } + + coalesceLabels.insert(coalesceLabels.end(), matched.begin(), matched.end()); + coalesceLabels.insert(coalesceLabels.end(), unmatched.begin(), unmatched.end()); + + multipleQualifiedAll = starTerms.size() > 1; + } + + auto column = Columns.List.begin(); + auto isNamedColumn = Columns.NamedColumns.begin(); + for (auto& term: Terms) { + auto sourceName = term->GetSourceName(); + if (!term->IsAsterisk()) { + auto body = Y(); + body = L(body, Y("let", "res", term)); + TPosition lambdaPos = Pos; + TPosition aliasPos = Pos; + if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) { + // TODO: recanonize for positions below + lambdaPos = term->GetPos(); + aliasPos = term->GetLabelPos() ? *term->GetLabelPos() : lambdaPos; + } + auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(aliasPos, *isNamedColumn ? *column : ""), BuildLambda(lambdaPos, Y("row"), body, "res")); + if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) { + projectItem = L(projectItem, Q(Y(Q(Y(Q("warnShadow")))))); + } + if (!*isNamedColumn) { + projectItem = L(projectItem, Q(Y(Q(Y(Q("autoName")))))); + } + sqlProjectArgs = L(sqlProjectArgs, projectItem); + } else { + auto terms = PrepareWithout(Y()); + auto options = Y(); + if (ctx.SimpleColumns && !isJoin) { + terms = L(terms, Y("let", "res", "row")); + } else { + terms = PrepareJoinCoalesce(ctx, terms, multipleQualifiedAll, coalesceLabels); + + auto members = isJoin ? Y() : Y("FlattenMembers"); + if (isJoin) { + members = L(members, BuildQuotedAtom(Pos, *sourceName + ".")); + if (ctx.SimpleColumns) { + options = L(options, Q(Y(Q("divePrefix"), Q(members)))); + } + members = Y(ctx.SimpleColumns ? "DivePrefixMembers" : "SelectMembers", "row", Q(members)); + } else { + auto prefix = BuildQuotedAtom(Pos, ctx.SimpleColumns ? "" : *sourceName + "."); + members = L(members, Q(Y(prefix, "row"))); + if (!ctx.SimpleColumns) { + options = L(options, Q(Y(Q("addPrefix"), prefix))); + } + } + + terms = L(terms, Y("let", "res", members)); + } + sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, *sourceName), BuildLambda(Pos, Y("row"), terms, "res"), Q(options))); + } + ++column; + ++isNamedColumn; + } + } + + for (const auto& [columnName, column]: ExtraSortColumns) { + auto body = Y(); + body = L(body, Y("let", "res", column)); + TPosition pos = column->GetPos(); + auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(pos, columnName), BuildLambda(pos, Y("row"), body, "res")); + sqlProjectArgs = L(sqlProjectArgs, projectItem); + } + + auto block(Y(Y("let", "projectCoreType", Y("TypeOf", "core")))); + block = L(block, Y("let", "core", Y(ordered ? "OrderedSqlProject" : "SqlProject", "core", Q(sqlProjectArgs)))); + if (!(UniqueSets.empty() && DistinctSets.empty())) { + block = L(block, Y("let", "core", Y("RemoveSystemMembers", "core"))); + const auto MakeUniqueHint = [this](INode::TPtr& block, const TColumnsSets& sets, bool distinct) { + if (!sets.empty()) { + auto assume = Y(distinct ? "AssumeDistinctHint" : "AssumeUniqueHint", "core"); + if (!sets.front().empty()) { + for (const auto& columns : sets) { + auto set = Y(); + for (const auto& column : columns) { + set = L(set, Q(column)); + } + + assume = L(assume, Q(set)); + } + } + block = L(block, Y("let", "core", assume)); + } + }; + + MakeUniqueHint(block, DistinctSets, true); + MakeUniqueHint(block, UniqueSets, false); + } + + return Y("block", Q(L(block, Y("return", "core")))); + } + +private: + TSourcePtr Source; + TVector<TNodePtr> GroupByExpr; + TVector<TNodePtr> DistinctAggrExpr; + TVector<TNodePtr> GroupBy; + bool AssumeSorted = false; + bool CompactGroupBy = false; + TString GroupBySuffix; + TVector<TSortSpecificationPtr> OrderBy; + TNodePtr Having; + TWinSpecs WinSpecs; + TNodePtr Flatten; + TNodePtr PreFlattenMap; + TNodePtr PreaggregatedMap; + TNodePtr PrewindowMap; + TNodePtr Aggregate; + TNodePtr CalcOverWindow; + TNodePtr CompositeTerms; + TVector<TNodePtr> Terms; + TVector<TNodePtr> Without; + const bool Distinct; + bool OrderByInit = false; + TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec; + const bool SelectStream; + const TWriteSettings Settings; + const TColumnsSets UniqueSets, DistinctSets; + TMap<TString, TNodePtr> ExtraSortColumns; +}; + +class TProcessSource: public IRealSource { +public: + TProcessSource( + TPosition pos, + TSourcePtr source, + TNodePtr with, + bool withExtFunction, + TVector<TNodePtr>&& terms, + bool listCall, + bool processStream, + const TWriteSettings& settings, + const TVector<TSortSpecificationPtr>& assumeOrderBy + ) + : IRealSource(pos) + , Source(std::move(source)) + , With(with) + , WithExtFunction(withExtFunction) + , Terms(std::move(terms)) + , ListCall(listCall) + , ProcessStream(processStream) + , Settings(settings) + , AssumeOrderBy(assumeOrderBy) + { + } + + void GetInputTables(TTableList& tableList) const override { + Source->GetInputTables(tableList); + ISource::GetInputTables(tableList); + } + + bool DoInit(TContext& ctx, ISource* initSrc) override { + if (AsInner) { + Source->UseAsInner(); + } + + if (!Source->Init(ctx, initSrc)) { + return false; + } + + if (ProcessStream && !Source->IsStream()) { + ctx.Error(Pos) << "PROCESS STREAM is unsupported for non-streaming sources"; + return false; + } + + auto src = Source.Get(); + if (!With) { + src->AllColumns(); + Columns.SetAll(); + src->FinishColumns(); + return true; + } + + /// grouped expressions are available in filters + if (!Source->InitFilters(ctx)) { + return false; + } + + TSourcePtr fakeSource = nullptr; + if (ListCall && !WithExtFunction) { + fakeSource = BuildFakeSource(src->GetPos()); + src->AllColumns(); + } + + auto processSource = fakeSource != nullptr ? fakeSource.Get() : src; + Y_DEBUG_ABORT_UNLESS(processSource != nullptr); + if (!With->Init(ctx, processSource)) { + return false; + } + if (With->GetLabel().empty()) { + Columns.SetAll(); + } else { + if (ListCall) { + ctx.Error(With->GetPos()) << "Label is not allowed to use with TableRows()"; + return false; + } + Columns.Add(&With->GetLabel(), false); + } + + bool hasError = false; + + TNodePtr produce; + if (WithExtFunction) { + produce = Y(); + } else { + TString processCall = (ListCall ? "SqlProcess" : "Apply"); + produce = Y(processCall, With); + } + TMaybe<ui32> listPosIndex; + ui32 termIndex = 0; + for (auto& term: Terms) { + if (!term->GetLabel().empty()) { + ctx.Error(term->GetPos()) << "Labels are not allowed for PROCESS terms"; + hasError = true; + continue; + } + + if (!term->Init(ctx, processSource)) { + hasError = true; + continue; + } + + if (ListCall) { + if (auto atom = dynamic_cast<TTableRows*>(term.Get())) { + listPosIndex = termIndex; + } + } + ++termIndex; + + produce = L(produce, term); + } + + if (hasError) { + return false; + } + + if (ListCall && !WithExtFunction) { + YQL_ENSURE(listPosIndex.Defined()); + produce = L(produce, Q(ToString(*listPosIndex))); + } + + if (!produce->Init(ctx, src)) { + hasError = true; + } + + if (!(WithExtFunction && Terms.empty())) { + TVector<TNodePtr>(1, produce).swap(Terms); + } + + src->FinishColumns(); + + const auto label = GetLabel(); + for (const auto& sortSpec: AssumeOrderBy) { + auto& expr = sortSpec->OrderExpr; + SetLabel(Source->GetLabel()); + if (!expr->Init(ctx, this)) { + hasError = true; + continue; + } + if (!IsComparableExpression(ctx, expr, true, "ASSUME ORDER BY")) { + hasError = true; + continue; + } + } + SetLabel(label); + + return !hasError; + } + + TNodePtr Build(TContext& ctx) override { + auto input = Source->Build(ctx); + if (!input) { + return nullptr; + } + + if (!With) { + auto res = input; + if (ctx.EnableSystemColumns) { + res = Y("RemoveSystemMembers", res); + } + + return res; + } + + TString inputLabel = ListCall ? "inputRowsList" : "core"; + + auto block(Y(Y("let", inputLabel, input))); + + auto filter = Source->BuildFilter(ctx, inputLabel); + if (filter) { + block = L(block, Y("let", inputLabel, filter)); + } + + if (WithExtFunction) { + auto preTransform = Y("RemoveSystemMembers", inputLabel); + if (Terms.size() > 0) { + preTransform = Y("Map", preTransform, BuildLambda(Pos, Y("row"), Q(Terms[0]))); + } + block = L(block, Y("let", inputLabel, preTransform)); + block = L(block, Y("let", "transform", With)); + block = L(block, Y("let", "core", Y("Apply", "transform", inputLabel))); + } else if (ListCall) { + block = L(block, Y("let", "core", Terms[0])); + } else { + auto terms = BuildColumnsTerms(ctx); + block = L(block, Y("let", "core", Y(ctx.UseUnordered(*this) ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), terms, "res")))); + } + block = L(block, Y("let", "core", Y("AutoDemux", Y("PersistableRepr", "core")))); + return Y("block", Q(L(block, Y("return", "core")))); + } + + TNodePtr BuildSort(TContext& ctx, const TString& label) override { + Y_UNUSED(ctx); + if (AssumeOrderBy.empty()) { + return nullptr; + } + + return Y("let", label, BuildSortSpec(AssumeOrderBy, label, false, true)); + } + + EOrderKind GetOrderKind() const override { + if (!With) { + return EOrderKind::Passthrough; + } + return AssumeOrderBy.empty() ? EOrderKind::None : EOrderKind::Assume; + } + + bool IsSelect() const override { + return false; + } + + bool HasSelectResult() const override { + return !Settings.Discard; + } + + bool IsStream() const override { + return Source->IsStream(); + } + + TWriteSettings GetWriteSettings() const override { + return Settings; + } + + TNodePtr DoClone() const final { + return new TProcessSource(Pos, Source->CloneSource(), SafeClone(With), WithExtFunction, + CloneContainer(Terms), ListCall, ProcessStream, Settings, CloneContainer(AssumeOrderBy)); + } + +private: + TNodePtr BuildColumnsTerms(TContext& ctx) { + Y_UNUSED(ctx); + TNodePtr terms; + Y_DEBUG_ABORT_UNLESS(Terms.size() == 1); + if (Columns.All) { + terms = Y(Y("let", "res", Y("ToSequence", Terms.front()))); + } else { + Y_DEBUG_ABORT_UNLESS(Columns.List.size() == Terms.size()); + terms = L(Y(), Y("let", "res", + L(Y("AsStructUnordered"), Q(Y(BuildQuotedAtom(Pos, Columns.List.front()), Terms.front()))))); + terms = L(terms, Y("let", "res", Y("Just", "res"))); + } + return terms; + } + +private: + TSourcePtr Source; + TNodePtr With; + const bool WithExtFunction; + TVector<TNodePtr> Terms; + const bool ListCall; + const bool ProcessStream; + const TWriteSettings Settings; + TVector<TSortSpecificationPtr> AssumeOrderBy; +}; + +TSourcePtr BuildProcess( + TPosition pos, + TSourcePtr source, + TNodePtr with, + bool withExtFunction, + TVector<TNodePtr>&& terms, + bool listCall, + bool processStream, + const TWriteSettings& settings, + const TVector<TSortSpecificationPtr>& assumeOrderBy +) { + return new TProcessSource(pos, std::move(source), with, withExtFunction, std::move(terms), listCall, processStream, settings, assumeOrderBy); +} + +class TNestedProxySource: public IProxySource { +public: + TNestedProxySource(TPosition pos, const TVector<TNodePtr>& groupBy, TSourcePtr source) + : IProxySource(pos, source.Get()) + , CompositeSelect(nullptr) + , Holder(std::move(source)) + , GroupBy(groupBy) + {} + + TNestedProxySource(TCompositeSelect* compositeSelect, const TVector<TNodePtr>& groupBy) + : IProxySource(compositeSelect->GetPos(), compositeSelect->RealSource()) + , CompositeSelect(compositeSelect) + , GroupBy(groupBy) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + return Source->Init(ctx, src); + } + + TNodePtr Build(TContext& ctx) override { + return CompositeSelect ? BuildAtom(Pos, "composite", TNodeFlags::Default) : Source->Build(ctx); + } + + bool InitFilters(TContext& ctx) override { + return CompositeSelect ? true : Source->InitFilters(ctx); + } + + TNodePtr BuildFilter(TContext& ctx, const TString& label) override { + return CompositeSelect ? nullptr : Source->BuildFilter(ctx, label); + } + + IJoin* GetJoin() override { + return Source->GetJoin(); + } + + bool IsCompositeSource() const override { + return true; + } + + ISource* GetCompositeSource() override { + return CompositeSelect; + } + + bool AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& hintColumn) override { + Y_UNUSED(ctx); + hintColumn = TStringBuilder() << "GroupingHint" << Hints.size(); + ui64 hint = 0; + if (GroupByColumns.empty()) { + const bool isJoin = GetJoin(); + for (const auto& groupByNode: GroupBy) { + auto namePtr = groupByNode->GetColumnName(); + YQL_ENSURE(namePtr); + TString column = *namePtr; + if (isJoin) { + auto sourceNamePtr = groupByNode->GetSourceName(); + if (sourceNamePtr && !sourceNamePtr->empty()) { + column = DotJoin(*sourceNamePtr, column); + } + } + GroupByColumns.insert(column); + } + } + for (const auto& column: columns) { + hint <<= 1; + if (!GroupByColumns.contains(column)) { + hint += 1; + } + } + Hints.push_back(hint); + return true; + } + + size_t GetGroupingColumnsCount() const override { + return Hints.size(); + } + + TNodePtr BuildGroupingColumns(const TString& label) override { + if (Hints.empty()) { + return nullptr; + } + + auto body = Y(); + for (size_t i = 0; i < Hints.size(); ++i) { + TString hintColumn = TStringBuilder() << "GroupingHint" << i; + TString hintValue = ToString(Hints[i]); + body = L(body, Y("let", "row", Y("AddMember", "row", Q(hintColumn), Y("Uint64", Q(hintValue))))); + } + return Y("Map", label, BuildLambda(Pos, Y("row"), body, "row")); + } + + + void FinishColumns() override { + Source->FinishColumns(); + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override { + if (const TString* columnName = column.GetColumnName()) { + if (columnName && IsExprAlias(*columnName)) { + return true; + } + } + return Source->AddColumn(ctx, column); + } + + TPtr DoClone() const final { + YQL_ENSURE(Hints.empty()); + return Holder.Get() ? new TNestedProxySource(Pos, CloneContainer(GroupBy), Holder->CloneSource()) : + new TNestedProxySource(CompositeSelect, CloneContainer(GroupBy)); + } + +private: + TCompositeSelect* CompositeSelect; + TSourcePtr Holder; + TVector<TNodePtr> GroupBy; + mutable TSet<TString> GroupByColumns; + mutable TVector<ui64> Hints; +}; + + +namespace { +TSourcePtr DoBuildSelectCore( + TContext& ctx, + TPosition pos, + TSourcePtr originalSource, + TSourcePtr source, + const TVector<TNodePtr>& groupByExpr, + const TVector<TNodePtr>& groupBy, + bool compactGroupBy, + const TString& groupBySuffix, + bool assumeSorted, + const TVector<TSortSpecificationPtr>& orderBy, + TNodePtr having, + TWinSpecs&& winSpecs, + TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec, + TVector<TNodePtr>&& terms, + bool distinct, + TVector<TNodePtr>&& without, + bool selectStream, + const TWriteSettings& settings, + TColumnsSets&& uniqueSets, + TColumnsSets&& distinctSets +) { + if (groupBy.empty() || !groupBy.front()->ContentListPtr()) { + return new TSelectCore(pos, std::move(source), groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted, + orderBy, having, winSpecs, legacyHoppingWindowSpec, terms, distinct, without, selectStream, settings, std::move(uniqueSets), std::move(distinctSets)); + } + if (groupBy.size() == 1) { + /// actualy no big idea to use grouping function in this case (result allways 0) + auto contentPtr = groupBy.front()->ContentListPtr(); + source = new TNestedProxySource(pos, *contentPtr, source); + return DoBuildSelectCore(ctx, pos, originalSource, source, groupByExpr, *contentPtr, compactGroupBy, groupBySuffix, + assumeSorted, orderBy, having, std::move(winSpecs), + legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets)); + } + /// \todo some smart merge logic, generalize common part of grouping (expr, flatten, etc)? + TIntrusivePtr<TCompositeSelect> compositeSelect = new TCompositeSelect(pos, std::move(source), originalSource->CloneSource(), settings); + size_t totalGroups = 0; + TVector<TSourcePtr> subselects; + TVector<TNodePtr> groupingCols; + for (auto& grouping: groupBy) { + auto contentPtr = grouping->ContentListPtr(); + TVector<TNodePtr> cache(1, nullptr); + if (!contentPtr) { + cache[0] = grouping; + contentPtr = &cache; + } + groupingCols.insert(groupingCols.end(), contentPtr->cbegin(), contentPtr->cend()); + TSourcePtr proxySource = new TNestedProxySource(compositeSelect.Get(), CloneContainer(*contentPtr)); + if (!subselects.empty()) { + /// clone terms for others usage + TVector<TNodePtr> termsCopy; + for (const auto& term: terms) { + termsCopy.emplace_back(term->Clone()); + } + std::swap(terms, termsCopy); + } + totalGroups += contentPtr->size(); + TSelectCore* selectCore = new TSelectCore(pos, std::move(proxySource), CloneContainer(groupByExpr), + CloneContainer(*contentPtr), compactGroupBy, groupBySuffix, assumeSorted, orderBy, SafeClone(having), CloneContainer(winSpecs), + legacyHoppingWindowSpec, terms, distinct, without, selectStream, settings, TColumnsSets(uniqueSets), TColumnsSets(distinctSets)); + subselects.emplace_back(selectCore); + } + if (totalGroups > ctx.PragmaGroupByLimit) { + ctx.Error(pos) << "Unable to GROUP BY more than " << ctx.PragmaGroupByLimit << " groups, you try use " << totalGroups << " groups"; + return nullptr; + } + compositeSelect->SetSubselects(std::move(subselects), std::move(groupingCols), CloneContainer(groupByExpr)); + return compositeSelect; +} + +} + +TSourcePtr BuildSelectCore( + TContext& ctx, + TPosition pos, + TSourcePtr source, + const TVector<TNodePtr>& groupByExpr, + const TVector<TNodePtr>& groupBy, + bool compactGroupBy, + const TString& groupBySuffix, + bool assumeSorted, + const TVector<TSortSpecificationPtr>& orderBy, + TNodePtr having, + TWinSpecs&& winSpecs, + TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec, + TVector<TNodePtr>&& terms, + bool distinct, + TVector<TNodePtr>&& without, + bool selectStream, + const TWriteSettings& settings, + TColumnsSets&& uniqueSets, + TColumnsSets&& distinctSets +) +{ + return DoBuildSelectCore(ctx, pos, source, source, groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted, orderBy, + having, std::move(winSpecs), legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets)); +} + +class TUnion: public IRealSource { +public: + TUnion(TPosition pos, TVector<TSourcePtr>&& sources, bool quantifierAll, const TWriteSettings& settings) + : IRealSource(pos) + , Sources(std::move(sources)) + , QuantifierAll(quantifierAll) + , Settings(settings) + { + } + + const TColumns* GetColumns() const override { + return IRealSource::GetColumns(); + } + + void GetInputTables(TTableList& tableList) const override { + for (auto& x : Sources) { + x->GetInputTables(tableList); + } + + ISource::GetInputTables(tableList); + } + + bool DoInit(TContext& ctx, ISource* src) override { + bool first = true; + for (auto& s: Sources) { + s->UseAsInner(); + if (!s->Init(ctx, src)) { + return false; + } + if (!ctx.PositionalUnionAll || first) { + auto c = s->GetColumns(); + Y_DEBUG_ABORT_UNLESS(c); + Columns.Merge(*c); + first = false; + } + } + return true; + } + + TNodePtr Build(TContext& ctx) override { + TPtr res; + if (QuantifierAll) { + res = ctx.PositionalUnionAll ? Y("UnionAllPositional") : Y("UnionAll"); + } else { + res = ctx.PositionalUnionAll ? Y("UnionPositional") : Y("Union"); + } + + for (auto& s: Sources) { + auto input = s->Build(ctx); + if (!input) { + return nullptr; + } + res->Add(input); + } + return res; + } + + + bool IsStream() const override { + for (auto& s: Sources) { + if (!s->IsStream()) { + return false; + } + } + return true; + } + + TNodePtr DoClone() const final { + return MakeIntrusive<TUnion>(Pos, CloneContainer(Sources), QuantifierAll, Settings); + } + + bool IsSelect() const override { + return true; + } + + bool HasSelectResult() const override { + return !Settings.Discard; + } + + TWriteSettings GetWriteSettings() const override { + return Settings; + } + +private: + TVector<TSourcePtr> Sources; + bool QuantifierAll; + const TWriteSettings Settings; +}; + +TSourcePtr BuildUnion( + TPosition pos, + TVector<TSourcePtr>&& sources, + bool quantifierAll, + const TWriteSettings& settings +) { + return new TUnion(pos, std::move(sources), quantifierAll, settings); +} + +class TOverWindowSource: public IProxySource { +public: + TOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource) + : IProxySource(pos, origSource) + , WindowName(windowName) + { + Source->SetLabel(origSource->GetLabel()); + } + + TString MakeLocalName(const TString& name) override { + return Source->MakeLocalName(name); + } + + void AddTmpWindowColumn(const TString& column) override { + return Source->AddTmpWindowColumn(column); + } + + bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override { + if (aggr->IsOverWindow() || aggr->IsOverWindowDistinct()) { + return Source->AddAggregationOverWindow(ctx, WindowName, aggr); + } + return Source->AddAggregation(ctx, aggr); + } + + bool AddFuncOverWindow(TContext& ctx, TNodePtr expr) override { + return Source->AddFuncOverWindow(ctx, WindowName, expr); + } + + bool IsOverWindowSource() const override { + return true; + } + + TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override { + return Source->AddColumn(ctx, column); + } + + TNodePtr Build(TContext& ctx) override { + Y_UNUSED(ctx); + Y_ABORT("Unexpected call"); + } + + const TString* GetWindowName() const override { + return &WindowName; + } + + TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const override { + return Source->FindWindowSpecification(ctx, windowName); + } + + TNodePtr GetSessionWindowSpec() const override { + return Source->GetSessionWindowSpec(); + } + + TNodePtr DoClone() const final { + return {}; + } + +private: + const TString WindowName; +}; + +TSourcePtr BuildOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource) { + return new TOverWindowSource(pos, windowName, origSource); +} + +class TSkipTakeNode final: public TAstListNode { +public: + TSkipTakeNode(TPosition pos, const TNodePtr& skip, const TNodePtr& take) + : TAstListNode(pos), IsSkipProvided_(!!skip) + { + TNodePtr select(AstNode("select")); + if (skip) { + select = Y("Skip", select, Y("Coalesce", skip, Y("Uint64", Q("0")))); + } + static const TString uiMax = ::ToString(std::numeric_limits<ui64>::max()); + Add("let", "select", Y("Take", select, Y("Coalesce", take, Y("Uint64", Q(uiMax))))); + } + + TPtr DoClone() const final { + return {}; + } + + bool HasSkip() const { + return IsSkipProvided_; + } +private: + const bool IsSkipProvided_; +}; + +TNodePtr BuildSkipTake(TPosition pos, const TNodePtr& skip, const TNodePtr& take) { + return new TSkipTakeNode(pos, skip, take); +} + +class TSelect: public IProxySource { +public: + TSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake) + : IProxySource(pos, source.Get()) + , Source(std::move(source)) + , SkipTake(skipTake) + {} + + bool DoInit(TContext& ctx, ISource* src) override { + Source->SetLabel(Label); + if (AsInner) { + Source->UseAsInner(); + } + + if (IgnoreSort()) { + Source->DisableSort(); + ctx.Warning(Source->GetPos(), TIssuesIds::YQL_ORDER_BY_WITHOUT_LIMIT_IN_SUBQUERY) << "ORDER BY without LIMIT in subquery will be ignored"; + } + + if (!Source->Init(ctx, src)) { + return false; + } + src = Source.Get(); + if (SkipTake) { + FakeSource = BuildFakeSource(SkipTake->GetPos()); + if (!SkipTake->Init(ctx, FakeSource.Get())) { + return false; + } + if (SkipTake->HasSkip() && EOrderKind::Sort != Source->GetOrderKind()) { + ctx.Warning(Source->GetPos(), TIssuesIds::YQL_OFFSET_WITHOUT_SORT) << "LIMIT with OFFSET without ORDER BY may provide different results from run to run"; + } + } + + return true; + } + + TNodePtr Build(TContext& ctx) override { + auto input = Source->Build(ctx); + if (!input) { + return nullptr; + } + const auto label = "select"; + auto block(Y(Y("let", label, input))); + + auto sortNode = Source->BuildSort(ctx, label); + if (sortNode && !IgnoreSort()) { + block = L(block, sortNode); + } + + if (SkipTake) { + block = L(block, SkipTake); + } + + TNodePtr sample; + if (!BuildSamplingLambda(sample)) { + return nullptr; + } else if (sample) { + block = L(block, Y("let", "select", Y("OrderedFlatMap", "select", sample))); + } + + if (auto removeNode = Source->BuildCleanupColumns(ctx, label)) { + block = L(block, removeNode); + } + + block = L(block, Y("return", label)); + return Y("block", Q(block)); + } + + bool SetSamplingOptions( + TContext& ctx, + TPosition pos, + ESampleClause sampleClause, + ESampleMode mode, + TNodePtr samplingRate, + TNodePtr samplingSeed) override { + if (mode == ESampleMode::System) { + ctx.Error(pos) << "only Bernoulli sampling mode is supported for subqueries"; + return false; + } + if (samplingSeed) { + ctx.Error(pos) << "'Repeatable' keyword is not supported for subqueries"; + return false; + } + return SetSamplingRate(ctx, sampleClause, samplingRate); + } + + bool IsSelect() const override { + return Source->IsSelect(); + } + + bool HasSelectResult() const override { + return Source->HasSelectResult(); + } + + TPtr DoClone() const final { + return MakeIntrusive<TSelect>(Pos, Source->CloneSource(), SafeClone(SkipTake)); + } +protected: + bool IgnoreSort() const { + return AsInner && !SkipTake && EOrderKind::Sort == Source->GetOrderKind(); + } + + TSourcePtr Source; + TNodePtr SkipTake; + TSourcePtr FakeSource; +}; + +TSourcePtr BuildSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake) { + return new TSelect(pos, std::move(source), skipTake); +} + +class TSelectResultNode final: public TAstListNode { +public: + TSelectResultNode(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery, + TScopedStatePtr scoped) + : TAstListNode(pos) + , Source(std::move(source)) + , WriteResult(writeResult) + , InSubquery(inSubquery) + , Scoped(scoped) + { + YQL_ENSURE(Source, "Invalid source node"); + FakeSource = BuildFakeSource(pos); + } + + bool IsSelect() const override { + return true; + } + + bool HasSelectResult() const override { + return Source->HasSelectResult(); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!Source->Init(ctx, src)) { + return false; + } + + src = Source.Get(); + TTableList tableList; + Source->GetInputTables(tableList); + + TNodePtr node(BuildInputTables(Pos, tableList, InSubquery, Scoped)); + if (!node->Init(ctx, src)) { + return false; + } + + auto writeSettings = src->GetWriteSettings(); + bool asRef = ctx.PragmaRefSelect; + bool asAutoRef = true; + if (ctx.PragmaSampleSelect) { + asRef = false; + asAutoRef = false; + } + + auto settings = Y(Q(Y(Q("type")))); + if (writeSettings.Discard) { + settings = L(settings, Q(Y(Q("discard")))); + } + + if (!writeSettings.Label.Empty()) { + auto labelNode = writeSettings.Label.Build(); + if (!writeSettings.Label.GetLiteral()) { + labelNode = Y("EvaluateAtom", labelNode); + } + + if (!labelNode->Init(ctx, FakeSource.Get())) { + return false; + } + + settings = L(settings, Q(Y(Q("label"), labelNode))); + } + + if (asRef) { + settings = L(settings, Q(Y(Q("ref")))); + } else if (asAutoRef) { + settings = L(settings, Q(Y(Q("autoref")))); + } + + auto columns = Source->GetColumns(); + if (columns && !columns->All && !(columns->QualifiedAll && ctx.SimpleColumns)) { + auto list = Y(); + YQL_ENSURE(columns->List.size() == columns->NamedColumns.size()); + for (size_t i = 0; i < columns->List.size(); ++i) { + auto& c = columns->List[i]; + if (c.EndsWith('*')) { + list = L(list, Q(Y(Q("prefix"), BuildQuotedAtom(Pos, c.substr(0, c.size() - 1))))); + } else if (columns->NamedColumns[i]) { + list = L(list, BuildQuotedAtom(Pos, c)); + } else { + list = L(list, Q(Y(Q("auto")))); + } + } + settings = L(settings, Q(Y(Q("columns"), Q(list)))); + } + + if (ctx.ResultRowsLimit > 0) { + settings = L(settings, Q(Y(Q("take"), Q(ToString(ctx.ResultRowsLimit))))); + } + + auto output = Source->Build(ctx); + if (!output) { + return false; + } + node = L(node, Y("let", "output", output)); + if (WriteResult || writeSettings.Discard) { + if (EOrderKind::None == Source->GetOrderKind() && ctx.UseUnordered(*Source)) { + node = L(node, Y("let", "output", Y("Unordered", "output"))); + if (ctx.UnorderedResult) { + settings = L(settings, Q(Y(Q("unordered")))); + } + } + auto writeResult(BuildWriteResult(Pos, "output", settings)); + if (!writeResult->Init(ctx, src)) { + return false; + } + node = L(node, Y("let", "world", writeResult)); + node = L(node, Y("return", "world")); + } else { + node = L(node, Y("return", "output")); + } + + Add("block", Q(node)); + return true; + } + + TPtr DoClone() const final { + return {}; + } +protected: + TSourcePtr Source; + + const bool WriteResult; + const bool InSubquery; + TScopedStatePtr Scoped; + TSourcePtr FakeSource; +}; + +TNodePtr BuildSelectResult(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery, + TScopedStatePtr scoped) { + return new TSelectResultNode(pos, std::move(source), writeResult, inSubquery, scoped); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/source.cpp b/yql/essentials/sql/v1/source.cpp new file mode 100644 index 00000000000..4231a9d370a --- /dev/null +++ b/yql/essentials/sql/v1/source.cpp @@ -0,0 +1,992 @@ +#include "source.h" +#include "context.h" + +#include <yql/essentials/ast/yql_ast_escaping.h> +#include <yql/essentials/ast/yql_expr.h> +#include <yql/essentials/core/sql_types/simple_types.h> +#include <yql/essentials/minikql/mkql_type_ops.h> +#include <yql/essentials/parser/pg_catalog/catalog.h> +#include <yql/essentials/utils/yql_panic.h> + +#include <library/cpp/containers/stack_vector/stack_vec.h> +#include <library/cpp/charset/ci_string.h> +#include <util/generic/hash_set.h> +#include <util/stream/str.h> +#include <util/string/cast.h> +#include <util/string/escape.h> +#include <util/string/subst.h> + +using namespace NYql; + +namespace NSQLTranslationV1 { + + +TTableRef::TTableRef(const TString& refName, const TString& service, const TDeferredAtom& cluster, TNodePtr keys) + : RefName(refName) + , Service(to_lower(service)) + , Cluster(cluster) + , Keys(keys) +{ +} + +TString TTableRef::ShortName() const { + Y_DEBUG_ABORT_UNLESS(Keys); + if (Keys->GetTableKeys()->GetTableName()) { + return *Keys->GetTableKeys()->GetTableName(); + } + return TString(); +} + +ISource::ISource(TPosition pos) + : INode(pos) +{ +} + +ISource::~ISource() +{ +} + +TSourcePtr ISource::CloneSource() const { + Y_DEBUG_ABORT_UNLESS(dynamic_cast<ISource*>(Clone().Get()), "Cloned node is no source"); + TSourcePtr result = static_cast<ISource*>(Clone().Get()); + for (auto curFilter: Filters) { + result->Filters.emplace_back(curFilter->Clone()); + } + for (int i = 0; i < static_cast<int>(EExprSeat::Max); ++i) { + result->NamedExprs[i] = CloneContainer(NamedExprs[i]); + } + result->FlattenColumns = FlattenColumns; + result->FlattenMode = FlattenMode; + return result; +} + +bool ISource::IsFake() const { + return false; +} + +void ISource::AllColumns() { + return; +} + +const TColumns* ISource::GetColumns() const { + return nullptr; +} + +void ISource::GetInputTables(TTableList& tableList) const { + for (auto srcPtr: UsedSources) { + srcPtr->GetInputTables(tableList); + } + return; +} + +TMaybe<bool> ISource::AddColumn(TContext& ctx, TColumnNode& column) { + if (column.IsReliable()) { + ctx.Error(Pos) << "Source does not allow column references"; + ctx.Error(column.GetPos()) << "Column reference " << + (column.GetColumnName() ? "'" + *column.GetColumnName() + "'" : "(expr)"); + } + return {}; +} + +void ISource::FinishColumns() { +} + + +bool ISource::AddFilter(TContext& ctx, TNodePtr filter) { + Y_UNUSED(ctx); + Filters.push_back(filter); + return true; +} + +bool ISource::AddGroupKey(TContext& ctx, const TString& column) { + if (!GroupKeys.insert(column).second) { + ctx.Error() << "Duplicate grouping column: " << column; + return false; + } + OrderedGroupKeys.push_back(column); + return true; +} + +void ISource::SetCompactGroupBy(bool compactGroupBy) { + CompactGroupBy = compactGroupBy; +} + +void ISource::SetGroupBySuffix(const TString& suffix) { + GroupBySuffix = suffix; +} + +bool ISource::AddExpressions(TContext& ctx, const TVector<TNodePtr>& expressions, EExprSeat exprSeat) { + YQL_ENSURE(exprSeat < EExprSeat::Max); + THashSet<TString> names; + THashSet<TString> aliasSet; + // TODO: merge FlattenBy with FlattenByExpr + const bool isFlatten = (exprSeat == EExprSeat::FlattenBy || exprSeat == EExprSeat::FlattenByExpr); + THashSet<TString>& aliases = isFlatten ? FlattenByAliases : aliasSet; + for (const auto& expr: expressions) { + const auto& alias = expr->GetLabel(); + const auto& columnNamePtr = expr->GetColumnName(); + if (alias) { + ExprAliases.insert(alias); + if (!aliases.emplace(alias).second) { + ctx.Error(expr->GetPos()) << "Duplicate alias found: " << alias << " in " << exprSeat << " section"; + return false; + } + if (names.contains(alias)) { + ctx.Error(expr->GetPos()) << "Collision between alias and column name: " << alias << " in " << exprSeat << " section"; + return false; + } + } + if (columnNamePtr) { + const auto& sourceName = *expr->GetSourceName(); + auto columnName = *columnNamePtr; + if (sourceName) { + columnName = DotJoin(sourceName, columnName); + } + if (!names.emplace(columnName).second) { + ctx.Error(expr->GetPos()) << "Duplicate column name found: " << columnName << " in " << exprSeat << " section"; + return false; + } + if (!alias && aliases.contains(columnName)) { + ctx.Error(expr->GetPos()) << "Collision between alias and column name: " << columnName << " in " << exprSeat << " section"; + return false; + } + if (alias && exprSeat == EExprSeat::GroupBy) { + auto columnAlias = GroupByColumnAliases.emplace(columnName, alias); + auto oldAlias = columnAlias.first->second; + if (columnAlias.second && oldAlias != alias) { + ctx.Error(expr->GetPos()) << "Alias for column not same, column: " << columnName << + ", exist alias: " << oldAlias << ", another alias: " << alias; + return false; + } + } + } + + if (exprSeat == EExprSeat::GroupBy) { + if (auto sessionWindow = dynamic_cast<TSessionWindow*>(expr.Get())) { + if (SessionWindow) { + ctx.Error(expr->GetPos()) << "Duplicate session window specification:"; + ctx.Error(SessionWindow->GetPos()) << "Previous session window is declared here"; + return false; + } + SessionWindow = expr; + } + if (auto hoppingWindow = dynamic_cast<THoppingWindow*>(expr.Get())) { + if (HoppingWindow) { + ctx.Error(expr->GetPos()) << "Duplicate hopping window specification:"; + ctx.Error(HoppingWindow->GetPos()) << "Previous hopping window is declared here"; + return false; + } + HoppingWindow = expr; + } + } + Expressions(exprSeat).emplace_back(expr); + } + return true; +} + +void ISource::SetFlattenByMode(const TString& mode) { + FlattenMode = mode; +} + +void ISource::MarkFlattenColumns() { + FlattenColumns = true; +} + +bool ISource::IsFlattenColumns() const { + return FlattenColumns; +} + +TString ISource::MakeLocalName(const TString& name) { + auto iter = GenIndexes.find(name); + if (iter == GenIndexes.end()) { + iter = GenIndexes.emplace(name, 0).first; + } + TStringBuilder str; + str << name << iter->second; + ++iter->second; + return std::move(str); +} + +bool ISource::AddAggregation(TContext& ctx, TAggregationPtr aggr) { + Y_UNUSED(ctx); + YQL_ENSURE(aggr); + Aggregations.push_back(aggr); + return true; +} + +bool ISource::HasAggregations() const { + return !Aggregations.empty() || !GroupKeys.empty(); +} + +void ISource::AddWindowSpecs(TWinSpecs winSpecs) { + WinSpecs = winSpecs; +} + +bool ISource::AddFuncOverWindow(TContext& ctx, TNodePtr expr) { + Y_UNUSED(ctx); + Y_UNUSED(expr); + return false; +} + +void ISource::AddTmpWindowColumn(const TString& column) { + TmpWindowColumns.push_back(column); +} + +const TVector<TString>& ISource::GetTmpWindowColumns() const { + return TmpWindowColumns; +} + +void ISource::SetLegacyHoppingWindowSpec(TLegacyHoppingWindowSpecPtr spec) { + LegacyHoppingWindowSpec = spec; +} + +TLegacyHoppingWindowSpecPtr ISource::GetLegacyHoppingWindowSpec() const { + return LegacyHoppingWindowSpec; +} + +TNodePtr ISource::GetSessionWindowSpec() const { + return SessionWindow; +} + +TNodePtr ISource::GetHoppingWindowSpec() const { + return HoppingWindow; +} + +TWindowSpecificationPtr ISource::FindWindowSpecification(TContext& ctx, const TString& windowName) const { + auto winIter = WinSpecs.find(windowName); + if (winIter == WinSpecs.end()) { + ctx.Error(Pos) << "Unable to find window specification for window '" << windowName << "'"; + return {}; + } + YQL_ENSURE(winIter->second); + return winIter->second; +} + +inline TVector<TNodePtr>& ISource::Expressions(EExprSeat exprSeat) { + return NamedExprs[static_cast<size_t>(exprSeat)]; +} + +const TVector<TNodePtr>& ISource::Expressions(EExprSeat exprSeat) const { + return NamedExprs[static_cast<size_t>(exprSeat)]; +} + +inline TNodePtr ISource::AliasOrColumn(const TNodePtr& node, bool withSource) { + auto result = node->GetLabel(); + if (!result) { + const auto columnNamePtr = node->GetColumnName(); + YQL_ENSURE(columnNamePtr); + result = *columnNamePtr; + if (withSource) { + const auto sourceNamePtr = node->GetSourceName(); + if (sourceNamePtr) { + result = DotJoin(*sourceNamePtr, result); + } + } + } + return BuildQuotedAtom(node->GetPos(), result); +} + +bool ISource::AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func) { + if (ctx.DistinctOverWindow) { + YQL_ENSURE(func->IsOverWindow() || func->IsOverWindowDistinct()); + } else { + YQL_ENSURE(func->IsOverWindow()); + if (func->IsDistinct()) { + ctx.Error(func->GetPos()) << "Aggregation with distinct is not allowed over window: " << windowName; + return false; + } + } + + if (!FindWindowSpecification(ctx, windowName)) { + return false; + } + AggregationOverWindow[windowName].emplace_back(std::move(func)); + return true; +} + +bool ISource::AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func) { + if (!FindWindowSpecification(ctx, windowName)) { + return false; + } + FuncOverWindow[windowName].emplace_back(std::move(func)); + return true; +} + +void ISource::SetMatchRecognize(TMatchRecognizeBuilderPtr matchRecognize) { + MatchRecognizeBuilder = matchRecognize; +} + +bool ISource::IsCompositeSource() const { + return false; +} + +bool ISource::IsGroupByColumn(const TString& column) const { + return GroupKeys.contains(column); +} + +bool ISource::IsFlattenByColumns() const { + return !Expressions(EExprSeat::FlattenBy).empty(); +} + +bool ISource::IsFlattenByExprs() const { + return !Expressions(EExprSeat::FlattenByExpr).empty(); +} + +bool ISource::IsAlias(EExprSeat exprSeat, const TString& column) const { + for (const auto& exprNode: Expressions(exprSeat)) { + const auto& labelName = exprNode->GetLabel(); + if (labelName && labelName == column) { + return true; + } + } + return false; +} + +bool ISource::IsExprAlias(const TString& column) const { + std::array<EExprSeat, 5> exprSeats = {{EExprSeat::FlattenBy, EExprSeat::FlattenByExpr, EExprSeat::GroupBy, + EExprSeat::WindowPartitionBy, EExprSeat::DistinctAggr}}; + for (auto seat: exprSeats) { + if (IsAlias(seat, column)) { + return true; + } + } + return false; +} + +bool ISource::IsExprSeat(EExprSeat exprSeat, EExprType type) const { + auto expressions = Expressions(exprSeat); + if (!expressions) { + return false; + } + for (const auto& exprNode: expressions) { + if (exprNode->GetLabel()) { + return type == EExprType::WithExpression; + } + } + return type == EExprType::ColumnOnly; +} + +TString ISource::GetGroupByColumnAlias(const TString& column) const { + auto iter = GroupByColumnAliases.find(column); + if (iter == GroupByColumnAliases.end()) { + return {}; + } + return iter->second; +} + +const TString* ISource::GetWindowName() const { + return {}; +} + +bool ISource::IsCalcOverWindow() const { + return !AggregationOverWindow.empty() || !FuncOverWindow.empty() || + AnyOf(WinSpecs, [](const auto& item) { return item.second->Session; }); +} + +bool ISource::IsOverWindowSource() const { + return !WinSpecs.empty(); +} + +bool ISource::IsStream() const { + return false; +} + +EOrderKind ISource::GetOrderKind() const { + return EOrderKind::None; +} + +TWriteSettings ISource::GetWriteSettings() const { + return {}; +} + +TNodePtr ISource::PrepareSamplingRate(TPosition pos, ESampleClause clause, TNodePtr samplingRate) { + if (ESampleClause::Sample == clause) { + samplingRate = Y("*", samplingRate, Y("Double", Q("100"))); + } + auto ensureLow = Y("Ensure", "samplingRate", Y(">=", "samplingRate", Y("Double", Q("0"))), Y("String", BuildQuotedAtom(pos, "Expected sampling rate to be nonnegative"))); + auto ensureHigh = Y("Ensure", "samplingRate", Y("<=", "samplingRate", Y("Double", Q("100"))), Y("String", BuildQuotedAtom(pos, "Sampling rate is over 100%"))); + + auto block(Y(Y("let", "samplingRate", samplingRate))); + block = L(block, Y("let", "samplingRate", ensureLow)); + block = L(block, Y("let", "samplingRate", ensureHigh)); + samplingRate = Y("block", Q(L(block, Y("return", "samplingRate")))); + return samplingRate; +} + + +bool ISource::SetSamplingOptions(TContext& ctx, + TPosition pos, + ESampleClause sampleClause, + ESampleMode mode, + TNodePtr samplingRate, + TNodePtr samplingSeed) { + Y_UNUSED(pos); + Y_UNUSED(sampleClause); + Y_UNUSED(mode); + Y_UNUSED(samplingRate); + Y_UNUSED(samplingSeed); + ctx.Error() << "Sampling is only supported for table sources"; + return false; +} + +bool ISource::SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) { + Y_UNUSED(pos); + Y_UNUSED(contextHints); + if (hints) { + ctx.Error() << "Explicit hints are only supported for table sources"; + return false; + } + return true; +} + +bool ISource::AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& grouingColumn) { + Y_UNUSED(columns); + Y_UNUSED(grouingColumn); + ctx.Error() << "Source not support grouping hint"; + return false; +} + +size_t ISource::GetGroupingColumnsCount() const { + return 0; +} + +TNodePtr ISource::BuildFilter(TContext& ctx, const TString& label) { + return Filters.empty() ? nullptr : Y(ctx.UseUnordered(*this) ? "OrderedFilter" : "Filter", label, BuildFilterLambda()); +} + +TNodePtr ISource::BuildFilterLambda() { + if (Filters.empty()) { + return BuildLambda(Pos, Y("row"), Y("Bool", Q("true"))); + } + YQL_ENSURE(Filters[0]->HasState(ENodeState::Initialized)); + TNodePtr filter(Filters[0]); + for (ui32 i = 1; i < Filters.size(); ++i) { + YQL_ENSURE(Filters[i]->HasState(ENodeState::Initialized)); + filter = Y("And", filter, Filters[i]); + } + filter = Y("Coalesce", filter, Y("Bool", Q("false"))); + return BuildLambda(Pos, Y("row"), filter); +} + +TNodePtr ISource::BuildFlattenByColumns(const TString& label) { + auto columnsList = Y("FlattenByColumns", Q(FlattenMode), label); + for (const auto& column: Expressions(EExprSeat::FlattenBy)) { + const auto columnNamePtr = column->GetColumnName(); + YQL_ENSURE(columnNamePtr); + if (column->GetLabel().empty()) { + columnsList = L(columnsList, Q(*columnNamePtr)); + } else { + columnsList = L(columnsList, Q(Y(Q(*columnNamePtr), Q(column->GetLabel())))); + } + } + return Y(Y("let", "res", columnsList)); +} + +TNodePtr ISource::BuildFlattenColumns(const TString& label) { + return Y(Y("let", "res", Y("Just", Y("FlattenStructs", label)))); +} + +namespace { + +TNodePtr BuildLambdaBodyForExprAliases(TPosition pos, const TVector<TNodePtr>& exprs) { + auto structObj = BuildAtom(pos, "row", TNodeFlags::Default); + for (const auto& exprNode: exprs) { + const auto name = exprNode->GetLabel(); + YQL_ENSURE(name); + structObj = structObj->Y("ForceRemoveMember", structObj, structObj->Q(name)); + if (dynamic_cast<const TSessionWindow*>(exprNode.Get())) { + continue; + } + if (dynamic_cast<const THoppingWindow*>(exprNode.Get())) { + continue; + } + structObj = structObj->Y("AddMember", structObj, structObj->Q(name), exprNode); + } + return structObj->Y("AsList", structObj); +} + +} + +TNodePtr ISource::BuildPreaggregatedMap(TContext& ctx) { + Y_UNUSED(ctx); + const auto& groupByExprs = Expressions(EExprSeat::GroupBy); + const auto& distinctAggrExprs = Expressions(EExprSeat::DistinctAggr); + YQL_ENSURE(groupByExprs || distinctAggrExprs); + + TNodePtr res; + if (groupByExprs) { + auto body = BuildLambdaBodyForExprAliases(Pos, groupByExprs); + res = Y("FlatMap", "core", BuildLambda(Pos, Y("row"), body)); + } + + if (distinctAggrExprs) { + auto body = BuildLambdaBodyForExprAliases(Pos, distinctAggrExprs); + auto lambda = BuildLambda(Pos, Y("row"), body); + res = res ? Y("FlatMap", res, lambda) : Y("FlatMap", "core", lambda); + } + return res; +} + +TNodePtr ISource::BuildPreFlattenMap(TContext& ctx) { + Y_UNUSED(ctx); + YQL_ENSURE(IsFlattenByExprs()); + return BuildLambdaBodyForExprAliases(Pos, Expressions(EExprSeat::FlattenByExpr)); +} + +TNodePtr ISource::BuildPrewindowMap(TContext& ctx) { + auto feed = BuildAtom(Pos, "row", TNodeFlags::Default); + for (const auto& exprNode: Expressions(EExprSeat::WindowPartitionBy)) { + const auto name = exprNode->GetLabel(); + if (name && !dynamic_cast<const TSessionWindow*>(exprNode.Get())) { + feed = Y("AddMember", feed, Q(name), exprNode); + } + } + return Y(ctx.UseUnordered(*this) ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), Y("AsList", feed))); +} + +bool ISource::BuildSamplingLambda(TNodePtr& node) { + if (!SamplingRate) { + return true; + } + auto res = Y("Coalesce", Y("SafeCast", SamplingRate, Y("DataType", Q("Double"))), Y("Double", Q("0"))); + res = Y("/", res, Y("Double", Q("100"))); + res = Y(Y("let", "res", Y("OptionalIf", Y("<", Y("Random", Y("DependsOn", "row")), res), "row"))); + node = BuildLambda(GetPos(), Y("row"), res, "res"); + return !!node; +} + +bool ISource::SetSamplingRate(TContext& ctx, ESampleClause clause, TNodePtr samplingRate) { + if (samplingRate) { + if (!samplingRate->Init(ctx, this)) { + return false; + } + SamplingRate = PrepareSamplingRate(Pos, clause, samplingRate); + } + return true; +} + +std::pair<TNodePtr, bool> ISource::BuildAggregation(const TString& label, TContext& ctx) { + if (GroupKeys.empty() && Aggregations.empty() && !IsCompositeSource() && !LegacyHoppingWindowSpec) { + return { nullptr, true }; + } + + auto keysTuple = Y(); + YQL_ENSURE(GroupKeys.size() == OrderedGroupKeys.size()); + for (const auto& key: OrderedGroupKeys) { + YQL_ENSURE(GroupKeys.contains(key)); + keysTuple = L(keysTuple, BuildQuotedAtom(Pos, key)); + } + + std::map<std::pair<bool, TString>, std::vector<IAggregation*>> genericAggrs; + for (const auto& aggr: Aggregations) { + if (const auto key = aggr->GetGenericKey()) { + genericAggrs[{aggr->IsDistinct(), *key}].emplace_back(aggr.Get()); + } + } + + for (const auto& aggr : genericAggrs) { + for (size_t i = 1U; i < aggr.second.size(); ++i) { + aggr.second.front()->Join(aggr.second[i]); + } + } + + const auto listType = Y("TypeOf", label); + auto aggrArgs = Y(); + const bool overState = GroupBySuffix == "CombineState" || GroupBySuffix == "MergeState" || + GroupBySuffix == "MergeFinalize" || GroupBySuffix == "MergeManyFinalize"; + const bool allowAggApply = !LegacyHoppingWindowSpec && !SessionWindow && !HoppingWindow; + for (const auto& aggr: Aggregations) { + auto res = aggr->AggregationTraits(listType, overState, GroupBySuffix == "MergeManyFinalize", allowAggApply, ctx); + if (!res.second) { + return { nullptr, false }; + } + + if (res.first) { + aggrArgs = L(aggrArgs, res.first); + } + } + + auto options = Y(); + if (CompactGroupBy || GroupBySuffix == "Finalize") { + options = L(options, Q(Y(Q("compact")))); + } + + if (LegacyHoppingWindowSpec) { + auto hoppingTraits = Y( + "HoppingTraits", + Y("ListItemType", listType), + BuildLambda(Pos, Y("row"), LegacyHoppingWindowSpec->TimeExtractor), + LegacyHoppingWindowSpec->Hop, + LegacyHoppingWindowSpec->Interval, + LegacyHoppingWindowSpec->Delay, + LegacyHoppingWindowSpec->DataWatermarks ? Q("true") : Q("false"), + Q("v1")); + + options = L(options, Q(Y(Q("hopping"), hoppingTraits))); + } + + if (SessionWindow) { + YQL_ENSURE(SessionWindow->GetLabel()); + auto sessionWindow = dynamic_cast<TSessionWindow*>(SessionWindow.Get()); + YQL_ENSURE(sessionWindow); + options = L(options, Q(Y(Q("session"), + Q(Y(BuildQuotedAtom(Pos, SessionWindow->GetLabel()), sessionWindow->BuildTraits(label)))))); + } + + if (HoppingWindow) { + YQL_ENSURE(HoppingWindow->GetLabel()); + auto hoppingWindow = dynamic_cast<THoppingWindow*>(HoppingWindow.Get()); + YQL_ENSURE(hoppingWindow); + options = L(options, Q(Y(Q("hopping"), + Q(Y(BuildQuotedAtom(Pos, HoppingWindow->GetLabel()), hoppingWindow->BuildTraits(label)))))); + } + + return { Y("AssumeColumnOrderPartial", Y("Aggregate" + GroupBySuffix, label, Q(keysTuple), Q(aggrArgs), Q(options)), Q(keysTuple)), true }; +} + +TMaybe<TString> ISource::FindColumnMistype(const TString& name) const { + auto result = FindMistypeIn(GroupKeys, name); + return result ? result : FindMistypeIn(ExprAliases, name); +} + +void ISource::AddDependentSource(ISource* usedSource) { + UsedSources.push_back(usedSource); +} + +class TYqlFrameBound final: public TCallNode { +public: + TYqlFrameBound(TPosition pos, TNodePtr bound) + : TCallNode(pos, "EvaluateExpr", 1, 1, { bound }) + , FakeSource(BuildFakeSource(pos)) + { + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args[0]->Init(ctx, FakeSource.Get())) { + return false; + } + + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TYqlFrameBound(Pos, Args[0]->Clone()); + } +private: + TSourcePtr FakeSource; +}; + +TNodePtr BuildFrameNode(const TFrameBound& frame, EFrameType frameType) { + TString settingStr; + switch (frame.Settings) { + case FramePreceding: settingStr = "preceding"; break; + case FrameCurrentRow: settingStr = "currentRow"; break; + case FrameFollowing: settingStr = "following"; break; + default: YQL_ENSURE(false, "Unexpected frame setting"); + } + + TNodePtr node = frame.Bound; + TPosition pos = frame.Pos; + if (frameType != EFrameType::FrameByRows) { + TVector<TNodePtr> settings; + settings.push_back(BuildQuotedAtom(pos, settingStr, TNodeFlags::Default)); + if (frame.Settings != FrameCurrentRow) { + if (!node) { + node = BuildQuotedAtom(pos, "unbounded", TNodeFlags::Default); + } else if (!node->IsLiteral()) { + node = new TYqlFrameBound(pos, node); + } + settings.push_back(std::move(node)); + } + return BuildTuple(pos, std::move(settings)); + } + + // TODO: switch FrameByRows to common format above + YQL_ENSURE(frame.Settings != FrameCurrentRow, "Should be already replaced by 0 preceding/following"); + if (!node) { + node = BuildLiteralVoid(pos); + } else if (node->IsLiteral()) { + YQL_ENSURE(node->GetLiteralType() == "Int32"); + i32 value = FromString<i32>(node->GetLiteralValue()); + YQL_ENSURE(value >= 0); + if (frame.Settings == FramePreceding) { + value = -value; + } + node = new TCallNodeImpl(pos, "Int32", { BuildQuotedAtom(pos, ToString(value), TNodeFlags::Default) }); + } else { + if (frame.Settings == FramePreceding) { + node = new TCallNodeImpl(pos, "Minus", { node->Clone() }); + } + node = new TYqlFrameBound(pos, node); + } + return node; +} + +TNodePtr ISource::BuildWindowFrame(const TFrameSpecification& spec, bool isCompact) { + YQL_ENSURE(spec.FrameExclusion == FrameExclNone); + YQL_ENSURE(spec.FrameBegin); + YQL_ENSURE(spec.FrameEnd); + + auto frameBeginNode = BuildFrameNode(*spec.FrameBegin, spec.FrameType); + auto frameEndNode = BuildFrameNode(*spec.FrameEnd, spec.FrameType); + + auto begin = Q(Y(Q("begin"), frameBeginNode)); + auto end = Q(Y(Q("end"), frameEndNode)); + + return isCompact ? Q(Y(begin, end, Q(Y(Q("compact"))))) : Q(Y(begin, end)); +} + +class TSessionWindowTraits final: public TCallNode { +public: + TSessionWindowTraits(TPosition pos, const TVector<TNodePtr>& args) + : TCallNode(pos, "SessionWindowTraits", args) + , FakeSource(BuildFakeSource(pos)) + { + YQL_ENSURE(args.size() == 4); + } + + bool DoInit(TContext& ctx, ISource* src) override { + if (!ValidateArguments(ctx)) { + return false; + } + + if (!Args.back()->Init(ctx, FakeSource.Get())) { + return false; + } + + return TCallNode::DoInit(ctx, src); + } + + TNodePtr DoClone() const final { + return new TSessionWindowTraits(Pos, CloneContainer(Args)); + } +private: + TSourcePtr FakeSource; +}; + +TNodePtr ISource::BuildCalcOverWindow(TContext& ctx, const TString& label) { + YQL_ENSURE(IsCalcOverWindow()); + + TSet<TString> usedWindows; + for (auto& it : AggregationOverWindow) { + usedWindows.insert(it.first); + } + for (auto& it : FuncOverWindow) { + usedWindows.insert(it.first); + } + for (auto& it : WinSpecs) { + if (it.second->Session) { + usedWindows.insert(it.first); + } + } + + YQL_ENSURE(!usedWindows.empty()); + + const bool onePartition = usedWindows.size() == 1; + const auto useLabel = onePartition ? label : "partitioning"; + const auto listType = Y("TypeOf", useLabel); + auto framesProcess = Y(); + auto resultNode = onePartition ? Y() : Y(Y("let", "partitioning", label)); + + for (const auto& name : usedWindows) { + auto spec = FindWindowSpecification(ctx, name); + YQL_ENSURE(spec); + + auto aggsIter = AggregationOverWindow.find(name); + auto funcsIter = FuncOverWindow.find(name); + + const auto& aggs = (aggsIter == AggregationOverWindow.end()) ? TVector<TAggregationPtr>() : aggsIter->second; + const auto& funcs = (funcsIter == FuncOverWindow.end()) ? TVector<TNodePtr>() : funcsIter->second; + + auto frames = Y(); + TString frameType; + switch (spec->Frame->FrameType) { + case EFrameType::FrameByRows: frameType = "WinOnRows"; break; + case EFrameType::FrameByRange: frameType = "WinOnRange"; break; + case EFrameType::FrameByGroups: frameType = "WinOnGroups"; break; + } + YQL_ENSURE(frameType); + auto callOnFrame = Y(frameType, BuildWindowFrame(*spec->Frame, spec->IsCompact)); + for (auto& agg : aggs) { + auto winTraits = agg->WindowTraits(listType, ctx); + callOnFrame = L(callOnFrame, winTraits); + } + for (auto& func : funcs) { + auto winSpec = func->WindowSpecFunc(listType); + callOnFrame = L(callOnFrame, winSpec); + } + frames = L(frames, callOnFrame); + + auto keysTuple = Y(); + for (const auto& key: spec->Partitions) { + if (!dynamic_cast<TSessionWindow*>(key.Get())) { + keysTuple = L(keysTuple, AliasOrColumn(key, GetJoin())); + } + } + + auto sortSpec = spec->OrderBy.empty() ? Y("Void") : BuildSortSpec(spec->OrderBy, useLabel, true, false); + if (spec->Session) { + TString label = spec->Session->GetLabel(); + YQL_ENSURE(label); + auto sessionWindow = dynamic_cast<TSessionWindow*>(spec->Session.Get()); + YQL_ENSURE(sessionWindow); + auto labelNode = BuildQuotedAtom(sessionWindow->GetPos(), label); + + auto sessionTraits = sessionWindow->BuildTraits(useLabel); + framesProcess = Y("CalcOverSessionWindow", useLabel, Q(keysTuple), sortSpec, Q(frames), sessionTraits, Q(Y(labelNode))); + } else { + YQL_ENSURE(aggs || funcs); + framesProcess = Y("CalcOverWindow", useLabel, Q(keysTuple), sortSpec, Q(frames)); + } + + if (!onePartition) { + resultNode = L(resultNode, Y("let", "partitioning", framesProcess)); + } + } + if (onePartition) { + return framesProcess; + } else { + return Y("block", Q(L(resultNode, Y("return", "partitioning")))); + } +} + +TNodePtr ISource::BuildSort(TContext& ctx, const TString& label) { + Y_UNUSED(ctx); + Y_UNUSED(label); + return nullptr; +} + +TNodePtr ISource::BuildCleanupColumns(TContext& ctx, const TString& label) { + Y_UNUSED(ctx); + Y_UNUSED(label); + return nullptr; +} + +TNodePtr ISource::BuildGroupingColumns(const TString& label) { + Y_UNUSED(label); + return nullptr; +} + +IJoin* ISource::GetJoin() { + return nullptr; +} + +ISource* ISource::GetCompositeSource() { + return nullptr; +} + +bool ISource::IsSelect() const { + return true; +} + +bool ISource::IsTableSource() const { + return false; +} + +bool ISource::ShouldUseSourceAsColumn(const TString& source) const { + Y_UNUSED(source); + return false; +} + +bool ISource::IsJoinKeysInitializing() const { + return false; +} + +bool ISource::DoInit(TContext& ctx, ISource* src) { + for (auto& column: Expressions(EExprSeat::FlattenBy)) { + if (!column->Init(ctx, this)) { + return false; + } + } + + if (IsFlattenColumns() && src) { + src->AllColumns(); + } + + return true; +} + +bool ISource::InitFilters(TContext& ctx) { + for (auto& filter: Filters) { + if (!filter->Init(ctx, this)) { + return false; + } + if (filter->IsAggregated() && !filter->IsConstant() && !filter->HasState(ENodeState::AggregationKey)) { + ctx.Error(filter->GetPos()) << "Can not use aggregated values in filtering"; + return false; + } + } + return true; +} + +TAstNode* ISource::Translate(TContext& ctx) const { + Y_DEBUG_ABORT_UNLESS(false); + Y_UNUSED(ctx); + return nullptr; +} + +void ISource::FillSortParts(const TVector<TSortSpecificationPtr>& orderBy, TNodePtr& sortDirection, TNodePtr& sortKeySelector) { + TNodePtr expr; + if (orderBy.empty()) { + YQL_ENSURE(!sortKeySelector); + sortDirection = sortKeySelector = Y("Void"); + return; + } else if (orderBy.size() == 1) { + auto& sortSpec = orderBy.front(); + expr = Y("PersistableRepr", sortSpec->OrderExpr); + sortDirection = Y("Bool", Q(sortSpec->Ascending ? "true" : "false")); + } else { + auto exprList = Y(); + sortDirection = Y(); + for (const auto& sortSpec: orderBy) { + const auto asc = sortSpec->Ascending; + sortDirection = L(sortDirection, Y("Bool", Q(asc ? "true" : "false"))); + exprList = L(exprList, Y("PersistableRepr", sortSpec->OrderExpr)); + } + sortDirection = Q(sortDirection); + expr = Q(exprList); + } + sortKeySelector = BuildLambda(Pos, Y("row"), expr); +} + +TNodePtr ISource::BuildSortSpec(const TVector<TSortSpecificationPtr>& orderBy, const TString& label, bool traits, bool assume) { + YQL_ENSURE(!orderBy.empty()); + TNodePtr dirsNode; + TNodePtr keySelectorNode; + FillSortParts(orderBy, dirsNode, keySelectorNode); + if (traits) { + return Y("SortTraits", Y("TypeOf", label), dirsNode, keySelectorNode); + } else if (assume) { + return Y("AssumeSorted", label, dirsNode, keySelectorNode); + } else { + return Y("Sort", label, dirsNode, keySelectorNode); + } +} + +bool ISource::HasMatchRecognize() const { + return static_cast<bool>(MatchRecognizeBuilder); +} + +TNodePtr ISource::BuildMatchRecognize(TContext& ctx, TString&& inputTable){ + YQL_ENSURE(HasMatchRecognize()); + return MatchRecognizeBuilder->Build(ctx, std::move(inputTable), this); +}; + +IJoin::IJoin(TPosition pos) + : ISource(pos) +{ +} + +IJoin::~IJoin() +{ +} + +IJoin* IJoin::GetJoin() { + return this; +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/source.h b/yql/essentials/sql/v1/source.h new file mode 100644 index 00000000000..35129fffbb8 --- /dev/null +++ b/yql/essentials/sql/v1/source.h @@ -0,0 +1,320 @@ +#pragma once +#include "node.h" +#include "match_recognize.h" +#include <library/cpp/containers/sorted_vector/sorted_vector.h> + +namespace NSQLTranslationV1 { + using TColumnsSets = NSorted::TSimpleSet<NSorted::TSimpleSet<TString>>; + + class ISource; + typedef TIntrusivePtr<ISource> TSourcePtr; + + struct TTableRef { + TString RefName; + TString Service; + TDeferredAtom Cluster; + TNodePtr Keys; + TNodePtr Options; + TSourcePtr Source; + + TTableRef() = default; + TTableRef(const TString& refName, const TString& service, const TDeferredAtom& cluster, TNodePtr keys); + TTableRef(const TTableRef&) = default; + TTableRef& operator=(const TTableRef&) = default; + + TString ShortName() const; + }; + + typedef TVector<TTableRef> TTableList; + + + class IJoin; + class ISource: public INode { + public: + virtual ~ISource(); + + virtual bool IsFake() const; + virtual void AllColumns(); + virtual const TColumns* GetColumns() const; + virtual void GetInputTables(TTableList& tableList) const; + /// in case of error unfilled, flag show if ensure column name + virtual TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column); + virtual void FinishColumns(); + virtual bool AddExpressions(TContext& ctx, const TVector<TNodePtr>& columns, EExprSeat exprSeat); + virtual void SetFlattenByMode(const TString& mode); + virtual void MarkFlattenColumns(); + virtual bool IsFlattenColumns() const; + virtual bool AddFilter(TContext& ctx, TNodePtr filter); + virtual bool AddGroupKey(TContext& ctx, const TString& column); + virtual void SetCompactGroupBy(bool compactGroupBy); + virtual void SetGroupBySuffix(const TString& suffix); + virtual TString MakeLocalName(const TString& name); + virtual bool AddAggregation(TContext& ctx, TAggregationPtr aggr); + virtual bool AddFuncOverWindow(TContext& ctx, TNodePtr expr); + virtual void AddTmpWindowColumn(const TString& column); + virtual void SetMatchRecognize(TMatchRecognizeBuilderPtr matchRecognize); + virtual const TVector<TString>& GetTmpWindowColumns() const; + virtual bool HasAggregations() const; + virtual void AddWindowSpecs(TWinSpecs winSpecs); + virtual bool AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func); + virtual bool AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func); + virtual void SetLegacyHoppingWindowSpec(TLegacyHoppingWindowSpecPtr spec); + virtual TLegacyHoppingWindowSpecPtr GetLegacyHoppingWindowSpec() const; + virtual TNodePtr GetSessionWindowSpec() const; + virtual TNodePtr GetHoppingWindowSpec() const; + virtual bool IsCompositeSource() const; + virtual bool IsGroupByColumn(const TString& column) const; + virtual bool IsFlattenByColumns() const; + virtual bool IsFlattenByExprs() const; + virtual bool IsCalcOverWindow() const; + virtual bool IsOverWindowSource() const; + virtual bool IsStream() const; + virtual EOrderKind GetOrderKind() const; + virtual TWriteSettings GetWriteSettings() const; + TNodePtr PrepareSamplingRate(TPosition pos, ESampleClause clause, TNodePtr samplingRate); + virtual bool SetSamplingOptions(TContext& ctx, TPosition pos, ESampleClause clause, ESampleMode mode, TNodePtr samplingRate, TNodePtr samplingSeed); + virtual bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints); + virtual bool AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& groupingColumn); + virtual size_t GetGroupingColumnsCount() const; + virtual TNodePtr BuildFilter(TContext& ctx, const TString& label); + virtual TNodePtr BuildFilterLambda(); + virtual TNodePtr BuildFlattenByColumns(const TString& label); + virtual TNodePtr BuildFlattenColumns(const TString& label); + virtual TNodePtr BuildPreaggregatedMap(TContext& ctx); + virtual TNodePtr BuildPreFlattenMap(TContext& ctx); + virtual TNodePtr BuildPrewindowMap(TContext& ctx); + virtual std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx); + virtual TNodePtr BuildCalcOverWindow(TContext& ctx, const TString& label); + virtual TNodePtr BuildSort(TContext& ctx, const TString& label); + virtual TNodePtr BuildCleanupColumns(TContext& ctx, const TString& label); + virtual TNodePtr BuildGroupingColumns(const TString& label); + virtual bool BuildSamplingLambda(TNodePtr& node); + virtual bool SetSamplingRate(TContext& ctx, ESampleClause clause, TNodePtr samplingRate); + virtual IJoin* GetJoin(); + virtual ISource* GetCompositeSource(); + virtual bool IsSelect() const; + virtual bool IsTableSource() const; + virtual bool ShouldUseSourceAsColumn(const TString& source) const; + virtual bool IsJoinKeysInitializing() const; + virtual const TString* GetWindowName() const; + virtual bool HasMatchRecognize() const; + virtual TNodePtr BuildMatchRecognize(TContext& ctx, TString&& inputTable); + virtual bool DoInit(TContext& ctx, ISource* src); + virtual TNodePtr Build(TContext& ctx) = 0; + + virtual TMaybe<TString> FindColumnMistype(const TString& name) const; + + virtual bool InitFilters(TContext& ctx); + void AddDependentSource(ISource* usedSource); + bool IsAlias(EExprSeat exprSeat, const TString& label) const; + bool IsExprAlias(const TString& label) const; + bool IsExprSeat(EExprSeat exprSeat, EExprType type = EExprType::WithExpression) const; + TString GetGroupByColumnAlias(const TString& column) const; + const TVector<TNodePtr>& Expressions(EExprSeat exprSeat) const; + + virtual TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const; + + TIntrusivePtr<ISource> CloneSource() const; + TNodePtr BuildSortSpec(const TVector<TSortSpecificationPtr>& orderBy, const TString& label, bool traits, bool assume); + + protected: + ISource(TPosition pos); + virtual TAstNode* Translate(TContext& ctx) const; + + void FillSortParts(const TVector<TSortSpecificationPtr>& orderBy, TNodePtr& sortKeySelector, TNodePtr& sortDirection); + + TVector<TNodePtr>& Expressions(EExprSeat exprSeat); + TNodePtr AliasOrColumn(const TNodePtr& node, bool withSource); + + TNodePtr BuildWindowFrame(const TFrameSpecification& spec, bool isCompact); + + THashSet<TString> ExprAliases; + THashSet<TString> FlattenByAliases; + THashMap<TString, TString> GroupByColumnAliases; + TVector<TNodePtr> Filters; + bool CompactGroupBy = false; + TString GroupBySuffix; + TSet<TString> GroupKeys; + TVector<TString> OrderedGroupKeys; + std::array<TVector<TNodePtr>, static_cast<unsigned>(EExprSeat::Max)> NamedExprs; + TVector<TAggregationPtr> Aggregations; + TMap<TString, TVector<TAggregationPtr>> AggregationOverWindow; + TMap<TString, TVector<TNodePtr>> FuncOverWindow; + TWinSpecs WinSpecs; + TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec; + TNodePtr SessionWindow; + TNodePtr HoppingWindow; + TVector<ISource*> UsedSources; + TString FlattenMode; + bool FlattenColumns = false; + THashMap<TString, ui32> GenIndexes; + TVector<TString> TmpWindowColumns; + TNodePtr SamplingRate; + TMatchRecognizeBuilderPtr MatchRecognizeBuilder; + }; + + template<> + inline TVector<TSourcePtr> CloneContainer<TSourcePtr>(const TVector<TSourcePtr>& args) { + TVector<TSourcePtr> cloneArgs; + cloneArgs.reserve(args.size()); + for (const auto& arg: args) { + cloneArgs.emplace_back(arg ? arg->CloneSource() : nullptr); + } + return cloneArgs; + } + + struct TJoinLinkSettings { + enum class EStrategy { + Default, + SortedMerge, + StreamLookup, + ForceMap, + ForceGrace + }; + EStrategy Strategy = EStrategy::Default; + bool Compact = false; + }; + + class IJoin: public ISource { + public: + virtual ~IJoin(); + + virtual IJoin* GetJoin(); + virtual TNodePtr BuildJoinKeys(TContext& ctx, const TVector<TDeferredAtom>& names) = 0; + virtual void SetupJoin(const TString& joinOp, TNodePtr joinExpr, const TJoinLinkSettings& linkSettings) = 0; + virtual const THashMap<TString, THashSet<TString>>& GetSameKeysMap() const = 0; + virtual TVector<TString> GetJoinLabels() const = 0; + + protected: + IJoin(TPosition pos); + }; + + class TSessionWindow final : public INode { + public: + TSessionWindow(TPosition pos, const TVector<TNodePtr>& args); + void MarkValid(); + TNodePtr BuildTraits(const TString& label) const; + private: + bool DoInit(TContext& ctx, ISource* src) override; + TAstNode* Translate(TContext&) const override; + void DoUpdateState() const override; + TNodePtr DoClone() const override; + TString GetOpName() const override; + + TVector<TNodePtr> Args; + TSourcePtr FakeSource; + TNodePtr Node; + bool Valid; + }; + + class THoppingWindow final : public INode { + public: + THoppingWindow(TPosition pos, const TVector<TNodePtr>& args); + void MarkValid(); + TNodePtr BuildTraits(const TString& label) const; + public: + TNodePtr Hop; + TNodePtr Interval; + private: + bool DoInit(TContext& ctx, ISource* src) override; + TAstNode* Translate(TContext&) const override; + void DoUpdateState() const override; + TNodePtr DoClone() const override; + TString GetOpName() const override; + TNodePtr ProcessIntervalParam(const TNodePtr& val) const; + + TVector<TNodePtr> Args; + TSourcePtr FakeSource; + TNodePtr Node; + bool Valid; + }; + + + // Implemented in join.cpp + TString NormalizeJoinOp(const TString& joinOp); + TSourcePtr BuildEquiJoin(TPosition pos, TVector<TSourcePtr>&& sources, TVector<bool>&& anyFlags, bool strictJoinKeyTypes); + + // Implemented in select.cpp + TNodePtr BuildSubquery(TSourcePtr source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped); + TNodePtr BuildSubqueryRef(TNodePtr subquery, const TString& alias, int tupleIndex = -1); + TNodePtr BuildInvalidSubqueryRef(TPosition subqueryPos); + TNodePtr BuildSourceNode(TPosition pos, TSourcePtr source, bool checkExist = false); + TSourcePtr BuildMuxSource(TPosition pos, TVector<TSourcePtr>&& sources); + TSourcePtr BuildFakeSource(TPosition pos, bool missingFrom = false, bool inSubquery = false); + TSourcePtr BuildNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList = false); + TSourcePtr BuildTableSource(TPosition pos, const TTableRef& table, const TString& label = TString()); + TSourcePtr BuildInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label = TString()); + TSourcePtr BuildRefColumnSource(TPosition pos, const TString& partExpression); + TSourcePtr BuildUnion(TPosition pos, TVector<TSourcePtr>&& sources, bool quantifierAll, const TWriteSettings& settings); + TSourcePtr BuildOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource); + + TNodePtr BuildOrderBy(TPosition pos, const TVector<TNodePtr>& keys, const TVector<bool>& order); + TNodePtr BuildSkipTake(TPosition pos, const TNodePtr& skip, const TNodePtr& take); + + + TSourcePtr BuildSelectCore( + TContext& ctx, + TPosition pos, + TSourcePtr source, + const TVector<TNodePtr>& groupByExpr, + const TVector<TNodePtr>& groupBy, + bool compactGroupBy, + const TString& groupBySuffix, + bool assumeSorted, + const TVector<TSortSpecificationPtr>& orderBy, + TNodePtr having, + TWinSpecs&& windowSpec, + TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec, + TVector<TNodePtr>&& terms, + bool distinct, + TVector<TNodePtr>&& without, + bool selectStream, + const TWriteSettings& settings, + TColumnsSets&& uniqueSets, + TColumnsSets&& distinctSets + ); + TSourcePtr BuildSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake); + + + enum class ReduceMode { + ByPartition, + ByAll, + }; + TSourcePtr BuildReduce(TPosition pos, ReduceMode mode, TSourcePtr source, TVector<TSortSpecificationPtr>&& orderBy, + TVector<TNodePtr>&& keys, TVector<TNodePtr>&& args, TNodePtr udf, TNodePtr having, const TWriteSettings& settings, + const TVector<TSortSpecificationPtr>& assumeOrderBy, bool listCall); + TSourcePtr BuildProcess(TPosition pos, TSourcePtr source, TNodePtr with, bool withExtFunction, TVector<TNodePtr>&& terms, bool listCall, + bool prcessStream, const TWriteSettings& settings, const TVector<TSortSpecificationPtr>& assumeOrderBy); + + TNodePtr BuildSelectResult(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery, TScopedStatePtr scoped); + + // Implemented in insert.cpp + TSourcePtr BuildWriteValues(TPosition pos, const TString& opertationHumanName, const TVector<TString>& columnsHint, const TVector<TVector<TNodePtr>>& values); + TSourcePtr BuildWriteValues(TPosition pos, const TString& opertationHumanName, const TVector<TString>& columnsHint, TSourcePtr source); + TSourcePtr BuildUpdateValues(TPosition pos, const TVector<TString>& columnsHint, const TVector<TNodePtr>& values); + + EWriteColumnMode ToWriteColumnsMode(ESQLWriteColumnMode sqlWriteColumnMode); + TNodePtr BuildEraseColumns(TPosition pos, const TVector<TString>& columns); + TNodePtr BuildIntoTableOptions(TPosition pos, const TVector<TString>& eraseColumns, const TTableHints& hints); + TNodePtr BuildWriteColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, EWriteColumnMode mode, TSourcePtr values, TNodePtr options = nullptr); + TNodePtr BuildUpdateColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr values, TSourcePtr source, TNodePtr options = nullptr); + TNodePtr BuildDelete(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr source, TNodePtr options = nullptr); + + // Implemented in query.cpp + TNodePtr BuildTableKey(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TViewDescription& view); + TNodePtr BuildTableKeys(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TString& func, const TVector<TTableArg>& args); + TNodePtr BuildTopicKey(TPosition pos, const TDeferredAtom& cluster, const TDeferredAtom& name); + TNodePtr BuildInputOptions(TPosition pos, const TTableHints& hints); + TNodePtr BuildInputTables(TPosition pos, const TTableList& tables, bool inSubquery, TScopedStatePtr scoped); + TNodePtr BuildCreateTable(TPosition pos, const TTableRef& tr, bool existingOk, bool replaceIfExists, const TCreateTableParameters& params, TSourcePtr source, TScopedStatePtr scoped); + TNodePtr BuildAlterTable(TPosition pos, const TTableRef& tr, const TAlterTableParameters& params, TScopedStatePtr scoped); + TNodePtr BuildDropTable(TPosition pos, const TTableRef& table, bool missingOk, ETableType tableType, TScopedStatePtr scoped); + TNodePtr BuildWriteTable(TPosition pos, const TString& label, const TTableRef& table, EWriteColumnMode mode, TNodePtr options, + TScopedStatePtr scoped); + TNodePtr BuildAnalyze(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TAnalyzeParams& params, TScopedStatePtr scoped); + TSourcePtr TryMakeSourceFromExpression(TPosition pos, TContext& ctx, const TString& currService, const TDeferredAtom& currCluster, + TNodePtr node, const TString& view = {}); + void MakeTableFromExpression(TPosition pos, TContext& ctx, TNodePtr node, TDeferredAtom& table, const TString& prefix = {}); + TDeferredAtom MakeAtomFromExpression(TPosition pos, TContext& ctx, TNodePtr node, const TString& prefix = {}); + TString NormalizeTypeString(const TString& str); +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql.cpp b/yql/essentials/sql/v1/sql.cpp new file mode 100644 index 00000000000..506b3950d54 --- /dev/null +++ b/yql/essentials/sql/v1/sql.cpp @@ -0,0 +1,247 @@ +#include "sql.h" +#include "sql_query.h" +#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h> +#include <yql/essentials/sql/v1/lexer/lexer.h> +#include <yql/essentials/sql/v1/proto_parser/proto_parser.h> + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +TAstNode* SqlASTToYql(const google::protobuf::Message& protoAst, TContext& ctx) { + const google::protobuf::Descriptor* d = protoAst.GetDescriptor(); + if (d && d->name() != "TSQLv1ParserAST") { + ctx.Error() << "Invalid AST structure: " << d->name() << ", expected TSQLv1ParserAST"; + return nullptr; + } + TSqlQuery query(ctx, ctx.Settings.Mode, true); + TNodePtr node(query.Build(static_cast<const TSQLv1ParserAST&>(protoAst))); + try { + if (node && node->Init(ctx, nullptr)) { + return node->Translate(ctx); + } + } catch (const NProtoAST::TTooManyErrors&) { + // do not add error issue, no room for it + } + + return nullptr; +} + +TAstNode* SqlASTsToYqls(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core>& ast, TContext& ctx) { + TSqlQuery query(ctx, ctx.Settings.Mode, true); + TNodePtr node(query.Build(ast)); + try { + if (node && node->Init(ctx, nullptr)) { + return node->Translate(ctx); + } + } catch (const NProtoAST::TTooManyErrors&) { + // do not add error issue, no room for it + } + + return nullptr; +} + +void SqlASTToYqlImpl(NYql::TAstParseResult& res, const google::protobuf::Message& protoAst, + TContext& ctx) { + YQL_ENSURE(!ctx.Issues.Size()); + res.Root = SqlASTToYql(protoAst, ctx); + res.Pool = std::move(ctx.Pool); + if (!res.Root) { + if (ctx.Issues.Size()) { + ctx.IncrementMonCounter("sql_errors", "AstToYqlError"); + } else { + ctx.IncrementMonCounter("sql_errors", "AstToYqlSilentError"); + ctx.Error() << "Error occurred on parse SQL query, but no error is collected" << + ", please send this request over bug report into YQL interface or write on yql@ maillist"; + } + } else { + ctx.WarnUnusedHints(); + } +} + +void SqlASTsToYqlsImpl(NYql::TAstParseResult& res, const std::vector<::NSQLv1Generated::TRule_sql_stmt_core>& ast, TContext& ctx) { + res.Root = SqlASTsToYqls(ast, ctx); + res.Pool = std::move(ctx.Pool); + if (!res.Root) { + if (ctx.Issues.Size()) { + ctx.IncrementMonCounter("sql_errors", "AstToYqlError"); + } else { + ctx.IncrementMonCounter("sql_errors", "AstToYqlSilentError"); + ctx.Error() << "Error occurred on parse SQL query, but no error is collected" << + ", please send this request over bug report into YQL interface or write on yql@ maillist"; + } + } else { + ctx.WarnUnusedHints(); + } +} + +NYql::TAstParseResult SqlASTToYql(const google::protobuf::Message& protoAst, + const NSQLTranslation::TSQLHints& hints, + const NSQLTranslation::TTranslationSettings& settings) +{ + YQL_ENSURE(IsQueryMode(settings.Mode)); + TAstParseResult res; + TContext ctx(settings, hints, res.Issues); + SqlASTToYqlImpl(res, protoAst, ctx); + res.ActualSyntaxType = NYql::ESyntaxType::YQLv1; + return res; +} + +NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules) +{ + TAstParseResult res; + const TString queryName = "query"; + + NSQLTranslation::TSQLHints hints; + auto lexer = MakeLexer(settings.AnsiLexer, settings.Antlr4Parser); + YQL_ENSURE(lexer); + if (!CollectSqlHints(*lexer, query, queryName, settings.File, hints, res.Issues, settings.MaxErrors, settings.Antlr4Parser)) { + return res; + } + + TContext ctx(settings, hints, res.Issues); + NSQLTranslation::TErrorCollectorOverIssues collector(res.Issues, settings.MaxErrors, settings.File); + + google::protobuf::Message* ast(SqlAST(query, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena)); + if (ast) { + SqlASTToYqlImpl(res, *ast, ctx); + } else { + ctx.IncrementMonCounter("sql_errors", "AstError"); + } + if (warningRules) { + *warningRules = ctx.WarningPolicy.GetRules(); + ctx.WarningPolicy.Clear(); + } + res.ActualSyntaxType = NYql::ESyntaxType::YQLv1; + return res; +} + +bool NeedUseForAllStatements(const TRule_sql_stmt_core::AltCase& subquery) { + switch (subquery) { + case TRule_sql_stmt_core::kAltSqlStmtCore1: // pragma + case TRule_sql_stmt_core::kAltSqlStmtCore3: // named nodes + case TRule_sql_stmt_core::kAltSqlStmtCore6: // use + case TRule_sql_stmt_core::kAltSqlStmtCore12: // declare + case TRule_sql_stmt_core::kAltSqlStmtCore13: // import + case TRule_sql_stmt_core::kAltSqlStmtCore14: // export + case TRule_sql_stmt_core::kAltSqlStmtCore18: // define action or subquery + return true; + case TRule_sql_stmt_core::ALT_NOT_SET: + case TRule_sql_stmt_core::kAltSqlStmtCore2: // select + case TRule_sql_stmt_core::kAltSqlStmtCore4: // create table + case TRule_sql_stmt_core::kAltSqlStmtCore5: // drop table + case TRule_sql_stmt_core::kAltSqlStmtCore7: // into table + case TRule_sql_stmt_core::kAltSqlStmtCore8: // commit + case TRule_sql_stmt_core::kAltSqlStmtCore9: // update + case TRule_sql_stmt_core::kAltSqlStmtCore10: // delete + case TRule_sql_stmt_core::kAltSqlStmtCore11: // rollback + case TRule_sql_stmt_core::kAltSqlStmtCore15: // alter table + case TRule_sql_stmt_core::kAltSqlStmtCore16: // alter external table + case TRule_sql_stmt_core::kAltSqlStmtCore17: // do + case TRule_sql_stmt_core::kAltSqlStmtCore19: // if + case TRule_sql_stmt_core::kAltSqlStmtCore20: // for + case TRule_sql_stmt_core::kAltSqlStmtCore21: // values + case TRule_sql_stmt_core::kAltSqlStmtCore22: // create user + case TRule_sql_stmt_core::kAltSqlStmtCore23: // alter user + case TRule_sql_stmt_core::kAltSqlStmtCore24: // create group + case TRule_sql_stmt_core::kAltSqlStmtCore25: // alter group + case TRule_sql_stmt_core::kAltSqlStmtCore26: // drop role + case TRule_sql_stmt_core::kAltSqlStmtCore27: // create object + case TRule_sql_stmt_core::kAltSqlStmtCore28: // alter object + case TRule_sql_stmt_core::kAltSqlStmtCore29: // drop object + case TRule_sql_stmt_core::kAltSqlStmtCore30: // create external data source + case TRule_sql_stmt_core::kAltSqlStmtCore31: // alter external data source + case TRule_sql_stmt_core::kAltSqlStmtCore32: // drop external data source + case TRule_sql_stmt_core::kAltSqlStmtCore33: // create replication + case TRule_sql_stmt_core::kAltSqlStmtCore34: // drop replication + case TRule_sql_stmt_core::kAltSqlStmtCore35: // create topic + case TRule_sql_stmt_core::kAltSqlStmtCore36: // alter topic + case TRule_sql_stmt_core::kAltSqlStmtCore37: // drop topic + case TRule_sql_stmt_core::kAltSqlStmtCore38: // grant permissions + case TRule_sql_stmt_core::kAltSqlStmtCore39: // revoke permissions + case TRule_sql_stmt_core::kAltSqlStmtCore40: // alter table store + case TRule_sql_stmt_core::kAltSqlStmtCore41: // upsert object + case TRule_sql_stmt_core::kAltSqlStmtCore42: // create view + case TRule_sql_stmt_core::kAltSqlStmtCore43: // drop view + case TRule_sql_stmt_core::kAltSqlStmtCore44: // alter replication + case TRule_sql_stmt_core::kAltSqlStmtCore45: // create resource pool + case TRule_sql_stmt_core::kAltSqlStmtCore46: // alter resource pool + case TRule_sql_stmt_core::kAltSqlStmtCore47: // drop resource pool + case TRule_sql_stmt_core::kAltSqlStmtCore48: // create backup collection + case TRule_sql_stmt_core::kAltSqlStmtCore49: // alter backup collection + case TRule_sql_stmt_core::kAltSqlStmtCore50: // drop backup collection + case TRule_sql_stmt_core::kAltSqlStmtCore51: // analyze + case TRule_sql_stmt_core::kAltSqlStmtCore52: // create resource pool classifier + case TRule_sql_stmt_core::kAltSqlStmtCore53: // alter resource pool classifier + case TRule_sql_stmt_core::kAltSqlStmtCore54: // drop resource pool classifier + case TRule_sql_stmt_core::kAltSqlStmtCore55: // backup + case TRule_sql_stmt_core::kAltSqlStmtCore56: // restore + return false; + } +} + +TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules, + TVector<NYql::TStmtParseInfo>* stmtParseInfo) +{ + TVector<TAstParseResult> result; + const TString queryName = "query"; + TIssues issues; + + NSQLTranslation::TSQLHints hints; + auto lexer = MakeLexer(settings.AnsiLexer, settings.Antlr4Parser); + YQL_ENSURE(lexer); + if (!CollectSqlHints(*lexer, query, queryName, settings.File, hints, issues, settings.MaxErrors, settings.Antlr4Parser)) { + return result; + } + + TContext ctx(settings, hints, issues); + NSQLTranslation::TErrorCollectorOverIssues collector(issues, settings.MaxErrors, settings.File); + + google::protobuf::Message* astProto(SqlAST(query, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena)); + if (astProto) { + auto ast = static_cast<const TSQLv1ParserAST&>(*astProto); + const auto& query = ast.GetRule_sql_query(); + if (query.Alt_case() == NSQLv1Generated::TRule_sql_query::kAltSqlQuery1) { + std::vector<::NSQLv1Generated::TRule_sql_stmt_core> commonStates; + std::vector<::NSQLv1Generated::TRule_sql_stmt_core> statementResult; + const auto& statements = query.GetAlt_sql_query1().GetRule_sql_stmt_list1(); + if (NeedUseForAllStatements(statements.GetRule_sql_stmt2().GetRule_sql_stmt_core2().Alt_case())) { + commonStates.push_back(statements.GetRule_sql_stmt2().GetRule_sql_stmt_core2()); + } else { + TContext ctx(settings, hints, issues); + result.emplace_back(); + if (stmtParseInfo) { + stmtParseInfo->push_back({}); + } + SqlASTsToYqlsImpl(result.back(), {statements.GetRule_sql_stmt2().GetRule_sql_stmt_core2()}, ctx); + result.back().Issues = std::move(issues); + issues.Clear(); + } + for (auto block: statements.GetBlock3()) { + if (NeedUseForAllStatements(block.GetRule_sql_stmt2().GetRule_sql_stmt_core2().Alt_case())) { + commonStates.push_back(block.GetRule_sql_stmt2().GetRule_sql_stmt_core2()); + continue; + } + TContext ctx(settings, hints, issues); + result.emplace_back(); + if (stmtParseInfo) { + stmtParseInfo->push_back({}); + } + statementResult = commonStates; + statementResult.push_back(block.GetRule_sql_stmt2().GetRule_sql_stmt_core2()); + SqlASTsToYqlsImpl(result.back(), statementResult, ctx); + result.back().Issues = std::move(issues); + issues.Clear(); + } + } + } else { + ctx.IncrementMonCounter("sql_errors", "AstError"); + } + if (warningRules) { + *warningRules = ctx.WarningPolicy.GetRules(); + ctx.WarningPolicy.Clear(); + } + return result; +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql.h b/yql/essentials/sql/v1/sql.h new file mode 100644 index 00000000000..0a12c45d308 --- /dev/null +++ b/yql/essentials/sql/v1/sql.h @@ -0,0 +1,22 @@ +#pragma once + +#include <yql/essentials/ast/yql_ast.h> +#include <yql/essentials/parser/lexer_common/hints.h> +#include <yql/essentials/parser/proto_ast/common.h> +#include <yql/essentials/public/issue/yql_warning.h> +#include <yql/essentials/public/issue/yql_issue_manager.h> +#include <yql/essentials/sql/settings/translation_settings.h> + +#include <google/protobuf/message.h> + +namespace NSQLTranslation { + struct TTranslationSettings; +} + +namespace NSQLTranslationV1 { + + NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules = nullptr); + NYql::TAstParseResult SqlASTToYql(const google::protobuf::Message& protoAst, const NSQLTranslation::TSQLHints& hints, const NSQLTranslation::TTranslationSettings& settings); + TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo = nullptr); + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_call_expr.cpp b/yql/essentials/sql/v1/sql_call_expr.cpp new file mode 100644 index 00000000000..1871c1bbc0b --- /dev/null +++ b/yql/essentials/sql/v1/sql_call_expr.cpp @@ -0,0 +1,444 @@ +#include "sql_call_expr.h" +#include "sql_expression.h" + +#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h> + +#include <yql/essentials/minikql/mkql_program_builder.h> + +namespace NSQLTranslationV1 { + +TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, + TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig); + +using namespace NSQLv1Generated; + +static bool ValidateForCounters(const TString& input) { + for (auto c : input) { + if (!(IsAlnum(c) || c == '_')) { + return false; + } + } + return true; +} + +TNodePtr TSqlCallExpr::BuildUdf(bool forReduce) { + auto result = Node ? Node : BuildCallable(Pos, Module, Func, Args, forReduce); + if (to_lower(Module) == "tensorflow" && Func == "RunBatch") { + if (Args.size() > 2) { + Args.erase(Args.begin() + 2); + } else { + Ctx.Error(Pos) << "Excepted >= 3 arguments, but got: " << Args.size(); + return nullptr; + } + } + return result; +} + +TNodePtr TSqlCallExpr::BuildCall() { + TVector<TNodePtr> args; + bool warnOnYqlNameSpace = true; + + TUdfNode* udf_node = Node ? Node->GetUdfNode() : nullptr; + if (udf_node) { + if (!udf_node->DoInit(Ctx, nullptr)) { + return nullptr; + } + TNodePtr positional_args = BuildTuple(Pos, PositionalArgs); + TNodePtr positional = positional_args->Y("TypeOf", positional_args); + TNodePtr named_args = BuildStructure(Pos, NamedArgs); + TNodePtr named = named_args->Y("TypeOf", named_args); + + TNodePtr custom_user_type = new TCallNodeImpl(Pos, "TupleType", {positional, named, udf_node->GetExternalTypes()}); + + return BuildSqlCall(Ctx, Pos, udf_node->GetModule(), udf_node->GetFunction(), + args, positional_args, named_args, custom_user_type, + udf_node->GetTypeConfig(), udf_node->GetRunConfig()); + } + + if (Node && !Node->FuncName()) { + Module = "YQL"; + Func = NamedArgs.empty() ? "Apply" : "NamedApply"; + warnOnYqlNameSpace = false; + args.push_back(Node); + } + + if (Node && Node->FuncName()) { + Module = Node->ModuleName() ? *Node->ModuleName() : "YQL"; + Func = *Node->FuncName(); + } + bool mustUseNamed = !NamedArgs.empty(); + if (mustUseNamed) { + if (Node && !Node->FuncName()) { + mustUseNamed = false; + } + args.emplace_back(BuildTuple(Pos, PositionalArgs)); + args.emplace_back(BuildStructure(Pos, NamedArgs)); + } else if (IsExternalCall) { + Func = "SqlExternalFunction"; + if (Args.size() < 2 || Args.size() > 3) { + Ctx.Error(Pos) << "EXTERNAL FUNCTION requires from 2 to 3 arguments, but got: " << Args.size(); + return nullptr; + } + + if (Args.size() == 3) { + args.insert(args.end(), Args.begin(), Args.end() - 1); + Args.erase(Args.begin(), Args.end() - 1); + } else { + args.insert(args.end(), Args.begin(), Args.end()); + Args.erase(Args.begin(), Args.end()); + } + auto configNode = new TExternalFunctionConfig(Pos, CallConfig); + auto configList = new TAstListNodeImpl(Pos, { new TAstAtomNodeImpl(Pos, "quote", 0), configNode }); + args.push_back(configList); + } else { + args.insert(args.end(), Args.begin(), Args.end()); + } + + auto result = BuildBuiltinFunc(Ctx, Pos, Func, args, Module, AggMode, &mustUseNamed, warnOnYqlNameSpace); + if (mustUseNamed) { + Error() << "Named args are used for call, but unsupported by function: " << Func; + return nullptr; + } + + if (WindowName) { + result = BuildCalcOverWindow(Pos, WindowName, result); + } + + return result; +} + +bool TSqlCallExpr::Init(const TRule_value_constructor& node) { + switch (node.Alt_case()) { + case TRule_value_constructor::kAltValueConstructor1: { + auto& ctor = node.GetAlt_value_constructor1(); + Func = "Variant"; + TSqlExpression expr(Ctx, Mode); + if (!Expr(expr, Args, ctor.GetRule_expr3())) { + return false; + } + if (!Expr(expr, Args, ctor.GetRule_expr5())) { + return false; + } + if (!Expr(expr, Args, ctor.GetRule_expr7())) { + return false; + } + break; + } + case TRule_value_constructor::kAltValueConstructor2: { + auto& ctor = node.GetAlt_value_constructor2(); + Func = "Enum"; + TSqlExpression expr(Ctx, Mode); + if (!Expr(expr, Args, ctor.GetRule_expr3())) { + return false; + } + if (!Expr(expr, Args, ctor.GetRule_expr5())) { + return false; + } + break; + } + case TRule_value_constructor::kAltValueConstructor3: { + auto& ctor = node.GetAlt_value_constructor3(); + Func = "Callable"; + TSqlExpression expr(Ctx, Mode); + if (!Expr(expr, Args, ctor.GetRule_expr3())) { + return false; + } + if (!Expr(expr, Args, ctor.GetRule_expr5())) { + return false; + } + break; + } + case TRule_value_constructor::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + PositionalArgs = Args; + return true; +} + +bool TSqlCallExpr::ExtractCallParam(const TRule_external_call_param& node) { + TString paramName = Id(node.GetRule_an_id1(), *this); + paramName = to_lower(paramName); + + if (CallConfig.contains(paramName)) { + Ctx.Error() << "WITH " << to_upper(paramName).Quote() + << " clause should be specified only once"; + return false; + } + + const bool optimizeForParam = paramName == "optimize_for"; + const auto columnRefState = optimizeForParam ? EColumnRefState::AsStringLiteral : EColumnRefState::Deny; + + TColumnRefScope scope(Ctx, columnRefState); + if (optimizeForParam) { + scope.SetNoColumnErrContext("in external call params"); + } + + TSqlExpression expression(Ctx, Mode); + auto value = expression.Build(node.GetRule_expr3()); + if (value && optimizeForParam) { + TDeferredAtom atom; + MakeTableFromExpression(Ctx.Pos(), Ctx, value, atom); + value = new TCallNodeImpl(Ctx.Pos(), "String", { atom.Build() }); + } + + if (!value) { + return false; + } + + CallConfig[paramName] = value; + return true; +} + +bool TSqlCallExpr::ConfigureExternalCall(const TRule_external_call_settings& node) { + bool success = ExtractCallParam(node.GetRule_external_call_param1()); + for (auto& block: node.GetBlock2()) { + success = ExtractCallParam(block.GetRule_external_call_param2()) && success; + } + + return success; +} + +bool TSqlCallExpr::Init(const TRule_using_call_expr& node) { + // using_call_expr: ((an_id_or_type NAMESPACE an_id_or_type) | an_id_expr | bind_parameter | (EXTERNAL FUNCTION)) invoke_expr; + const auto& block = node.GetBlock1(); + switch (block.Alt_case()) { + case TRule_using_call_expr::TBlock1::kAlt1: { + auto& subblock = block.GetAlt1(); + Module = Id(subblock.GetRule_an_id_or_type1(), *this); + Func = Id(subblock.GetRule_an_id_or_type3(), *this); + break; + } + case TRule_using_call_expr::TBlock1::kAlt2: { + Func = Id(block.GetAlt2().GetRule_an_id_expr1(), *this); + break; + } + case TRule_using_call_expr::TBlock1::kAlt3: { + TString bindName; + if (!NamedNodeImpl(block.GetAlt3().GetRule_bind_parameter1(), bindName, *this)) { + return false; + } + Node = GetNamedNode(bindName); + if (!Node) { + return false; + } + break; + } + case TRule_using_call_expr::TBlock1::kAlt4: { + IsExternalCall = true; + break; + } + case TRule_using_call_expr::TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + YQL_ENSURE(!DistinctAllowed); + UsingCallExpr = true; + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + return Init(node.GetRule_invoke_expr2()); +} + +void TSqlCallExpr::InitName(const TString& name) { + Module = ""; + Func = name; +} + +void TSqlCallExpr::InitExpr(const TNodePtr& expr) { + Node = expr; +} + +bool TSqlCallExpr::FillArg(const TString& module, const TString& func, size_t& idx, const TRule_named_expr& node) { + const bool isNamed = node.HasBlock2(); + + TMaybe<EColumnRefState> status; + // TODO: support named args + if (!isNamed) { + status = GetFunctionArgColumnStatus(Ctx, module, func, idx); + } + + TNodePtr expr; + if (status) { + TColumnRefScope scope(Ctx, *status, /* isTopLevel = */ false); + expr = NamedExpr(node); + } else { + expr = NamedExpr(node); + } + + if (!expr) { + return false; + } + + Args.emplace_back(std::move(expr)); + if (!isNamed) { + ++idx; + } + return true; +} + +bool TSqlCallExpr::FillArgs(const TRule_named_expr_list& node) { + TString module = Module; + TString func = Func; + if (Node && Node->FuncName()) { + module = Node->ModuleName() ? *Node->ModuleName() : "YQL"; + func = *Node->FuncName(); + } + + size_t idx = 0; + if (!FillArg(module, func, idx, node.GetRule_named_expr1())) { + return false; + } + + for (auto& b: node.GetBlock2()) { + if (!FillArg(module, func, idx, b.GetRule_named_expr2())) { + return false; + } + } + + return true; +} + +bool TSqlCallExpr::Init(const TRule_invoke_expr& node) { + // invoke_expr: LPAREN (opt_set_quantifier named_expr_list COMMA? | ASTERISK)? RPAREN invoke_expr_tail; + // invoke_expr_tail: + // (null_treatment | filter_clause)? (OVER window_name_or_specification)? + // ; + Pos = Ctx.Pos(); + if (node.HasBlock2()) { + switch (node.GetBlock2().Alt_case()) { + case TRule_invoke_expr::TBlock2::kAlt1: { + const auto& alt = node.GetBlock2().GetAlt1(); + TPosition distinctPos; + if (IsDistinctOptSet(alt.GetRule_opt_set_quantifier1(), distinctPos)) { + if (!DistinctAllowed) { + if (UsingCallExpr) { + Ctx.Error(distinctPos) << "DISTINCT can not be used in PROCESS/REDUCE"; + } else { + Ctx.Error(distinctPos) << "DISTINCT can only be used in aggregation functions"; + } + return false; + } + YQL_ENSURE(AggMode == EAggregateMode::Normal); + AggMode = EAggregateMode::Distinct; + Ctx.IncrementMonCounter("sql_features", "DistinctInCallExpr"); + } + if (!FillArgs(alt.GetRule_named_expr_list2())) { + return false; + } + for (const auto& arg : Args) { + if (arg->GetLabel()) { + NamedArgs.push_back(arg); + } + else { + PositionalArgs.push_back(arg); + if (!NamedArgs.empty()) { + Ctx.Error(arg->GetPos()) << "Unnamed arguments can not follow after named one"; + return false; + } + } + } + break; + } + case TRule_invoke_expr::TBlock2::kAlt2: + if (IsExternalCall) { + Ctx.Error() << "You should set EXTERNAL FUNCTION type. Example: EXTERNAL FUNCTION('YANDEX-CLOUD', ...)"; + } else { + Args.push_back(new TAsteriskNode(Pos)); + } + break; + case TRule_invoke_expr::TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + } + + const auto& tail = node.GetRule_invoke_expr_tail4(); + + if (tail.HasBlock1()) { + if (IsExternalCall) { + Ctx.Error() << "Additional clause after EXTERNAL FUNCTION(...) is not supported"; + return false; + } + + switch (tail.GetBlock1().Alt_case()) { + case TRule_invoke_expr_tail::TBlock1::kAlt1: { + if (!tail.HasBlock2()) { + Ctx.Error() << "RESPECT/IGNORE NULLS can only be used with window functions"; + return false; + } + const auto& alt = tail.GetBlock1().GetAlt1(); + if (alt.GetRule_null_treatment1().Alt_case() == TRule_null_treatment::kAltNullTreatment2) { + SetIgnoreNulls(); + } + break; + } + case TRule_invoke_expr_tail::TBlock1::kAlt2: { + Ctx.Error() << "FILTER clause is not supported yet"; + return false; + } + case TRule_invoke_expr_tail::TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + } + + if (tail.HasBlock2()) { + if (Ctx.DistinctOverWindow) { + AggMode == EAggregateMode::Distinct ? SetOverWindowDistinct() : SetOverWindow(); + } else { + if (AggMode == EAggregateMode::Distinct) { + Ctx.Error() << "DISTINCT is not yet supported in window functions"; + return false; + } + SetOverWindow(); + } + auto winRule = tail.GetBlock2().GetRule_window_name_or_specification2(); + switch (winRule.Alt_case()) { + case TRule_window_name_or_specification::kAltWindowNameOrSpecification1: { + WindowName = Id(winRule.GetAlt_window_name_or_specification1().GetRule_window_name1().GetRule_an_id_window1(), *this); + break; + } + case TRule_window_name_or_specification::kAltWindowNameOrSpecification2: { + if (!Ctx.WinSpecsScopes) { + auto pos = Ctx.TokenPosition(tail.GetBlock2().GetToken1()); + Ctx.Error(pos) << "Window and aggregation functions are not allowed in this context"; + return false; + } + + TWindowSpecificationPtr spec = WindowSpecification( + winRule.GetAlt_window_name_or_specification2().GetRule_window_specification1().GetRule_window_specification_details2()); + if (!spec) { + return false; + } + + WindowName = Ctx.MakeName("_yql_anonymous_window"); + TWinSpecs& specs = Ctx.WinSpecsScopes.back(); + YQL_ENSURE(!specs.contains(WindowName)); + specs[WindowName] = spec; + break; + } + case TRule_window_name_or_specification::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + Ctx.IncrementMonCounter("sql_features", "WindowFunctionOver"); + } + + return true; +} + +void TSqlCallExpr::IncCounters() { + if (Node) { + Ctx.IncrementMonCounter("sql_features", "NamedNodeUseApply"); + } else if (!Module.empty()) { + if (ValidateForCounters(Module)) { + Ctx.IncrementMonCounter("udf_modules", Module); + Ctx.IncrementMonCounter("sql_features", "CallUdf"); + if (ValidateForCounters(Func)) { + auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(Module); + if (scriptType == NKikimr::NMiniKQL::EScriptType::Unknown) { + Ctx.IncrementMonCounter("udf_functions", Module + "." + Func); + } + } + } + } else if (ValidateForCounters(Func)) { + Ctx.IncrementMonCounter("sql_builtins", Func); + Ctx.IncrementMonCounter("sql_features", "CallBuiltin"); + } +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_call_expr.h b/yql/essentials/sql/v1/sql_call_expr.h new file mode 100644 index 00000000000..9b9d39b81a8 --- /dev/null +++ b/yql/essentials/sql/v1/sql_call_expr.h @@ -0,0 +1,98 @@ +#pragma once + +#include "sql_translation.h" + +namespace NSQLTranslationV1 { + +TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, + TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig); + +using namespace NSQLv1Generated; + +class TSqlCallExpr: public TSqlTranslation { +public: + TSqlCallExpr(TContext& ctx, NSQLTranslation::ESqlMode mode) + : TSqlTranslation(ctx, mode) + { + } + + TSqlCallExpr(const TSqlCallExpr& call, const TVector<TNodePtr>& args) + : TSqlTranslation(call.Ctx, call.Mode) + , Pos(call.Pos) + , Func(call.Func) + , Module(call.Module) + , Node(call.Node) + , Args(args) + , AggMode(call.AggMode) + , DistinctAllowed(call.DistinctAllowed) + , UsingCallExpr(call.UsingCallExpr) + , IsExternalCall(call.IsExternalCall) + , CallConfig(call.CallConfig) + { + } + + void AllowDistinct() { + DistinctAllowed = true; + } + + void InitName(const TString& name); + void InitExpr(const TNodePtr& expr); + + bool Init(const TRule_using_call_expr& node); + bool Init(const TRule_value_constructor& node); + bool Init(const TRule_invoke_expr& node); + bool ConfigureExternalCall(const TRule_external_call_settings& node); + void IncCounters(); + + TNodePtr BuildUdf(bool forReduce); + + TNodePtr BuildCall(); + + TPosition GetPos() const { + return Pos; + } + + const TVector<TNodePtr>& GetArgs() const { + return Args; + } + + void SetOverWindow() { + YQL_ENSURE(AggMode == EAggregateMode::Normal); + AggMode = EAggregateMode::OverWindow; + } + + void SetOverWindowDistinct() { + YQL_ENSURE(AggMode == EAggregateMode::Distinct); + AggMode = EAggregateMode::OverWindowDistinct; + } + + void SetIgnoreNulls() { + Func += "_IgnoreNulls"; + } + + bool IsExternal() { + return IsExternalCall; + } + +private: + bool ExtractCallParam(const TRule_external_call_param& node); + bool FillArg(const TString& module, const TString& func, size_t& idx, const TRule_named_expr& node); + bool FillArgs(const TRule_named_expr_list& node); + +private: + TPosition Pos; + TString Func; + TString Module; + TNodePtr Node; + TVector<TNodePtr> Args; + TVector<TNodePtr> PositionalArgs; + TVector<TNodePtr> NamedArgs; + EAggregateMode AggMode = EAggregateMode::Normal; + TString WindowName; + bool DistinctAllowed = false; + bool UsingCallExpr = false; + bool IsExternalCall = false; + TFunctionConfig CallConfig; +}; + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_call_param.h b/yql/essentials/sql/v1/sql_call_param.h new file mode 100644 index 00000000000..57495afd88f --- /dev/null +++ b/yql/essentials/sql/v1/sql_call_param.h @@ -0,0 +1,20 @@ +#pragma once + +#include <util/system/types.h> + +namespace NSQLTranslationV1 { + +/////////////////////////////////////////////////////////////////////////////////////////////// + +enum class ESqlCallParam: ui32 { + InputType /* "INPUT_TYPE" */, // as is + OutputType /* "OUTPUT_TYPE" */, // as is + Concurrency /* "CONCURRENCY" */, // as is + BatchSize /* "BATCH_SIZE" */, // as is + OptimizeFor /* "OPTIMIZE_FOR" */, // evaluate atom + Connection /* "CONNECTION" */, // evaluate atom + Init /* "INIT" */, // as is +}; + +/////////////////////////////////////////////////////////////////////////////////////////////// +} diff --git a/yql/essentials/sql/v1/sql_expression.cpp b/yql/essentials/sql/v1/sql_expression.cpp new file mode 100644 index 00000000000..1f506991a2c --- /dev/null +++ b/yql/essentials/sql/v1/sql_expression.cpp @@ -0,0 +1,2307 @@ +#include "sql_expression.h" +#include "sql_call_expr.h" +#include "sql_select.h" +#include "sql_values.h" +#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h> +#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/utils/utf8.h> +#include <util/charset/wide.h> +#include <util/string/ascii.h> +#include <util/string/hex.h> + +namespace NSQLTranslationV1 { + +using NALPDefault::SQLv1LexerTokens; +using NALPDefaultAntlr4::SQLv1Antlr4Lexer; + +using namespace NSQLv1Generated; + +TNodePtr TSqlExpression::Build(const TRule_expr& node) { + // expr: + // or_subexpr (OR or_subexpr)* + // | type_name_composite + switch (node.Alt_case()) { + case TRule_expr::kAltExpr1: { + auto getNode = [](const TRule_expr_TAlt1_TBlock2& b) -> const TRule_or_subexpr& { return b.GetRule_or_subexpr2(); }; + return BinOper("Or", node.GetAlt_expr1().GetRule_or_subexpr1(), getNode, + node.GetAlt_expr1().GetBlock2().begin(), node.GetAlt_expr1().GetBlock2().end(), {}); + } + case TRule_expr::kAltExpr2: { + return TypeNode(node.GetAlt_expr2().GetRule_type_name_composite1()); + } + case TRule_expr::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + } + +TNodePtr TSqlExpression::SubExpr(const TRule_mul_subexpr& node, const TTrailingQuestions& tail) { + // mul_subexpr: con_subexpr (DOUBLE_PIPE con_subexpr)*; + auto getNode = [](const TRule_mul_subexpr::TBlock2& b) -> const TRule_con_subexpr& { return b.GetRule_con_subexpr2(); }; + return BinOper("Concat", node.GetRule_con_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail); +} + +TNodePtr TSqlExpression::SubExpr(const TRule_add_subexpr& node, const TTrailingQuestions& tail) { + // add_subexpr: mul_subexpr ((ASTERISK | SLASH | PERCENT) mul_subexpr)*; + auto getNode = [](const TRule_add_subexpr::TBlock2& b) -> const TRule_mul_subexpr& { return b.GetRule_mul_subexpr2(); }; + return BinOpList(node.GetRule_mul_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail); +} + +TNodePtr TSqlExpression::SubExpr(const TRule_bit_subexpr& node, const TTrailingQuestions& tail) { + // bit_subexpr: add_subexpr ((PLUS | MINUS) add_subexpr)*; + auto getNode = [](const TRule_bit_subexpr::TBlock2& b) -> const TRule_add_subexpr& { return b.GetRule_add_subexpr2(); }; + return BinOpList(node.GetRule_add_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail); + } + +TNodePtr TSqlExpression::SubExpr(const TRule_neq_subexpr& node, const TTrailingQuestions& tailExternal) { + //neq_subexpr: bit_subexpr ((SHIFT_LEFT | shift_right | ROT_LEFT | rot_right | AMPERSAND | PIPE | CARET) bit_subexpr)* + // // trailing QUESTIONS are used in optional simple types (String?) and optional lambda args: ($x, $y?) -> ($x) + // ((double_question neq_subexpr) => double_question neq_subexpr | QUESTION+)?; + YQL_ENSURE(tailExternal.Count == 0); + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && !node.HasBlock3(); + TTrailingQuestions tail; + if (node.HasBlock3() && node.GetBlock3().Alt_case() == TRule_neq_subexpr::TBlock3::kAlt2) { + auto& questions = node.GetBlock3().GetAlt2(); + tail.Count = questions.GetBlock1().size(); + tail.Pos = Ctx.TokenPosition(questions.GetBlock1().begin()->GetToken1()); + YQL_ENSURE(tail.Count > 0); + } + + auto getNode = [](const TRule_neq_subexpr::TBlock2& b) -> const TRule_bit_subexpr& { return b.GetRule_bit_subexpr2(); }; + auto result = BinOpList(node.GetRule_bit_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail); + if (!result) { + return {}; + } + if (node.HasBlock3()) { + auto& block = node.GetBlock3(); + if (block.Alt_case() == TRule_neq_subexpr::TBlock3::kAlt1) { + TSqlExpression altExpr(Ctx, Mode); + auto altResult = SubExpr(block.GetAlt1().GetRule_neq_subexpr2(), {}); + if (!altResult) { + return {}; + } + const TVector<TNodePtr> args({result, altResult}); + Token(block.GetAlt1().GetRule_double_question1().GetToken1()); + result = BuildBuiltinFunc(Ctx, Ctx.Pos(), "Coalesce", args); + } + } + return result; + } + + TNodePtr TSqlExpression::SubExpr(const TRule_eq_subexpr& node, const TTrailingQuestions& tail) { + // eq_subexpr: neq_subexpr ((LESS | LESS_OR_EQ | GREATER | GREATER_OR_EQ) neq_subexpr)*; + auto getNode = [](const TRule_eq_subexpr::TBlock2& b) -> const TRule_neq_subexpr& { return b.GetRule_neq_subexpr2(); }; + return BinOpList(node.GetRule_neq_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail); + } + + TNodePtr TSqlExpression::SubExpr(const TRule_or_subexpr& node, const TTrailingQuestions& tail) { + // or_subexpr: and_subexpr (AND and_subexpr)*; + auto getNode = [](const TRule_or_subexpr::TBlock2& b) -> const TRule_and_subexpr& { return b.GetRule_and_subexpr2(); }; + return BinOper("And", node.GetRule_and_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail); +} + +TNodePtr TSqlExpression::SubExpr(const TRule_and_subexpr& node, const TTrailingQuestions& tail) { + // and_subexpr: xor_subexpr (XOR xor_subexpr)*; + auto getNode = [](const TRule_and_subexpr::TBlock2& b) -> const TRule_xor_subexpr& { return b.GetRule_xor_subexpr2(); }; + return BinOper("Xor", node.GetRule_xor_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail); +} + + +bool ChangefeedSettingsEntry(const TRule_changefeed_settings_entry& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter) { + const auto id = IdEx(node.GetRule_an_id1(), ctx); + if (alter) { + // currently we don't support alter settings + ctx.Error() << to_upper(id.Name) << " alter is not supported"; + return false; + } + + const auto& setting = node.GetRule_changefeed_setting_value3(); + auto exprNode = ctx.Build(setting.GetRule_expr1()); + + if (!exprNode) { + ctx.Context().Error(id.Pos) << "Invalid changefeed setting: " << id.Name; + return false; + } + + if (to_lower(id.Name) == "sink_type") { + if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") { + ctx.Context().Error() << "Literal of String type is expected for " << id.Name; + return false; + } + + const auto value = exprNode->GetLiteralValue(); + if (to_lower(value) == "local") { + settings.SinkSettings = TChangefeedSettings::TLocalSinkSettings(); + } else { + ctx.Context().Error() << "Unknown changefeed sink type: " << value; + return false; + } + } else if (to_lower(id.Name) == "mode") { + if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") { + ctx.Context().Error() << "Literal of String type is expected for " << id.Name; + return false; + } + settings.Mode = exprNode; + } else if (to_lower(id.Name) == "format") { + if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") { + ctx.Context().Error() << "Literal of String type is expected for " << id.Name; + return false; + } + settings.Format = exprNode; + } else if (to_lower(id.Name) == "initial_scan") { + if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "Bool") { + ctx.Context().Error() << "Literal of Bool type is expected for " << id.Name; + return false; + } + settings.InitialScan = exprNode; + } else if (to_lower(id.Name) == "virtual_timestamps") { + if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "Bool") { + ctx.Context().Error() << "Literal of Bool type is expected for " << id.Name; + return false; + } + settings.VirtualTimestamps = exprNode; + } else if (to_lower(id.Name) == "resolved_timestamps") { + if (exprNode->GetOpName() != "Interval") { + ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name; + return false; + } + settings.ResolvedTimestamps = exprNode; + } else if (to_lower(id.Name) == "retention_period") { + if (exprNode->GetOpName() != "Interval") { + ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name; + return false; + } + settings.RetentionPeriod = exprNode; + } else if (to_lower(id.Name) == "topic_auto_partitioning") { + auto v = to_lower(exprNode->GetLiteralValue()); + if (v != "enabled" && v != "disabled") { + ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name; + } + settings.TopicAutoPartitioning = exprNode; + } else if (to_lower(id.Name) == "topic_max_active_partitions") { + if (!exprNode->IsIntegerLiteral()) { + ctx.Context().Error() << "Literal of integer type is expected for " << id.Name; + return false; + } + settings.TopicMaxActivePartitions = exprNode; + } else if (to_lower(id.Name) == "topic_min_active_partitions") { + if (!exprNode->IsIntegerLiteral()) { + ctx.Context().Error() << "Literal of integer type is expected for " << id.Name; + return false; + } + settings.TopicPartitions = exprNode; + } else if (to_lower(id.Name) == "aws_region") { + if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") { + ctx.Context().Error() << "Literal of String type is expected for " << id.Name; + return false; + } + settings.AwsRegion = exprNode; + } else { + ctx.Context().Error(id.Pos) << "Unknown changefeed setting: " << id.Name; + return false; + } + + return true; +} + +bool ChangefeedSettings(const TRule_changefeed_settings& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter) { + if (!ChangefeedSettingsEntry(node.GetRule_changefeed_settings_entry1(), ctx, settings, alter)) { + return false; + } + + for (auto& block : node.GetBlock2()) { + if (!ChangefeedSettingsEntry(block.GetRule_changefeed_settings_entry2(), ctx, settings, alter)) { + return false; + } + } + + return true; +} + +bool CreateChangefeed(const TRule_changefeed& node, TSqlExpression& ctx, TVector<TChangefeedDescription>& changefeeds) { + changefeeds.emplace_back(IdEx(node.GetRule_an_id2(), ctx)); + + if (!ChangefeedSettings(node.GetRule_changefeed_settings5(), ctx, changefeeds.back().Settings, false)) { + return false; + } + + return true; +} + +namespace { + bool WithoutAlpha(const std::string_view &literal) { + return literal.cend() == std::find_if(literal.cbegin(), literal.cend(), [](char c) { return std::isalpha(c) || (c & '\x80'); }); + } +} + + +bool Expr(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr& node) { + TNodePtr exprNode = sqlExpr.Build(node); + if (!exprNode) { + return false; + } + exprNodes.push_back(exprNode); + return true; +} + +bool ExprList(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr_list& node) { + if (!Expr(sqlExpr, exprNodes, node.GetRule_expr1())) { + return false; + } + for (auto b: node.GetBlock2()) { + sqlExpr.Token(b.GetToken1()); + if (!Expr(sqlExpr, exprNodes, b.GetRule_expr2())) { + return false; + } + } + return true; +} +bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& suffix) { + const auto str = to_lower(strOrig); + const auto strLen = str.size(); + ui64 base = 10; + if (strLen > 2 && str[0] == '0') { + const auto formatChar = str[1]; + if (formatChar == 'x') { + base = 16; + } else if (formatChar == 'o') { + base = 8; + } else if (formatChar == 'b') { + base = 2; + } + } + if (strLen > 1) { + auto iter = str.cend() - 1; + if (*iter == 'l' || *iter == 's' || *iter == 't' || *iter == 's' || *iter == 'i' || *iter == 'b' || *iter == 'n') { + --iter; + } + if (*iter == 'u' || *iter == 'p') { + --iter; + } + suffix = TString(++iter, str.cend()); + } + value = 0; + const TString digString(str.begin() + (base == 10 ? 0 : 2), str.end() - suffix.size()); + for (const char& cur: digString) { + const ui64 curDigit = Char2DigitTable[static_cast<int>(cur)]; + if (curDigit >= base) { + ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", char: '" << cur << + "' is out of base: " << base; + return false; + } + + ui64 curValue = value; + value *= base; + bool overflow = ((value / base) != curValue); + if (!overflow) { + curValue = value; + value += curDigit; + overflow = value < curValue; + } + + if (overflow) { + ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", number limit overflow"; + return false; + } + } + return true; +} + +TNodePtr LiteralNumber(TContext& ctx, const TRule_integer& node) { + const TString intergerString = ctx.Token(node.GetToken1()); + if (to_lower(intergerString).EndsWith("pn")) { + // TODO: add validation + return new TLiteralNode(ctx.Pos(), "PgNumeric", intergerString.substr(0, intergerString.size() - 2)); + } + + ui64 value; + TString suffix; + if (!ParseNumbers(ctx, intergerString, value, suffix)) { + return {}; + } + + const bool noSpaceForInt32 = value >> 31; + const bool noSpaceForInt64 = value >> 63; + if (suffix == "") { + bool implicitType = true; + if (noSpaceForInt64) { + return new TLiteralNumberNode<ui64>(ctx.Pos(), "Uint64", ToString(value), implicitType); + } else if (noSpaceForInt32) { + return new TLiteralNumberNode<i64>(ctx.Pos(), "Int64", ToString(value), implicitType); + } + return new TLiteralNumberNode<i32>(ctx.Pos(), "Int32", ToString(value), implicitType); + } else if (suffix == "p") { + bool implicitType = true; + if (noSpaceForInt64) { + ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << intergerString << ", 64 bit signed integer overflow"; + return {}; + } else if (noSpaceForInt32) { + return new TLiteralNumberNode<i64>(ctx.Pos(), "PgInt8", ToString(value), implicitType); + } + return new TLiteralNumberNode<i32>(ctx.Pos(), "PgInt4", ToString(value), implicitType); + } else if (suffix == "u") { + return new TLiteralNumberNode<ui32>(ctx.Pos(), "Uint32", ToString(value)); + } else if (suffix == "ul") { + return new TLiteralNumberNode<ui64>(ctx.Pos(), "Uint64", ToString(value)); + } else if (suffix == "ut") { + return new TLiteralNumberNode<ui8>(ctx.Pos(), "Uint8", ToString(value)); + } else if (suffix == "t") { + return new TLiteralNumberNode<i8>(ctx.Pos(), "Int8", ToString(value)); + } else if (suffix == "l") { + return new TLiteralNumberNode<i64>(ctx.Pos(), "Int64", ToString(value)); + } else if (suffix == "us") { + return new TLiteralNumberNode<ui16>(ctx.Pos(), "Uint16", ToString(value)); + } else if (suffix == "s") { + return new TLiteralNumberNode<i16>(ctx.Pos(), "Int16", ToString(value)); + } else if (suffix == "ps") { + return new TLiteralNumberNode<i16>(ctx.Pos(), "PgInt2", ToString(value)); + } else if (suffix == "pi") { + return new TLiteralNumberNode<i32>(ctx.Pos(), "PgInt4", ToString(value)); + } else if (suffix == "pb") { + return new TLiteralNumberNode<i64>(ctx.Pos(), "PgInt8", ToString(value)); + } else { + ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << intergerString << ", invalid suffix: " << suffix; + return {}; + } +} + +TNodePtr LiteralReal(TContext& ctx, const TRule_real& node) { + const TString value(ctx.Token(node.GetToken1())); + YQL_ENSURE(!value.empty()); + auto lower = to_lower(value); + if (lower.EndsWith("f")) { + return new TLiteralNumberNode<float>(ctx.Pos(), "Float", value.substr(0, value.size()-1)); + } else if (lower.EndsWith("p")) { + return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat8", value.substr(0, value.size()-1)); + } else if (lower.EndsWith("pf4")) { + return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat4", value.substr(0, value.size()-3)); + } else if (lower.EndsWith("pf8")) { + return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat8", value.substr(0, value.size()-3)); + } else if (lower.EndsWith("pn")) { + return new TLiteralNode(ctx.Pos(), "PgNumeric", value.substr(0, value.size()-2)); + } else { + return new TLiteralNumberNode<double>(ctx.Pos(), "Double", value); + } +} + +TMaybe<TExprOrIdent> TSqlExpression::LiteralExpr(const TRule_literal_value& node) { + TExprOrIdent result; + switch (node.Alt_case()) { + case TRule_literal_value::kAltLiteralValue1: { + result.Expr = LiteralNumber(Ctx, node.GetAlt_literal_value1().GetRule_integer1()); + break; + } + case TRule_literal_value::kAltLiteralValue2: { + result.Expr = LiteralReal(Ctx, node.GetAlt_literal_value2().GetRule_real1()); + break; + } + case TRule_literal_value::kAltLiteralValue3: { + const TString value(Token(node.GetAlt_literal_value3().GetToken1())); + return BuildLiteralTypedSmartStringOrId(Ctx, value); + } + case TRule_literal_value::kAltLiteralValue5: { + Token(node.GetAlt_literal_value5().GetToken1()); + result.Expr = BuildLiteralNull(Ctx.Pos()); + break; + } + case TRule_literal_value::kAltLiteralValue9: { + const TString value(to_lower(Token(node.GetAlt_literal_value9().GetRule_bool_value1().GetToken1()))); + result.Expr = BuildLiteralBool(Ctx.Pos(), FromString<bool>(value)); + break; + } + case TRule_literal_value::kAltLiteralValue10: { + result.Expr = BuildEmptyAction(Ctx.Pos()); + break; + } + case TRule_literal_value::kAltLiteralValue4: + case TRule_literal_value::kAltLiteralValue6: + case TRule_literal_value::kAltLiteralValue7: + case TRule_literal_value::kAltLiteralValue8: + case TRule_literal_value::ALT_NOT_SET: + AltNotImplemented("literal_value", node); + } + if (!result.Expr) { + return {}; + } + return result; +} + +template<typename TUnarySubExprType> +TNodePtr TSqlExpression::UnaryExpr(const TUnarySubExprType& node, const TTrailingQuestions& tail) { + if constexpr (std::is_same_v<TUnarySubExprType, TRule_unary_subexpr>) { + if (node.Alt_case() == TRule_unary_subexpr::kAltUnarySubexpr1) { + return UnaryCasualExpr(node.GetAlt_unary_subexpr1().GetRule_unary_casual_subexpr1(), tail); + } else if (tail.Count) { + UnexpectedQuestionToken(tail); + return {}; + } else { + MaybeUnnamedSmartParenOnTop = false; + return JsonApiExpr(node.GetAlt_unary_subexpr2().GetRule_json_api_expr1()); + } + } else { + MaybeUnnamedSmartParenOnTop = false; + if (node.Alt_case() == TRule_in_unary_subexpr::kAltInUnarySubexpr1) { + return UnaryCasualExpr(node.GetAlt_in_unary_subexpr1().GetRule_in_unary_casual_subexpr1(), tail); + } else if (tail.Count) { + UnexpectedQuestionToken(tail); + return {}; + } else { + return JsonApiExpr(node.GetAlt_in_unary_subexpr2().GetRule_json_api_expr1()); + } + } +} + +TNodePtr TSqlExpression::JsonPathSpecification(const TRule_jsonpath_spec& node) { + /* + jsonpath_spec: STRING_VALUE; + */ + TString value = Token(node.GetToken1()); + TPosition pos = Ctx.Pos(); + + auto parsed = StringContent(Ctx, pos, value); + if (!parsed) { + return nullptr; + } + return new TCallNodeImpl(pos, "Utf8", {BuildQuotedAtom(pos, parsed->Content, parsed->Flags)}); +} + +TNodePtr TSqlExpression::JsonReturningTypeRule(const TRule_type_name_simple& node) { + /* + (RETURNING type_name_simple)? + */ + return TypeSimple(node, /* onlyDataAllowed */ true); +} + +TNodePtr TSqlExpression::JsonInputArg(const TRule_json_common_args& node) { + /* + json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?; + */ + TNodePtr jsonExpr = Build(node.GetRule_expr1()); + if (!jsonExpr || jsonExpr->IsNull()) { + jsonExpr = new TCallNodeImpl(Ctx.Pos(), "Nothing", { + new TCallNodeImpl(Ctx.Pos(), "OptionalType", {BuildDataType(Ctx.Pos(), "Json")}) + }); + } + + return jsonExpr; +} + +void TSqlExpression::AddJsonVariable(const TRule_json_variable& node, TVector<TNodePtr>& children) { + /* + json_variable: expr AS json_variable_name; + */ + TNodePtr expr; + TString rawName; + TPosition namePos = Ctx.Pos(); + ui32 nameFlags = 0; + + expr = Build(node.GetRule_expr1()); + const auto& nameRule = node.GetRule_json_variable_name3(); + switch (nameRule.GetAltCase()) { + case TRule_json_variable_name::kAltJsonVariableName1: + rawName = Id(nameRule.GetAlt_json_variable_name1().GetRule_id_expr1(), *this); + nameFlags = TNodeFlags::ArbitraryContent; + break; + case TRule_json_variable_name::kAltJsonVariableName2: { + const auto& token = nameRule.GetAlt_json_variable_name2().GetToken1(); + namePos = GetPos(token); + auto parsed = StringContentOrIdContent(Ctx, namePos, token.GetValue()); + if (!parsed) { + return; + } + rawName = parsed->Content; + nameFlags = parsed->Flags; + break; + } + case TRule_json_variable_name::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + TNodePtr nameExpr = BuildQuotedAtom(namePos, rawName, nameFlags); + children.push_back(BuildTuple(namePos, {nameExpr, expr})); +} + +void TSqlExpression::AddJsonVariables(const TRule_json_variables& node, TVector<TNodePtr>& children) { + /* + json_variables: json_variable (COMMA json_variable)*; + */ + AddJsonVariable(node.GetRule_json_variable1(), children); + for (size_t i = 0; i < node.Block2Size(); i++) { + AddJsonVariable(node.GetBlock2(i).GetRule_json_variable2(), children); + } +} + +TNodePtr TSqlExpression::JsonVariables(const TRule_json_common_args& node) { + /* + json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?; + */ + TVector<TNodePtr> variables; + TPosition pos = Ctx.Pos(); + if (node.HasBlock4()) { + const auto& block = node.GetBlock4(); + pos = GetPos(block.GetToken1()); + AddJsonVariables(block.GetRule_json_variables2(), variables); + } + return new TCallNodeImpl(pos, "JsonVariables", variables); +} + +void TSqlExpression::AddJsonCommonArgs(const TRule_json_common_args& node, TVector<TNodePtr>& children) { + /* + json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?; + */ + TNodePtr jsonExpr = JsonInputArg(node); + TNodePtr jsonPath = JsonPathSpecification(node.GetRule_jsonpath_spec3()); + TNodePtr variables = JsonVariables(node); + + children.push_back(jsonExpr); + children.push_back(jsonPath); + children.push_back(variables); +} + +TNodePtr TSqlExpression::JsonValueCaseHandler(const TRule_json_case_handler& node, EJsonValueHandlerMode& mode) { + /* + json_case_handler: ERROR | NULL | (DEFAULT expr); + */ + + switch (node.GetAltCase()) { + case TRule_json_case_handler::kAltJsonCaseHandler1: { + const auto pos = GetPos(node.GetAlt_json_case_handler1().GetToken1()); + mode = EJsonValueHandlerMode::Error; + return new TCallNodeImpl(pos, "Null", {}); + } + case TRule_json_case_handler::kAltJsonCaseHandler2: { + const auto pos = GetPos(node.GetAlt_json_case_handler2().GetToken1()); + mode = EJsonValueHandlerMode::DefaultValue; + return new TCallNodeImpl(pos, "Null", {}); + } + case TRule_json_case_handler::kAltJsonCaseHandler3: + mode = EJsonValueHandlerMode::DefaultValue; + return Build(node.GetAlt_json_case_handler3().GetRule_expr2()); + case TRule_json_case_handler::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +void TSqlExpression::AddJsonValueCaseHandlers(const TRule_json_value& node, TVector<TNodePtr>& children) { + /* + json_case_handler* + */ + if (node.Block5Size() > 2) { + Ctx.Error() << "Only 1 ON EMPTY and/or 1 ON ERROR clause is expected"; + Ctx.IncrementMonCounter("sql_errors", "JsonValueTooManyHandleClauses"); + return; + } + + TNodePtr onEmpty; + EJsonValueHandlerMode onEmptyMode = EJsonValueHandlerMode::DefaultValue; + TNodePtr onError; + EJsonValueHandlerMode onErrorMode = EJsonValueHandlerMode::DefaultValue; + for (size_t i = 0; i < node.Block5Size(); i++) { + const auto block = node.GetBlock5(i); + const bool isEmptyClause = to_lower(block.GetToken3().GetValue()) == "empty"; + + if (isEmptyClause && onEmpty != nullptr) { + Ctx.Error() << "Only 1 ON EMPTY clause is expected"; + Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnEmptyClauses"); + return; + } + + if (!isEmptyClause && onError != nullptr) { + Ctx.Error() << "Only 1 ON ERROR clause is expected"; + Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnErrorClauses"); + return; + } + + if (isEmptyClause && onError != nullptr) { + Ctx.Error() << "ON EMPTY clause must be before ON ERROR clause"; + Ctx.IncrementMonCounter("sql_errors", "JsonValueOnEmptyAfterOnError"); + return; + } + + EJsonValueHandlerMode currentMode; + TNodePtr currentHandler = JsonValueCaseHandler(block.GetRule_json_case_handler1(), currentMode); + + if (isEmptyClause) { + onEmpty = currentHandler; + onEmptyMode = currentMode; + } else { + onError = currentHandler; + onErrorMode = currentMode; + } + } + + if (onEmpty == nullptr) { + onEmpty = new TCallNodeImpl(Ctx.Pos(), "Null", {}); + } + + if (onError == nullptr) { + onError = new TCallNodeImpl(Ctx.Pos(), "Null", {}); + } + + children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onEmptyMode), TNodeFlags::Default)); + children.push_back(onEmpty); + children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onErrorMode), TNodeFlags::Default)); + children.push_back(onError); +} + +TNodePtr TSqlExpression::JsonValueExpr(const TRule_json_value& node) { + /* + json_value: JSON_VALUE LPAREN + json_common_args + (RETURNING type_name_simple)? + (json_case_handler ON (EMPTY | ERROR))* + RPAREN; + */ + TVector<TNodePtr> children; + AddJsonCommonArgs(node.GetRule_json_common_args3(), children); + AddJsonValueCaseHandlers(node, children); + + if (node.HasBlock4()) { + auto returningType = JsonReturningTypeRule(node.GetBlock4().GetRule_type_name_simple2()); + if (!returningType) { + return {}; + } + children.push_back(returningType); + } + + return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonValue", children); +} + +void TSqlExpression::AddJsonExistsHandler(const TRule_json_exists& node, TVector<TNodePtr>& children) { + /* + json_exists: JSON_EXISTS LPAREN + json_common_args + json_exists_handler? + RPAREN; + */ + auto buildJustBool = [&](const TPosition& pos, bool value) { + return new TCallNodeImpl(pos, "Just", {BuildLiteralBool(pos, value)}); + }; + + if (!node.HasBlock4()) { + children.push_back(buildJustBool(Ctx.Pos(), false)); + return; + } + + const auto& handlerRule = node.GetBlock4().GetRule_json_exists_handler1(); + const auto& token = handlerRule.GetToken1(); + const auto pos = GetPos(token); + const auto mode = to_lower(token.GetValue()); + if (mode == "unknown") { + const auto nothingNode = new TCallNodeImpl(pos, "Nothing", { + new TCallNodeImpl(pos, "OptionalType", {BuildDataType(pos, "Bool")}) + }); + children.push_back(nothingNode); + } else if (mode != "error") { + children.push_back(buildJustBool(pos, FromString<bool>(mode))); + } +} + +TNodePtr TSqlExpression::JsonExistsExpr(const TRule_json_exists& node) { + /* + json_exists: JSON_EXISTS LPAREN + json_common_args + json_exists_handler? + RPAREN; + */ + TVector<TNodePtr> children; + AddJsonCommonArgs(node.GetRule_json_common_args3(), children); + + AddJsonExistsHandler(node, children); + + return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonExists", children); +} + +EJsonQueryWrap TSqlExpression::JsonQueryWrapper(const TRule_json_query& node) { + /* + json_query: JSON_QUERY LPAREN + json_common_args + (json_query_wrapper WRAPPER)? + (json_query_handler ON EMPTY)? + (json_query_handler ON ERROR)? + RPAREN; + */ + // default behaviour - no wrapping + if (!node.HasBlock4()) { + return EJsonQueryWrap::NoWrap; + } + + // WITHOUT ARRAY? - no wrapping + const auto& wrapperRule = node.GetBlock4().GetRule_json_query_wrapper1(); + if (wrapperRule.GetAltCase() == TRule_json_query_wrapper::kAltJsonQueryWrapper1) { + return EJsonQueryWrap::NoWrap; + } + + // WITH (CONDITIONAL | UNCONDITIONAL)? ARRAY? - wrapping depends on 2nd token. Default is UNCONDITIONAL + const auto& withWrapperRule = wrapperRule.GetAlt_json_query_wrapper2(); + if (!withWrapperRule.HasBlock2()) { + return EJsonQueryWrap::Wrap; + } + + const auto& token = withWrapperRule.GetBlock2().GetToken1(); + if (to_lower(token.GetValue()) == "conditional") { + return EJsonQueryWrap::ConditionalWrap; + } else { + return EJsonQueryWrap::Wrap; + } +} + +EJsonQueryHandler TSqlExpression::JsonQueryHandler(const TRule_json_query_handler& node) { + /* + json_query_handler: ERROR | NULL | (EMPTY ARRAY) | (EMPTY OBJECT); + */ + switch (node.GetAltCase()) { + case TRule_json_query_handler::kAltJsonQueryHandler1: + return EJsonQueryHandler::Error; + case TRule_json_query_handler::kAltJsonQueryHandler2: + return EJsonQueryHandler::Null; + case TRule_json_query_handler::kAltJsonQueryHandler3: + return EJsonQueryHandler::EmptyArray; + case TRule_json_query_handler::kAltJsonQueryHandler4: + return EJsonQueryHandler::EmptyObject; + case TRule_json_query_handler::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TNodePtr TSqlExpression::JsonQueryExpr(const TRule_json_query& node) { + /* + json_query: JSON_QUERY LPAREN + json_common_args + (json_query_wrapper WRAPPER)? + (json_query_handler ON EMPTY)? + (json_query_handler ON ERROR)? + RPAREN; + */ + + TVector<TNodePtr> children; + AddJsonCommonArgs(node.GetRule_json_common_args3(), children); + + auto addChild = [&](TPosition pos, const TString& content) { + children.push_back(BuildQuotedAtom(pos, content, TNodeFlags::Default)); + }; + + const auto wrapMode = JsonQueryWrapper(node); + addChild(Ctx.Pos(), ToString(wrapMode)); + + auto onEmpty = EJsonQueryHandler::Null; + if (node.HasBlock5()) { + if (wrapMode != EJsonQueryWrap::NoWrap) { + Ctx.Error() << "ON EMPTY is prohibited because WRAPPER clause is specified"; + Ctx.IncrementMonCounter("sql_errors", "JsonQueryOnEmptyWithWrapper"); + return nullptr; + } + onEmpty = JsonQueryHandler(node.GetBlock5().GetRule_json_query_handler1()); + } + addChild(Ctx.Pos(), ToString(onEmpty)); + + auto onError = EJsonQueryHandler::Null; + if (node.HasBlock6()) { + onError = JsonQueryHandler(node.GetBlock6().GetRule_json_query_handler1()); + } + addChild(Ctx.Pos(), ToString(onError)); + + return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonQuery", children); +} + +TNodePtr TSqlExpression::JsonApiExpr(const TRule_json_api_expr& node) { + /* + json_api_expr: json_value | json_exists | json_query; + */ + TPosition pos = Ctx.Pos(); + TNodePtr result = nullptr; + switch (node.GetAltCase()) { + case TRule_json_api_expr::kAltJsonApiExpr1: { + const auto& jsonValue = node.GetAlt_json_api_expr1().GetRule_json_value1(); + pos = GetPos(jsonValue.GetToken1()); + result = JsonValueExpr(jsonValue); + break; + } + case TRule_json_api_expr::kAltJsonApiExpr2: { + const auto& jsonExists = node.GetAlt_json_api_expr2().GetRule_json_exists1(); + pos = GetPos(jsonExists.GetToken1()); + result = JsonExistsExpr(jsonExists); + break; + } + case TRule_json_api_expr::kAltJsonApiExpr3: { + const auto& jsonQuery = node.GetAlt_json_api_expr3().GetRule_json_query1(); + pos = GetPos(jsonQuery.GetToken1()); + result = JsonQueryExpr(jsonQuery); + break; + } + case TRule_json_api_expr::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + return result; +} + +TNodePtr MatchRecognizeVarAccess(TTranslation& ctx, const TString& var, const TRule_an_id_or_type& suffix, bool theSameVar) { + switch (suffix.GetAltCase()) { + case TRule_an_id_or_type::kAltAnIdOrType1: + break; + case TRule_an_id_or_type::kAltAnIdOrType2: + break; + case TRule_an_id_or_type::ALT_NOT_SET: + break; + } + const auto& column = Id( + suffix.GetAlt_an_id_or_type1() + .GetRule_id_or_type1().GetAlt_id_or_type1().GetRule_id1(), + ctx + ); + return BuildMatchRecognizeVarAccess(TPosition{}, var, column, theSameVar); +} + +TNodePtr TSqlExpression::RowPatternVarAccess(const TString& alias, const TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2 block) { + switch (block.GetAltCase()) { + case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt1: + break; + case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt2: + break; + case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt3: + switch (block.GetAlt3().GetRule_an_id_or_type1().GetAltCase()) { + case TRule_an_id_or_type::kAltAnIdOrType1: { + const auto &idOrType = block.GetAlt3().GetRule_an_id_or_type1().GetAlt_an_id_or_type1().GetRule_id_or_type1(); + switch(idOrType.GetAltCase()) { + case TRule_id_or_type::kAltIdOrType1: + return BuildMatchRecognizeVarAccess( + Ctx.Pos(), + alias, + Id(idOrType.GetAlt_id_or_type1().GetRule_id1(), *this), + Ctx.GetMatchRecognizeDefineVar() == alias + ); + case TRule_id_or_type::kAltIdOrType2: + break; + case TRule_id_or_type::ALT_NOT_SET: + break; + } + break; + } + case TRule_an_id_or_type::kAltAnIdOrType2: + break; + case TRule_an_id_or_type::ALT_NOT_SET: + break; + } + break; + case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + return TNodePtr{}; +} + +template<typename TUnaryCasualExprRule> +TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const TTrailingQuestions& tail) { + // unary_casual_subexpr: (id_expr | atom_expr) unary_subexpr_suffix; + // OR + // in_unary_casual_subexpr: (id_expr_in | in_atom_expr) unary_subexpr_suffix; + // where + // unary_subexpr_suffix: (key_expr | invoke_expr |(DOT (bind_parameter | DIGITS | id)))* (COLLATE id)?; + + const auto& suffix = node.GetRule_unary_subexpr_suffix2(); + const bool suffixIsEmpty = suffix.GetBlock1().empty() && !suffix.HasBlock2(); + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && suffixIsEmpty; + TString name; + TNodePtr expr; + bool typePossible = false; + auto& block = node.GetBlock1(); + switch (block.Alt_case()) { + case TUnaryCasualExprRule::TBlock1::kAlt1: { + MaybeUnnamedSmartParenOnTop = false; + auto& alt = block.GetAlt1(); + if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) { + name = Id(alt.GetRule_id_expr1(), *this); + typePossible = !IsQuotedId(alt.GetRule_id_expr1(), *this); + } else { + // type was never possible here + name = Id(alt.GetRule_id_expr_in1(), *this); + } + break; + } + case TUnaryCasualExprRule::TBlock1::kAlt2: { + auto& alt = block.GetAlt2(); + TMaybe<TExprOrIdent> exprOrId; + if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) { + exprOrId = AtomExpr(alt.GetRule_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{}); + } else { + MaybeUnnamedSmartParenOnTop = false; + exprOrId = InAtomExpr(alt.GetRule_in_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{}); + } + + if (!exprOrId) { + Ctx.IncrementMonCounter("sql_errors", "BadAtomExpr"); + return nullptr; + } + if (!exprOrId->Expr) { + name = exprOrId->Ident; + } else { + expr = exprOrId->Expr; + } + break; + } + case TUnaryCasualExprRule::TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + // bool onlyDots = true; + bool isColumnRef = !expr; + bool isFirstElem = true; + + for (auto& _b : suffix.GetBlock1()) { + auto& b = _b.GetBlock1(); + switch (b.Alt_case()) { + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt1: { + // key_expr + // onlyDots = false; + break; + } + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: { + // invoke_expr - cannot be a column, function name + if (isFirstElem) { + isColumnRef = false; + } + + // onlyDots = false; + break; + } + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: { + // In case of MATCH_RECOGNIZE lambdas + // X.Y is treated as Var.Column access + if (isColumnRef && EColumnRefState::MatchRecognize == Ctx.GetColumnReferenceState()) { + if (auto rowPatternVarAccess = RowPatternVarAccess( + name, + b.GetAlt3().GetBlock2()) + ) { + return rowPatternVarAccess; + } + } + break; + } + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::ALT_NOT_SET: + AltNotImplemented("unary_subexpr_suffix", b); + return nullptr; + } + + isFirstElem = false; + } + + isFirstElem = true; + TVector<INode::TIdPart> ids; + INode::TPtr lastExpr; + if (!isColumnRef) { + lastExpr = expr; + } else { + const bool flexibleTypes = Ctx.FlexibleTypes; + bool columnOrType = false; + auto columnRefsState = Ctx.GetColumnReferenceState(); + bool explicitPgType = columnRefsState == EColumnRefState::AsPgType; + if (explicitPgType && typePossible && suffixIsEmpty) { + auto pgType = BuildSimpleType(Ctx, Ctx.Pos(), name, false); + if (pgType && tail.Count) { + Ctx.Error() << "Optional types are not supported in this context"; + return {}; + } + return pgType; + } + if (auto simpleType = LookupSimpleType(name, flexibleTypes, false); simpleType && typePossible && suffixIsEmpty) { + if (tail.Count > 0 || columnRefsState == EColumnRefState::Deny || !flexibleTypes) { + // a type + return AddOptionals(BuildSimpleType(Ctx, Ctx.Pos(), name, false), tail.Count); + } + // type or column: ambiguity will be resolved on type annotation stage + columnOrType = columnRefsState == EColumnRefState::Allow; + } + if (tail.Count) { + UnexpectedQuestionToken(tail); + return {}; + } + if (!Ctx.CheckColumnReference(Ctx.Pos(), name)) { + return nullptr; + } + + ids.push_back(columnOrType ? BuildColumnOrType(Ctx.Pos()) : BuildColumn(Ctx.Pos())); + ids.push_back(name); + } + + TPosition pos(Ctx.Pos()); + for (auto& _b : suffix.GetBlock1()) { + auto& b = _b.GetBlock1(); + switch (b.Alt_case()) { + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt1: { + // key_expr + auto keyExpr = KeyExpr(b.GetAlt1().GetRule_key_expr1()); + if (!keyExpr) { + Ctx.IncrementMonCounter("sql_errors", "BadKeyExpr"); + return nullptr; + } + + if (!lastExpr) { + lastExpr = BuildAccess(pos, ids, false); + ids.clear(); + } + + ids.push_back(lastExpr); + ids.push_back(keyExpr); + lastExpr = BuildAccess(pos, ids, true); + ids.clear(); + break; + } + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: { + // invoke_expr - cannot be a column, function name + TSqlCallExpr call(Ctx, Mode); + if (isFirstElem && !name.empty()) { + call.AllowDistinct(); + call.InitName(name); + } else { + call.InitExpr(lastExpr); + } + + bool initRet = call.Init(b.GetAlt2().GetRule_invoke_expr1()); + if (initRet) { + call.IncCounters(); + } + + if (!initRet) { + return nullptr; + } + + lastExpr = call.BuildCall(); + if (!lastExpr) { + return nullptr; + } + + break; + } + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: { + // dot + if (lastExpr) { + ids.push_back(lastExpr); + } + + auto bb = b.GetAlt3().GetBlock2(); + switch (bb.Alt_case()) { + case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt1: { + TString named; + if (!NamedNodeImpl(bb.GetAlt1().GetRule_bind_parameter1(), named, *this)) { + return nullptr; + } + auto namedNode = GetNamedNode(named); + if (!namedNode) { + return nullptr; + } + + ids.push_back(named); + ids.back().Expr = namedNode; + break; + } + case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt2: { + const TString str(Token(bb.GetAlt2().GetToken1())); + ids.push_back(str); + break; + } + case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt3: { + ids.push_back(Id(bb.GetAlt3().GetRule_an_id_or_type1(), *this)); + break; + } + case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + if (lastExpr) { + lastExpr = BuildAccess(pos, ids, false); + ids.clear(); + } + + break; + } + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::ALT_NOT_SET: + AltNotImplemented("unary_subexpr_suffix", b); + return nullptr; + } + + isFirstElem = false; + } + + if (!lastExpr) { + lastExpr = BuildAccess(pos, ids, false); + ids.clear(); + } + + if (suffix.HasBlock2()) { + Ctx.IncrementMonCounter("sql_errors", "CollateUnarySubexpr"); + Error() << "unary_subexpr: COLLATE is not implemented yet"; + } + + return lastExpr; +} + +TNodePtr TSqlExpression::BindParameterRule(const TRule_bind_parameter& rule, const TTrailingQuestions& tail) { + TString namedArg; + if (!NamedNodeImpl(rule, namedArg, *this)) { + return {}; + } + if (SmartParenthesisMode == ESmartParenthesis::SqlLambdaParams) { + Ctx.IncrementMonCounter("sql_features", "LambdaArgument"); + if (tail.Count > 1) { + Ctx.Error(tail.Pos) << "Expecting at most one '?' token here (for optional lambda parameters), but got " << tail.Count; + return {}; + } + return BuildAtom(Ctx.Pos(), namedArg, NYql::TNodeFlags::ArbitraryContent, tail.Count != 0); + } + if (tail.Count) { + UnexpectedQuestionToken(tail); + return {}; + } + Ctx.IncrementMonCounter("sql_features", "NamedNodeUseAtom"); + return GetNamedNode(namedArg); +} + +TNodePtr TSqlExpression::LambdaRule(const TRule_lambda& rule) { + const auto& alt = rule; + const bool isSqlLambda = alt.HasBlock2(); + if (!isSqlLambda) { + return SmartParenthesis(alt.GetRule_smart_parenthesis1()); + } + + MaybeUnnamedSmartParenOnTop = false; + TNodePtr parenthesis; + { + // we allow column reference here to postpone error and report it with better description in SqlLambdaParams + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TSqlExpression expr(Ctx, Mode); + expr.SetSmartParenthesisMode(ESmartParenthesis::SqlLambdaParams); + parenthesis = expr.SmartParenthesis(alt.GetRule_smart_parenthesis1()); + } + if (!parenthesis) { + return {}; + } + + ui32 optionalArgumentsCount = 0; + TVector<TSymbolNameWithPos> args; + if (!SqlLambdaParams(parenthesis, args, optionalArgumentsCount)) { + return {}; + } + auto bodyBlock = alt.GetBlock2(); + Token(bodyBlock.GetToken1()); + TPosition pos(Ctx.Pos()); + TVector<TNodePtr> exprSeq; + for (auto& arg: args) { + arg.Name = PushNamedAtom(arg.Pos, arg.Name); + } + bool ret = false; + TColumnRefScope scope(Ctx, EColumnRefState::Deny); + scope.SetNoColumnErrContext("in lambda function"); + if (bodyBlock.GetBlock2().HasAlt1()) { + ret = SqlLambdaExprBody(Ctx, bodyBlock.GetBlock2().GetAlt1().GetRule_expr2(), exprSeq); + } else { + ret = SqlLambdaExprBody(Ctx, bodyBlock.GetBlock2().GetAlt2().GetRule_lambda_body2(), exprSeq); + } + + TVector<TString> argNames; + for (const auto& arg : args) { + argNames.push_back(arg.Name); + PopNamedNode(arg.Name); + } + if (!ret) { + return {}; + } + + auto lambdaNode = BuildSqlLambda(pos, std::move(argNames), std::move(exprSeq)); + if (optionalArgumentsCount > 0) { + lambdaNode = new TCallNodeImpl(pos, "WithOptionalArgs", { + lambdaNode, + BuildQuotedAtom(pos, ToString(optionalArgumentsCount), TNodeFlags::Default) + }); + } + + return lambdaNode; +} + +TNodePtr TSqlExpression::CastRule(const TRule_cast_expr& rule) { + Ctx.IncrementMonCounter("sql_features", "Cast"); + const auto& alt = rule; + Token(alt.GetToken1()); + TPosition pos(Ctx.Pos()); + TSqlExpression expr(Ctx, Mode); + auto exprNode = expr.Build(rule.GetRule_expr3()); + if (!exprNode) { + return {}; + } + auto type = TypeNodeOrBind(rule.GetRule_type_name_or_bind5()); + if (!type) { + return {}; + } + return new TCallNodeImpl(pos, "SafeCast", {exprNode, type}); +} + +TNodePtr TSqlExpression::BitCastRule(const TRule_bitcast_expr& rule) { + Ctx.IncrementMonCounter("sql_features", "BitCast"); + const auto& alt = rule; + Token(alt.GetToken1()); + TPosition pos(Ctx.Pos()); + TSqlExpression expr(Ctx, Mode); + auto exprNode = expr.Build(rule.GetRule_expr3()); + if (!exprNode) { + return {}; + } + auto type = TypeSimple(rule.GetRule_type_name_simple5(), true); + if (!type) { + return {}; + } + return new TCallNodeImpl(pos, "BitCast", {exprNode, type}); +} + +TNodePtr TSqlExpression::ExistsRule(const TRule_exists_expr& rule) { + Ctx.IncrementMonCounter("sql_features", "Exists"); + + TPosition pos; + TSourcePtr source; + Token(rule.GetToken2()); + switch (rule.GetBlock3().Alt_case()) { + case TRule_exists_expr::TBlock3::kAlt1: { + const auto& alt = rule.GetBlock3().GetAlt1().GetRule_select_stmt1(); + TSqlSelect select(Ctx, Mode); + source = select.Build(alt, pos); + break; + } + case TRule_exists_expr::TBlock3::kAlt2: { + const auto& alt = rule.GetBlock3().GetAlt2().GetRule_values_stmt1(); + TSqlValues values(Ctx, Mode); + source = values.Build(alt, pos); + break; + } + case TRule_exists_expr::TBlock3::ALT_NOT_SET: + AltNotImplemented("exists_expr", rule.GetBlock3()); + } + + if (!source) { + Ctx.IncrementMonCounter("sql_errors", "BadSource"); + return nullptr; + } + const bool checkExist = true; + return BuildBuiltinFunc(Ctx, Ctx.Pos(), "ListHasItems", {BuildSourceNode(pos, std::move(source), checkExist)}); +} + +TNodePtr TSqlExpression::CaseRule(const TRule_case_expr& rule) { + // case_expr: CASE expr? when_expr+ (ELSE expr)? END; + // when_expr: WHEN expr THEN expr; + Ctx.IncrementMonCounter("sql_features", "Case"); + const auto& alt = rule; + Token(alt.GetToken1()); + TNodePtr elseExpr; + if (alt.HasBlock4()) { + Token(alt.GetBlock4().GetToken1()); + TSqlExpression expr(Ctx, Mode); + elseExpr = expr.Build(alt.GetBlock4().GetRule_expr2()); + } else { + Ctx.IncrementMonCounter("sql_errors", "ElseIsRequired"); + Error() << "ELSE is required"; + return {}; + } + + TNodePtr caseExpr; + if (alt.HasBlock2()) { + TSqlExpression expr(Ctx, Mode); + caseExpr = expr.Build(alt.GetBlock2().GetRule_expr1()); + if (!caseExpr) { + return {}; + } + } + + TVector<TCaseBranch> branches; + for (size_t i = 0; i < alt.Block3Size(); ++i) { + branches.emplace_back(); + const auto& block = alt.GetBlock3(i).GetRule_when_expr1(); + Token(block.GetToken1()); + TSqlExpression condExpr(Ctx, Mode); + branches.back().Pred = condExpr.Build(block.GetRule_expr2()); + if (caseExpr) { + branches.back().Pred = BuildBinaryOp(Ctx, Ctx.Pos(), "==", caseExpr->Clone(), branches.back().Pred); + } + if (!branches.back().Pred) { + return {}; + } + Token(block.GetToken3()); + TSqlExpression thenExpr(Ctx, Mode); + branches.back().Value = thenExpr.Build(block.GetRule_expr4()); + if (!branches.back().Value) { + return {}; + } + } + auto final = ReduceCaseBranches(branches.begin(), branches.end()); + return BuildBuiltinFunc(Ctx, Ctx.Pos(), "If", { final.Pred, final.Value, elseExpr }); +} + +TMaybe<TExprOrIdent> TSqlExpression::AtomExpr(const TRule_atom_expr& node, const TTrailingQuestions& tail) { + // atom_expr: + // literal_value + // | bind_parameter + // | lambda + // | cast_expr + // | exists_expr + // | case_expr + // | an_id_or_type NAMESPACE (id_or_type | STRING_VALUE) + // | value_constructor + // | bitcast_expr + // | list_literal + // | dict_literal + // | struct_literal + // ; + if (node.Alt_case() != TRule_atom_expr::kAltAtomExpr2 && tail.Count) { + UnexpectedQuestionToken(tail); + return {}; + } + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (node.Alt_case() == TRule_atom_expr::kAltAtomExpr3); + TExprOrIdent result; + switch (node.Alt_case()) { + case TRule_atom_expr::kAltAtomExpr1: + Ctx.IncrementMonCounter("sql_features", "LiteralExpr"); + return LiteralExpr(node.GetAlt_atom_expr1().GetRule_literal_value1()); + case TRule_atom_expr::kAltAtomExpr2: + result.Expr = BindParameterRule(node.GetAlt_atom_expr2().GetRule_bind_parameter1(), tail); + break; + case TRule_atom_expr::kAltAtomExpr3: + result.Expr = LambdaRule(node.GetAlt_atom_expr3().GetRule_lambda1()); + break; + case TRule_atom_expr::kAltAtomExpr4: + result.Expr = CastRule(node.GetAlt_atom_expr4().GetRule_cast_expr1()); + break; + case TRule_atom_expr::kAltAtomExpr5: + result.Expr = ExistsRule(node.GetAlt_atom_expr5().GetRule_exists_expr1()); + break; + case TRule_atom_expr::kAltAtomExpr6: + result.Expr = CaseRule(node.GetAlt_atom_expr6().GetRule_case_expr1()); + break; + case TRule_atom_expr::kAltAtomExpr7: { + const auto& alt = node.GetAlt_atom_expr7(); + TString module(Id(alt.GetRule_an_id_or_type1(), *this)); + TPosition pos(Ctx.Pos()); + TString name; + switch (alt.GetBlock3().Alt_case()) { + case TRule_atom_expr::TAlt7::TBlock3::kAlt1: + name = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), *this); + break; + case TRule_atom_expr::TAlt7::TBlock3::kAlt2: { + name = Token(alt.GetBlock3().GetAlt2().GetToken1()); + if (Ctx.AnsiQuotedIdentifiers && name.StartsWith('"')) { + // same as previous case + name = IdContentFromString(Ctx, name); + } else { + module = "@" + module; + } + break; + } + case TRule_atom_expr::TAlt7::TBlock3::ALT_NOT_SET: + Y_ABORT("Unsigned number: you should change implementation according to grammar changes"); + } + result.Expr = BuildCallable(pos, module, name, {}); + break; + } + case TRule_atom_expr::kAltAtomExpr8: { + result.Expr = ValueConstructor(node.GetAlt_atom_expr8().GetRule_value_constructor1()); + break; + } + case TRule_atom_expr::kAltAtomExpr9: + result.Expr = BitCastRule(node.GetAlt_atom_expr9().GetRule_bitcast_expr1()); + break; + case TRule_atom_expr::kAltAtomExpr10: + result.Expr = ListLiteral(node.GetAlt_atom_expr10().GetRule_list_literal1()); + break; + case TRule_atom_expr::kAltAtomExpr11: + result.Expr = DictLiteral(node.GetAlt_atom_expr11().GetRule_dict_literal1()); + break; + case TRule_atom_expr::kAltAtomExpr12: + result.Expr = StructLiteral(node.GetAlt_atom_expr12().GetRule_struct_literal1()); + break; + case TRule_atom_expr::ALT_NOT_SET: + AltNotImplemented("atom_expr", node); + } + if (!result.Expr) { + return {}; + } + return result; +} + +TMaybe<TExprOrIdent> TSqlExpression::InAtomExpr(const TRule_in_atom_expr& node, const TTrailingQuestions& tail) { + // in_atom_expr: + // literal_value + // | bind_parameter + // | lambda + // | cast_expr + // | case_expr + // | an_id_or_type NAMESPACE (id_or_type | STRING_VALUE) + // | LPAREN select_stmt RPAREN + // | value_constructor + // | bitcast_expr + // | list_literal + // | dict_literal + // | struct_literal + // ; + if (node.Alt_case() != TRule_in_atom_expr::kAltInAtomExpr2 && tail.Count) { + UnexpectedQuestionToken(tail); + return {}; + } + TExprOrIdent result; + switch (node.Alt_case()) { + case TRule_in_atom_expr::kAltInAtomExpr1: + Ctx.IncrementMonCounter("sql_features", "LiteralExpr"); + return LiteralExpr(node.GetAlt_in_atom_expr1().GetRule_literal_value1()); + case TRule_in_atom_expr::kAltInAtomExpr2: + result.Expr = BindParameterRule(node.GetAlt_in_atom_expr2().GetRule_bind_parameter1(), tail); + break; + case TRule_in_atom_expr::kAltInAtomExpr3: + result.Expr = LambdaRule(node.GetAlt_in_atom_expr3().GetRule_lambda1()); + break; + case TRule_in_atom_expr::kAltInAtomExpr4: + result.Expr = CastRule(node.GetAlt_in_atom_expr4().GetRule_cast_expr1()); + break; + case TRule_in_atom_expr::kAltInAtomExpr5: + result.Expr = CaseRule(node.GetAlt_in_atom_expr5().GetRule_case_expr1()); + break; + case TRule_in_atom_expr::kAltInAtomExpr6: { + const auto& alt = node.GetAlt_in_atom_expr6(); + TString module(Id(alt.GetRule_an_id_or_type1(), *this)); + TPosition pos(Ctx.Pos()); + TString name; + switch (alt.GetBlock3().Alt_case()) { + case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1: + name = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), *this); + break; + case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: { + name = Token(alt.GetBlock3().GetAlt2().GetToken1()); + if (Ctx.AnsiQuotedIdentifiers && name.StartsWith('"')) { + // same as previous case + name = IdContentFromString(Ctx, name); + } else { + module = "@" + module; + } + break; + } + case TRule_in_atom_expr::TAlt6::TBlock3::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + result.Expr = BuildCallable(pos, module, name, {}); + break; + } + case TRule_in_atom_expr::kAltInAtomExpr7: { + Token(node.GetAlt_in_atom_expr7().GetToken1()); + // reset column reference scope (select will reenable it where needed) + TColumnRefScope scope(Ctx, EColumnRefState::Deny); + TSqlSelect select(Ctx, Mode); + TPosition pos; + auto source = select.Build(node.GetAlt_in_atom_expr7().GetRule_select_stmt2(), pos); + if (!source) { + Ctx.IncrementMonCounter("sql_errors", "BadSource"); + return {}; + } + Ctx.IncrementMonCounter("sql_features", "InSubquery"); + const auto alias = Ctx.MakeName("subquerynode"); + const auto ref = Ctx.MakeName("subquery"); + auto& blocks = Ctx.GetCurrentBlocks(); + blocks.push_back(BuildSubquery(std::move(source), alias, Mode == NSQLTranslation::ESqlMode::SUBQUERY, -1, Ctx.Scoped)); + blocks.back()->SetLabel(ref); + result.Expr = BuildSubqueryRef(blocks.back(), ref, -1); + break; + } + case TRule_in_atom_expr::kAltInAtomExpr8: { + result.Expr = ValueConstructor(node.GetAlt_in_atom_expr8().GetRule_value_constructor1()); + break; + } + case TRule_in_atom_expr::kAltInAtomExpr9: + result.Expr = BitCastRule(node.GetAlt_in_atom_expr9().GetRule_bitcast_expr1()); + break; + case TRule_in_atom_expr::kAltInAtomExpr10: + result.Expr = ListLiteral(node.GetAlt_in_atom_expr10().GetRule_list_literal1()); + break; + case TRule_in_atom_expr::kAltInAtomExpr11: + result.Expr = DictLiteral(node.GetAlt_in_atom_expr11().GetRule_dict_literal1()); + break; + case TRule_in_atom_expr::kAltInAtomExpr12: + result.Expr = StructLiteral(node.GetAlt_in_atom_expr12().GetRule_struct_literal1()); + break; + case TRule_in_atom_expr::ALT_NOT_SET: + AltNotImplemented("in_atom_expr", node); + } + if (!result.Expr) { + return {}; + } + return result; +} + +bool TSqlExpression::SqlLambdaParams(const TNodePtr& node, TVector<TSymbolNameWithPos>& args, ui32& optionalArgumentsCount) { + args.clear(); + optionalArgumentsCount = 0; + auto errMsg = TStringBuf("Invalid lambda arguments syntax. Lambda arguments should start with '$' as named value."); + auto tupleNodePtr = node->GetTupleNode();; + if (!tupleNodePtr) { + Ctx.Error(node->GetPos()) << errMsg; + return false; + } + THashSet<TString> dupArgsChecker; + for (const auto& argPtr: tupleNodePtr->Elements()) { + auto contentPtr = argPtr->GetAtomContent(); + if (!contentPtr || !contentPtr->StartsWith("$")) { + Ctx.Error(argPtr->GetPos()) << errMsg; + return false; + } + if (argPtr->IsOptionalArg()) { + ++optionalArgumentsCount; + } else if (optionalArgumentsCount > 0) { + Ctx.Error(argPtr->GetPos()) << "Non-optional argument can not follow optional one"; + return false; + } + + if (!IsAnonymousName(*contentPtr) && !dupArgsChecker.insert(*contentPtr).second) { + Ctx.Error(argPtr->GetPos()) << "Duplicate lambda argument parametr: '" << *contentPtr << "'."; + return false; + } + args.push_back(TSymbolNameWithPos{*contentPtr, argPtr->GetPos()}); + } + return true; +} + +bool TSqlExpression::SqlLambdaExprBody(TContext& ctx, const TRule_expr& node, TVector<TNodePtr>& exprSeq) { + TSqlExpression expr(ctx, ctx.Settings.Mode); + TNodePtr nodeExpr = expr.Build(node); + if (!nodeExpr) { + return false; + } + exprSeq.push_back(nodeExpr); + return true; +} + +bool TSqlExpression::SqlLambdaExprBody(TContext& ctx, const TRule_lambda_body& node, TVector<TNodePtr>& exprSeq) { + TSqlExpression expr(ctx, ctx.Settings.Mode); + TVector<TString> localNames; + bool hasError = false; + for (auto& block: node.GetBlock2()) { + const auto& rule = block.GetRule_lambda_stmt1(); + switch (rule.Alt_case()) { + case TRule_lambda_stmt::kAltLambdaStmt1: { + TVector<TSymbolNameWithPos> names; + auto nodeExpr = NamedNode(rule.GetAlt_lambda_stmt1().GetRule_named_nodes_stmt1(), names); + if (!nodeExpr) { + hasError = true; + continue; + } else if (nodeExpr->GetSource()) { + ctx.Error() << "SELECT is not supported inside lambda body"; + hasError = true; + continue; + } + if (names.size() > 1) { + auto ref = ctx.MakeName("tie"); + exprSeq.push_back(nodeExpr->Y("EnsureTupleSize", nodeExpr, nodeExpr->Q(ToString(names.size())))); + exprSeq.back()->SetLabel(ref); + for (size_t i = 0; i < names.size(); ++i) { + TNodePtr nthExpr = nodeExpr->Y("Nth", ref, nodeExpr->Q(ToString(i))); + names[i].Name = PushNamedAtom(names[i].Pos, names[i].Name); + nthExpr->SetLabel(names[i].Name); + localNames.push_back(names[i].Name); + exprSeq.push_back(nthExpr); + } + } else { + auto& symbol = names.front(); + symbol.Name = PushNamedAtom(symbol.Pos, symbol.Name); + nodeExpr->SetLabel(symbol.Name); + localNames.push_back(symbol.Name); + exprSeq.push_back(nodeExpr); + } + break; + } + case TRule_lambda_stmt::kAltLambdaStmt2: { + if (!ImportStatement(rule.GetAlt_lambda_stmt2().GetRule_import_stmt1(), &localNames)) { + hasError = true; + } + break; + } + case TRule_lambda_stmt::ALT_NOT_SET: + Y_ABORT("SampleClause: does not correspond to grammar changes"); + } + } + + TNodePtr nodeExpr; + if (!hasError) { + nodeExpr = expr.Build(node.GetRule_expr4()); + } + + for (const auto& name : localNames) { + PopNamedNode(name); + } + + if (!nodeExpr) { + return false; + } + exprSeq.push_back(nodeExpr); + return true; +} + +TNodePtr TSqlExpression::SubExpr(const TRule_con_subexpr& node, const TTrailingQuestions& tail) { + // con_subexpr: unary_subexpr | unary_op unary_subexpr; + switch (node.Alt_case()) { + case TRule_con_subexpr::kAltConSubexpr1: + return UnaryExpr(node.GetAlt_con_subexpr1().GetRule_unary_subexpr1(), tail); + case TRule_con_subexpr::kAltConSubexpr2: { + MaybeUnnamedSmartParenOnTop = false; + Ctx.IncrementMonCounter("sql_features", "UnaryOperation"); + TString opName; + auto token = node.GetAlt_con_subexpr2().GetRule_unary_op1().GetToken1(); + Token(token); + TPosition pos(Ctx.Pos()); + auto tokenId = token.GetId(); + if (IS_TOKEN(tokenId, NOT)) { + opName = "Not"; + } else if (IS_TOKEN(tokenId, PLUS)) { + opName = "Plus"; + } else if (IS_TOKEN(tokenId, MINUS)) { + opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMinus" : "Minus"; + } else if (IS_TOKEN(tokenId, TILDA)) { + opName = "BitNot"; + } else { + Ctx.IncrementMonCounter("sql_errors", "UnsupportedUnaryOperation"); + Error() << "Unsupported unary operation: " << token.GetValue(); + return nullptr; + } + Ctx.IncrementMonCounter("sql_unary_operations", opName); + auto expr = UnaryExpr(node.GetAlt_con_subexpr2().GetRule_unary_subexpr2(), tail); + return expr ? expr->ApplyUnaryOp(Ctx, pos, opName) : expr; + } + case TRule_con_subexpr::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + return nullptr; +} + +TNodePtr TSqlExpression::SubExpr(const TRule_xor_subexpr& node, const TTrailingQuestions& tail) { + // xor_subexpr: eq_subexpr cond_expr?; + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && !node.HasBlock2(); + TNodePtr res(SubExpr(node.GetRule_eq_subexpr1(), node.HasBlock2() ? TTrailingQuestions{} : tail)); + if (!res) { + return {}; + } + TPosition pos(Ctx.Pos()); + if (node.HasBlock2()) { + auto cond = node.GetBlock2().GetRule_cond_expr1(); + switch (cond.Alt_case()) { + case TRule_cond_expr::kAltCondExpr1: { + const auto& matchOp = cond.GetAlt_cond_expr1(); + const bool notMatch = matchOp.HasBlock1(); + const TCiString& opName = Token(matchOp.GetRule_match_op2().GetToken1()); + const auto& pattern = SubExpr(cond.GetAlt_cond_expr1().GetRule_eq_subexpr3(), matchOp.HasBlock4() ? TTrailingQuestions{} : tail); + if (!pattern) { + return {}; + } + TNodePtr isMatch; + if (opName == "like" || opName == "ilike") { + const TString* escapeLiteral = nullptr; + TNodePtr escapeNode; + const auto& escaper = BuildUdf(Ctx, pos, "Re2", "PatternFromLike", {}); + TVector<TNodePtr> escaperArgs({ escaper, pattern }); + + if (matchOp.HasBlock4()) { + const auto& escapeBlock = matchOp.GetBlock4(); + TNodePtr escapeExpr = SubExpr(escapeBlock.GetRule_eq_subexpr2(), tail); + if (!escapeExpr) { + return {}; + } + escapeLiteral = escapeExpr->GetLiteral("String"); + escapeNode = escapeExpr; + if (escapeLiteral) { + Ctx.IncrementMonCounter("sql_features", "LikeEscape"); + if (escapeLiteral->size() != 1) { + Ctx.IncrementMonCounter("sql_errors", "LikeMultiCharEscape"); + Error() << "ESCAPE clause requires single character argument"; + return nullptr; + } + if (escapeLiteral[0] == "%" || escapeLiteral[0] == "_" || escapeLiteral[0] == "\\") { + Ctx.IncrementMonCounter("sql_errors", "LikeUnsupportedEscapeChar"); + Error() << "'%', '_' and '\\' are currently not supported in ESCAPE clause, "; + Error() << "please choose any other character"; + return nullptr; + } + if (!IsAscii(escapeLiteral->front())) { + Ctx.IncrementMonCounter("sql_errors", "LikeUnsupportedEscapeChar"); + Error() << "Non-ASCII symbols are not supported in ESCAPE clause, "; + Error() << "please choose ASCII character"; + return nullptr; + } + escaperArgs.push_back(BuildLiteralRawString(pos, *escapeLiteral)); + } else { + Ctx.IncrementMonCounter("sql_errors", "LikeNotLiteralEscape"); + Error() << "ESCAPE clause requires String literal argument"; + return nullptr; + } + } + + auto re2options = BuildUdf(Ctx, pos, "Re2", "Options", {}); + if (opName == "ilike") { + Ctx.IncrementMonCounter("sql_features", "CaseInsensitiveLike"); + } + auto csModeLiteral = BuildLiteralBool(pos, opName != "ilike"); + csModeLiteral->SetLabel("CaseSensitive"); + auto csOption = BuildStructure(pos, { csModeLiteral }); + auto optionsApply = new TCallNodeImpl(pos, "NamedApply", { re2options, BuildTuple(pos, {}), csOption }); + + const TNodePtr escapedPattern = new TCallNodeImpl(pos, "Apply", { escaperArgs }); + auto list = new TAstListNodeImpl(pos, { escapedPattern, optionsApply }); + auto runConfig = new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "quote", 0), list }); + + const TNodePtr matcher = new TCallNodeImpl(pos, "AssumeStrict", { BuildUdf(Ctx, pos, "Re2", "Match", { runConfig }) }); + isMatch = new TCallNodeImpl(pos, "Apply", { matcher, res }); + + bool isUtf8 = false; + const TString* literalPattern = pattern->GetLiteral("String"); + if (!literalPattern) { + literalPattern = pattern->GetLiteral("Utf8"); + isUtf8 = literalPattern != nullptr; + } + + if (literalPattern) { + bool inEscape = false; + TMaybe<char> escape; + if (escapeLiteral) { + escape = escapeLiteral->front(); + } + + bool mayIgnoreCase; + TVector<TPatternComponent<char>> components; + if (isUtf8) { + auto splitResult = SplitPattern(UTF8ToUTF32<false>(*literalPattern), escape, inEscape); + for (const auto& component : splitResult) { + TPatternComponent<char> converted; + converted.IsSimple = component.IsSimple; + converted.Prefix = WideToUTF8(component.Prefix); + converted.Suffix = WideToUTF8(component.Suffix); + components.push_back(std::move(converted)); + } + mayIgnoreCase = ToLowerUTF8(*literalPattern) == ToUpperUTF8(*literalPattern); + } else { + components = SplitPattern(*literalPattern, escape, inEscape); + mayIgnoreCase = WithoutAlpha(*literalPattern); + } + + if (inEscape) { + Ctx.IncrementMonCounter("sql_errors", "LikeEscapeSymbolEnd"); + Error() << "LIKE pattern should not end with escape symbol"; + return nullptr; + } + + if (opName == "like" || mayIgnoreCase) { + // TODO: expand LIKE in optimizers - we can analyze argument types there + YQL_ENSURE(!components.empty()); + const auto& first = components.front(); + if (components.size() == 1 && first.IsSimple) { + // no '%'s and '_'s in pattern + YQL_ENSURE(first.Prefix == first.Suffix); + isMatch = BuildBinaryOp(Ctx, pos, "==", res, BuildLiteralRawString(pos, first.Suffix, isUtf8)); + } else if (!first.Prefix.empty()) { + const TString& prefix = first.Prefix; + TNodePtr prefixMatch; + if (Ctx.EmitStartsWith) { + prefixMatch = BuildBinaryOp(Ctx, pos, "StartsWith", res, BuildLiteralRawString(pos, prefix, isUtf8)); + } else { + prefixMatch = BuildBinaryOp(Ctx, pos, ">=", res, BuildLiteralRawString(pos, prefix, isUtf8)); + auto upperBound = isUtf8 ? NextValidUtf8(prefix) : NextLexicographicString(prefix); + if (upperBound) { + prefixMatch = BuildBinaryOp( + Ctx, + pos, + "And", + prefixMatch, + BuildBinaryOp(Ctx, pos, "<", res, BuildLiteralRawString(pos, TString(*upperBound), isUtf8)) + ); + } + } + + if (Ctx.AnsiLike && first.IsSimple && components.size() == 2 && components.back().IsSimple) { + const TString& suffix = components.back().Suffix; + // 'prefix%suffix' + if (suffix.empty()) { + isMatch = prefixMatch; + } else { + // len(str) >= len(prefix) + len(suffix) && StartsWith(str, prefix) && EndsWith(str, suffix) + TNodePtr sizePred = BuildBinaryOp(Ctx, pos, ">=", + TNodePtr(new TCallNodeImpl(pos, "Size", { res })), + TNodePtr(new TLiteralNumberNode<ui32>(pos, "Uint32", ToString(prefix.size() + suffix.size())))); + TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, suffix, isUtf8)); + isMatch = new TCallNodeImpl(pos, "And", { + sizePred, + prefixMatch, + suffixMatch + }); + } + } else { + isMatch = BuildBinaryOp(Ctx, pos, "And", prefixMatch, isMatch); + } + } else if (Ctx.AnsiLike && AllOf(components, [](const auto& comp) { return comp.IsSimple; })) { + YQL_ENSURE(first.Prefix.empty()); + if (components.size() == 3 && components.back().Prefix.empty()) { + // '%foo%' + YQL_ENSURE(!components[1].Prefix.empty()); + isMatch = BuildBinaryOp(Ctx, pos, "StringContains", res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8)); + } else if (components.size() == 2) { + // '%foo' + isMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8)); + } + } else if (Ctx.AnsiLike && !components.back().Suffix.empty()) { + const TString& suffix = components.back().Suffix; + TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, suffix, isUtf8)); + isMatch = BuildBinaryOp(Ctx, pos, "And", suffixMatch, isMatch); + } + // TODO: more StringContains/StartsWith/EndsWith cases? + } + } + + Ctx.IncrementMonCounter("sql_features", notMatch ? "NotLike" : "Like"); + + } else if (opName == "regexp" || opName == "rlike" || opName == "match") { + if (matchOp.HasBlock4()) { + Ctx.IncrementMonCounter("sql_errors", "RegexpEscape"); + TString opNameUpper(opName); + opNameUpper.to_upper(); + Error() << opName << " and ESCAPE clauses should not be used together"; + return nullptr; + } + + if (!Ctx.PragmaRegexUseRe2) { + Ctx.Warning(pos, TIssuesIds::CORE_LEGACY_REGEX_ENGINE) << "Legacy regex engine works incorrectly with unicode. Use PRAGMA RegexUseRe2='true';"; + } + + const auto& matcher = Ctx.PragmaRegexUseRe2 ? + BuildUdf(Ctx, pos, "Re2", opName == "match" ? "Match" : "Grep", {BuildTuple(pos, {pattern, BuildLiteralNull(pos)})}): + BuildUdf(Ctx, pos, "Pcre", opName == "match" ? "BacktrackingMatch" : "BacktrackingGrep", { pattern }); + isMatch = new TCallNodeImpl(pos, "Apply", { matcher, res }); + if (opName != "match") { + Ctx.IncrementMonCounter("sql_features", notMatch ? "NotRegexp" : "Regexp"); + } else { + Ctx.IncrementMonCounter("sql_features", notMatch ? "NotMatch" : "Match"); + } + } else { + Ctx.IncrementMonCounter("sql_errors", "UnknownMatchOp"); + AltNotImplemented("match_op", cond); + return nullptr; + } + return (notMatch && isMatch) ? isMatch->ApplyUnaryOp(Ctx, pos, "Not") : isMatch; + } + case TRule_cond_expr::kAltCondExpr2: { + // | NOT? IN COMPACT? in_expr + auto altInExpr = cond.GetAlt_cond_expr2(); + const bool notIn = altInExpr.HasBlock1(); + auto hints = BuildTuple(pos, {}); + bool isCompact = altInExpr.HasBlock3(); + if (!isCompact) { + auto sqlHints = Ctx.PullHintForToken(Ctx.TokenPosition(altInExpr.GetToken2())); + isCompact = AnyOf(sqlHints, [](const NSQLTranslation::TSQLHint& hint) { return to_lower(hint.Name) == "compact"; }); + } + if (isCompact) { + Ctx.IncrementMonCounter("sql_features", "IsCompactHint"); + auto sizeHint = BuildTuple(pos, { BuildQuotedAtom(pos, "isCompact", NYql::TNodeFlags::Default) }); + hints = BuildTuple(pos, { sizeHint }); + } + TSqlExpression inSubexpr(Ctx, Mode); + auto inRight = inSubexpr.SqlInExpr(altInExpr.GetRule_in_expr4(), tail); + auto isIn = BuildBuiltinFunc(Ctx, pos, "In", {res, inRight, hints}); + Ctx.IncrementMonCounter("sql_features", notIn ? "NotIn" : "In"); + return (notIn && isIn) ? isIn->ApplyUnaryOp(Ctx, pos, "Not") : isIn; + } + case TRule_cond_expr::kAltCondExpr3: { + if (tail.Count) { + UnexpectedQuestionToken(tail); + return {}; + } + auto altCase = cond.GetAlt_cond_expr3().GetBlock1().Alt_case(); + const bool notNoll = + altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt2 || + altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt4 + ; + + if (altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt4 && + !cond.GetAlt_cond_expr3().GetBlock1().GetAlt4().HasBlock1()) + { + Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_MISSING_IS_BEFORE_NOT_NULL) << "Missing IS keyword before NOT NULL"; + } + + auto isNull = BuildIsNullOp(pos, res); + Ctx.IncrementMonCounter("sql_features", notNoll ? "NotNull" : "Null"); + return (notNoll && isNull) ? isNull->ApplyUnaryOp(Ctx, pos, "Not") : isNull; + } + case TRule_cond_expr::kAltCondExpr4: { + auto alt = cond.GetAlt_cond_expr4(); + const bool symmetric = alt.HasBlock3() && IS_TOKEN(alt.GetBlock3().GetToken1().GetId(), SYMMETRIC); + const bool negation = alt.HasBlock1(); + TNodePtr left = SubExpr(alt.GetRule_eq_subexpr4(), {}); + TNodePtr right = SubExpr(alt.GetRule_eq_subexpr6(), tail); + if (!left || !right) { + return {}; + } + + const bool bothArgNull = left->IsNull() && right->IsNull(); + const bool oneArgNull = left->IsNull() || right->IsNull(); + + if (res->IsNull() || bothArgNull || (symmetric && oneArgNull)) { + Ctx.Warning(pos, TIssuesIds::YQL_OPERATION_WILL_RETURN_NULL) + << "BETWEEN operation will return NULL here"; + } + + auto buildSubexpr = [&](const TNodePtr& left, const TNodePtr& right) { + if (negation) { + return BuildBinaryOpRaw( + pos, + "Or", + BuildBinaryOpRaw(pos, "<", res, left), + BuildBinaryOpRaw(pos, ">", res, right) + ); + } else { + return BuildBinaryOpRaw( + pos, + "And", + BuildBinaryOpRaw(pos, ">=", res, left), + BuildBinaryOpRaw(pos, "<=", res, right) + ); + } + }; + + if (symmetric) { + Ctx.IncrementMonCounter("sql_features", negation? "NotBetweenSymmetric" : "BetweenSymmetric"); + return BuildBinaryOpRaw( + pos, + negation? "And" : "Or", + buildSubexpr(left, right), + buildSubexpr(right, left) + ); + } else { + Ctx.IncrementMonCounter("sql_features", negation? "NotBetween" : "Between"); + return buildSubexpr(left, right); + } + } + case TRule_cond_expr::kAltCondExpr5: { + auto alt = cond.GetAlt_cond_expr5(); + auto getNode = [](const TRule_cond_expr::TAlt5::TBlock1& b) -> const TRule_eq_subexpr& { return b.GetRule_eq_subexpr2(); }; + return BinOpList(node.GetRule_eq_subexpr1(), getNode, alt.GetBlock1().begin(), alt.GetBlock1().end(), tail); + } + case TRule_cond_expr::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownConditionExpr"); + AltNotImplemented("cond_expr", cond); + return nullptr; + } + } + return res; +} + +TNodePtr TSqlExpression::BinOperList(const TString& opName, TVector<TNodePtr>::const_iterator begin, TVector<TNodePtr>::const_iterator end) const { + TPosition pos(Ctx.Pos()); + const size_t opCount = end - begin; + Y_DEBUG_ABORT_UNLESS(opCount >= 2); + if (opCount == 2) { + return BuildBinaryOp(Ctx, pos, opName, *begin, *(begin+1)); + } if (opCount == 3) { + return BuildBinaryOp(Ctx, pos, opName, BuildBinaryOp(Ctx, pos, opName, *begin, *(begin+1)), *(begin+2)); + } else { + auto mid = begin + opCount / 2; + return BuildBinaryOp(Ctx, pos, opName, BinOperList(opName, begin, mid), BinOperList(opName, mid, end)); + } +} + +TSqlExpression::TCaseBranch TSqlExpression::ReduceCaseBranches(TVector<TCaseBranch>::const_iterator begin, TVector<TCaseBranch>::const_iterator end) const { + YQL_ENSURE(begin < end); + const size_t branchCount = end - begin; + if (branchCount == 1) { + return *begin; + } + + auto mid = begin + branchCount / 2; + auto left = ReduceCaseBranches(begin, mid); + auto right = ReduceCaseBranches(mid, end); + + TVector<TNodePtr> preds; + preds.reserve(branchCount); + for (auto it = begin; it != end; ++it) { + preds.push_back(it->Pred); + } + + TCaseBranch result; + result.Pred = new TCallNodeImpl(Ctx.Pos(), "Or", CloneContainer(preds)); + result.Value = BuildBuiltinFunc(Ctx, Ctx.Pos(), "If", { left.Pred, left.Value, right.Value }); + return result; +} + +template <typename TNode, typename TGetNode, typename TIter> +TNodePtr TSqlExpression::BinOper(const TString& opName, const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) { + if (begin == end) { + return SubExpr(node, tail); + } + // can't have top level smart_parenthesis node if any binary operation is present + MaybeUnnamedSmartParenOnTop = false; + Ctx.IncrementMonCounter("sql_binary_operations", opName); + const size_t listSize = end - begin; + TVector<TNodePtr> nodes; + nodes.reserve(1 + listSize); + nodes.push_back(SubExpr(node, {})); + for (; begin != end; ++begin) { + nodes.push_back(SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{})); + } + return BinOperList(opName, nodes.begin(), nodes.end()); +} + +template <typename TNode, typename TGetNode, typename TIter> +TNodePtr TSqlExpression::BinOpList(const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) { + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end); + TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{}); + while (begin != end) { + Ctx.IncrementMonCounter("sql_features", "BinaryOperation"); + Token(begin->GetToken1()); + TPosition pos(Ctx.Pos()); + TString opName; + auto tokenId = begin->GetToken1().GetId(); + if (IS_TOKEN(tokenId, LESS)) { + opName = "<"; + Ctx.IncrementMonCounter("sql_binary_operations", "Less"); + } else if (IS_TOKEN(tokenId, LESS_OR_EQ)) { + opName = "<="; + Ctx.IncrementMonCounter("sql_binary_operations", "LessOrEq"); + } else if (IS_TOKEN(tokenId, GREATER)) { + opName = ">"; + Ctx.IncrementMonCounter("sql_binary_operations", "Greater"); + } else if (IS_TOKEN(tokenId, GREATER_OR_EQ)) { + opName = ">="; + Ctx.IncrementMonCounter("sql_binary_operations", "GreaterOrEq"); + } else if (IS_TOKEN(tokenId, PLUS)) { + opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedAdd" : "+MayWarn"; + Ctx.IncrementMonCounter("sql_binary_operations", "Plus"); + } else if (IS_TOKEN(tokenId, MINUS)) { + opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedSub" : "-MayWarn"; + Ctx.IncrementMonCounter("sql_binary_operations", "Minus"); + } else if (IS_TOKEN(tokenId, ASTERISK)) { + opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMul" : "*MayWarn"; + Ctx.IncrementMonCounter("sql_binary_operations", "Multiply"); + } else if (IS_TOKEN(tokenId, SLASH)) { + opName = "/MayWarn"; + Ctx.IncrementMonCounter("sql_binary_operations", "Divide"); + if (!Ctx.Scoped->PragmaClassicDivision && partialResult) { + partialResult = new TCallNodeImpl(pos, "SafeCast", {std::move(partialResult), BuildDataType(pos, "Double")}); + } else if (Ctx.Scoped->PragmaCheckedOps) { + opName = "CheckedDiv"; + } + } else if (IS_TOKEN(tokenId, PERCENT)) { + opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMod" : "%MayWarn"; + Ctx.IncrementMonCounter("sql_binary_operations", "Mod"); + } else { + Ctx.IncrementMonCounter("sql_errors", "UnsupportedBinaryOperation"); + Error() << "Unsupported binary operation token: " << tokenId; + return nullptr; + } + + partialResult = BuildBinaryOp(Ctx, pos, opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{})); + ++begin; + } + + return partialResult; +} + +template <typename TGetNode, typename TIter> +TNodePtr TSqlExpression::BinOpList(const TRule_bit_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) { + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end); + TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{}); + while (begin != end) { + Ctx.IncrementMonCounter("sql_features", "BinaryOperation"); + TString opName; + switch (begin->GetBlock1().Alt_case()) { + case TRule_neq_subexpr_TBlock2_TBlock1::kAlt1: { + Token(begin->GetBlock1().GetAlt1().GetToken1()); + auto tokenId = begin->GetBlock1().GetAlt1().GetToken1().GetId(); + if (!IS_TOKEN(tokenId, SHIFT_LEFT)) { + Error() << "Unsupported binary operation token: " << tokenId; + return {}; + } + opName = "ShiftLeft"; + Ctx.IncrementMonCounter("sql_binary_operations", "ShiftLeft"); + break; + } + case TRule_neq_subexpr_TBlock2_TBlock1::kAlt2: { + opName = "ShiftRight"; + Ctx.IncrementMonCounter("sql_binary_operations", "ShiftRight"); + break; + } + case TRule_neq_subexpr_TBlock2_TBlock1::kAlt3: { + Token(begin->GetBlock1().GetAlt3().GetToken1()); + auto tokenId = begin->GetBlock1().GetAlt3().GetToken1().GetId(); + if (!IS_TOKEN(tokenId, ROT_LEFT)) { + Error() << "Unsupported binary operation token: " << tokenId; + return {}; + } + opName = "RotLeft"; + Ctx.IncrementMonCounter("sql_binary_operations", "RotLeft"); + break; + } + case TRule_neq_subexpr_TBlock2_TBlock1::kAlt4: { + opName = "RotRight"; + Ctx.IncrementMonCounter("sql_binary_operations", "RotRight"); + break; + } + case TRule_neq_subexpr_TBlock2_TBlock1::kAlt5: { + Token(begin->GetBlock1().GetAlt5().GetToken1()); + auto tokenId = begin->GetBlock1().GetAlt5().GetToken1().GetId(); + if (!IS_TOKEN(tokenId, AMPERSAND)) { + Error() << "Unsupported binary operation token: " << tokenId; + return {}; + } + opName = "BitAnd"; + Ctx.IncrementMonCounter("sql_binary_operations", "BitAnd"); + break; + } + case TRule_neq_subexpr_TBlock2_TBlock1::kAlt6: { + Token(begin->GetBlock1().GetAlt6().GetToken1()); + auto tokenId = begin->GetBlock1().GetAlt6().GetToken1().GetId(); + if (!IS_TOKEN(tokenId, PIPE)) { + Error() << "Unsupported binary operation token: " << tokenId; + return {}; + } + opName = "BitOr"; + Ctx.IncrementMonCounter("sql_binary_operations", "BitOr"); + break; + } + case TRule_neq_subexpr_TBlock2_TBlock1::kAlt7: { + Token(begin->GetBlock1().GetAlt7().GetToken1()); + auto tokenId = begin->GetBlock1().GetAlt7().GetToken1().GetId(); + if (!IS_TOKEN(tokenId, CARET)) { + Error() << "Unsupported binary operation token: " << tokenId; + return {}; + } + opName = "BitXor"; + Ctx.IncrementMonCounter("sql_binary_operations", "BitXor"); + break; + } + case TRule_neq_subexpr_TBlock2_TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + partialResult = BuildBinaryOp(Ctx, Ctx.Pos(), opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{})); + ++begin; + } + + return partialResult; +} + +template <typename TGetNode, typename TIter> +TNodePtr TSqlExpression::BinOpList(const TRule_eq_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) { + MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end); + TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{}); + while (begin != end) { + Ctx.IncrementMonCounter("sql_features", "BinaryOperation"); + TString opName; + switch (begin->GetBlock1().Alt_case()) { + case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt1: { + Token(begin->GetBlock1().GetAlt1().GetToken1()); + auto tokenId = begin->GetBlock1().GetAlt1().GetToken1().GetId(); + if (!IS_TOKEN(tokenId, EQUALS)) { + Error() << "Unsupported binary operation token: " << tokenId; + return {}; + } + Ctx.IncrementMonCounter("sql_binary_operations", "Equals"); + opName = "=="; + break; + } + case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt2: { + Token(begin->GetBlock1().GetAlt2().GetToken1()); + auto tokenId = begin->GetBlock1().GetAlt2().GetToken1().GetId(); + if (!IS_TOKEN(tokenId, EQUALS2)) { + Error() << "Unsupported binary operation token: " << tokenId; + return {}; + } + Ctx.IncrementMonCounter("sql_binary_operations", "Equals2"); + opName = "=="; + break; + } + case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt3: { + Token(begin->GetBlock1().GetAlt3().GetToken1()); + auto tokenId = begin->GetBlock1().GetAlt3().GetToken1().GetId(); + if (!IS_TOKEN(tokenId, NOT_EQUALS)) { + Error() << "Unsupported binary operation token: " << tokenId; + return {}; + } + Ctx.IncrementMonCounter("sql_binary_operations", "NotEquals"); + opName = "!="; + break; + } + case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt4: { + Token(begin->GetBlock1().GetAlt4().GetToken1()); + auto tokenId = begin->GetBlock1().GetAlt4().GetToken1().GetId(); + if (!IS_TOKEN(tokenId, NOT_EQUALS2)) { + Error() << "Unsupported binary operation token: " << tokenId; + return {}; + } + Ctx.IncrementMonCounter("sql_binary_operations", "NotEquals2"); + opName = "!="; + break; + } + case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt5: { + Token(begin->GetBlock1().GetAlt5().GetRule_distinct_from_op1().GetToken1()); + opName = begin->GetBlock1().GetAlt5().GetRule_distinct_from_op1().HasBlock2() ? "IsNotDistinctFrom" : "IsDistinctFrom"; + Ctx.IncrementMonCounter("sql_binary_operations", opName); + break; + } + case TRule_cond_expr::TAlt5::TBlock1::TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + partialResult = BuildBinaryOp(Ctx, Ctx.Pos(), opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{})); + ++begin; + } + + return partialResult; +} + +TNodePtr TSqlExpression::SqlInExpr(const TRule_in_expr& node, const TTrailingQuestions& tail) { + TSqlExpression expr(Ctx, Mode); + expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::InStatement); + auto result = expr.UnaryExpr(node.GetRule_in_unary_subexpr1(), tail); + return result; +} + +TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) { + TVector<TNodePtr> exprs; + Token(node.GetToken1()); + const TPosition pos(Ctx.Pos()); + const bool isTuple = node.HasBlock3(); + bool expectTuple = SmartParenthesisMode == ESmartParenthesis::InStatement; + EExpr mode = EExpr::Regular; + if (SmartParenthesisMode == ESmartParenthesis::SqlLambdaParams) { + mode = EExpr::SqlLambdaParams; + expectTuple = true; + } + if (node.HasBlock2() && !NamedExprList(node.GetBlock2().GetRule_named_expr_list1(), exprs, mode)) { + return {}; + } + + bool topLevelGroupBy = MaybeUnnamedSmartParenOnTop && SmartParenthesisMode == ESmartParenthesis::GroupBy; + + bool hasAliases = false; + bool hasUnnamed = false; + for (const auto& expr: exprs) { + if (expr->GetLabel()) { + hasAliases = true; + } else { + hasUnnamed = true; + } + if (hasAliases && hasUnnamed && !topLevelGroupBy) { + Ctx.IncrementMonCounter("sql_errors", "AnonymousStructMembers"); + Ctx.Error(pos) << "Structure does not allow anonymous members"; + return nullptr; + } + } + if (exprs.size() == 1 && hasUnnamed && !isTuple && !expectTuple) { + return exprs.back(); + } + if (topLevelGroupBy) { + if (isTuple) { + Ctx.IncrementMonCounter("sql_errors", "SimpleTupleInGroupBy"); + Token(node.GetBlock3().GetToken1()); + Ctx.Error() << "Unexpected trailing comma in grouping elements list"; + return nullptr; + } + Ctx.IncrementMonCounter("sql_features", "ListOfNamedNode"); + return BuildListOfNamedNodes(pos, std::move(exprs)); + } + Ctx.IncrementMonCounter("sql_features", hasUnnamed ? "SimpleTuple" : "SimpleStruct"); + return (hasUnnamed || expectTuple || exprs.size() == 0) ? BuildTuple(pos, exprs) : BuildStructure(pos, exprs); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_expression.h b/yql/essentials/sql/v1/sql_expression.h new file mode 100644 index 00000000000..64b9dd8a690 --- /dev/null +++ b/yql/essentials/sql/v1/sql_expression.h @@ -0,0 +1,147 @@ +#pragma once + +#include "sql_translation.h" +#include <yql/essentials/core/sql_types/yql_atom_enums.h> + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +class TSqlExpression: public TSqlTranslation { +public: + enum class ESmartParenthesis { + Default, + GroupBy, + InStatement, + SqlLambdaParams, + }; + + TSqlExpression(TContext& ctx, NSQLTranslation::ESqlMode mode) + : TSqlTranslation(ctx, mode) + { + } + + TNodePtr Build(const TRule_expr& node); + + void SetSmartParenthesisMode(ESmartParenthesis mode) { + SmartParenthesisMode = mode; + } + + void MarkAsNamed() { + MaybeUnnamedSmartParenOnTop = false; + } + + TMaybe<TExprOrIdent> LiteralExpr(const TRule_literal_value& node); +private: + struct TTrailingQuestions { + size_t Count = 0; + TPosition Pos; + }; + + TNodePtr BindParameterRule(const TRule_bind_parameter& rule, const TTrailingQuestions& tail); + TNodePtr LambdaRule(const TRule_lambda& rule); + TNodePtr CastRule(const TRule_cast_expr& rule); + TNodePtr BitCastRule(const TRule_bitcast_expr& rule); + TNodePtr ExistsRule(const TRule_exists_expr& rule); + TNodePtr CaseRule(const TRule_case_expr& rule); + + TMaybe<TExprOrIdent> AtomExpr(const TRule_atom_expr& node, const TTrailingQuestions& tail); + TMaybe<TExprOrIdent> InAtomExpr(const TRule_in_atom_expr& node, const TTrailingQuestions& tail); + + TNodePtr JsonInputArg(const TRule_json_common_args& node); + TNodePtr JsonPathSpecification(const TRule_jsonpath_spec& node); + TNodePtr JsonReturningTypeRule(const TRule_type_name_simple& node); + TNodePtr JsonValueCaseHandler(const TRule_json_case_handler& node, EJsonValueHandlerMode& mode); + void AddJsonValueCaseHandlers(const TRule_json_value& node, TVector<TNodePtr>& children); + void AddJsonVariable(const TRule_json_variable& node, TVector<TNodePtr>& children); + void AddJsonVariables(const TRule_json_variables& node, TVector<TNodePtr>& children); + TNodePtr JsonVariables(const TRule_json_common_args& node); + void AddJsonCommonArgs(const TRule_json_common_args& node, TVector<TNodePtr>& children); + TNodePtr JsonValueExpr(const TRule_json_value& node); + void AddJsonExistsHandler(const TRule_json_exists& node, TVector<TNodePtr>& children); + TNodePtr JsonExistsExpr(const TRule_json_exists& node); + EJsonQueryWrap JsonQueryWrapper(const TRule_json_query& node); + EJsonQueryHandler JsonQueryHandler(const TRule_json_query_handler& node); + TNodePtr JsonQueryExpr(const TRule_json_query& node); + TNodePtr JsonApiExpr(const TRule_json_api_expr& node); + + template<typename TUnaryCasualExprRule> + TNodePtr UnaryCasualExpr(const TUnaryCasualExprRule& node, const TTrailingQuestions& tail); + + template<typename TUnarySubExprRule> + TNodePtr UnaryExpr(const TUnarySubExprRule& node, const TTrailingQuestions& tail); + + bool SqlLambdaParams(const TNodePtr& node, TVector<TSymbolNameWithPos>& args, ui32& optionalArgumentsCount); + bool SqlLambdaExprBody(TContext& ctx, const TRule_lambda_body& node, TVector<TNodePtr>& exprSeq); + bool SqlLambdaExprBody(TContext& ctx, const TRule_expr& node, TVector<TNodePtr>& exprSeq); + + TNodePtr KeyExpr(const TRule_key_expr& node) { + TSqlExpression expr(Ctx, Mode); + return expr.Build(node.GetRule_expr2()); + } + + TNodePtr SubExpr(const TRule_con_subexpr& node, const TTrailingQuestions& tail); + TNodePtr SubExpr(const TRule_xor_subexpr& node, const TTrailingQuestions& tail); + + TNodePtr SubExpr(const TRule_mul_subexpr& node, const TTrailingQuestions& tail); + + TNodePtr SubExpr(const TRule_add_subexpr& node, const TTrailingQuestions& tail); + + TNodePtr SubExpr(const TRule_bit_subexpr& node, const TTrailingQuestions& tail); + + TNodePtr SubExpr(const TRule_neq_subexpr& node, const TTrailingQuestions& tailExternal); + + TNodePtr SubExpr(const TRule_eq_subexpr& node, const TTrailingQuestions& tail); + + TNodePtr SubExpr(const TRule_or_subexpr& node, const TTrailingQuestions& tail); + + TNodePtr SubExpr(const TRule_and_subexpr& node, const TTrailingQuestions& tail); + + template <typename TNode, typename TGetNode, typename TIter> + TNodePtr BinOpList(const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail); + + template <typename TGetNode, typename TIter> + TNodePtr BinOpList(const TRule_bit_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail); + + template <typename TGetNode, typename TIter> + TNodePtr BinOpList(const TRule_eq_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail); + + TNodePtr BinOperList(const TString& opName, TVector<TNodePtr>::const_iterator begin, TVector<TNodePtr>::const_iterator end) const; + + TNodePtr RowPatternVarAccess(const TString& alias, const TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2 block); + + struct TCaseBranch { + TNodePtr Pred; + TNodePtr Value; + }; + TCaseBranch ReduceCaseBranches(TVector<TCaseBranch>::const_iterator begin, TVector<TCaseBranch>::const_iterator end) const; + + template <typename TNode, typename TGetNode, typename TIter> + TNodePtr BinOper(const TString& operName, const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail); + + TNodePtr SqlInExpr(const TRule_in_expr& node, const TTrailingQuestions& tail); + + void UnexpectedQuestionToken(const TTrailingQuestions& tail) { + YQL_ENSURE(tail.Count > 0); + Ctx.Error(tail.Pos) << "Unexpected token '?' at the end of expression"; + } + + TNodePtr SmartParenthesis(const TRule_smart_parenthesis& node); + + ESmartParenthesis SmartParenthesisMode = ESmartParenthesis::Default; + bool MaybeUnnamedSmartParenOnTop = true; + + THashMap<TString, TNodePtr> ExprShortcuts; +}; + +bool ChangefeedSettingsEntry(const TRule_changefeed_settings_entry& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter); + +bool ChangefeedSettings(const TRule_changefeed_settings& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter); + +bool CreateChangefeed(const TRule_changefeed& node, TSqlExpression& ctx, TVector<TChangefeedDescription>& changefeeds); + +bool Expr(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr& node); + +bool ExprList(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr_list& node); + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_group_by.cpp b/yql/essentials/sql/v1/sql_group_by.cpp new file mode 100644 index 00000000000..46ae8ee52a8 --- /dev/null +++ b/yql/essentials/sql/v1/sql_group_by.cpp @@ -0,0 +1,475 @@ +#include "sql_group_by.h" +#include "sql_expression.h" +#include "source.h" +#include <yql/essentials/minikql/mkql_type_ops.h> + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +const TString TGroupByClause::AutogenerateNamePrefix = "group"; + +bool TGroupByClause::Build(const TRule_group_by_clause& node) { + // group_by_clause: GROUP COMPACT? BY opt_set_quantifier grouping_element_list (WITH an_id)?; + if (Ctx.CompactGroupBy.Defined()) { + CompactGroupBy = *Ctx.CompactGroupBy; + } else { + CompactGroupBy = node.HasBlock2(); + if (!CompactGroupBy) { + auto hints = Ctx.PullHintForToken(Ctx.TokenPosition(node.GetToken1())); + CompactGroupBy = AnyOf(hints, [](const NSQLTranslation::TSQLHint& hint) { return to_lower(hint.Name) == "compact"; }); + } + } + TPosition distinctPos; + if (IsDistinctOptSet(node.GetRule_opt_set_quantifier4(), distinctPos)) { + Ctx.Error(distinctPos) << "DISTINCT is not supported in GROUP BY clause yet!"; + Ctx.IncrementMonCounter("sql_errors", "DistinctInGroupByNotSupported"); + return false; + } + if (!ParseList(node.GetRule_grouping_element_list5(), EGroupByFeatures::Ordinary)) { + return false; + } + + if (node.HasBlock6()) { + TString mode = Id(node.GetBlock6().GetRule_an_id2(), *this); + TMaybe<TIssue> normalizeError = NormalizeName(Ctx.Pos(), mode); + if (!normalizeError.Empty()) { + Error() << normalizeError->GetMessage(); + Ctx.IncrementMonCounter("sql_errors", "NormalizeGroupByModeError"); + return false; + } + + if (mode == "combine") { + Suffix = "Combine"; + } else if (mode == "combinestate") { + Suffix = "CombineState"; + } else if (mode == "mergestate") { + Suffix = "MergeState"; + } else if (mode == "finalize") { + Suffix = "Finalize"; + } else if (mode == "mergefinalize") { + Suffix = "MergeFinalize"; + } else if (mode == "mergemanyfinalize") { + Suffix = "MergeManyFinalize"; + } else { + Ctx.Error() << "Unsupported group by mode: " << mode; + Ctx.IncrementMonCounter("sql_errors", "GroupByModeUnknown"); + return false; + } + } + + if (!ResolveGroupByAndGrouping()) { + return false; + } + return true; +} + +bool TGroupByClause::ParseList(const TRule_grouping_element_list& groupingListNode, EGroupByFeatures featureContext) { + if (!GroupingElement(groupingListNode.GetRule_grouping_element1(), featureContext)) { + return false; + } + for (auto b: groupingListNode.GetBlock2()) { + if (!GroupingElement(b.GetRule_grouping_element2(), featureContext)) { + return false; + } + } + return true; +} + +void TGroupByClause::SetFeatures(const TString& field) const { + Ctx.IncrementMonCounter(field, "GroupBy"); + const auto& features = Features(); + if (features.Test(EGroupByFeatures::Ordinary)) { + Ctx.IncrementMonCounter(field, "GroupByOrdinary"); + } + if (features.Test(EGroupByFeatures::Expression)) { + Ctx.IncrementMonCounter(field, "GroupByExpression"); + } + if (features.Test(EGroupByFeatures::Rollup)) { + Ctx.IncrementMonCounter(field, "GroupByRollup"); + } + if (features.Test(EGroupByFeatures::Cube)) { + Ctx.IncrementMonCounter(field, "GroupByCube"); + } + if (features.Test(EGroupByFeatures::GroupingSet)) { + Ctx.IncrementMonCounter(field, "GroupByGroupingSet"); + } + if (features.Test(EGroupByFeatures::Empty)) { + Ctx.IncrementMonCounter(field, "GroupByEmpty"); + } +} + +TVector<TNodePtr>& TGroupByClause::Content() { + return GroupBySet; +} + +TMap<TString, TNodePtr>& TGroupByClause::Aliases() { + return GroupSetContext->NodeAliases; +} + +TLegacyHoppingWindowSpecPtr TGroupByClause::GetLegacyHoppingWindow() const { + return LegacyHoppingWindowSpec; +} + +bool TGroupByClause::IsCompactGroupBy() const { + return CompactGroupBy; +} + +TString TGroupByClause::GetSuffix() const { + return Suffix; +} + +TMaybe<TVector<TNodePtr>> TGroupByClause::MultiplyGroupingSets(const TVector<TNodePtr>& lhs, const TVector<TNodePtr>& rhs) const { + TVector<TNodePtr> content; + for (const auto& leftNode: lhs) { + auto leftPtr = leftNode->ContentListPtr(); + if (!leftPtr) { + // TODO: shouldn't happen + Ctx.Error() << "Unable to multiply grouping sets"; + return {}; + } + for (const auto& rightNode: rhs) { + TVector<TNodePtr> mulItem(leftPtr->begin(), leftPtr->end()); + auto rightPtr = rightNode->ContentListPtr(); + if (!rightPtr) { + // TODO: shouldn't happen + Ctx.Error() << "Unable to multiply grouping sets"; + return {}; + } + mulItem.insert(mulItem.end(), rightPtr->begin(), rightPtr->end()); + content.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(mulItem))); + } + } + return content; +} + +bool TGroupByClause::ResolveGroupByAndGrouping() { + auto listPos = std::find_if(GroupBySet.begin(), GroupBySet.end(), [](const TNodePtr& node) { + return node->ContentListPtr(); + }); + if (listPos == GroupBySet.end()) { + return true; + } + auto curContent = *(*listPos)->ContentListPtr(); + if (listPos != GroupBySet.begin()) { + TVector<TNodePtr> emulate(GroupBySet.begin(), listPos); + TVector<TNodePtr> emulateContent(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(emulate))); + auto mult = MultiplyGroupingSets(emulateContent, curContent); + if (!mult) { + return false; + } + curContent = *mult; + } + for (++listPos; listPos != GroupBySet.end(); ++listPos) { + auto newElem = (*listPos)->ContentListPtr(); + if (newElem) { + auto mult = MultiplyGroupingSets(curContent, *newElem); + if (!mult) { + return false; + } + curContent = *mult; + } else { + TVector<TNodePtr> emulate(1, *listPos); + TVector<TNodePtr> emulateContent(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(emulate))); + auto mult = MultiplyGroupingSets(curContent, emulateContent); + if (!mult) { + return false; + } + curContent = *mult; + } + } + TVector<TNodePtr> result(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(curContent))); + std::swap(result, GroupBySet); + return true; +} + +bool TGroupByClause::GroupingElement(const TRule_grouping_element& node, EGroupByFeatures featureContext) { + TSourcePtr res; + TVector<TNodePtr> emptyContent; + switch (node.Alt_case()) { + case TRule_grouping_element::kAltGroupingElement1: + if (!OrdinaryGroupingSet(node.GetAlt_grouping_element1().GetRule_ordinary_grouping_set1(), featureContext)) { + return false; + } + Features().Set(EGroupByFeatures::Ordinary); + break; + case TRule_grouping_element::kAltGroupingElement2: { + TGroupByClause subClause(Ctx, Mode, GroupSetContext); + if (!subClause.OrdinaryGroupingSetList(node.GetAlt_grouping_element2().GetRule_rollup_list1().GetRule_ordinary_grouping_set_list3(), + EGroupByFeatures::Rollup)) + { + return false; + } + auto& content = subClause.Content(); + TVector<TNodePtr> collection; + for (auto limit = content.end(), begin = content.begin(); limit != begin; --limit) { + TVector<TNodePtr> grouping(begin, limit); + collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(grouping))); + } + collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(emptyContent))); + GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection))); + Ctx.IncrementMonCounter("sql_features", TStringBuilder() << "GroupByRollup" << content.size()); + Features().Set(EGroupByFeatures::Rollup); + break; + } + case TRule_grouping_element::kAltGroupingElement3: { + TGroupByClause subClause(Ctx, Mode, GroupSetContext); + if (!subClause.OrdinaryGroupingSetList(node.GetAlt_grouping_element3().GetRule_cube_list1().GetRule_ordinary_grouping_set_list3(), + EGroupByFeatures::Cube)) + { + return false; + } + auto& content = subClause.Content(); + if (content.size() > Ctx.PragmaGroupByCubeLimit) { + Ctx.Error() << "GROUP BY CUBE is allowed only for " << Ctx.PragmaGroupByCubeLimit << " columns, but you use " << content.size(); + return false; + } + TVector<TNodePtr> collection; + for (unsigned mask = (1 << content.size()) - 1; mask > 0; --mask) { + TVector<TNodePtr> grouping; + for (unsigned index = 0; index < content.size(); ++index) { + if (mask & (1 << index)) { + grouping.push_back(content[content.size() - index - 1]); + } + } + collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(grouping))); + } + collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(emptyContent))); + GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection))); + Ctx.IncrementMonCounter("sql_features", TStringBuilder() << "GroupByCube" << content.size()); + Features().Set(EGroupByFeatures::Cube); + break; + } + case TRule_grouping_element::kAltGroupingElement4: { + auto listNode = node.GetAlt_grouping_element4().GetRule_grouping_sets_specification1().GetRule_grouping_element_list4(); + TGroupByClause subClause(Ctx, Mode, GroupSetContext); + if (!subClause.ParseList(listNode, EGroupByFeatures::GroupingSet)) { + return false; + } + auto& content = subClause.Content(); + TVector<TNodePtr> collection; + bool hasEmpty = false; + for (auto& elem: content) { + auto elemContent = elem->ContentListPtr(); + if (elemContent) { + if (!elemContent->empty() && elemContent->front()->ContentListPtr()) { + for (auto& sub: *elemContent) { + FeedCollection(sub, collection, hasEmpty); + } + } else { + FeedCollection(elem, collection, hasEmpty); + } + } else { + TVector<TNodePtr> elemList(1, std::move(elem)); + collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(elemList))); + } + } + GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection))); + Features().Set(EGroupByFeatures::GroupingSet); + break; + } + case TRule_grouping_element::kAltGroupingElement5: { + if (!HoppingWindow(node.GetAlt_grouping_element5().GetRule_hopping_window_specification1())) { + return false; + } + break; + } + case TRule_grouping_element::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + return true; +} + +void TGroupByClause::FeedCollection(const TNodePtr& elem, TVector<TNodePtr>& collection, bool& hasEmpty) const { + auto elemContentPtr = elem->ContentListPtr(); + if (elemContentPtr && elemContentPtr->empty()) { + if (hasEmpty) { + return; + } + hasEmpty = true; + } + collection.push_back(elem); +} + +bool TGroupByClause::OrdinaryGroupingSet(const TRule_ordinary_grouping_set& node, EGroupByFeatures featureContext) { + TNodePtr namedExprNode; + { + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + namedExprNode = NamedExpr(node.GetRule_named_expr1(), EExpr::GroupBy); + } + if (!namedExprNode) { + return false; + } + auto nodeLabel = namedExprNode->GetLabel(); + auto contentPtr = namedExprNode->ContentListPtr(); + if (contentPtr) { + if (nodeLabel && (contentPtr->size() != 1 || contentPtr->front()->GetLabel())) { + Ctx.Error() << "Unable to use aliases for list of named expressions"; + Ctx.IncrementMonCounter("sql_errors", "GroupByAliasForListOfExpressions"); + return false; + } + for (auto& content: *contentPtr) { + auto label = content->GetLabel(); + if (!label) { + if (content->GetColumnName()) { + namedExprNode->AssumeColumn(); + continue; + } + + if (!AllowUnnamed(content->GetPos(), featureContext)) { + return false; + } + + content->SetLabel(label = GenerateGroupByExprName()); + } + if (!AddAlias(label, content)) { + return false; + } + content = BuildColumn(content->GetPos(), label); + } + } else { + if (!nodeLabel && namedExprNode->GetColumnName()) { + namedExprNode->AssumeColumn(); + } + + if (!nodeLabel && !namedExprNode->GetColumnName()) { + if (!AllowUnnamed(namedExprNode->GetPos(), featureContext)) { + return false; + } + namedExprNode->SetLabel(nodeLabel = GenerateGroupByExprName()); + } + if (nodeLabel) { + if (!AddAlias(nodeLabel, namedExprNode)) { + return false; + } + namedExprNode = BuildColumn(namedExprNode->GetPos(), nodeLabel); + } + } + GroupBySet.emplace_back(std::move(namedExprNode)); + return true; +} + +bool TGroupByClause::OrdinaryGroupingSetList(const TRule_ordinary_grouping_set_list& node, EGroupByFeatures featureContext) { + if (!OrdinaryGroupingSet(node.GetRule_ordinary_grouping_set1(), featureContext)) { + return false; + } + for (auto& block: node.GetBlock2()) { + if (!OrdinaryGroupingSet(block.GetRule_ordinary_grouping_set2(), featureContext)) { + return false; + } + } + return true; +} + +bool TGroupByClause::HoppingWindow(const TRule_hopping_window_specification& node) { + if (LegacyHoppingWindowSpec) { + Ctx.Error() << "Duplicate hopping window specification."; + return false; + } + LegacyHoppingWindowSpec = new TLegacyHoppingWindowSpec; + { + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TSqlExpression expr(Ctx, Mode); + LegacyHoppingWindowSpec->TimeExtractor = expr.Build(node.GetRule_expr3()); + if (!LegacyHoppingWindowSpec->TimeExtractor) { + return false; + } + } + auto processIntervalParam = [&] (const TRule_expr& rule) -> TNodePtr { + TSqlExpression expr(Ctx, Mode); + auto node = expr.Build(rule); + if (!node) { + return nullptr; + } + + auto literal = node->GetLiteral("String"); + if (!literal) { + return new TAstListNodeImpl(Ctx.Pos(), { + new TAstAtomNodeImpl(Ctx.Pos(), "EvaluateExpr", TNodeFlags::Default), + node + }); + } + + const auto out = NKikimr::NMiniKQL::ValueFromString(NKikimr::NUdf::EDataSlot::Interval, *literal); + if (!out) { + Ctx.Error(node->GetPos()) << "Expected interval in ISO 8601 format"; + return nullptr; + } + + if ('T' == literal->back()) { + Ctx.Error(node->GetPos()) << "Time prefix 'T' at end of interval constant. The designator 'T' shall be absent if all of the time components are absent."; + return nullptr; + } + + return new TAstListNodeImpl(Ctx.Pos(), { + new TAstAtomNodeImpl(Ctx.Pos(), "Interval", TNodeFlags::Default), + new TAstListNodeImpl(Ctx.Pos(), { + new TAstAtomNodeImpl(Ctx.Pos(), "quote", TNodeFlags::Default), + new TAstAtomNodeImpl(Ctx.Pos(), ToString(out.Get<i64>()), TNodeFlags::Default) + }) + }); + }; + + LegacyHoppingWindowSpec->Hop = processIntervalParam(node.GetRule_expr5()); + if (!LegacyHoppingWindowSpec->Hop) { + return false; + } + LegacyHoppingWindowSpec->Interval = processIntervalParam(node.GetRule_expr7()); + if (!LegacyHoppingWindowSpec->Interval) { + return false; + } + LegacyHoppingWindowSpec->Delay = processIntervalParam(node.GetRule_expr9()); + if (!LegacyHoppingWindowSpec->Delay) { + return false; + } + LegacyHoppingWindowSpec->DataWatermarks = Ctx.PragmaDataWatermarks; + + return true; +} + +bool TGroupByClause::AllowUnnamed(TPosition pos, EGroupByFeatures featureContext) { + TStringBuf feature; + switch (featureContext) { + case EGroupByFeatures::Ordinary: + return true; + case EGroupByFeatures::Rollup: + feature = "ROLLUP"; + break; + case EGroupByFeatures::Cube: + feature = "CUBE"; + break; + case EGroupByFeatures::GroupingSet: + feature = "GROUPING SETS"; + break; + default: + YQL_ENSURE(false, "Unknown feature"); + } + + Ctx.Error(pos) << "Unnamed expressions are not supported in " << feature << ". Please use '<expr> AS <name>'."; + Ctx.IncrementMonCounter("sql_errors", "GroupBySetNoAliasOrColumn"); + return false; +} + +TGroupByClause::TGroupingSetFeatures& TGroupByClause::Features() { + return GroupSetContext->GroupFeatures; +} + +const TGroupByClause::TGroupingSetFeatures& TGroupByClause::Features() const { + return GroupSetContext->GroupFeatures; +} + +bool TGroupByClause::AddAlias(const TString& label, const TNodePtr& node) { + if (Aliases().contains(label)) { + Ctx.Error() << "Duplicated aliases not allowed"; + Ctx.IncrementMonCounter("sql_errors", "GroupByDuplicateAliases"); + return false; + } + Aliases().emplace(label, node); + return true; +} + +TString TGroupByClause::GenerateGroupByExprName() { + return TStringBuilder() << AutogenerateNamePrefix << GroupSetContext->UnnamedCount++; +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_group_by.h b/yql/essentials/sql/v1/sql_group_by.h new file mode 100644 index 00000000000..83e602596c6 --- /dev/null +++ b/yql/essentials/sql/v1/sql_group_by.h @@ -0,0 +1,73 @@ +#pragma once + +#include "sql_translation.h" + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +class TGroupByClause: public TSqlTranslation { + enum class EGroupByFeatures { + Begin, + Ordinary = Begin, + Expression, + Rollup, + Cube, + GroupingSet, + Empty, + End, + }; + typedef TEnumBitSet<EGroupByFeatures, static_cast<int>(EGroupByFeatures::Begin), static_cast<int>(EGroupByFeatures::End)> TGroupingSetFeatures; + + class TGroupByClauseCtx: public TSimpleRefCount<TGroupByClauseCtx> { + public: + typedef TIntrusivePtr<TGroupByClauseCtx> TPtr; + + TGroupingSetFeatures GroupFeatures; + TMap<TString, TNodePtr> NodeAliases; + size_t UnnamedCount = 0; + }; + +public: + TGroupByClause(TContext& ctx, NSQLTranslation::ESqlMode mode, TGroupByClauseCtx::TPtr groupSetContext = {}) + : TSqlTranslation(ctx, mode) + , GroupSetContext(groupSetContext ? groupSetContext : TGroupByClauseCtx::TPtr(new TGroupByClauseCtx())) + , CompactGroupBy(false) + {} + + bool Build(const TRule_group_by_clause& node); + bool ParseList(const TRule_grouping_element_list& groupingListNode, EGroupByFeatures featureContext); + + void SetFeatures(const TString& field) const; + TVector<TNodePtr>& Content(); + TMap<TString, TNodePtr>& Aliases(); + TLegacyHoppingWindowSpecPtr GetLegacyHoppingWindow() const; + bool IsCompactGroupBy() const; + TString GetSuffix() const; + +private: + TMaybe<TVector<TNodePtr>> MultiplyGroupingSets(const TVector<TNodePtr>& lhs, const TVector<TNodePtr>& rhs) const; + bool ResolveGroupByAndGrouping(); + bool GroupingElement(const TRule_grouping_element& node, EGroupByFeatures featureContext); + void FeedCollection(const TNodePtr& elem, TVector<TNodePtr>& collection, bool& hasEmpty) const; + bool OrdinaryGroupingSet(const TRule_ordinary_grouping_set& node, EGroupByFeatures featureContext); + bool OrdinaryGroupingSetList(const TRule_ordinary_grouping_set_list& node, EGroupByFeatures featureContext); + bool HoppingWindow(const TRule_hopping_window_specification& node); + + bool AllowUnnamed(TPosition pos, EGroupByFeatures featureContext); + + TGroupingSetFeatures& Features(); + const TGroupingSetFeatures& Features() const; + bool AddAlias(const TString& label, const TNodePtr& node); + TString GenerateGroupByExprName(); + bool IsAutogenerated(const TString* name) const; + + TVector<TNodePtr> GroupBySet; + TGroupByClauseCtx::TPtr GroupSetContext; + TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec; // stream queries + static const TString AutogenerateNamePrefix; + bool CompactGroupBy; + TString Suffix; +}; + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_into_tables.cpp b/yql/essentials/sql/v1/sql_into_tables.cpp new file mode 100644 index 00000000000..43cc08de0d7 --- /dev/null +++ b/yql/essentials/sql/v1/sql_into_tables.cpp @@ -0,0 +1,267 @@ +#include "sql_into_tables.h" +#include "sql_values.h" + +#include <util/string/join.h> + +using namespace NYql; + +namespace NSQLTranslationV1 { + +using NALPDefault::SQLv1LexerTokens; + +using namespace NSQLv1Generated; + +TNodePtr TSqlIntoTable::Build(const TRule_into_table_stmt& node) { + static const TMap<TString, ESQLWriteColumnMode> str2Mode = { + {"InsertInto", ESQLWriteColumnMode::InsertInto}, + {"InsertOrAbortInto", ESQLWriteColumnMode::InsertOrAbortInto}, + {"InsertOrIgnoreInto", ESQLWriteColumnMode::InsertOrIgnoreInto}, + {"InsertOrRevertInto", ESQLWriteColumnMode::InsertOrRevertInto}, + {"UpsertInto", ESQLWriteColumnMode::UpsertInto}, + {"ReplaceInto", ESQLWriteColumnMode::ReplaceInto}, + {"InsertIntoWithTruncate", ESQLWriteColumnMode::InsertIntoWithTruncate} + }; + + auto& modeBlock = node.GetBlock1(); + + TVector<TToken> modeTokens; + switch (modeBlock.Alt_case()) { + case TRule_into_table_stmt_TBlock1::AltCase::kAlt1: + modeTokens = {modeBlock.GetAlt1().GetToken1()}; + break; + case TRule_into_table_stmt_TBlock1::AltCase::kAlt2: + modeTokens = { + modeBlock.GetAlt2().GetToken1(), + modeBlock.GetAlt2().GetToken2(), + modeBlock.GetAlt2().GetToken3() + }; + break; + case TRule_into_table_stmt_TBlock1::AltCase::kAlt3: + modeTokens = { + modeBlock.GetAlt3().GetToken1(), + modeBlock.GetAlt3().GetToken2(), + modeBlock.GetAlt3().GetToken3() + }; + break; + case TRule_into_table_stmt_TBlock1::AltCase::kAlt4: + modeTokens = { + modeBlock.GetAlt4().GetToken1(), + modeBlock.GetAlt4().GetToken2(), + modeBlock.GetAlt4().GetToken3() + }; + break; + case TRule_into_table_stmt_TBlock1::AltCase::kAlt5: + modeTokens = {modeBlock.GetAlt5().GetToken1()}; + break; + case TRule_into_table_stmt_TBlock1::AltCase::kAlt6: + modeTokens = {modeBlock.GetAlt6().GetToken1()}; + break; + case TRule_into_table_stmt_TBlock1::AltCase::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + TVector<TString> modeStrings; + modeStrings.reserve(modeTokens.size()); + TVector<TString> userModeStrings; + userModeStrings.reserve(modeTokens.size()); + + for (auto& token : modeTokens) { + auto tokenStr = Token(token); + + auto modeStr = tokenStr; + modeStr.to_lower(); + modeStr.to_upper(0, 1); + modeStrings.push_back(modeStr); + + auto userModeStr = tokenStr; + userModeStr.to_upper(); + userModeStrings.push_back(userModeStr); + } + + modeStrings.push_back("Into"); + userModeStrings.push_back("INTO"); + + SqlIntoModeStr = JoinRange("", modeStrings.begin(), modeStrings.end()); + SqlIntoUserModeStr = JoinRange(" ", userModeStrings.begin(), userModeStrings.end()); + + const auto& intoTableRef = node.GetRule_into_simple_table_ref3(); + const auto& tableRef = intoTableRef.GetRule_simple_table_ref1(); + const auto& tableRefCore = tableRef.GetRule_simple_table_ref_core1(); + + auto service = Ctx.Scoped->CurrService; + auto cluster = Ctx.Scoped->CurrCluster; + std::pair<bool, TDeferredAtom> nameOrAt; + bool isBinding = false; + switch (tableRefCore.Alt_case()) { + case TRule_simple_table_ref_core::AltCase::kAltSimpleTableRefCore1: { + if (tableRefCore.GetAlt_simple_table_ref_core1().GetRule_object_ref1().HasBlock1()) { + const auto& clusterExpr = tableRefCore.GetAlt_simple_table_ref_core1().GetRule_object_ref1().GetBlock1().GetRule_cluster_expr1(); + bool hasAt = tableRefCore.GetAlt_simple_table_ref_core1().GetRule_object_ref1().GetRule_id_or_at2().HasBlock1(); + bool result = !hasAt ? + ClusterExprOrBinding(clusterExpr, service, cluster, isBinding) : ClusterExpr(clusterExpr, false, service, cluster); + if (!result) { + return nullptr; + } + } + + if (!isBinding && cluster.Empty()) { + Ctx.Error() << "No cluster name given and no default cluster is selected"; + return nullptr; + } + + auto id = Id(tableRefCore.GetAlt_simple_table_ref_core1().GetRule_object_ref1().GetRule_id_or_at2(), *this); + nameOrAt = std::make_pair(id.first, TDeferredAtom(Ctx.Pos(), id.second)); + break; + } + case TRule_simple_table_ref_core::AltCase::kAltSimpleTableRefCore2: { + auto at = tableRefCore.GetAlt_simple_table_ref_core2().HasBlock1(); + TString name; + if (!NamedNodeImpl(tableRefCore.GetAlt_simple_table_ref_core2().GetRule_bind_parameter2(), name, *this)) { + return nullptr; + } + auto named = GetNamedNode(name); + if (!named) { + return nullptr; + } + + if (cluster.Empty()) { + Ctx.Error() << "No cluster name given and no default cluster is selected"; + return nullptr; + } + + TDeferredAtom table; + MakeTableFromExpression(Ctx.Pos(), Ctx, named, table); + nameOrAt = std::make_pair(at, table); + break; + } + case TRule_simple_table_ref_core::AltCase::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + bool withTruncate = false; + TTableHints tableHints; + if (tableRef.HasBlock2()) { + auto hints = TableHintsImpl(tableRef.GetBlock2().GetRule_table_hints1(), service); + if (!hints) { + Ctx.Error() << "Failed to parse table hints"; + return nullptr; + } + for (const auto& hint : *hints) { + if (to_upper(hint.first) == "TRUNCATE") { + withTruncate = true; + } + } + std::erase_if(*hints, [](const auto &hint) { return to_upper(hint.first) == "TRUNCATE"; }); + tableHints = std::move(*hints); + } + + TVector<TString> eraseColumns; + if (intoTableRef.HasBlock2()) { + if (service != StatProviderName) { + Ctx.Error() << "ERASE BY is unsupported for " << service; + return nullptr; + } + + PureColumnListStr( + intoTableRef.GetBlock2().GetRule_pure_column_list3(), *this, eraseColumns + ); + } + + if (withTruncate) { + if (SqlIntoModeStr != "InsertInto") { + Error() << "Unable " << SqlIntoUserModeStr << " with truncate mode"; + return nullptr; + } + SqlIntoModeStr += "WithTruncate"; + SqlIntoUserModeStr += " ... WITH TRUNCATE"; + } + const auto iterMode = str2Mode.find(SqlIntoModeStr); + YQL_ENSURE(iterMode != str2Mode.end(), "Invalid sql write mode string: " << SqlIntoModeStr); + const auto SqlIntoMode = iterMode->second; + + TPosition pos(Ctx.Pos()); + TTableRef table(Ctx.MakeName("table"), service, cluster, nullptr); + if (isBinding) { + const TString* binding = nameOrAt.second.GetLiteral(); + YQL_ENSURE(binding); + YQL_ENSURE(!nameOrAt.first); + if (!ApplyTableBinding(*binding, table, tableHints)) { + return nullptr; + } + } else { + table.Keys = BuildTableKey(pos, service, cluster, nameOrAt.second, {nameOrAt.first ? "@" : ""}); + } + + Ctx.IncrementMonCounter("sql_insert_clusters", table.Cluster.GetLiteral() ? *table.Cluster.GetLiteral() : "unknown"); + + auto values = TSqlIntoValues(Ctx, Mode).Build(node.GetRule_into_values_source4(), SqlIntoUserModeStr); + if (!values) { + return nullptr; + } + if (!ValidateServiceName(node, table, SqlIntoMode, GetPos(modeTokens[0]))) { + return nullptr; + } + Ctx.IncrementMonCounter("sql_features", SqlIntoModeStr); + + auto options = BuildIntoTableOptions(pos, eraseColumns, tableHints); + + if (node.HasBlock5()) { + options = options->L(options, ReturningList(node.GetBlock5().GetRule_returning_columns_list1())); + } + + return BuildWriteColumns(pos, Ctx.Scoped, table, + ToWriteColumnsMode(SqlIntoMode), std::move(values), + options); +} + +bool TSqlIntoTable::ValidateServiceName(const TRule_into_table_stmt& node, const TTableRef& table, + ESQLWriteColumnMode mode, const TPosition& pos) { + Y_UNUSED(node); + auto serviceName = table.Service; + const bool isMapReduce = serviceName == YtProviderName; + const bool isKikimr = serviceName == KikimrProviderName || serviceName == YdbProviderName; + const bool isRtmr = serviceName == RtmrProviderName; + const bool isStat = serviceName == StatProviderName; + + if (!isKikimr) { + if (mode == ESQLWriteColumnMode::InsertOrAbortInto || + mode == ESQLWriteColumnMode::InsertOrIgnoreInto || + mode == ESQLWriteColumnMode::InsertOrRevertInto || + mode == ESQLWriteColumnMode::UpsertInto && !isStat) + { + Ctx.Error(pos) << SqlIntoUserModeStr << " is not supported for " << serviceName << " tables"; + Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName); + return false; + } + } + + if (isMapReduce) { + if (mode == ESQLWriteColumnMode::ReplaceInto) { + Ctx.Error(pos) << "Meaning of REPLACE INTO has been changed, now you should use INSERT INTO <table> WITH TRUNCATE ... for " << serviceName; + Ctx.IncrementMonCounter("sql_errors", "ReplaceIntoConflictUsage"); + return false; + } + } else if (isKikimr) { + if (mode == ESQLWriteColumnMode::InsertIntoWithTruncate) { + Ctx.Error(pos) << "INSERT INTO WITH TRUNCATE is not supported for " << serviceName << " tables"; + Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName); + return false; + } + } else if (isRtmr) { + if (mode != ESQLWriteColumnMode::InsertInto) { + Ctx.Error(pos) << SqlIntoUserModeStr << " is unsupported for " << serviceName; + Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName); + return false; + } + } else if (isStat) { + if (mode != ESQLWriteColumnMode::UpsertInto) { + Ctx.Error(pos) << SqlIntoUserModeStr << " is unsupported for " << serviceName; + Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName); + return false; + } + } + + return true; +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_into_tables.h b/yql/essentials/sql/v1/sql_into_tables.h new file mode 100644 index 00000000000..0e40f5669bb --- /dev/null +++ b/yql/essentials/sql/v1/sql_into_tables.h @@ -0,0 +1,30 @@ +#pragma once + +#include "sql_translation.h" +#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h> + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +class TSqlIntoTable: public TSqlTranslation { +public: + TSqlIntoTable(TContext& ctx, NSQLTranslation::ESqlMode mode) + : TSqlTranslation(ctx, mode) + { + } + + TNodePtr Build(const TRule_into_table_stmt& node); + +private: + //bool BuildValuesRow(const TRule_values_source_row& inRow, TVector<TNodePtr>& outRow); + //TSourcePtr ValuesSource(const TRule_values_source& node, TVector<TString>& columnsHint); + //TSourcePtr IntoValuesSource(const TRule_into_values_source& node); + + bool ValidateServiceName(const TRule_into_table_stmt& node, const TTableRef& table, ESQLWriteColumnMode mode, + const TPosition& pos); + TString SqlIntoModeStr; + TString SqlIntoUserModeStr; +}; + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_match_recognize.cpp b/yql/essentials/sql/v1/sql_match_recognize.cpp new file mode 100644 index 00000000000..47e001efbb3 --- /dev/null +++ b/yql/essentials/sql/v1/sql_match_recognize.cpp @@ -0,0 +1,377 @@ +#include "sql_match_recognize.h" +#include "node.h" +#include "sql_expression.h" +#include <yql/essentials/core/sql_types/match_recognize.h> +#include <algorithm> + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +namespace { + +TPosition TokenPosition(const TToken& token){ + return TPosition{token.GetColumn(), token.GetLine()}; +} + +TString PatternVar(const TRule_row_pattern_variable_name& node, TSqlMatchRecognizeClause& ctx){ + return Id(node.GetRule_identifier1(), ctx); +} + +} //namespace + +TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Generated::TRule_row_pattern_recognition_clause &matchRecognizeClause) { + TPosition pos(matchRecognizeClause.GetToken1().GetColumn(), matchRecognizeClause.GetToken1().GetLine()); + if (!Ctx.FeatureR010) { + Ctx.Error(pos, TIssuesIds::CORE) << "Unexpected MATCH_RECOGNIZE"; + return {}; + } + TVector<TNamedFunction> partitioners; + TPosition partitionsPos = pos; + if (matchRecognizeClause.HasBlock3()) { + const auto& partitionClause = matchRecognizeClause.GetBlock3().GetRule_window_partition_clause1(); + partitionsPos = TokenPosition(partitionClause.GetToken1()); + partitioners = ParsePartitionBy(partitionClause); + if (!partitioners) + return {}; + } + TVector<TSortSpecificationPtr> sortSpecs; + TPosition orderByPos = pos; + if (matchRecognizeClause.HasBlock4()) { + const auto& orderByClause = matchRecognizeClause.GetBlock4().GetRule_order_by_clause1(); + orderByPos = TokenPosition(orderByClause.GetToken1()); + if (!OrderByClause(orderByClause, sortSpecs)) { + return {}; + } + } + + TPosition measuresPos = pos; + TVector<TNamedFunction> measures; + if (matchRecognizeClause.HasBlock5()) { + const auto& measuresClause = matchRecognizeClause.GetBlock5().GetRule_row_pattern_measures1(); + measuresPos = TokenPosition(measuresClause.GetToken1()); + measures = ParseMeasures(measuresClause.GetRule_row_pattern_measure_list2()); + } + + TPosition rowsPerMatchPos = pos; + ERowsPerMatch rowsPerMatch = ERowsPerMatch::OneRow; + if (matchRecognizeClause.HasBlock6()) { + std::tie(rowsPerMatchPos, rowsPerMatch) = ParseRowsPerMatch(matchRecognizeClause.GetBlock6().GetRule_row_pattern_rows_per_match1()); + if (ERowsPerMatch::AllRows == rowsPerMatch) { + //https://st.yandex-team.ru/YQL-16213 + Ctx.Error(pos, TIssuesIds::CORE) << "ALL ROWS PER MATCH is not supported yet"; + return {}; + } + } + + const auto& commonSyntax = matchRecognizeClause.GetRule_row_pattern_common_syntax7(); + + + if (commonSyntax.HasBlock2()) { + const auto& initialOrSeek = commonSyntax.GetBlock2().GetRule_row_pattern_initial_or_seek1(); + Ctx.Error(TokenPosition(initialOrSeek.GetToken1())) << "InitialOrSeek subclause is not allowed in FROM clause"; + return {}; + } + + auto pattern = ParsePattern(commonSyntax.GetRule_row_pattern5()); + const auto& patternPos = TokenPosition(commonSyntax.token3()); + + //this block is located before pattern block in grammar, + // but depends on it, so it is processed after pattern block + std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> skipTo { + pos, + NYql::NMatchRecognize::TAfterMatchSkipTo{ + NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow, + TString() + } + }; + if (commonSyntax.HasBlock1()){ + skipTo = ParseAfterMatchSkipTo(commonSyntax.GetBlock1().GetRule_row_pattern_skip_to3()); + const auto varRequired = + NYql::NMatchRecognize::EAfterMatchSkipTo::ToFirst == skipTo.second.To || + NYql::NMatchRecognize::EAfterMatchSkipTo::ToLast == skipTo.second.To || + NYql::NMatchRecognize::EAfterMatchSkipTo::To == skipTo.second.To; + if (varRequired) { + const auto& allVars = NYql::NMatchRecognize::GetPatternVars(pattern); + if (allVars.find(skipTo.second.Var) == allVars.cend()) { + Ctx.Error(skipTo.first) << "Unknown pattern variable in AFTER MATCH"; + return {}; + } + } + } + + + TNodePtr subset; + TPosition subsetPos = pos; + if (commonSyntax.HasBlock7()) { + const auto& rowPatternSubset = commonSyntax.GetBlock7().GetRule_row_pattern_subset_clause1(); + subsetPos = TokenPosition(rowPatternSubset.GetToken1()); + Ctx.Error() << "SUBSET is not implemented yet"; + //TODO https://st.yandex-team.ru/YQL-16225 + return {}; + } + const auto& definitions = ParseDefinitions(commonSyntax.GetRule_row_pattern_definition_list9()); + const auto& definitionsPos = TokenPosition(commonSyntax.GetToken8()); + + const auto& rowPatternVariables = GetPatternVars(pattern); + for (const auto& [callable, name]: definitions) { + if (!rowPatternVariables.contains(name)) { + Ctx.Error(callable->GetPos()) << "ROW PATTERN VARIABLE " << name << " is defined, but not mentioned in the PATTERN"; + return {}; + } + } + + return new TMatchRecognizeBuilder{ + pos, + std::pair{partitionsPos, std::move(partitioners)}, + std::pair{orderByPos, std::move(sortSpecs)}, + std::pair{measuresPos, measures}, + std::pair{rowsPerMatchPos, rowsPerMatch}, + std::move(skipTo), + std::pair{patternPos, std::move(pattern)}, + std::pair{subsetPos, std::move(subset)}, + std::pair{definitionsPos, std::move(definitions)} + }; + + +} + +TVector<TNamedFunction> TSqlMatchRecognizeClause::ParsePartitionBy(const TRule_window_partition_clause& partitionClause) { + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TVector<TNodePtr> partitionExprs; + if (!NamedExprList( + partitionClause.GetRule_named_expr_list4(), + partitionExprs)) { + return {}; + } + TVector<TNamedFunction> partitioners; + for (const auto& p: partitionExprs) { + auto label = p->GetLabel(); + if (!label && p->GetColumnName()) { + label = *p->GetColumnName(); + } + partitioners.push_back(TNamedFunction{p, label}); + } + return partitioners; +} + +TNamedFunction TSqlMatchRecognizeClause::ParseOneMeasure(const TRule_row_pattern_measure_definition& node) { + TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize); + const auto& expr = TSqlExpression(Ctx, Mode).Build(node.GetRule_expr1()); + const auto& name = Id(node.GetRule_an_id3(), *this); + //TODO https://st.yandex-team.ru/YQL-16186 + //Each measure must be a lambda, that accepts 2 args: + // - List<InputTableColumns + _yql_Classifier, _yql_MatchNumber> + // - Struct that maps row pattern variables to ranges in the queue + return {expr, name}; +} + +TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseMeasures(const TRule_row_pattern_measure_list& node) { + TVector<TNamedFunction> result{ ParseOneMeasure(node.GetRule_row_pattern_measure_definition1()) }; + for (const auto& m: node.GetBlock2()) { + result.push_back(ParseOneMeasure(m.GetRule_row_pattern_measure_definition2())); + } + return result; +} + +std::pair<TPosition, ERowsPerMatch> TSqlMatchRecognizeClause::ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause) { + + switch(rowsPerMatchClause.GetAltCase()) { + case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch1: + return std::pair { + TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match1().GetToken1()), + ERowsPerMatch::OneRow + }; + case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch2: + return std::pair { + TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match2().GetToken1()), + ERowsPerMatch::AllRows + }; + case TRule_row_pattern_rows_per_match::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause) { + switch (skipToClause.GetAltCase()) { + case TRule_row_pattern_skip_to::kAltRowPatternSkipTo1: + return std::pair{ + TokenPosition(skipToClause.GetAlt_row_pattern_skip_to1().GetToken1()), + NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::NextRow, ""} + }; + case TRule_row_pattern_skip_to::kAltRowPatternSkipTo2: + return std::pair{ + TokenPosition(skipToClause.GetAlt_row_pattern_skip_to2().GetToken1()), + NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow, ""} + }; + case TRule_row_pattern_skip_to::kAltRowPatternSkipTo3: + return std::pair{ + TokenPosition(skipToClause.GetAlt_row_pattern_skip_to3().GetToken1()), + NYql::NMatchRecognize::TAfterMatchSkipTo{ + NYql::NMatchRecognize::EAfterMatchSkipTo::ToFirst, + skipToClause.GetAlt_row_pattern_skip_to3().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue() + } + }; + case TRule_row_pattern_skip_to::kAltRowPatternSkipTo4: + return std::pair{ + TokenPosition(skipToClause.GetAlt_row_pattern_skip_to4().GetToken1()), + NYql::NMatchRecognize::TAfterMatchSkipTo{ + NYql::NMatchRecognize::EAfterMatchSkipTo::ToLast, + skipToClause.GetAlt_row_pattern_skip_to4().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue() + } + }; + case TRule_row_pattern_skip_to::kAltRowPatternSkipTo5: + return std::pair{ + TokenPosition(skipToClause.GetAlt_row_pattern_skip_to5().GetToken1()), + NYql::NMatchRecognize::TAfterMatchSkipTo{ + NYql::NMatchRecognize::EAfterMatchSkipTo::To, + skipToClause.GetAlt_row_pattern_skip_to5().GetRule_row_pattern_skip_to_variable_name3().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue() + } + }; + case TRule_row_pattern_skip_to::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node){ + NYql::NMatchRecognize::TRowPatternTerm term; + TPosition pos; + for (const auto& factor: node.GetBlock1()) { + const auto& primaryVar = factor.GetRule_row_pattern_factor1().GetRule_row_pattern_primary1(); + NYql::NMatchRecognize::TRowPatternPrimary primary; + bool output = true; + switch (primaryVar.GetAltCase()) { + case TRule_row_pattern_primary::kAltRowPatternPrimary1: + primary = PatternVar(primaryVar.GetAlt_row_pattern_primary1().GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1(), *this); + break; + case TRule_row_pattern_primary::kAltRowPatternPrimary2: + primary = primaryVar.GetAlt_row_pattern_primary2().GetToken1().GetValue(); + Y_ENSURE("$" == std::get<0>(primary)); + break; + case TRule_row_pattern_primary::kAltRowPatternPrimary3: + primary = primaryVar.GetAlt_row_pattern_primary3().GetToken1().GetValue(); + Y_ENSURE("^" == std::get<0>(primary)); + break; + case TRule_row_pattern_primary::kAltRowPatternPrimary4: { + if (++PatternNestingLevel <= NYql::NMatchRecognize::MaxPatternNesting) { + primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary4().GetBlock2().GetRule_row_pattern1()); + --PatternNestingLevel; + } else { + Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) + << "To big nesting level in the pattern"; + return NYql::NMatchRecognize::TRowPatternTerm{}; + } + break; + } + case TRule_row_pattern_primary::kAltRowPatternPrimary5: + output = false; + Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) + << "ALL ROWS PER MATCH and {- -} are not supported yet"; //https://st.yandex-team.ru/YQL-16227 + break; + case TRule_row_pattern_primary::kAltRowPatternPrimary6: { + std::vector<NYql::NMatchRecognize::TRowPatternPrimary> items{ParsePattern( + primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetRule_row_pattern3()) + }; + for (const auto& p: primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetBlock4()) { + items.push_back(ParsePattern(p.GetRule_row_pattern2())); + } + //Permutations now is a syntactic sugar and converted to all possible alternatives + if (items.size() > NYql::NMatchRecognize::MaxPermutedItems) { + Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1())) + << "Too many items in permute"; + return NYql::NMatchRecognize::TRowPatternTerm{}; + } + std::vector<size_t> indexes(items.size()); + std::generate(begin(indexes), end(indexes), [n = 0] () mutable { return n++; }); + NYql::NMatchRecognize::TRowPattern permuted; + do { + NYql::NMatchRecognize::TRowPatternTerm term; + term.reserve(indexes.size()); + for (size_t i = 0; i != indexes.size(); ++i) { + term.push_back({items[indexes[i]], 1, 1, true, false, false}); + } + permuted.push_back(std::move(term)); + } while (std::next_permutation(indexes.begin(), indexes.end())); + primary = permuted; + break; + } + case TRule_row_pattern_primary::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + uint64_t quantityMin = 1; + uint64_t quantityMax = 1; + constexpr uint64_t infinity = std::numeric_limits<uint64_t>::max(); + bool greedy = true; + if (factor.GetRule_row_pattern_factor1().HasBlock2()) { + const auto& quantifier = factor.GetRule_row_pattern_factor1().GetBlock2().GetRule_row_pattern_quantifier1(); + switch(quantifier.GetAltCase()){ + case TRule_row_pattern_quantifier::kAltRowPatternQuantifier1: //* + quantityMin = 0; + quantityMax = infinity; + greedy = !quantifier.GetAlt_row_pattern_quantifier1().HasBlock2(); + break; + case TRule_row_pattern_quantifier::kAltRowPatternQuantifier2: //+ + quantityMax = infinity; + greedy = !quantifier.GetAlt_row_pattern_quantifier2().HasBlock2(); + break; + case TRule_row_pattern_quantifier::kAltRowPatternQuantifier3: //? + quantityMin = 0; + greedy = !quantifier.GetAlt_row_pattern_quantifier3().HasBlock2(); + break; + case TRule_row_pattern_quantifier::kAltRowPatternQuantifier4: //{ 2?, 4?} + if (quantifier.GetAlt_row_pattern_quantifier4().HasBlock2()) { + quantityMin = FromString(quantifier.GetAlt_row_pattern_quantifier4().GetBlock2().GetRule_integer1().GetToken1().GetValue()); + } + else { + quantityMin = 0;; + } + if (quantifier.GetAlt_row_pattern_quantifier4().HasBlock4()) { + quantityMax = FromString(quantifier.GetAlt_row_pattern_quantifier4().GetBlock4().GetRule_integer1().GetToken1().GetValue()); + } + else { + quantityMax = infinity; + } + greedy = !quantifier.GetAlt_row_pattern_quantifier4().HasBlock6(); + + break; + case TRule_row_pattern_quantifier::kAltRowPatternQuantifier5: + quantityMin = quantityMax = FromString(quantifier.GetAlt_row_pattern_quantifier5().GetRule_integer2().GetToken1().GetValue()); + break; + case TRule_row_pattern_quantifier::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + } + term.push_back(NYql::NMatchRecognize::TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output, false}); + } + return term; +} + +NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){ + TVector<NYql::NMatchRecognize::TRowPatternTerm> result; + result.push_back(ParsePatternTerm(node.GetRule_row_pattern_term1())); + for (const auto& term: node.GetBlock2()) + result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2())); + return result; +} + +TNamedFunction TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node){ + const auto& varName = PatternVar(node.GetRule_row_pattern_definition_variable_name1().GetRule_row_pattern_variable_name1(), *this); + TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize, true, varName); + const auto& searchCondition = TSqlExpression(Ctx, Mode).Build(node.GetRule_row_pattern_definition_search_condition3().GetRule_search_condition1().GetRule_expr1()); + return TNamedFunction{searchCondition, varName}; +} + +TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseDefinitions(const TRule_row_pattern_definition_list& node) { + TVector<TNamedFunction> result { ParseOneDefinition(node.GetRule_row_pattern_definition1())}; + for (const auto& d: node.GetBlock2()) { + //TODO https://st.yandex-team.ru/YQL-16186 + //Each define must be a predicate lambda, that accepts 3 args: + // - List<input table rows> + // - A struct that maps row pattern variables to ranges in the queue + // - An index of the current row + result.push_back(ParseOneDefinition(d.GetRule_row_pattern_definition2())); + } + return result; +} + +} //namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_match_recognize.h b/yql/essentials/sql/v1/sql_match_recognize.h new file mode 100644 index 00000000000..6766acc9537 --- /dev/null +++ b/yql/essentials/sql/v1/sql_match_recognize.h @@ -0,0 +1,30 @@ +#pragma once + +#include "sql_translation.h" +#include "match_recognize.h" + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +class TSqlMatchRecognizeClause: public TSqlTranslation { +public: + TSqlMatchRecognizeClause(TContext& ctx, NSQLTranslation::ESqlMode mode) + : TSqlTranslation(ctx, mode) + {} + TMatchRecognizeBuilderPtr CreateBuilder(const TRule_row_pattern_recognition_clause& node); +private: + TVector<TNamedFunction> ParsePartitionBy(const TRule_window_partition_clause& partitionClause); + TNamedFunction ParseOneMeasure(const TRule_row_pattern_measure_definition& node); + TVector<TNamedFunction> ParseMeasures(const TRule_row_pattern_measure_list& node); + std::pair<TPosition, ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause); + std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause); + NYql::NMatchRecognize::TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node); + NYql::NMatchRecognize::TRowPattern ParsePattern(const TRule_row_pattern& node); + TNamedFunction ParseOneDefinition(const TRule_row_pattern_definition& node); + TVector<TNamedFunction> ParseDefinitions(const TRule_row_pattern_definition_list& node); +private: + size_t PatternNestingLevel = 0; +}; + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_match_recognize_ut.cpp b/yql/essentials/sql/v1/sql_match_recognize_ut.cpp new file mode 100644 index 00000000000..20c5e6ab7bb --- /dev/null +++ b/yql/essentials/sql/v1/sql_match_recognize_ut.cpp @@ -0,0 +1,742 @@ +#include "sql_ut.h" +#include "match_recognize.h" +#include <yql/essentials/providers/common/provider/yql_provider_names.h> +#include <yql/essentials/core/sql_types/match_recognize.h> +#include <yql/essentials/sql/sql.h> +#include <util/generic/map.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/string/split.h> + +using namespace NSQLTranslation; + +NYql::TAstParseResult MatchRecognizeSqlToYql(const TString& query) { + TString enablingPragma = R"( +pragma FeatureR010="prototype"; +)"; + return SqlToYql(enablingPragma + query); +} + +const NYql::TAstNode* FindMatchRecognizeParam(const NYql::TAstNode* root, TString name) { + auto matchRecognizeBlock = FindNodeByChildAtomContent(root, 1, "match_recognize"); + UNIT_ASSERT(matchRecognizeBlock); + auto paramNode = FindNodeByChildAtomContent(matchRecognizeBlock, 1, name); + return paramNode->GetChild(2); +} + +bool IsQuotedListOfSize(const NYql::TAstNode* node, ui32 size) { + UNIT_ASSERT(node->IsListOfSize(2)); + if (!node->IsListOfSize(2)) + return false; + UNIT_ASSERT_EQUAL(node->GetChild(0)->GetContent(), "quote"); + if (node->GetChild(0)->GetContent() != "quote") + return false; + UNIT_ASSERT_EQUAL(node->GetChild(1)->GetChildrenCount(), size); + return node->GetChild(1)->IsListOfSize(size); +} + +bool IsLambda(const NYql::TAstNode* node, ui32 numberOfArgs) { + if (!node->IsListOfSize(3)) { + return false; + } + if (!node->GetChild(0)->IsAtom() || node->GetChild(0)->GetContent() != "lambda") { + return false; + } + return IsQuotedListOfSize(node->GetChild(1), numberOfArgs); +} + +Y_UNIT_TEST_SUITE(MatchRecognize) { + auto minValidMatchRecognizeSql = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN ( A ) + DEFINE A as A + ) +)"; + Y_UNIT_TEST(EnabledWithPragma) { + UNIT_ASSERT(not SqlToYql(minValidMatchRecognizeSql).IsOk()); + UNIT_ASSERT(MatchRecognizeSqlToYql(minValidMatchRecognizeSql).IsOk()); + } + + Y_UNIT_TEST(InputTableName) { + auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql); + UNIT_ASSERT(r.IsOk()); + auto input = FindMatchRecognizeParam(r.Root, "input"); + UNIT_ASSERT(input->IsAtom() && input->GetContent() == "core"); + } + + Y_UNIT_TEST(MatchRecognizeAndSample) { + auto matchRecognizeAndSample = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN ( A ) + DEFINE A as A + ) TABLESAMPLE BERNOULLI(1.0) +)"; + UNIT_ASSERT(not MatchRecognizeSqlToYql(matchRecognizeAndSample).IsOk()); + } + + Y_UNIT_TEST(NoPartitionBy) { + auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql); + UNIT_ASSERT(r.IsOk()); + auto partitionKeySelector = FindMatchRecognizeParam(r.Root, "partitionKeySelector"); + UNIT_ASSERT(IsQuotedListOfSize(partitionKeySelector->GetChild(2), 0)); //empty tuple + auto partitionColumns = FindMatchRecognizeParam(r.Root, "partitionColumns"); + UNIT_ASSERT(IsQuotedListOfSize(partitionColumns, 0)); //empty tuple + } + + Y_UNIT_TEST(PartitionBy) { + auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PARTITION BY col1 as c1, ~CAST(col1 as Int32) as invertedC1, c2 + PATTERN ( A ) + DEFINE A as A + ) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto partitionKeySelector = FindMatchRecognizeParam(r.Root, "partitionKeySelector"); + UNIT_ASSERT(IsQuotedListOfSize(partitionKeySelector->GetChild(2), 3)); + auto partitionColumns = FindMatchRecognizeParam(r.Root, "partitionColumns"); + UNIT_ASSERT(IsQuotedListOfSize(partitionColumns, 3)); + //TODO check partitioner lambdas(alias/no alias) + } + + Y_UNIT_TEST(NoOrderBy) { + auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql); + UNIT_ASSERT(r.IsOk()); + auto sortTraits = FindMatchRecognizeParam(r.Root, "sortTraits"); + UNIT_ASSERT(sortTraits && sortTraits->IsListOfSize(1)); + UNIT_ASSERT(sortTraits->GetChild(0)->GetContent() == "Void"); + } + + Y_UNIT_TEST(OrderBy) { + auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + ORDER BY col1, ~CAST(col1 as Int32), c2 + PATTERN ( A ) + DEFINE A as A + ) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto sortTraits = FindMatchRecognizeParam(r.Root, "sortTraits"); + UNIT_ASSERT(sortTraits && sortTraits->IsListOfSize(4)); + UNIT_ASSERT(sortTraits->GetChild(0)->GetContent() == "SortTraits"); + UNIT_ASSERT(IsQuotedListOfSize(sortTraits->GetChild(2), 3)); + UNIT_ASSERT(IsQuotedListOfSize(sortTraits->GetChild(3)->GetChild(2), 3)); + } + Y_UNIT_TEST(Measures) { + auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + MEASURES + Last(Q.dt) as T, + First(Y.key) as Key + PATTERN ( Y Q ) + DEFINE Y as true +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto measures = FindMatchRecognizeParam(r.Root, "measures"); + UNIT_ASSERT_VALUES_EQUAL(6, measures->GetChildrenCount()); + const auto columnNames = measures->GetChild(3); + UNIT_ASSERT(IsQuotedListOfSize(columnNames, 2)); + UNIT_ASSERT_VALUES_EQUAL("T", columnNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("Key", columnNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent()); + UNIT_ASSERT(IsLambda(measures->GetChild(4), 2)); + UNIT_ASSERT(IsLambda(measures->GetChild(5), 2)); + } + Y_UNIT_TEST(RowsPerMatch) { + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + ONE ROW PER MATCH + PATTERN (A) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto rowsPerMatch = FindMatchRecognizeParam(r.Root, "rowsPerMatch"); + UNIT_ASSERT_VALUES_EQUAL("RowsPerMatch_OneRow", rowsPerMatch->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + ALL ROWS PER MATCH + PATTERN (A) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(not r.IsOk()); ///https://st.yandex-team.ru/YQL-16213 + } + { //default + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN (A) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto rowsPerMatch = FindMatchRecognizeParam(r.Root, "rowsPerMatch"); + UNIT_ASSERT_VALUES_EQUAL("RowsPerMatch_OneRow", rowsPerMatch->GetChild(1)->GetContent()); + } + + } + Y_UNIT_TEST(SkipAfterMatch) { + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo"); + UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_NextRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo"); + UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_PastLastRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO FIRST Y + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo"); + UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToFirst", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO FIRST T -- unknown pattern var + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(not r.IsOk()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO LAST Y + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo"); + UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToLast", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO LAST T -- unknown pattern var + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(not r.IsOk()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO Y + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo"); + UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_To", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent()); + } + { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + AFTER MATCH SKIP TO T -- unknown pattern var + PATTERN (A | (U | (Q | Y)) | ($ B)+ C D) + DEFINE A as A +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(not r.IsOk()); + } + } + Y_UNIT_TEST(row_pattern_initial) { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + INITIAL + PATTERN (A+ B* C?) + DEFINE A as A + ) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(not r.IsOk()); + } + + Y_UNIT_TEST(row_pattern_seek) { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + SEEK + PATTERN (A+ B* C?) + DEFINE A as A + ) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(not r.IsOk()); + } + + Y_UNIT_TEST(PatternSimple) { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN (A+ B* C?) + DEFINE A as A + ) +)"; + const auto& r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 1); + const auto& term = patternCallable->GetChild(1); + UNIT_ASSERT(IsQuotedListOfSize(term, 3)); + } + + Y_UNIT_TEST(PatternMultiTerm) { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN ($ A+ B{1,3} | C{3} D{1,4} E? | F?? | G{3,}? H*? I J ^) + DEFINE A as A + ) +)"; + const auto& r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 4); + const auto& lastTerm = patternCallable->GetChild(4); + UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 5)); + } + + Y_UNIT_TEST(PatternWithParanthesis) { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN ( + A | ($ B)+ C D + ) + DEFINE A as A + ) +)"; + const auto& r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern"); + UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 2); + const auto& firstTerm = patternCallable->GetChild(1); + UNIT_ASSERT(IsQuotedListOfSize(firstTerm, 1)); + const auto& lastTerm = patternCallable->GetChild(2); + UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 3)); + const auto& firstFactorOfLastTerm = lastTerm->GetChild(1)->GetChild(0); + UNIT_ASSERT(IsQuotedListOfSize(firstFactorOfLastTerm, 6)); + const auto nestedPattern = firstFactorOfLastTerm->GetChild(1)->GetChild(0); + UNIT_ASSERT_EQUAL(nestedPattern->GetChildrenCount(), 1 + 1); + UNIT_ASSERT_EQUAL(nestedPattern->GetChild(0)->GetContent(), "MatchRecognizePattern"); + UNIT_ASSERT(IsQuotedListOfSize(nestedPattern->GetChild(1), 2)); + } + + Y_UNIT_TEST(PatternManyAlternatives) { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( +PATTERN ( + (A B C D ) | (B A C D ) | (C B A D ) | (B C A D ) | (C A B D ) | (A C B D ) | (D A B C ) | (A D B C ) | (B A D C ) | (A B D C ) | (B D A C ) | (D B A C ) | (C D A B ) | (D C A B ) | (A D C B ) | (D A C B ) | (A C D B ) | (C A D B ) | (B C D A ) | (C B D A ) | (D C B A ) | (C D B A ) | (D B C A ) | (B D C A ) + ) + DEFINE A as A +) +)"; + UNIT_ASSERT(MatchRecognizeSqlToYql(stmt).IsOk()); + } + + Y_UNIT_TEST(PatternLimitedNesting) { + const size_t MaxNesting = 20; + for (size_t extraNesting = 0; extraNesting <= 1; ++extraNesting) { + std::string pattern; + for (size_t i = 0; i != MaxNesting + extraNesting; ++i) + pattern.push_back('('); + pattern.push_back('A'); + for (size_t i = 0; i != MaxNesting + extraNesting; ++i) + pattern.push_back(')'); + const auto stmt = TString(R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN( +)") + pattern + R"( + ) + DEFINE A as A + ) +)"; + const auto &r = MatchRecognizeSqlToYql(stmt); + if (not extraNesting) { + UNIT_ASSERT(r.IsOk()); + } else { + UNIT_ASSERT(not r.IsOk()); + } + } + } + + Y_UNIT_TEST(PatternFactorQuantifiers) { + auto makeRequest = [](const TString& factor) { + return TString(R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN( +)") + factor + R"( + ) + DEFINE A as A + ) +)"; + }; + auto getTheFactor = [](const NYql::TAstNode* root) { + const auto& patternCallable = FindMatchRecognizeParam(root, "pattern"); + const auto& factor = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1); + return NYql::NMatchRecognize::TRowPatternFactor{ + TString(), //primary var or subexpression, not used in this test + FromString<uint64_t>(factor->GetChild(1)->GetChild(1)->GetContent()), //QuantityMin + FromString<uint64_t>(factor->GetChild(2)->GetChild(1)->GetContent()), //QuantityMax + FromString<bool>(factor->GetChild(3)->GetChild(1)->GetContent()), //Greedy + false, //Output, not used in this test + false, // Flag "Unused", not used in this test + }; + }; + { + //no quantifiers + const auto stmt = makeRequest("A"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(1, factor.QuantityMin); + UNIT_ASSERT_EQUAL(1, factor.QuantityMax); + UNIT_ASSERT(factor.Greedy); + } + { + //optional greedy(default) + const auto stmt = makeRequest("A?"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(0, factor.QuantityMin); + UNIT_ASSERT_EQUAL(1, factor.QuantityMax); + UNIT_ASSERT(factor.Greedy); + } + { + //optional reluctant + const auto stmt = makeRequest("A??"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(0, factor.QuantityMin); + UNIT_ASSERT_EQUAL(1, factor.QuantityMax); + UNIT_ASSERT(!factor.Greedy); + } + { + //+ greedy(default) + const auto stmt = makeRequest("A+"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(1, factor.QuantityMin); + UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax); + UNIT_ASSERT(factor.Greedy); + } + { + //+ reluctant + const auto stmt = makeRequest("A+?"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(1, factor.QuantityMin); + UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax); + UNIT_ASSERT(!factor.Greedy); + } + { + //* greedy(default) + const auto stmt = makeRequest("A*"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(0, factor.QuantityMin); + UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax); + UNIT_ASSERT(factor.Greedy); + } + { + //* reluctant + const auto stmt = makeRequest("A*?"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(0, factor.QuantityMin); + UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax); + UNIT_ASSERT(!factor.Greedy); + } + { + //exact n + const auto stmt = makeRequest("A{4}"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(4, factor.QuantityMin); + UNIT_ASSERT_EQUAL(4, factor.QuantityMax); + } + { + //from n to m greedy(default + const auto stmt = makeRequest("A{4, 7}"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(4, factor.QuantityMin); + UNIT_ASSERT_EQUAL(7, factor.QuantityMax); + UNIT_ASSERT(factor.Greedy); + } + { + //from n to m reluctant + const auto stmt = makeRequest("A{4,7}?"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(4, factor.QuantityMin); + UNIT_ASSERT_EQUAL(7, factor.QuantityMax); + UNIT_ASSERT(!factor.Greedy); + } + { + //at least n greedy(default) + const auto stmt = makeRequest("A{4,}"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(4, factor.QuantityMin); + UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax); + UNIT_ASSERT(factor.Greedy); + } + { + //at least n reluctant + const auto stmt = makeRequest("A{4,}?"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(4, factor.QuantityMin); + UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax); + UNIT_ASSERT(!factor.Greedy); + } + { + //at most m greedy(default) + const auto stmt = makeRequest("A{,7}"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(0, factor.QuantityMin); + UNIT_ASSERT_EQUAL(7, factor.QuantityMax); + UNIT_ASSERT(factor.Greedy); + } + { + //at least n reluctant + const auto stmt = makeRequest("A{,7}?"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(0, factor.QuantityMin); + UNIT_ASSERT_EQUAL(7, factor.QuantityMax); + UNIT_ASSERT(!factor.Greedy); + } + + { + //quantifiers on subexpression + const auto stmt = makeRequest("(A B+ C | D | ^){4,7}?"); + const auto &r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto& factor = getTheFactor(r.Root); + UNIT_ASSERT_EQUAL(4, factor.QuantityMin); + UNIT_ASSERT_EQUAL(7, factor.QuantityMax); + UNIT_ASSERT(!factor.Greedy); + } + } + + Y_UNIT_TEST(Permute) { + const auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN ( + PERMUTE(A, B, C, D, E) --5 variables produce 5! permutations + ) + DEFINE A as A +) +)"; + const auto& r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + + const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern"); + const auto permutePattern = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1)->GetChild(0); + UNIT_ASSERT(permutePattern->IsListOfSize(1 + 120)); //CallableName + 5! + } + + Y_UNIT_TEST(PermuteTooMuch) { + for (size_t n = 1; n <= NYql::NMatchRecognize::MaxPermutedItems + 1; ++n) { + std::vector<std::string> vars(n); + std::generate(begin(vars), end(vars), [n = 0] () mutable { return "A" + std::to_string(n++);}); + const auto stmt = TString(R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN ( + PERMUTE( )" + std::accumulate(cbegin(vars) + 1, cend(vars), vars.front(), + [](const std::string& acc, const std::string& v) { + return acc + ", " + v; + }) + + R"( + ) + ) + DEFINE A0 as A0 +) +)" + ); + const auto &r = MatchRecognizeSqlToYql(stmt); + if (n <= NYql::NMatchRecognize::MaxPermutedItems) { + UNIT_ASSERT(r.IsOk()); + } else { + UNIT_ASSERT(!r.IsOk()); + } + } + } + + + Y_UNIT_TEST(row_pattern_subset_clause) { + //TODO https://st.yandex-team.ru/YQL-16186 + } + + Y_UNIT_TEST(Defines) { + auto stmt = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN ( Y Q L ) + DEFINE + Y as true, + Q as Q.V = "value", + L as L.V = LAST(Q.T) +) +)"; + auto r = MatchRecognizeSqlToYql(stmt); + UNIT_ASSERT(r.IsOk()); + const auto defines = FindMatchRecognizeParam(r.Root, "define"); + UNIT_ASSERT_VALUES_EQUAL(7, defines->GetChildrenCount()); + const auto varNames = defines->GetChild(3); + UNIT_ASSERT(IsQuotedListOfSize(varNames, 3)); + UNIT_ASSERT_VALUES_EQUAL("Y", varNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("Q", varNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent()); + UNIT_ASSERT_VALUES_EQUAL("L", varNames->GetChild(1)->GetChild(2)->GetChild(1)->GetContent()); + + UNIT_ASSERT(IsLambda(defines->GetChild(4), 3)); + UNIT_ASSERT(IsLambda(defines->GetChild(5), 3)); + UNIT_ASSERT(IsLambda(defines->GetChild(6), 3)); + } + + Y_UNIT_TEST(AbsentRowPatternVariableInDefines) { + auto getStatement = [](const TString &var) { + return TStringBuilder() << R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( +PATTERN ( Q ) +DEFINE +)" << var << " AS TRUE )"; + }; + UNIT_ASSERT(MatchRecognizeSqlToYql(getStatement("Q")).IsOk()); + UNIT_ASSERT(!MatchRecognizeSqlToYql(getStatement("Y")).IsOk()); + } + + Y_UNIT_TEST(CheckRequiredNavigationFunction) { + TString stmtPrefix = R"( +USE plato; +SELECT * +FROM Input MATCH_RECOGNIZE( + PATTERN ( Y Q L ) + DEFINE + L as L.V = +)"; + //Be aware that right parenthesis is added at the end of the query as required + UNIT_ASSERT(MatchRecognizeSqlToYql(stmtPrefix + "LAST(Q.dt) )").IsOk()); + UNIT_ASSERT(!MatchRecognizeSqlToYql(stmtPrefix + "Q.dt )").IsOk()); + } + +} diff --git a/yql/essentials/sql/v1/sql_query.cpp b/yql/essentials/sql/v1/sql_query.cpp new file mode 100644 index 00000000000..d0fb8737e15 --- /dev/null +++ b/yql/essentials/sql/v1/sql_query.cpp @@ -0,0 +1,3446 @@ +#include "sql_query.h" +#include "sql_expression.h" +#include "sql_select.h" +#include "sql_into_tables.h" +#include "sql_values.h" +#include "node.h" +#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h> +#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/sql/v1/object_processing.h> +#include <yql/essentials/utils/yql_paths.h> +#include <util/generic/scope.h> +#include <util/string/join.h> +#ifdef GetMessage +#undef GetMessage +#endif + +namespace NSQLTranslationV1 { + +using NALPDefault::SQLv1LexerTokens; +using NALPDefaultAntlr4::SQLv1Antlr4Lexer; + +using namespace NSQLv1Generated; + +void FillTargetList(TTranslation& ctx, const TRule_set_target_list& node, TVector<TString>& targetList) { + targetList.push_back(ColumnNameAsSingleStr(ctx, node.GetRule_set_target2().GetRule_column_name1())); + for (auto& block: node.GetBlock3()) { + targetList.push_back(ColumnNameAsSingleStr(ctx, block.GetRule_set_target2().GetRule_column_name1())); + } +} + +bool PackageVersionFromString(const TString& s, ui32& version) { + if (s == "release") { + version = 0; + return true; + } + if (s == "draft") { + version = 1; + return true; + } + return TryFromString(s, version); +} + +void TSqlQuery::AddStatementToBlocks(TVector<TNodePtr>& blocks, TNodePtr node) { + blocks.emplace_back(node); +} + +static bool AsyncReplicationSettingsEntry(std::map<TString, TNodePtr>& out, + const TRule_replication_settings_entry& in, TTranslation& ctx, bool create) +{ + auto key = IdEx(in.GetRule_an_id1(), ctx); + auto value = BuildLiteralSmartString(ctx.Context(), ctx.Token(in.GetToken3())); + + TSet<TString> configSettings = { + "connection_string", + "endpoint", + "database", + "token", + "token_secret_name", + "user", + "password", + "password_secret_name", + }; + + TSet<TString> stateSettings = { + "state", + "failover_mode", + }; + + const auto keyName = to_lower(key.Name); + if (!configSettings.count(keyName) && !stateSettings.count(keyName)) { + ctx.Context().Error() << "Unknown replication setting: " << key.Name; + return false; + } + + if (create && stateSettings.count(keyName)) { + ctx.Context().Error() << key.Name << " is not supported in CREATE"; + return false; + } + + if (!out.emplace(keyName, value).second) { + ctx.Context().Error() << "Duplicate replication setting: " << key.Name; + } + + return true; +} + +static bool AsyncReplicationSettings(std::map<TString, TNodePtr>& out, + const TRule_replication_settings& in, TTranslation& ctx, bool create) +{ + if (!AsyncReplicationSettingsEntry(out, in.GetRule_replication_settings_entry1(), ctx, create)) { + return false; + } + + for (auto& block : in.GetBlock2()) { + if (!AsyncReplicationSettingsEntry(out, block.GetRule_replication_settings_entry2(), ctx, create)) { + return false; + } + } + + return true; +} + +static bool AsyncReplicationTarget(std::vector<std::pair<TString, TString>>& out, TStringBuf prefixPath, + const TRule_replication_target& in, TTranslation& ctx) +{ + const TString remote = Id(in.GetRule_object_ref1().GetRule_id_or_at2(), ctx).second; + const TString local = Id(in.GetRule_object_ref3().GetRule_id_or_at2(), ctx).second; + out.emplace_back(remote, BuildTablePath(prefixPath, local)); + return true; +} + +static bool AsyncReplicationAlterAction(std::map<TString, TNodePtr>& settings, + const TRule_alter_replication_action& in, TTranslation& ctx) +{ + // TODO(ilnaz): support other actions + return AsyncReplicationSettings(settings, in.GetRule_alter_replication_set_setting1().GetRule_replication_settings3(), ctx, false); +} + +bool TSqlQuery::Statement(TVector<TNodePtr>& blocks, const TRule_sql_stmt_core& core) { + TString internalStatementName; + TString humanStatementName; + ParseStatementName(core, internalStatementName, humanStatementName); + const auto& altCase = core.Alt_case(); + if (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW && (altCase >= TRule_sql_stmt_core::kAltSqlStmtCore4 && + altCase != TRule_sql_stmt_core::kAltSqlStmtCore13)) { + Error() << humanStatementName << " statement is not supported in limited views"; + return false; + } + + if (Mode == NSQLTranslation::ESqlMode::SUBQUERY && (altCase >= TRule_sql_stmt_core::kAltSqlStmtCore4 && + altCase != TRule_sql_stmt_core::kAltSqlStmtCore13 && altCase != TRule_sql_stmt_core::kAltSqlStmtCore6 && + altCase != TRule_sql_stmt_core::kAltSqlStmtCore18)) { + Error() << humanStatementName << " statement is not supported in subqueries"; + return false; + } + + switch (altCase) { + case TRule_sql_stmt_core::kAltSqlStmtCore1: { + bool success = false; + TNodePtr nodeExpr = PragmaStatement(core.GetAlt_sql_stmt_core1().GetRule_pragma_stmt1(), success); + if (!success) { + return false; + } + if (nodeExpr) { + AddStatementToBlocks(blocks, nodeExpr); + } + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore2: { + if (Ctx.ParallelModeCount > 0) { + Error() << humanStatementName << " statement is not supported in parallel mode"; + return false; + } + + Ctx.BodyPart(); + TSqlSelect select(Ctx, Mode); + TPosition pos; + auto source = select.Build(core.GetAlt_sql_stmt_core2().GetRule_select_stmt1(), pos); + if (!source) { + return false; + } + blocks.emplace_back(BuildSelectResult(pos, std::move(source), + Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW && Mode != NSQLTranslation::ESqlMode::SUBQUERY, Mode == NSQLTranslation::ESqlMode::SUBQUERY, + Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore3: { + Ctx.BodyPart(); + TVector<TSymbolNameWithPos> names; + auto nodeExpr = NamedNode(core.GetAlt_sql_stmt_core3().GetRule_named_nodes_stmt1(), names); + if (!nodeExpr) { + return false; + } + TVector<TNodePtr> nodes; + auto subquery = nodeExpr->GetSource(); + if (subquery && Mode == NSQLTranslation::ESqlMode::LIBRARY && Ctx.ScopeLevel == 0) { + for (size_t i = 0; i < names.size(); ++i) { + nodes.push_back(BuildInvalidSubqueryRef(subquery->GetPos())); + } + } else if (subquery) { + const auto alias = Ctx.MakeName("subquerynode"); + const auto ref = Ctx.MakeName("subquery"); + blocks.push_back(BuildSubquery(subquery, alias, + Mode == NSQLTranslation::ESqlMode::SUBQUERY, names.size() == 1 ? -1 : names.size(), Ctx.Scoped)); + blocks.back()->SetLabel(ref); + + for (size_t i = 0; i < names.size(); ++i) { + nodes.push_back(BuildSubqueryRef(blocks.back(), ref, names.size() == 1 ? -1 : i)); + } + } else if (!Ctx.CompactNamedExprs || nodeExpr->GetUdfNode()) { + // Unlike other nodes, TUdfNode is not an independent node, but more like a set of parameters which should be + // applied on UDF call site. For example, TUdfNode can not be Translate()d + // So we can't add it to blocks and use reference, instead we store the TUdfNode itself as named node + // TODO: remove this special case + if (names.size() > 1) { + auto tupleRes = BuildTupleResult(nodeExpr, names.size()); + for (size_t i = 0; i < names.size(); ++i) { + nodes.push_back(nodeExpr->Y("Nth", tupleRes, nodeExpr->Q(ToString(i)))); + } + } else { + nodes.push_back(std::move(nodeExpr)); + } + } else { + const auto ref = Ctx.MakeName("namedexprnode"); + blocks.push_back(BuildNamedExpr(names.size() > 1 ? BuildTupleResult(nodeExpr, names.size()) : nodeExpr)); + blocks.back()->SetLabel(ref); + for (size_t i = 0; i < names.size(); ++i) { + nodes.push_back(BuildNamedExprReference(blocks.back(), ref, names.size() == 1 ? TMaybe<size_t>() : i)); + } + } + + for (size_t i = 0; i < names.size(); ++i) { + PushNamedNode(names[i].Pos, names[i].Name, nodes[i]); + } + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore4: { + Ctx.BodyPart(); + const auto& rule = core.GetAlt_sql_stmt_core4().GetRule_create_table_stmt1(); + + bool replaceIfExists = false; + if (rule.HasBlock2()) { // OR REPLACE + replaceIfExists = true; + Y_DEBUG_ABORT_UNLESS( + (IS_TOKEN(rule.GetBlock2().GetToken1().GetId(), OR) && + IS_TOKEN(rule.GetBlock2().GetToken2().GetId(), REPLACE)) + ); + } + + const bool isCreateTableAs = rule.HasBlock15(); + const auto& block = rule.GetBlock3(); + ETableType tableType = ETableType::Table; + bool temporary = false; + if (block.HasAlt2() && + IS_TOKEN(block.GetAlt2().GetToken1().GetId(), TABLESTORE) + ) { + tableType = ETableType::TableStore; + if (isCreateTableAs) { + Context().Error(GetPos(block.GetAlt2().GetToken1())) + << "CREATE TABLE AS is not supported for TABLESTORE"; + return false; + } + } else if (block.HasAlt3() && + IS_TOKEN(block.GetAlt3().GetToken1().GetId(), EXTERNAL) + ) { + tableType = ETableType::ExternalTable; + if (isCreateTableAs) { + Context().Error(GetPos(block.GetAlt3().GetToken1())) + << "CREATE TABLE AS is not supported for EXTERNAL TABLE"; + return false; + } + } else if (block.HasAlt4() && IS_TOKEN(block.GetAlt4().GetToken1().GetId(), TEMP) || + block.HasAlt5() && IS_TOKEN(block.GetAlt5().GetToken1().GetId(), TEMPORARY)) { + temporary = true; + } + + bool existingOk = false; + if (rule.HasBlock4()) { // IF NOT EXISTS + existingOk = true; + Y_DEBUG_ABORT_UNLESS( + IS_TOKEN(rule.GetBlock4().GetToken1().GetId(), IF) && + IS_TOKEN(rule.GetBlock4().GetToken2().GetId(), NOT) && + IS_TOKEN(rule.GetBlock4().GetToken3().GetId(), EXISTS) + ); + } + + if (replaceIfExists && tableType != ETableType::ExternalTable) { + Context().Error(GetPos(rule.GetBlock2().GetToken1())) + << "OR REPLACE feature is supported only for EXTERNAL DATA SOURCE and EXTERNAL TABLE"; + return false; + } + + TTableRef tr; + if (!SimpleTableRefImpl(rule.GetRule_simple_table_ref5(), tr)) { + return false; + } + + TCreateTableParameters params{.TableType=tableType, .Temporary=temporary}; + if (!CreateTableEntry(rule.GetRule_create_table_entry7(), params, isCreateTableAs)) { + return false; + } + for (auto& block: rule.GetBlock8()) { + if (!CreateTableEntry(block.GetRule_create_table_entry2(), params, isCreateTableAs)) { + return false; + } + } + + if (rule.HasBlock11()) { + Context().Error(GetPos(rule.GetBlock11().GetRule_table_inherits1().GetToken1())) + << "INHERITS clause is not supported yet"; + return false; + } + + if (rule.HasBlock12()) { + if (tableType == ETableType::TableStore) { + Context().Error(GetPos(rule.GetBlock12().GetRule_table_partition_by1().GetToken1())) + << "PARTITION BY is not supported for TABLESTORE"; + return false; + } + const auto list = rule.GetBlock12().GetRule_table_partition_by1().GetRule_pure_column_list4(); + params.PartitionByColumns.push_back(IdEx(list.GetRule_an_id2(), *this)); + for (auto& node : list.GetBlock3()) { + params.PartitionByColumns.push_back(IdEx(node.GetRule_an_id2(), *this)); + } + } + + if (rule.HasBlock13()) { + if (!CreateTableSettings(rule.GetBlock13().GetRule_with_table_settings1(), params)) { + return false; + } + } + + if (rule.HasBlock14()) { + Context().Error(GetPos(rule.GetBlock14().GetRule_table_tablestore1().GetToken1())) + << "TABLESTORE clause is not supported yet"; + return false; + } + + TSourcePtr tableSource = nullptr; + if (isCreateTableAs) { + tableSource = TSqlAsValues(Ctx, Mode).Build(rule.GetBlock15().GetRule_table_as_source1().GetRule_values_source2(), "CreateTableAs"); + if (!tableSource) { + return false; + } + } + + if (!ValidateExternalTable(params)) { + return false; + } + + AddStatementToBlocks(blocks, BuildCreateTable(Ctx.Pos(), tr, existingOk, replaceIfExists, params, std::move(tableSource), Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore5: { + Ctx.BodyPart(); + const auto& rule = core.GetAlt_sql_stmt_core5().GetRule_drop_table_stmt1(); + const auto& block = rule.GetBlock2(); + ETableType tableType = ETableType::Table; + if (block.HasAlt2()) { + tableType = ETableType::TableStore; + } + if (block.HasAlt3()) { + tableType = ETableType::ExternalTable; + } + + bool missingOk = false; + if (rule.HasBlock3()) { // IF EXISTS + missingOk = true; + Y_DEBUG_ABORT_UNLESS( + IS_TOKEN(rule.GetBlock3().GetToken1().GetId(), IF) && + IS_TOKEN(rule.GetBlock3().GetToken2().GetId(), EXISTS) + ); + } + + TTableRef tr; + if (!SimpleTableRefImpl(rule.GetRule_simple_table_ref4(), tr)) { + return false; + } + + AddStatementToBlocks(blocks, BuildDropTable(Ctx.Pos(), tr, missingOk, tableType, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore6: { + const auto& rule = core.GetAlt_sql_stmt_core6().GetRule_use_stmt1(); + Token(rule.GetToken1()); + if (!ClusterExpr(rule.GetRule_cluster_expr2(), true, Ctx.Scoped->CurrService, Ctx.Scoped->CurrCluster)) { + return false; + } + + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore7: { + Ctx.BodyPart(); + TSqlIntoTable intoTable(Ctx, Mode); + TNodePtr block(intoTable.Build(core.GetAlt_sql_stmt_core7().GetRule_into_table_stmt1())); + if (!block) { + return false; + } + blocks.emplace_back(block); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore8: { + if (Ctx.ParallelModeCount > 0) { + Error() << humanStatementName << " statement is not supported in parallel mode"; + return false; + } + + Ctx.BodyPart(); + const auto& rule = core.GetAlt_sql_stmt_core8().GetRule_commit_stmt1(); + Token(rule.GetToken1()); + blocks.emplace_back(BuildCommitClusters(Ctx.Pos())); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore9: { + Ctx.BodyPart(); + auto updateNode = Build(core.GetAlt_sql_stmt_core9().GetRule_update_stmt1()); + if (!updateNode) { + return false; + } + AddStatementToBlocks(blocks, updateNode); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore10: { + Ctx.BodyPart(); + auto deleteNode = Build(core.GetAlt_sql_stmt_core10().GetRule_delete_stmt1()); + if (!deleteNode) { + return false; + } + blocks.emplace_back(deleteNode); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore11: { + if (Ctx.ParallelModeCount > 0) { + Error() << humanStatementName << " statement is not supported in parallel mode"; + return false; + } + + Ctx.BodyPart(); + const auto& rule = core.GetAlt_sql_stmt_core11().GetRule_rollback_stmt1(); + Token(rule.GetToken1()); + blocks.emplace_back(BuildRollbackClusters(Ctx.Pos())); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore12: + if (!DeclareStatement(core.GetAlt_sql_stmt_core12().GetRule_declare_stmt1())) { + return false; + } + break; + case TRule_sql_stmt_core::kAltSqlStmtCore13: + if (!ImportStatement(core.GetAlt_sql_stmt_core13().GetRule_import_stmt1())) { + return false; + } + break; + case TRule_sql_stmt_core::kAltSqlStmtCore14: + if (!ExportStatement(core.GetAlt_sql_stmt_core14().GetRule_export_stmt1())) { + return false; + } + break; + case TRule_sql_stmt_core::kAltSqlStmtCore15: { + Ctx.BodyPart(); + const auto& rule = core.GetAlt_sql_stmt_core15().GetRule_alter_table_stmt1(); + const bool isTablestore = IS_TOKEN(rule.GetToken2().GetId(), TABLESTORE); + TTableRef tr; + if (!SimpleTableRefImpl(rule.GetRule_simple_table_ref3(), tr)) { + return false; + } + + TAlterTableParameters params; + if (isTablestore) { + params.TableType = ETableType::TableStore; + } + if (!AlterTableAction(rule.GetRule_alter_table_action4(), params)) { + return false; + } + + for (auto& block : rule.GetBlock5()) { + if (!AlterTableAction(block.GetRule_alter_table_action2(), params)) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildAlterTable(Ctx.Pos(), tr, params, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore16: { + // alter_external_table_stmt: ALTER EXTERNAL TABLE simple_table_ref alter_external_table_action (COMMA alter_external_table_action)* + Ctx.BodyPart(); + const auto& rule = core.GetAlt_sql_stmt_core16().GetRule_alter_external_table_stmt1(); + TTableRef tr; + if (!SimpleTableRefImpl(rule.GetRule_simple_table_ref4(), tr)) { + return false; + } + + TAlterTableParameters params; + params.TableType = ETableType::ExternalTable; + if (!AlterExternalTableAction(rule.GetRule_alter_external_table_action5(), params)) { + return false; + } + + for (auto& block : rule.GetBlock6()) { + if (!AlterExternalTableAction(block.GetRule_alter_external_table_action2(), params)) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildAlterTable(Ctx.Pos(), tr, params, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore17: { + Ctx.BodyPart(); + auto node = DoStatement(core.GetAlt_sql_stmt_core17().GetRule_do_stmt1(), false); + if (!node) { + return false; + } + + blocks.push_back(node); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore18: { + Ctx.BodyPart(); + TNodePtr lambda; + TSymbolNameWithPos nameAndPos; + const auto& stmt = core.GetAlt_sql_stmt_core18().GetRule_define_action_or_subquery_stmt1(); + const TString kind = to_lower(Ctx.Token(stmt.GetToken2())); + YQL_ENSURE(kind == "action" || kind == "subquery"); + if (!DefineActionOrSubqueryStatement(stmt, nameAndPos, lambda)) { + return false; + } + + if (Ctx.CompactNamedExprs) { + const auto ref = Ctx.MakeName("named" + kind + "node"); + blocks.push_back(BuildNamedExpr(lambda)); + blocks.back()->SetLabel(ref); + lambda = BuildNamedExprReference(blocks.back(), ref, {}); + } + + PushNamedNode(nameAndPos.Pos, nameAndPos.Name, lambda); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore19: { + Ctx.BodyPart(); + auto node = IfStatement(core.GetAlt_sql_stmt_core19().GetRule_if_stmt1()); + if (!node) { + return false; + } + + blocks.push_back(node); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore20: { + Ctx.BodyPart(); + auto node = ForStatement(core.GetAlt_sql_stmt_core20().GetRule_for_stmt1()); + if (!node) { + return false; + } + + blocks.push_back(node); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore21: { + if (Ctx.ParallelModeCount > 0) { + Error() << humanStatementName << " statement is not supported in parallel mode"; + return false; + } + + Ctx.BodyPart(); + TSqlValues values(Ctx, Mode); + TPosition pos; + auto source = values.Build(core.GetAlt_sql_stmt_core21().GetRule_values_stmt1(), pos, {}, TPosition()); + if (!source) { + return false; + } + blocks.emplace_back(BuildSelectResult(pos, std::move(source), + Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW && Mode != NSQLTranslation::ESqlMode::SUBQUERY, Mode == NSQLTranslation::ESqlMode::SUBQUERY, + Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore22: { + // create_user_stmt: CREATE USER role_name create_user_option?; + Ctx.BodyPart(); + auto& node = core.GetAlt_sql_stmt_core22().GetRule_create_user_stmt1(); + + Ctx.Token(node.GetToken1()); + const TPosition pos = Ctx.Pos(); + + TString service = Ctx.Scoped->CurrService; + TDeferredAtom cluster = Ctx.Scoped->CurrCluster; + if (cluster.Empty()) { + Error() << "USE statement is missing - no default cluster is selected"; + return false; + } + + TDeferredAtom roleName; + bool allowSystemRoles = false; + if (!RoleNameClause(node.GetRule_role_name3(), roleName, allowSystemRoles)) { + return false; + } + + TMaybe<TRoleParameters> roleParams; + if (node.HasBlock4()) { + roleParams.ConstructInPlace(); + if (!RoleParameters(node.GetBlock4().GetRule_create_user_option1(), *roleParams)) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildCreateUser(pos, service, cluster, roleName, roleParams, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore23: { + // alter_user_stmt: ALTER USER role_name (WITH? create_user_option | RENAME TO role_name); + Ctx.BodyPart(); + auto& node = core.GetAlt_sql_stmt_core23().GetRule_alter_user_stmt1(); + + Ctx.Token(node.GetToken1()); + const TPosition pos = Ctx.Pos(); + + TString service = Ctx.Scoped->CurrService; + TDeferredAtom cluster = Ctx.Scoped->CurrCluster; + if (cluster.Empty()) { + Error() << "USE statement is missing - no default cluster is selected"; + return false; + } + + TDeferredAtom roleName; + { + bool allowSystemRoles = true; + if (!RoleNameClause(node.GetRule_role_name3(), roleName, allowSystemRoles)) { + return false; + } + } + + TNodePtr stmt; + switch (node.GetBlock4().Alt_case()) { + case TRule_alter_user_stmt_TBlock4::kAlt1: { + TRoleParameters roleParams; + if (!RoleParameters(node.GetBlock4().GetAlt1().GetRule_create_user_option2(), roleParams)) { + return false; + } + stmt = BuildAlterUser(pos, service, cluster, roleName, roleParams, Ctx.Scoped); + break; + } + case TRule_alter_user_stmt_TBlock4::kAlt2: { + TDeferredAtom tgtRoleName; + bool allowSystemRoles = false; + if (!RoleNameClause(node.GetBlock4().GetAlt2().GetRule_role_name3(), tgtRoleName, allowSystemRoles)) { + return false; + } + stmt = BuildRenameUser(pos, service, cluster, roleName, tgtRoleName,Ctx.Scoped); + break; + } + case TRule_alter_user_stmt_TBlock4::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + AddStatementToBlocks(blocks, stmt); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore24: { + // create_group_stmt: CREATE GROUP role_name (WITH USER role_name (COMMA role_name)* COMMA?)?; + Ctx.BodyPart(); + auto& node = core.GetAlt_sql_stmt_core24().GetRule_create_group_stmt1(); + + Ctx.Token(node.GetToken1()); + const TPosition pos = Ctx.Pos(); + + TString service = Ctx.Scoped->CurrService; + TDeferredAtom cluster = Ctx.Scoped->CurrCluster; + if (cluster.Empty()) { + Error() << "USE statement is missing - no default cluster is selected"; + return false; + } + + TDeferredAtom roleName; + bool allowSystemRoles = false; + if (!RoleNameClause(node.GetRule_role_name3(), roleName, allowSystemRoles)) { + return false; + } + + TRoleParameters roleParams; + if (node.HasBlock4()) { + auto& addDropNode = node.GetBlock4(); + TVector<TDeferredAtom> roles; + bool allowSystemRoles = false; + roleParams.Roles.emplace_back(); + if (!RoleNameClause(addDropNode.GetRule_role_name3(), roleParams.Roles.back(), allowSystemRoles)) { + return false; + } + + for (auto& item : addDropNode.GetBlock4()) { + roleParams.Roles.emplace_back(); + if (!RoleNameClause(item.GetRule_role_name2(), roleParams.Roles.back(), allowSystemRoles)) { + return false; + } + } + } + + AddStatementToBlocks(blocks, BuildCreateGroup(pos, service, cluster, roleName, roleParams, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore25: { + // alter_group_stmt: ALTER GROUP role_name ((ADD|DROP) USER role_name (COMMA role_name)* COMMA? | RENAME TO role_name); + Ctx.BodyPart(); + auto& node = core.GetAlt_sql_stmt_core25().GetRule_alter_group_stmt1(); + + Ctx.Token(node.GetToken1()); + const TPosition pos = Ctx.Pos(); + + TString service = Ctx.Scoped->CurrService; + TDeferredAtom cluster = Ctx.Scoped->CurrCluster; + if (cluster.Empty()) { + Error() << "USE statement is missing - no default cluster is selected"; + return false; + } + + TDeferredAtom roleName; + { + bool allowSystemRoles = true; + if (!RoleNameClause(node.GetRule_role_name3(), roleName, allowSystemRoles)) { + return false; + } + } + + TNodePtr stmt; + switch (node.GetBlock4().Alt_case()) { + case TRule_alter_group_stmt_TBlock4::kAlt1: { + auto& addDropNode = node.GetBlock4().GetAlt1(); + const bool isDrop = IS_TOKEN(addDropNode.GetToken1().GetId(), DROP); + TVector<TDeferredAtom> roles; + bool allowSystemRoles = false; + roles.emplace_back(); + if (!RoleNameClause(addDropNode.GetRule_role_name3(), roles.back(), allowSystemRoles)) { + return false; + } + + for (auto& item : addDropNode.GetBlock4()) { + roles.emplace_back(); + if (!RoleNameClause(item.GetRule_role_name2(), roles.back(), allowSystemRoles)) { + return false; + } + } + + stmt = BuildAlterGroup(pos, service, cluster, roleName, roles, isDrop, Ctx.Scoped); + break; + } + case TRule_alter_group_stmt_TBlock4::kAlt2: { + TDeferredAtom tgtRoleName; + bool allowSystemRoles = false; + if (!RoleNameClause(node.GetBlock4().GetAlt2().GetRule_role_name3(), tgtRoleName, allowSystemRoles)) { + return false; + } + stmt = BuildRenameGroup(pos, service, cluster, roleName, tgtRoleName, Ctx.Scoped); + break; + } + case TRule_alter_group_stmt_TBlock4::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + AddStatementToBlocks(blocks, stmt); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore26: { + // drop_role_stmt: DROP (USER|GROUP) (IF EXISTS)? role_name (COMMA role_name)* COMMA?; + Ctx.BodyPart(); + auto& node = core.GetAlt_sql_stmt_core26().GetRule_drop_role_stmt1(); + + Ctx.Token(node.GetToken1()); + const TPosition pos = Ctx.Pos(); + + TString service = Ctx.Scoped->CurrService; + TDeferredAtom cluster = Ctx.Scoped->CurrCluster; + if (cluster.Empty()) { + Error() << "USE statement is missing - no default cluster is selected"; + return false; + } + + const bool isUser = IS_TOKEN(node.GetToken2().GetId(), USER); + bool missingOk = false; + if (node.HasBlock3()) { // IF EXISTS + missingOk = true; + Y_DEBUG_ABORT_UNLESS( + IS_TOKEN(node.GetBlock3().GetToken1().GetId(), IF) && + IS_TOKEN(node.GetBlock3().GetToken2().GetId(), EXISTS) + ); + } + + TVector<TDeferredAtom> roles; + bool allowSystemRoles = true; + roles.emplace_back(); + if (!RoleNameClause(node.GetRule_role_name4(), roles.back(), allowSystemRoles)) { + return false; + } + + for (auto& item : node.GetBlock5()) { + roles.emplace_back(); + if (!RoleNameClause(item.GetRule_role_name2(), roles.back(), allowSystemRoles)) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildDropRoles(pos, service, cluster, roles, isUser, missingOk, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore27: { + // create_object_stmt: CREATE OBJECT (IF NOT EXISTS)? name (TYPE type [WITH k=v,...]); + auto& node = core.GetAlt_sql_stmt_core27().GetRule_create_object_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref4().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + bool existingOk = false; + if (node.HasBlock3()) { // IF NOT EXISTS + existingOk = true; + Y_DEBUG_ABORT_UNLESS( + IS_TOKEN(node.GetBlock3().GetToken1().GetId(), IF) && + IS_TOKEN(node.GetBlock3().GetToken2().GetId(), NOT) && + IS_TOKEN(node.GetBlock3().GetToken3().GetId(), EXISTS) + ); + } + + const TString& objectId = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second; + const TString& typeId = Id(node.GetRule_object_type_ref7().GetRule_an_id_or_type1(), *this); + std::map<TString, TDeferredAtom> kv; + if (node.HasBlock9()) { + if (!ParseObjectFeatures(kv, node.GetBlock9().GetRule_create_object_features1().GetRule_object_features2())) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildCreateObjectOperation(Ctx.Pos(), objectId, typeId, existingOk, false, std::move(kv), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore28: { + // alter_object_stmt: ALTER OBJECT name (TYPE type [SET k=v,...]); + auto& node = core.GetAlt_sql_stmt_core28().GetRule_alter_object_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref3().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_object_ref3().GetRule_id_or_at2(), *this).second; + const TString& typeId = Id(node.GetRule_object_type_ref6().GetRule_an_id_or_type1(), *this); + std::map<TString, TDeferredAtom> kv; + if (!ParseObjectFeatures(kv, node.GetRule_alter_object_features8().GetRule_object_features2())) { + return false; + } + + AddStatementToBlocks(blocks, BuildAlterObjectOperation(Ctx.Pos(), objectId, typeId, std::move(kv), std::set<TString>(), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore29: { + // drop_object_stmt: DROP OBJECT (IF EXISTS)? name (TYPE type [WITH k=v,...]); + auto& node = core.GetAlt_sql_stmt_core29().GetRule_drop_object_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref4().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + bool missingOk = false; + if (node.HasBlock3()) { // IF EXISTS + missingOk = true; + Y_DEBUG_ABORT_UNLESS( + IS_TOKEN(node.GetBlock3().GetToken1().GetId(), IF) && + IS_TOKEN(node.GetBlock3().GetToken2().GetId(), EXISTS) + ); + } + + const TString& objectId = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second; + const TString& typeId = Id(node.GetRule_object_type_ref7().GetRule_an_id_or_type1(), *this); + std::map<TString, TDeferredAtom> kv; + if (node.HasBlock9()) { + if (!ParseObjectFeatures(kv, node.GetBlock9().GetRule_drop_object_features1().GetRule_object_features2())) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildDropObjectOperation(Ctx.Pos(), objectId, typeId, missingOk, std::move(kv), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore30: { + // create_external_data_source_stmt: CREATE (OR REPLACE)? EXTERNAL DATA SOURCE (IF NOT EXISTS)? name WITH (k=v,...); + auto& node = core.GetAlt_sql_stmt_core30().GetRule_create_external_data_source_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref7().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref7().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + bool replaceIfExists = false; + if (node.HasBlock2()) { // OR REPLACE + replaceIfExists = true; + Y_DEBUG_ABORT_UNLESS( + IS_TOKEN(node.GetBlock2().GetToken1().GetId(), OR) && + IS_TOKEN(node.GetBlock2().GetToken2().GetId(), REPLACE) + ); + } + + bool existingOk = false; + if (node.HasBlock6()) { // IF NOT EXISTS + existingOk = true; + Y_DEBUG_ABORT_UNLESS( + IS_TOKEN(node.GetBlock6().GetToken1().GetId(), IF) && + IS_TOKEN(node.GetBlock6().GetToken2().GetId(), NOT) && + IS_TOKEN(node.GetBlock6().GetToken3().GetId(), EXISTS) + ); + } + + const TString& objectId = Id(node.GetRule_object_ref7().GetRule_id_or_at2(), *this).second; + std::map<TString, TDeferredAtom> kv; + if (!ParseExternalDataSourceSettings(kv, node.GetRule_with_table_settings8())) { + return false; + } + + AddStatementToBlocks(blocks, BuildCreateObjectOperation(Ctx.Pos(), BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), "EXTERNAL_DATA_SOURCE", existingOk, replaceIfExists, std::move(kv), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore31: { + // alter_external_data_source_stmt: ALTER EXTERNAL DATA SOURCE object_ref alter_external_data_source_action (COMMA alter_external_data_source_action)* + Ctx.BodyPart(); + const auto& node = core.GetAlt_sql_stmt_core31().GetRule_alter_external_data_source_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref5().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref5().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_object_ref5().GetRule_id_or_at2(), *this).second; + std::map<TString, TDeferredAtom> kv; + std::set<TString> toReset; + if (!ParseExternalDataSourceSettings(kv, toReset, node.GetRule_alter_external_data_source_action6())) { + return false; + } + + for (const auto& action : node.GetBlock7()) { + if (!ParseExternalDataSourceSettings(kv, toReset, action.GetRule_alter_external_data_source_action2())) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildAlterObjectOperation(Ctx.Pos(), objectId, "EXTERNAL_DATA_SOURCE", std::move(kv), std::move(toReset), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore32: { + // drop_external_data_source_stmt: DROP EXTERNAL DATA SOURCE (IF EXISTS)? name; + auto& node = core.GetAlt_sql_stmt_core32().GetRule_drop_external_data_source_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref6().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref6().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + bool missingOk = false; + if (node.HasBlock5()) { // IF EXISTS + missingOk = true; + Y_DEBUG_ABORT_UNLESS( + IS_TOKEN(node.GetBlock5().GetToken1().GetId(), IF) && + IS_TOKEN(node.GetBlock5().GetToken2().GetId(), EXISTS) + ); + } + + const TString& objectId = Id(node.GetRule_object_ref6().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, BuildDropObjectOperation(Ctx.Pos(), BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), "EXTERNAL_DATA_SOURCE", missingOk, {}, context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore33: { + // create_replication_stmt: CREATE ASYNC REPLICATION + auto& node = core.GetAlt_sql_stmt_core33().GetRule_create_replication_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref4().HasBlock1()) { + const auto& cluster = node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(); + if (!ClusterExpr(cluster, false, context.ServiceId, context.Cluster)) { + return false; + } + } + + auto prefixPath = Ctx.GetPrefixPath(context.ServiceId, context.Cluster); + + std::vector<std::pair<TString, TString>> targets; + if (!AsyncReplicationTarget(targets, prefixPath, node.GetRule_replication_target6(), *this)) { + return false; + } + for (auto& block : node.GetBlock7()) { + if (!AsyncReplicationTarget(targets, prefixPath, block.GetRule_replication_target2(), *this)) { + return false; + } + } + + std::map<TString, TNodePtr> settings; + if (!AsyncReplicationSettings(settings, node.GetRule_replication_settings10(), *this, true)) { + return false; + } + + const TString id = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, BuildCreateAsyncReplication(Ctx.Pos(), BuildTablePath(prefixPath, id), + std::move(targets), std::move(settings), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore34: { + // drop_replication_stmt: DROP ASYNC REPLICATION + auto& node = core.GetAlt_sql_stmt_core34().GetRule_drop_replication_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref4().HasBlock1()) { + const auto& cluster = node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(); + if (!ClusterExpr(cluster, false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString id = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, BuildDropAsyncReplication(Ctx.Pos(), + BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), id), + node.HasBlock5(), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore35: { + Ctx.BodyPart(); + // create_topic_stmt: CREATE TOPIC (IF NOT EXISTS)? topic1 (CONSUMER ...)? [WITH (opt1 = val1, ...]? + auto& rule = core.GetAlt_sql_stmt_core35().GetRule_create_topic_stmt1(); + TTopicRef tr; + if (!TopicRefImpl(rule.GetRule_topic_ref4(), tr)) { + return false; + } + bool existingOk = false; + if (rule.HasBlock3()) { // if not exists + existingOk = true; + } + + TCreateTopicParameters params; + params.ExistingOk = existingOk; + if (rule.HasBlock5()) { //create_topic_entry (consumers) + auto& entries = rule.GetBlock5().GetRule_create_topic_entries1(); + auto& firstEntry = entries.GetRule_create_topic_entry2(); + if (!CreateTopicEntry(firstEntry, params)) { + return false; + } + const auto& list = entries.GetBlock3(); + for (auto& node : list) { + if (!CreateTopicEntry(node.GetRule_create_topic_entry2(), params)) { + return false; + } + } + + } + if (rule.HasBlock6()) { // with_topic_settings + auto& topic_settings_node = rule.GetBlock6().GetRule_with_topic_settings1().GetRule_topic_settings3(); + CreateTopicSettings(topic_settings_node, params.TopicSettings); + } + + AddStatementToBlocks(blocks, BuildCreateTopic(Ctx.Pos(), tr, params, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore36: { +// alter_topic_stmt: ALTER TOPIC topic_ref alter_topic_action (COMMA alter_topic_action)*; +// alter_topic_stmt: ALTER TOPIC IF EXISTS topic_ref alter_topic_action (COMMA alter_topic_action)*; + + Ctx.BodyPart(); + auto& rule = core.GetAlt_sql_stmt_core36().GetRule_alter_topic_stmt1(); + TTopicRef tr; + bool missingOk = false; + if (rule.HasBlock3()) { // IF EXISTS + missingOk = true; + } + if (!TopicRefImpl(rule.GetRule_topic_ref4(), tr)) { + return false; + } + + TAlterTopicParameters params; + params.MissingOk = missingOk; + auto& firstEntry = rule.GetRule_alter_topic_action5(); + if (!AlterTopicAction(firstEntry, params)) { + return false; + } + const auto& list = rule.GetBlock6(); + for (auto& node : list) { + if (!AlterTopicAction(node.GetRule_alter_topic_action2(), params)) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildAlterTopic(Ctx.Pos(), tr, params, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore37: { + // drop_topic_stmt: DROP TOPIC (IF EXISTS)? topic_ref; + Ctx.BodyPart(); + const auto& rule = core.GetAlt_sql_stmt_core37().GetRule_drop_topic_stmt1(); + + TDropTopicParameters params; + if (rule.HasBlock3()) { // IF EXISTS + params.MissingOk = true; + } else { + params.MissingOk = false; + } + + TTopicRef tr; + if (!TopicRefImpl(rule.GetRule_topic_ref4(), tr)) { + return false; + } + AddStatementToBlocks(blocks, BuildDropTopic(Ctx.Pos(), tr, params, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore38: { + // GRANT permission_name_target ON an_id_schema (COMMA an_id_schema)* TO role_name (COMMA role_name)* COMMA? (WITH GRANT OPTION)?; + Ctx.BodyPart(); + auto& node = core.GetAlt_sql_stmt_core38().GetRule_grant_permissions_stmt1(); + + Ctx.Token(node.GetToken1()); + const TPosition pos = Ctx.Pos(); + + TString service = Ctx.Scoped->CurrService; + TDeferredAtom cluster = Ctx.Scoped->CurrCluster; + if (cluster.Empty()) { + Error() << "USE statement is missing - no default cluster is selected"; + return false; + } + + TVector<TDeferredAtom> permissions; + if (!PermissionNameClause(node.GetRule_permission_name_target2(), permissions, node.has_block10())) { + return false; + } + + TVector<TDeferredAtom> schemaPaths; + schemaPaths.emplace_back(Ctx.Pos(), Id(node.GetRule_an_id_schema4(), *this)); + for (const auto& item : node.GetBlock5()) { + schemaPaths.emplace_back(Ctx.Pos(), Id(item.GetRule_an_id_schema2(), *this)); + } + + TVector<TDeferredAtom> roleNames; + const bool allowSystemRoles = false; + roleNames.emplace_back(); + if (!RoleNameClause(node.GetRule_role_name7(), roleNames.back(), allowSystemRoles)) { + return false; + } + for (const auto& item : node.GetBlock8()) { + roleNames.emplace_back(); + if (!RoleNameClause(item.GetRule_role_name2(), roleNames.back(), allowSystemRoles)) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildGrantPermissions(pos, service, cluster, permissions, schemaPaths, roleNames, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore39: + { + // REVOKE (GRANT OPTION FOR)? permission_name_target ON an_id_schema (COMMA an_id_schema)* FROM role_name (COMMA role_name)*; + Ctx.BodyPart(); + auto& node = core.GetAlt_sql_stmt_core39().GetRule_revoke_permissions_stmt1(); + + Ctx.Token(node.GetToken1()); + const TPosition pos = Ctx.Pos(); + + TString service = Ctx.Scoped->CurrService; + TDeferredAtom cluster = Ctx.Scoped->CurrCluster; + if (cluster.Empty()) { + Error() << "USE statement is missing - no default cluster is selected"; + return false; + } + + TVector<TDeferredAtom> permissions; + if (!PermissionNameClause(node.GetRule_permission_name_target3(), permissions, node.HasBlock2())) { + return false; + } + + TVector<TDeferredAtom> schemaPaths; + schemaPaths.emplace_back(Ctx.Pos(), Id(node.GetRule_an_id_schema5(), *this)); + for (const auto& item : node.GetBlock6()) { + schemaPaths.emplace_back(Ctx.Pos(), Id(item.GetRule_an_id_schema2(), *this)); + } + + TVector<TDeferredAtom> roleNames; + const bool allowSystemRoles = false; + roleNames.emplace_back(); + if (!RoleNameClause(node.GetRule_role_name8(), roleNames.back(), allowSystemRoles)) { + return false; + } + for (const auto& item : node.GetBlock9()) { + roleNames.emplace_back(); + if (!RoleNameClause(item.GetRule_role_name2(), roleNames.back(), allowSystemRoles)) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildRevokePermissions(pos, service, cluster, permissions, schemaPaths, roleNames, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore40: + { + // ALTER TABLESTORE object_ref alter_table_store_action (COMMA alter_table_store_action)*; + auto& node = core.GetAlt_sql_stmt_core40().GetRule_alter_table_store_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + + if (node.GetRule_object_ref3().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_object_ref3().GetRule_id_or_at2(), *this).second; + const TString& typeId = "TABLESTORE"; + std::map<TString, TDeferredAtom> kv; + if (!ParseTableStoreFeatures(kv, node.GetRule_alter_table_store_action4())) { + return false; + } + + AddStatementToBlocks(blocks, BuildAlterObjectOperation(Ctx.Pos(), objectId, typeId, std::move(kv), std::set<TString>(), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore41: + { + // create_object_stmt: UPSERT OBJECT name (TYPE type [WITH k=v,...]); + auto& node = core.GetAlt_sql_stmt_core41().GetRule_upsert_object_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref3().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_object_ref3().GetRule_id_or_at2(), *this).second; + const TString& typeId = Id(node.GetRule_object_type_ref6().GetRule_an_id_or_type1(), *this); + std::map<TString, TDeferredAtom> kv; + if (node.HasBlock8()) { + if (!ParseObjectFeatures(kv, node.GetBlock8().GetRule_create_object_features1().GetRule_object_features2())) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildUpsertObjectOperation(Ctx.Pos(), objectId, typeId, std::move(kv), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore42: { + // create_view_stmt: CREATE VIEW name WITH (k = v, ...) AS select_stmt; + auto& node = core.GetAlt_sql_stmt_core42().GetRule_create_view_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref3().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(), + false, + context.ServiceId, + context.Cluster)) { + return false; + } + } + + std::map<TString, TDeferredAtom> features; + if (node.HasBlock4()) { + if (!ParseObjectFeatures(features, node.GetBlock4().GetRule_create_object_features1().GetRule_object_features2())) { + return false; + } + } + if (!ParseViewQuery(features, node.GetRule_select_stmt6())) { + return false; + } + + const TString objectId = Id(node.GetRule_object_ref3().GetRule_id_or_at2(), *this).second; + constexpr const char* TypeId = "VIEW"; + AddStatementToBlocks(blocks, + BuildCreateObjectOperation(Ctx.Pos(), + BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), + TypeId, + false, + false, + std::move(features), + context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore43: { + // drop_view_stmt: DROP VIEW name; + auto& node = core.GetAlt_sql_stmt_core43().GetRule_drop_view_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref3().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(), + false, + context.ServiceId, + context.Cluster)) { + return false; + } + } + + const TString objectId = Id(node.GetRule_object_ref3().GetRule_id_or_at2(), *this).second; + constexpr const char* TypeId = "VIEW"; + AddStatementToBlocks(blocks, + BuildDropObjectOperation(Ctx.Pos(), + BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), + TypeId, + false, + {}, + context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore44: { + // alter_replication_stmt: ALTER ASYNC REPLICATION + auto& node = core.GetAlt_sql_stmt_core44().GetRule_alter_replication_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref4().HasBlock1()) { + const auto& cluster = node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(); + if (!ClusterExpr(cluster, false, context.ServiceId, context.Cluster)) { + return false; + } + } + + std::map<TString, TNodePtr> settings; + if (!AsyncReplicationAlterAction(settings, node.GetRule_alter_replication_action5(), *this)) { + return false; + } + for (auto& block : node.GetBlock6()) { + if (!AsyncReplicationAlterAction(settings, block.GetRule_alter_replication_action2(), *this)) { + return false; + } + } + + const TString id = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, BuildAlterAsyncReplication(Ctx.Pos(), + BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), id), + std::move(settings), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore45: { + // create_resource_pool_stmt: CREATE RESOURCE POOL name WITH (k=v,...); + auto& node = core.GetAlt_sql_stmt_core45().GetRule_create_resource_pool_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref4().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second; + std::map<TString, TDeferredAtom> kv; + if (!ParseResourcePoolSettings(kv, node.GetRule_with_table_settings5())) { + return false; + } + + AddStatementToBlocks(blocks, BuildCreateObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL", false, false, std::move(kv), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore46: { + // alter_resource_pool_stmt: ALTER RESOURCE POOL object_ref alter_resource_pool_action (COMMA alter_external_data_source_action)* + Ctx.BodyPart(); + const auto& node = core.GetAlt_sql_stmt_core46().GetRule_alter_resource_pool_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref4().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second; + std::map<TString, TDeferredAtom> kv; + std::set<TString> toReset; + if (!ParseResourcePoolSettings(kv, toReset, node.GetRule_alter_resource_pool_action5())) { + return false; + } + + for (const auto& action : node.GetBlock6()) { + if (!ParseResourcePoolSettings(kv, toReset, action.GetRule_alter_resource_pool_action2())) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildAlterObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL", std::move(kv), std::move(toReset), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore47: { + // drop_resource_pool_stmt: DROP RESOURCE POOL name; + auto& node = core.GetAlt_sql_stmt_core47().GetRule_drop_resource_pool_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref4().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, BuildDropObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL", false, {}, context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore48: { + // create_backup_collection_stmt: CREATE BACKUP COLLECTION name WITH (k=v,...); + auto& node = core.GetAlt_sql_stmt_core48().GetRule_create_backup_collection_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_backup_collection2().GetRule_object_ref3().HasBlock1()) { + if (!ClusterExpr(node.GetRule_backup_collection2().GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(), + false, + context.ServiceId, + context.Cluster)) { + return false; + } + } + + std::map<TString, TDeferredAtom> kv; + if (!ParseBackupCollectionSettings(kv, node.GetRule_backup_collection_settings6())) { + return false; + } + + bool database = false; + TVector<TDeferredAtom> tables; + if (node.HasBlock3()) { + database = node.GetBlock3().GetRule_create_backup_collection_entries1().has_alt_create_backup_collection_entries1(); + if (node.GetBlock3().GetRule_create_backup_collection_entries1().has_alt_create_backup_collection_entries2()) { + if (!ParseBackupCollectionTables( + tables, + node + .GetBlock3() + .GetRule_create_backup_collection_entries1() + .alt_create_backup_collection_entries2() + .GetRule_create_backup_collection_entries_many1() + .GetRule_table_list2())) + { + return false; + } + } + } + + const TString& objectId = Id(node.GetRule_backup_collection2().GetRule_object_ref3().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, + BuildCreateBackupCollection(Ctx.Pos(), + BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), + TCreateBackupCollectionParameters { + .Settings = std::move(kv), + .Database = database, + .Tables = tables, + .ExistingOk = false, + }, + context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore49: { + // alter_backup_collection_stmt: ALTER BACKUP COLLECTION name alter_backup_collection_action (COMMA alter_backup_collection_action)*; + auto& node = core.GetAlt_sql_stmt_core49().GetRule_alter_backup_collection_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_backup_collection2().GetRule_object_ref3().HasBlock1()) { + if (!ClusterExpr(node.GetRule_backup_collection2().GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(), + false, + context.ServiceId, + context.Cluster)) { + return false; + } + } + + std::map<TString, TDeferredAtom> kv; + std::set<TString> toReset; + + bool addDatabase = false; + bool dropDatabase = false; + TVector<TDeferredAtom> addTables; + TVector<TDeferredAtom> removeTables; + + switch (node.GetBlock3().Alt_case()) { + case TRule_alter_backup_collection_stmt_TBlock3::kAlt1: { + if (!ParseBackupCollectionSettings(kv, toReset, node.GetBlock3().GetAlt1().GetRule_alter_backup_collection_actions1())) { + return false; + } + break; + } + case TRule_alter_backup_collection_stmt_TBlock3::kAlt2: { + if (!ParseBackupCollectionEntries( + addDatabase, + dropDatabase, + addTables, + removeTables, + node.GetBlock3().GetAlt2().GetRule_alter_backup_collection_entries1())) + { + return false; + } + break; + } + case TRule_alter_backup_collection_stmt_TBlock3::ALT_NOT_SET: {} // do nothing + } + + auto database = addDatabase ? + TAlterBackupCollectionParameters::EDatabase::Add : + dropDatabase ? + TAlterBackupCollectionParameters::EDatabase::Drop : + TAlterBackupCollectionParameters::EDatabase::Unchanged; + + const TString& objectId = Id(node.GetRule_backup_collection2().GetRule_object_ref3().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, + BuildAlterBackupCollection(Ctx.Pos(), + BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), + TAlterBackupCollectionParameters { + .Settings = std::move(kv), + .SettingsToReset = std::move(toReset), + .Database = database, + .TablesToAdd = addTables, + .TablesToDrop = removeTables, + .MissingOk = false, + }, + context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore50: { + // drop_backup_collection_stmt: DROP BACKUP COLLECTION name; + auto& node = core.GetAlt_sql_stmt_core50().GetRule_drop_backup_collection_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_backup_collection2().GetRule_object_ref3().HasBlock1()) { + if (!ClusterExpr(node.GetRule_backup_collection2().GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(), + false, + context.ServiceId, + context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_backup_collection2().GetRule_object_ref3().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, + BuildDropBackupCollection(Ctx.Pos(), + BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), + TDropBackupCollectionParameters { + .MissingOk = false, + }, + context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore51: { + // analyze_stmt: ANALYZE table_ref + Ctx.BodyPart(); + const auto& rule = core.GetAlt_sql_stmt_core51().GetRule_analyze_stmt1(); + + if (!rule.GetRule_analyze_table_list2().GetBlock2().empty()) { + Error() << "ANALYZE with multitables hasn't been implemented yet"; + return false; + } + auto analyzeTable = rule.GetRule_analyze_table_list2().GetRule_analyze_table1(); + + TVector<TString> columns; + if (analyzeTable.HasBlock2()) { + auto columnsNode = + analyzeTable.GetBlock2().GetRule_column_list2(); + + if (columnsNode.HasRule_column_name1()) { + columns.push_back(Id(columnsNode.GetRule_column_name1().GetRule_an_id2(), *this)); + for (const auto& columnNode: columnsNode.GetBlock2()) { + columns.push_back(Id(columnNode.GetRule_column_name2().GetRule_an_id2(), *this)); + } + } + } + + TTableRef tr; + if (!SimpleTableRefImpl(rule.GetRule_analyze_table_list2().GetRule_analyze_table1().GetRule_simple_table_ref1(), tr)) { + return false; + } + + auto params = TAnalyzeParams{.Table = std::make_shared<TTableRef>(tr), .Columns = std::move(columns)}; + AddStatementToBlocks(blocks, BuildAnalyze(Ctx.Pos(), tr.Service, tr.Cluster, params, Ctx.Scoped)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore52: { + // create_resource_pool_classifier_stmt: CREATE RESOURCE POOL CLASSIFIER name WITH (k=v,...); + auto& node = core.GetAlt_sql_stmt_core52().GetRule_create_resource_pool_classifier_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref5().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref5().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_object_ref5().GetRule_id_or_at2(), *this).second; + std::map<TString, TDeferredAtom> kv; + if (!ParseResourcePoolClassifierSettings(kv, node.GetRule_with_table_settings6())) { + return false; + } + + AddStatementToBlocks(blocks, BuildCreateObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL_CLASSIFIER", false, false, std::move(kv), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore53: { + // alter_resource_pool_classifier_stmt: ALTER RESOURCE POOL CLASSIFIER object_ref alter_resource_pool_classifier_action (COMMA alter_resource_pool_classifier_action)* + Ctx.BodyPart(); + const auto& node = core.GetAlt_sql_stmt_core53().GetRule_alter_resource_pool_classifier_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref5().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref5().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_object_ref5().GetRule_id_or_at2(), *this).second; + std::map<TString, TDeferredAtom> kv; + std::set<TString> toReset; + if (!ParseResourcePoolClassifierSettings(kv, toReset, node.GetRule_alter_resource_pool_classifier_action6())) { + return false; + } + + for (const auto& action : node.GetBlock7()) { + if (!ParseResourcePoolClassifierSettings(kv, toReset, action.GetRule_alter_resource_pool_classifier_action2())) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildAlterObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL_CLASSIFIER", std::move(kv), std::move(toReset), context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore54: { + // drop_resource_pool_classifier_stmt: DROP RESOURCE POOL CLASSIFIER name; + auto& node = core.GetAlt_sql_stmt_core54().GetRule_drop_resource_pool_classifier_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref5().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref5().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString& objectId = Id(node.GetRule_object_ref5().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, BuildDropObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL_CLASSIFIER", false, {}, context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore55: { + // backup_stmt: BACKUP object_ref (INCREMENTAL)?; + auto& node = core.GetAlt_sql_stmt_core55().GetRule_backup_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref2().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref2().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + bool incremental = node.HasBlock3(); + + const TString& objectId = Id(node.GetRule_object_ref2().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, + BuildBackup( + Ctx.Pos(), + BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), + TBackupParameters{ + .Incremental = incremental, + }, + context)); + break; + } + case TRule_sql_stmt_core::kAltSqlStmtCore56: { + // restore_stmt: RESTORE object_ref (AT STRING_VALUE)?; + auto& node = core.GetAlt_sql_stmt_core56().GetRule_restore_stmt1(); + TObjectOperatorContext context(Ctx.Scoped); + if (node.GetRule_object_ref2().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref2().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + TString at; + if (node.HasBlock3()) { + const TString stringValue = Ctx.Token(node.GetBlock3().GetToken2()); + const auto unescaped = StringContent(Ctx, Ctx.Pos(), stringValue); + if (!unescaped) { + return false; + } + at = unescaped->Content; + } + + const TString& objectId = Id(node.GetRule_object_ref2().GetRule_id_or_at2(), *this).second; + AddStatementToBlocks(blocks, + BuildRestore( + Ctx.Pos(), + BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), + TRestoreParameters{ + .At = at, + }, + context)); + break; + } + case TRule_sql_stmt_core::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownStatement" + internalStatementName); + AltNotImplemented("sql_stmt_core", core); + return false; + } + + Ctx.IncrementMonCounter("sql_features", internalStatementName); + return !Ctx.HasPendingErrors; +} + +bool TSqlQuery::DeclareStatement(const TRule_declare_stmt& stmt) { + TNodePtr defaultValue; + if (stmt.HasBlock5()) { + TSqlExpression sqlExpr(Ctx, Mode); + auto exprOrId = sqlExpr.LiteralExpr(stmt.GetBlock5().GetRule_literal_value2()); + if (!exprOrId) { + return false; + } + if (!exprOrId->Expr) { + Ctx.Error() << "Identifier is not expected here"; + return false; + } + defaultValue = exprOrId->Expr; + } + if (defaultValue) { + Error() << "DEFAULT value not supported yet"; + return false; + } + if (!Ctx.IsParseHeading()) { + Error() << "DECLARE statement should be in beginning of query, but it's possible to use PRAGMA or USE before it"; + return false; + } + + TString varName; + if (!NamedNodeImpl(stmt.GetRule_bind_parameter2(), varName, *this)) { + return false; + } + const auto varPos = Ctx.Pos(); + const auto typeNode = TypeNode(stmt.GetRule_type_name4()); + if (!typeNode) { + return false; + } + if (IsAnonymousName(varName)) { + Ctx.Error(varPos) << "Can not use anonymous name '" << varName << "' in DECLARE statement"; + return false; + } + + if (Ctx.IsAlreadyDeclared(varName)) { + Ctx.Warning(varPos, TIssuesIds::YQL_DUPLICATE_DECLARE) << "Duplicate declaration of '" << varName << "' will be ignored"; + } else { + PushNamedAtom(varPos, varName); + Ctx.DeclareVariable(varName, varPos, typeNode); + } + return true; +} + +bool TSqlQuery::ExportStatement(const TRule_export_stmt& stmt) { + if (Mode != NSQLTranslation::ESqlMode::LIBRARY || !TopLevel) { + Error() << "EXPORT statement should be used only in a library on the top level"; + return false; + } + + TVector<TSymbolNameWithPos> bindNames; + if (!BindList(stmt.GetRule_bind_parameter_list2(), bindNames)) { + return false; + } + + for (auto& bindName : bindNames) { + if (!Ctx.AddExport(bindName.Pos, bindName.Name)) { + return false; + } + } + return true; +} + +bool TSqlQuery::AlterTableAction(const TRule_alter_table_action& node, TAlterTableParameters& params) { + if (params.RenameTo) { + // rename action is followed by some other actions + Error() << "RENAME TO can not be used together with another table action"; + return false; + } + + switch (node.Alt_case()) { + case TRule_alter_table_action::kAltAlterTableAction1: { + // ADD COLUMN + const auto& addRule = node.GetAlt_alter_table_action1().GetRule_alter_table_add_column1(); + if (!AlterTableAddColumn(addRule, params)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction2: { + // DROP COLUMN + const auto& dropRule = node.GetAlt_alter_table_action2().GetRule_alter_table_drop_column1(); + if (!AlterTableDropColumn(dropRule, params)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction3: { + // ALTER COLUMN + const auto& alterRule = node.GetAlt_alter_table_action3().GetRule_alter_table_alter_column1(); + if (!AlterTableAlterColumn(alterRule, params)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction4: { + // ADD FAMILY + const auto& familyEntry = node.GetAlt_alter_table_action4().GetRule_alter_table_add_column_family1() + .GetRule_family_entry2(); + if (!AlterTableAddFamily(familyEntry, params)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction5: { + // ALTER FAMILY + const auto& alterRule = node.GetAlt_alter_table_action5().GetRule_alter_table_alter_column_family1(); + if (!AlterTableAlterFamily(alterRule, params)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction6: { + // SET (uncompat) + const auto& setRule = node.GetAlt_alter_table_action6().GetRule_alter_table_set_table_setting_uncompat1(); + if (!AlterTableSetTableSetting(setRule, params.TableSettings, params.TableType)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction7: { + // SET (compat) + const auto& setRule = node.GetAlt_alter_table_action7().GetRule_alter_table_set_table_setting_compat1(); + if (!AlterTableSetTableSetting(setRule, params.TableSettings, params.TableType)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction8: { + // RESET + const auto& setRule = node.GetAlt_alter_table_action8().GetRule_alter_table_reset_table_setting1(); + if (!AlterTableResetTableSetting(setRule, params.TableSettings, params.TableType)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction9: { + // ADD INDEX + const auto& addIndex = node.GetAlt_alter_table_action9().GetRule_alter_table_add_index1(); + if (!AlterTableAddIndex(addIndex, params)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction10: { + // DROP INDEX + const auto& dropIndex = node.GetAlt_alter_table_action10().GetRule_alter_table_drop_index1(); + AlterTableDropIndex(dropIndex, params); + break; + } + case TRule_alter_table_action::kAltAlterTableAction11: { + // RENAME TO + if (!params.IsEmpty()) { + // rename action follows some other actions + Error() << "RENAME TO can not be used together with another table action"; + return false; + } + + const auto& renameTo = node.GetAlt_alter_table_action11().GetRule_alter_table_rename_to1(); + AlterTableRenameTo(renameTo, params); + break; + } + case TRule_alter_table_action::kAltAlterTableAction12: { + // ADD CHANGEFEED + const auto& rule = node.GetAlt_alter_table_action12().GetRule_alter_table_add_changefeed1(); + if (!AlterTableAddChangefeed(rule, params)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction13: { + // ALTER CHANGEFEED + const auto& rule = node.GetAlt_alter_table_action13().GetRule_alter_table_alter_changefeed1(); + if (!AlterTableAlterChangefeed(rule, params)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction14: { + // DROP CHANGEFEED + const auto& rule = node.GetAlt_alter_table_action14().GetRule_alter_table_drop_changefeed1(); + AlterTableDropChangefeed(rule, params); + break; + } + case TRule_alter_table_action::kAltAlterTableAction15: { + // RENAME INDEX TO + if (!params.IsEmpty()) { + // rename action follows some other actions + Error() << "RENAME INDEX TO can not be used together with another table action"; + return false; + } + + const auto& renameTo = node.GetAlt_alter_table_action15().GetRule_alter_table_rename_index_to1(); + AlterTableRenameIndexTo(renameTo, params); + break; + } + case TRule_alter_table_action::kAltAlterTableAction16: { + // ALTER INDEX + const auto& rule = node.GetAlt_alter_table_action16().GetRule_alter_table_alter_index1(); + if (!AlterTableAlterIndex(rule, params)) { + return false; + } + break; + } + case TRule_alter_table_action::kAltAlterTableAction17: { + // ALTER COLUMN id DROP NOT NULL + const auto& alterRule = node.GetAlt_alter_table_action17().GetRule_alter_table_alter_column_drop_not_null1(); + + if (!AlterTableAlterColumnDropNotNull(alterRule, params)) { + return false; + } + + break; + } + + case TRule_alter_table_action::ALT_NOT_SET: { + AltNotImplemented("alter_table_action", node); + return false; + } + } + return true; +} + +bool TSqlQuery::AlterExternalTableAction(const TRule_alter_external_table_action& node, TAlterTableParameters& params) { + if (params.RenameTo) { + // rename action is followed by some other actions + Error() << "RENAME TO can not be used together with another table action"; + return false; + } + + switch (node.Alt_case()) { + case TRule_alter_external_table_action::kAltAlterExternalTableAction1: { + // ADD COLUMN + const auto& addRule = node.GetAlt_alter_external_table_action1().GetRule_alter_table_add_column1(); + if (!AlterTableAddColumn(addRule, params)) { + return false; + } + break; + } + case TRule_alter_external_table_action::kAltAlterExternalTableAction2: { + // DROP COLUMN + const auto& dropRule = node.GetAlt_alter_external_table_action2().GetRule_alter_table_drop_column1(); + if (!AlterTableDropColumn(dropRule, params)) { + return false; + } + break; + } + case TRule_alter_external_table_action::kAltAlterExternalTableAction3: { + // SET (uncompat) + const auto& setRule = node.GetAlt_alter_external_table_action3().GetRule_alter_table_set_table_setting_uncompat1(); + if (!AlterTableSetTableSetting(setRule, params.TableSettings, params.TableType)) { + return false; + } + break; + } + case TRule_alter_external_table_action::kAltAlterExternalTableAction4: { + // SET (compat) + const auto& setRule = node.GetAlt_alter_external_table_action4().GetRule_alter_table_set_table_setting_compat1(); + if (!AlterTableSetTableSetting(setRule, params.TableSettings, params.TableType)) { + return false; + } + break; + } + case TRule_alter_external_table_action::kAltAlterExternalTableAction5: { + // RESET + const auto& setRule = node.GetAlt_alter_external_table_action5().GetRule_alter_table_reset_table_setting1(); + if (!AlterTableResetTableSetting(setRule, params.TableSettings, params.TableType)) { + return false; + } + break; + } + + case TRule_alter_external_table_action::ALT_NOT_SET: + AltNotImplemented("alter_external_table_action", node); + return false; + } + return true; +} + +bool TSqlQuery::AlterTableAddColumn(const TRule_alter_table_add_column& node, TAlterTableParameters& params) { + auto columnSchema = ColumnSchemaImpl(node.GetRule_column_schema3()); + if (!columnSchema) { + return false; + } + if (columnSchema->Families.size() > 1) { + Ctx.Error() << "Several column families for a single column are not yet supported"; + return false; + } + params.AddColumns.push_back(*columnSchema); + return true; +} + +bool TSqlQuery::AlterTableDropColumn(const TRule_alter_table_drop_column& node, TAlterTableParameters& params) { + TString name = Id(node.GetRule_an_id3(), *this); + params.DropColumns.push_back(name); + return true; +} + +bool TSqlQuery::AlterTableAlterColumn(const TRule_alter_table_alter_column& node, + TAlterTableParameters& params) +{ + TString name = Id(node.GetRule_an_id3(), *this); + const TPosition pos(Context().Pos()); + TVector<TIdentifier> families; + const auto& familyRelation = node.GetRule_family_relation5(); + families.push_back(IdEx(familyRelation.GetRule_an_id2(), *this)); + params.AlterColumns.emplace_back(pos, name, nullptr, false, families, false, nullptr, TColumnSchema::ETypeOfChange::SetFamily); + return true; +} + +bool TSqlQuery::AlterTableAddFamily(const TRule_family_entry& node, TAlterTableParameters& params) { + TFamilyEntry family(IdEx(node.GetRule_an_id2(), *this)); + if (!FillFamilySettings(node.GetRule_family_settings3(), family)) { + return false; + } + params.AddColumnFamilies.push_back(family); + return true; +} + +bool TSqlQuery::AlterTableAlterFamily(const TRule_alter_table_alter_column_family& node, + TAlterTableParameters& params) +{ + TFamilyEntry* entry = nullptr; + TIdentifier name = IdEx(node.GetRule_an_id3(), *this); + for (auto& family : params.AlterColumnFamilies) { + if (family.Name.Name == name.Name) { + entry = &family; + break; + } + } + if (!entry) { + entry = ¶ms.AlterColumnFamilies.emplace_back(name); + } + TIdentifier settingName = IdEx(node.GetRule_an_id5(), *this); + const TRule_family_setting_value& value = node.GetRule_family_setting_value6(); + if (to_lower(settingName.Name) == "data") { + if (entry->Data) { + Ctx.Error() << "Redefinition of 'data' setting for column family '" << name.Name + << "' in one alter"; + return false; + } + const TString stringValue(Ctx.Token(value.GetAlt_family_setting_value1().GetToken1())); + entry->Data = BuildLiteralSmartString(Ctx, stringValue); + } else if (to_lower(settingName.Name) == "compression") { + if (entry->Compression) { + Ctx.Error() << "Redefinition of 'compression' setting for column family '" << name.Name + << "' in one alter"; + return false; + } + const TString stringValue(Ctx.Token(value.GetAlt_family_setting_value1().GetToken1())); + entry->Compression = BuildLiteralSmartString(Ctx, stringValue); + } else if (to_lower(settingName.Name) == "compression_level") { + if (entry->CompressionLevel) { + Ctx.Error() << "Redefinition of 'compression_level' setting for column family '" << name.Name << "' in one alter"; + return false; + } + entry->CompressionLevel = LiteralNumber(Ctx, value.GetAlt_family_setting_value2().GetRule_integer1()); + } else { + Ctx.Error() << "Unknown table setting: " << settingName.Name; + return false; + } + return true; +} + +bool TSqlQuery::AlterTableSetTableSetting( + const TRule_alter_table_set_table_setting_uncompat& node, TTableSettings& tableSettings, ETableType tableType +) { + return StoreTableSettingsEntry( + IdEx(node.GetRule_an_id2(), *this), + node.GetRule_table_setting_value3(), + tableSettings, + tableType, + true + ); +} + +bool TSqlQuery::AlterTableSetTableSetting( + const TRule_alter_table_set_table_setting_compat& node, TTableSettings& tableSettings, ETableType tableType +) { + const auto storeSetting = [&](const TRule_alter_table_setting_entry& entry) { + return StoreTableSettingsEntry( + IdEx(entry.GetRule_an_id1(), *this), + entry.GetRule_table_setting_value3(), + tableSettings, + tableType, + true + ); + }; + + const auto& firstEntry = node.GetRule_alter_table_setting_entry3(); + if (!storeSetting(firstEntry)) { + return false; + } + for (const auto& block : node.GetBlock4()) { + const auto& entry = block.GetRule_alter_table_setting_entry2(); + if (!storeSetting(entry)) { + return false; + } + } + return true; +} + +bool TSqlQuery::AlterTableResetTableSetting( + const TRule_alter_table_reset_table_setting& node, TTableSettings& tableSettings, ETableType tableType +) { + const auto resetSetting = [&](const TRule_an_id& id) { + return ResetTableSettingsEntry(IdEx(id, *this), tableSettings, tableType); + }; + + const auto& firstEntry = node.GetRule_an_id3(); + if (!resetSetting(firstEntry)) { + return false; + } + for (const auto& block : node.GetBlock4()) { + const auto& entry = block.GetRule_an_id2(); + if (!resetSetting(entry)) { + return false; + } + } + return true; +} + +bool TSqlQuery::AlterTableAddIndex(const TRule_alter_table_add_index& node, TAlterTableParameters& params) { + if (!CreateTableIndex(node.GetRule_table_index2(), params.AddIndexes)) { + return false; + } + return true; +} + +void TSqlQuery::AlterTableDropIndex(const TRule_alter_table_drop_index& node, TAlterTableParameters& params) { + params.DropIndexes.emplace_back(IdEx(node.GetRule_an_id3(), *this)); +} + +void TSqlQuery::AlterTableRenameTo(const TRule_alter_table_rename_to& node, TAlterTableParameters& params) { + params.RenameTo = IdEx(node.GetRule_an_id_table3(), *this); +} + +void TSqlQuery::AlterTableRenameIndexTo(const TRule_alter_table_rename_index_to& node, TAlterTableParameters& params) { + auto src = IdEx(node.GetRule_an_id3(), *this); + auto dst = IdEx(node.GetRule_an_id5(), *this); + + params.RenameIndexTo = std::make_pair(src, dst); +} + +bool TSqlQuery::AlterTableAlterIndex(const TRule_alter_table_alter_index& node, TAlterTableParameters& params) { + const auto indexName = IdEx(node.GetRule_an_id3(), *this); + params.AlterIndexes.emplace_back(indexName); + TTableSettings& indexTableSettings = params.AlterIndexes.back().TableSettings; + + const auto& action = node.GetRule_alter_table_alter_index_action4(); + + switch (action.Alt_case()) { + case TRule_alter_table_alter_index_action::kAltAlterTableAlterIndexAction1: { + // SET setting value + const auto& rule = action.GetAlt_alter_table_alter_index_action1().GetRule_alter_table_set_table_setting_uncompat1(); + if (!AlterTableSetTableSetting(rule, indexTableSettings, params.TableType)) { + return false; + } + break; + } + case TRule_alter_table_alter_index_action::kAltAlterTableAlterIndexAction2: { + // SET (setting1 = value1, ...) + const auto& rule = action.GetAlt_alter_table_alter_index_action2().GetRule_alter_table_set_table_setting_compat1(); + if (!AlterTableSetTableSetting(rule, indexTableSettings, params.TableType)) { + return false; + } + break; + } + case TRule_alter_table_alter_index_action::kAltAlterTableAlterIndexAction3: { + // RESET (setting1, ...) + const auto& rule = action.GetAlt_alter_table_alter_index_action3().GetRule_alter_table_reset_table_setting1(); + if (!AlterTableResetTableSetting(rule, indexTableSettings, params.TableType)) { + return false; + } + break; + } + case TRule_alter_table_alter_index_action::ALT_NOT_SET: + AltNotImplemented("alter_table_alter_index_action", action); + return false; + } + + return true; +} + +bool TSqlQuery::AlterTableAlterColumnDropNotNull(const TRule_alter_table_alter_column_drop_not_null& node, TAlterTableParameters& params) { + TString name = Id(node.GetRule_an_id3(), *this); + const TPosition pos(Context().Pos()); + params.AlterColumns.emplace_back(pos, name, nullptr, false, TVector<TIdentifier>(), false, nullptr, TColumnSchema::ETypeOfChange::DropNotNullConstraint); + return true; +} + +bool TSqlQuery::AlterTableAddChangefeed(const TRule_alter_table_add_changefeed& node, TAlterTableParameters& params) { + TSqlExpression expr(Ctx, Mode); + return CreateChangefeed(node.GetRule_changefeed2(), expr, params.AddChangefeeds); +} + +bool TSqlQuery::AlterTableAlterChangefeed(const TRule_alter_table_alter_changefeed& node, TAlterTableParameters& params) { + params.AlterChangefeeds.emplace_back(IdEx(node.GetRule_an_id3(), *this)); + + const auto& alter = node.GetRule_changefeed_alter_settings4(); + switch (alter.Alt_case()) { + case TRule_changefeed_alter_settings::kAltChangefeedAlterSettings1: { + // DISABLE + params.AlterChangefeeds.back().Disable = true; + break; + } + case TRule_changefeed_alter_settings::kAltChangefeedAlterSettings2: { + // SET + const auto& rule = alter.GetAlt_changefeed_alter_settings2().GetRule_changefeed_settings3(); + TSqlExpression expr(Ctx, Mode); + if (!ChangefeedSettings(rule, expr, params.AlterChangefeeds.back().Settings, true)) { + return false; + } + break; + } + + case TRule_changefeed_alter_settings::ALT_NOT_SET: + AltNotImplemented("changefeed_alter_settings", alter); + return false; + } + + return true; +} + +void TSqlQuery::AlterTableDropChangefeed(const TRule_alter_table_drop_changefeed& node, TAlterTableParameters& params) { + params.DropChangefeeds.emplace_back(IdEx(node.GetRule_an_id3(), *this)); +} + +TNodePtr TSqlQuery::PragmaStatement(const TRule_pragma_stmt& stmt, bool& success) { + success = false; + const TString& prefix = OptIdPrefixAsStr(stmt.GetRule_opt_id_prefix_or_type2(), *this); + const TString& lowerPrefix = to_lower(prefix); + const TString pragma(Id(stmt.GetRule_an_id3(), *this)); + TString normalizedPragma(pragma); + TMaybe<TIssue> normalizeError = NormalizeName(Ctx.Pos(), normalizedPragma); + if (!normalizeError.Empty()) { + Error() << normalizeError->GetMessage(); + Ctx.IncrementMonCounter("sql_errors", "NormalizePragmaError"); + return {}; + } + + TVector<TDeferredAtom> values; + TVector<const TRule_pragma_value*> pragmaValues; + bool pragmaValueDefault = false; + if (stmt.GetBlock4().HasAlt1()) { + pragmaValues.push_back(&stmt.GetBlock4().GetAlt1().GetRule_pragma_value2()); + } + else if (stmt.GetBlock4().HasAlt2()) { + pragmaValues.push_back(&stmt.GetBlock4().GetAlt2().GetRule_pragma_value2()); + for (auto& additionalValue : stmt.GetBlock4().GetAlt2().GetBlock3()) { + pragmaValues.push_back(&additionalValue.GetRule_pragma_value2()); + } + } + + const bool withConfigure = prefix || normalizedPragma == "file" || normalizedPragma == "folder" || normalizedPragma == "udf"; + static const THashSet<TStringBuf> lexicalScopePragmas = { + "classicdivision", + "strictjoinkeytypes", + "disablestrictjoinkeytypes", + "checkedops", + "unicodeliterals", + "disableunicodeliterals", + "warnuntypedstringliterals", + "disablewarnuntypedstringliterals", + }; + const bool hasLexicalScope = withConfigure || lexicalScopePragmas.contains(normalizedPragma); + const bool withFileAlias = normalizedPragma == "file" || normalizedPragma == "folder" || normalizedPragma == "library" || normalizedPragma == "udf"; + for (auto pragmaValue : pragmaValues) { + if (pragmaValue->HasAlt_pragma_value3()) { + auto value = Token(pragmaValue->GetAlt_pragma_value3().GetToken1()); + auto parsed = StringContentOrIdContent(Ctx, Ctx.Pos(), value); + if (!parsed) { + return {}; + } + + TString prefix; + if (withFileAlias && (values.size() == 0)) { + prefix = Ctx.Settings.FileAliasPrefix; + } + + values.push_back(TDeferredAtom(Ctx.Pos(), prefix + parsed->Content)); + } + else if (pragmaValue->HasAlt_pragma_value2() + && pragmaValue->GetAlt_pragma_value2().GetRule_id1().HasAlt_id2() + && "default" == to_lower(Id(pragmaValue->GetAlt_pragma_value2().GetRule_id1(), *this))) + { + pragmaValueDefault = true; + } + else if (withConfigure && pragmaValue->HasAlt_pragma_value5()) { + TString bindName; + if (!NamedNodeImpl(pragmaValue->GetAlt_pragma_value5().GetRule_bind_parameter1(), bindName, *this)) { + return {}; + } + auto namedNode = GetNamedNode(bindName); + if (!namedNode) { + return {}; + } + + TString prefix; + if (withFileAlias && (values.size() == 0)) { + prefix = Ctx.Settings.FileAliasPrefix; + } + + TDeferredAtom atom; + MakeTableFromExpression(Ctx.Pos(), Ctx, namedNode, atom, prefix); + values.push_back(atom); + } else { + Error() << "Expected string" << (withConfigure ? ", named parameter" : "") << " or 'default' keyword as pragma value for pragma: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + } + + if (prefix.empty()) { + if (!TopLevel && !hasLexicalScope) { + Error() << "This pragma '" << pragma << "' is not allowed to be used in actions or subqueries"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return{}; + } + + if (normalizedPragma == "refselect") { + Ctx.PragmaRefSelect = true; + Ctx.IncrementMonCounter("sql_pragma", "RefSelect"); + } else if (normalizedPragma == "sampleselect") { + Ctx.PragmaSampleSelect = true; + Ctx.IncrementMonCounter("sql_pragma", "SampleSelect"); + } else if (normalizedPragma == "allowdotinalias") { + Ctx.PragmaAllowDotInAlias = true; + Ctx.IncrementMonCounter("sql_pragma", "AllowDotInAlias"); + } else if (normalizedPragma == "udf") { + if ((values.size() != 1 && values.size() != 2) || pragmaValueDefault) { + Error() << "Expected file alias as pragma value"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + if (Ctx.Settings.FileAliasPrefix) { + if (values.size() == 1) { + values.emplace_back(TDeferredAtom(Ctx.Pos(), "")); + } + + TString prefix; + if (!values[1].GetLiteral(prefix, Ctx)) { + Error() << "Expected literal UDF module prefix in views"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + values[1] = TDeferredAtom(Ctx.Pos(), Ctx.Settings.FileAliasPrefix + prefix); + } + + Ctx.IncrementMonCounter("sql_pragma", "udf"); + success = true; + return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "ImportUdfs", values, false); + } else if (normalizedPragma == "packageversion") { + if (values.size() != 2 || pragmaValueDefault) { + Error() << "Expected package name and version"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + ui32 version = 0; + TString versionString; + TString packageName; + if (!values[0].GetLiteral(packageName, Ctx) || !values[1].GetLiteral(versionString, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + if (!PackageVersionFromString(versionString, version)) { + Error() << "Unable to parse package version, possible values 0, 1, draft, release"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + Ctx.SetPackageVersion(packageName, version); + Ctx.IncrementMonCounter("sql_pragma", "PackageVersion"); + success = true; + return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "SetPackageVersion", TVector<TDeferredAtom>{ values[0], TDeferredAtom(values[1].Build()->GetPos(), ToString(version)) }, false); + } else if (normalizedPragma == "file") { + if (values.size() < 2U || values.size() > 3U || pragmaValueDefault) { + Error() << "Expected file alias, url and optional token name as pragma values"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + Ctx.IncrementMonCounter("sql_pragma", "file"); + success = true; + return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "AddFileByUrl", values, false); + } else if (normalizedPragma == "fileoption") { + if (values.size() < 3U) { + Error() << "Expected file alias, option key and value"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + Ctx.IncrementMonCounter("sql_pragma", "FileOption"); + success = true; + return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "SetFileOption", values, false); + } else if (normalizedPragma == "folder") { + if (values.size() < 2U || values.size() > 3U || pragmaValueDefault) { + Error() << "Expected folder alias, url and optional token name as pragma values"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.IncrementMonCounter("sql_pragma", "folder"); + success = true; + return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "AddFolderByUrl", values, false); + } else if (normalizedPragma == "library") { + if (values.size() < 1) { + Error() << "Expected non-empty file alias"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return{}; + } + if (values.size() > 3) { + Error() << "Expected file alias and optional url and token name as pragma values"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return{}; + } + + TString alias; + if (!values.front().GetLiteral(alias, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return{}; + } + + TContext::TLibraryStuff library; + std::get<TPosition>(library) = values.front().Build()->GetPos(); + if (values.size() > 1) { + auto& first = std::get<1U>(library); + first.emplace(); + first->second = values[1].Build()->GetPos(); + if (!values[1].GetLiteral(first->first, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return{}; + } + + TSet<TString> names; + SubstParameters(first->first, Nothing(), &names); + for (const auto& name : names) { + auto namedNode = GetNamedNode(name); + if (!namedNode) { + return{}; + } + } + if (values.size() > 2) { + auto& second = std::get<2U>(library); + second.emplace(); + second->second = values[2].Build()->GetPos(); + if (!values[2].GetLiteral(second->first, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return{}; + } + } + } + + Ctx.Libraries[alias] = std::move(library); + Ctx.IncrementMonCounter("sql_pragma", "library"); + } else if (normalizedPragma == "package") { + if (values.size() < 2U || values.size() > 3U) { + Error() << "Expected package name, url and optional token name as pragma values"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + TString packageName; + if (!values.front().GetLiteral(packageName, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + TContext::TPackageStuff package; + std::get<TPosition>(package) = values.front().Build()->GetPos(); + + auto fillLiteral = [&](auto& literal, size_t index) { + if (values.size() <= index) { + return true; + } + + constexpr bool optional = std::is_base_of_v< + std::optional<TContext::TLiteralWithPosition>, + std::decay_t<decltype(literal)> + >; + + TContext::TLiteralWithPosition* literalPtr; + + if constexpr (optional) { + literal.emplace(); + literalPtr = &*literal; + } else { + literalPtr = &literal; + } + + literalPtr->second = values[index].Build()->GetPos(); + + if (!values[index].GetLiteral(literalPtr->first, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return false; + } + + return true; + }; + + // fill url + auto& urlLiteral = std::get<1U>(package); + if (!fillLiteral(urlLiteral, 1U)) { + return {}; + } + + TSet<TString> names; + SubstParameters(urlLiteral.first, Nothing(), &names); + for (const auto& name : names) { + auto namedNode = GetNamedNode(name); + if (!namedNode) { + return {}; + } + } + + // fill token + if (!fillLiteral(std::get<2U>(package), 2U)) { + return {}; + } + + Ctx.Packages[packageName] = std::move(package); + Ctx.IncrementMonCounter("sql_pragma", "package"); + } else if (normalizedPragma == "overridelibrary") { + if (values.size() != 1U) { + Error() << "Expected override library alias as pragma value"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + TString alias; + if (!values.front().GetLiteral(alias, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + TContext::TOverrideLibraryStuff overrideLibrary; + std::get<TPosition>(overrideLibrary) = values.front().Build()->GetPos(); + + Ctx.OverrideLibraries[alias] = std::move(overrideLibrary); + Ctx.IncrementMonCounter("sql_pragma", "overridelibrary"); + } else if (normalizedPragma == "directread") { + Ctx.PragmaDirectRead = true; + Ctx.IncrementMonCounter("sql_pragma", "DirectRead"); + } else if (normalizedPragma == "equijoin") { + Ctx.IncrementMonCounter("sql_pragma", "EquiJoin"); + } else if (normalizedPragma == "autocommit") { + Ctx.PragmaAutoCommit = true; + Ctx.IncrementMonCounter("sql_pragma", "AutoCommit"); + } else if (normalizedPragma == "usetableprefixforeach") { + Ctx.PragmaUseTablePrefixForEach = true; + Ctx.IncrementMonCounter("sql_pragma", "UseTablePrefixForEach"); + } else if (normalizedPragma == "tablepathprefix") { + TString value; + TMaybe<TString> arg; + + if (values.size() == 1 || values.size() == 2) { + if (!values.front().GetLiteral(value, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + if (values.size() == 2) { + arg = value; + if (!values.back().GetLiteral(value, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + } + + if (!Ctx.SetPathPrefix(value, arg)) { + return {}; + } + } else { + Error() << "Expected path prefix or tuple of (Provider, PathPrefix) or" + << " (Cluster, PathPrefix) as pragma value"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + Ctx.IncrementMonCounter("sql_pragma", "PathPrefix"); + } else if (normalizedPragma == "groupbylimit") { + if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.PragmaGroupByLimit)) { + Error() << "Expected unsigned integer literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.IncrementMonCounter("sql_pragma", "GroupByLimit"); + } else if (normalizedPragma == "groupbycubelimit") { + if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.PragmaGroupByCubeLimit)) { + Error() << "Expected unsigned integer literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.IncrementMonCounter("sql_pragma", "GroupByCubeLimit"); + } else if (normalizedPragma == "simplecolumns") { + Ctx.SimpleColumns = true; + Ctx.IncrementMonCounter("sql_pragma", "SimpleColumns"); + } else if (normalizedPragma == "disablesimplecolumns") { + Ctx.SimpleColumns = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableSimpleColumns"); + } else if (normalizedPragma == "coalescejoinkeysonqualifiedall") { + Ctx.CoalesceJoinKeysOnQualifiedAll = true; + Ctx.IncrementMonCounter("sql_pragma", "CoalesceJoinKeysOnQualifiedAll"); + } else if (normalizedPragma == "disablecoalescejoinkeysonqualifiedall") { + Ctx.CoalesceJoinKeysOnQualifiedAll = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableCoalesceJoinKeysOnQualifiedAll"); + } else if (normalizedPragma == "resultrowslimit") { + if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.ResultRowsLimit)) { + Error() << "Expected unsigned integer literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + Ctx.IncrementMonCounter("sql_pragma", "ResultRowsLimit"); + } else if (normalizedPragma == "resultsizelimit") { + if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.ResultSizeLimit)) { + Error() << "Expected unsigned integer literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + Ctx.IncrementMonCounter("sql_pragma", "ResultSizeLimit"); + } else if (normalizedPragma == "warning") { + if (values.size() != 2U || values.front().Empty() || values.back().Empty()) { + Error() << "Expected arguments <action>, <issueId> for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + TString action; + TString codePattern; + if (!values[0].GetLiteral(action, Ctx) || !values[1].GetLiteral(codePattern, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + TWarningRule rule; + TString parseError; + auto parseResult = TWarningRule::ParseFrom(codePattern, action, rule, parseError); + switch (parseResult) { + case TWarningRule::EParseResult::PARSE_OK: + break; + case TWarningRule::EParseResult::PARSE_PATTERN_FAIL: + case TWarningRule::EParseResult::PARSE_ACTION_FAIL: + Ctx.Error() << parseError; + return {}; + default: + Y_ENSURE(false, "Unknown parse result"); + } + + Ctx.WarningPolicy.AddRule(rule); + if (rule.GetPattern() == "*" && rule.GetAction() == EWarningAction::ERROR) { + // Keep 'unused symbol' warning as warning unless explicitly set to error + Ctx.SetWarningPolicyFor(TIssuesIds::YQL_UNUSED_SYMBOL, EWarningAction::DEFAULT); + } + + Ctx.IncrementMonCounter("sql_pragma", "warning"); + } else if (normalizedPragma == "greetings") { + if (values.size() > 1) { + Error() << "Multiple arguments are not expected for " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + if (values.empty()) { + values.emplace_back(TDeferredAtom(Ctx.Pos(), "Hello, world! And best wishes from the YQL Team!")); + } + + TString arg; + if (!values.front().GetLiteral(arg, Ctx)) { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.Info(Ctx.Pos()) << arg; + } else if (normalizedPragma == "warningmsg") { + if (values.size() != 1 || !values[0].GetLiteral()) { + Error() << "Expected string literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_PRAGMA_WARNING_MSG) << *values[0].GetLiteral(); + } else if (normalizedPragma == "errormsg") { + if (values.size() != 1 || !values[0].GetLiteral()) { + Error() << "Expected string literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.Error(Ctx.Pos()) << *values[0].GetLiteral(); + } else if (normalizedPragma == "classicdivision") { + if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.Scoped->PragmaClassicDivision)) { + Error() << "Expected boolean literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.IncrementMonCounter("sql_pragma", "ClassicDivision"); + } else if (normalizedPragma == "checkedops") { + if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.Scoped->PragmaCheckedOps)) { + Error() << "Expected boolean literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.IncrementMonCounter("sql_pragma", "CheckedOps"); + } else if (normalizedPragma == "disableunordered") { + Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_DEPRECATED_PRAGMA) + << "Use of deprecated DisableUnordered pragma. It will be dropped soon"; + } else if (normalizedPragma == "pullupflatmapoverjoin") { + Ctx.PragmaPullUpFlatMapOverJoin = true; + Ctx.IncrementMonCounter("sql_pragma", "PullUpFlatMapOverJoin"); + } else if (normalizedPragma == "disablepullupflatmapoverjoin") { + Ctx.PragmaPullUpFlatMapOverJoin = false; + Ctx.IncrementMonCounter("sql_pragma", "DisablePullUpFlatMapOverJoin"); + } else if (normalizedPragma == "filterpushdownoverjoinoptionalside") { + Ctx.FilterPushdownOverJoinOptionalSide = true; + Ctx.IncrementMonCounter("sql_pragma", "FilterPushdownOverJoinOptionalSide"); + } else if (normalizedPragma == "disablefilterpushdownoverjoinoptionalside") { + Ctx.FilterPushdownOverJoinOptionalSide = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableFilterPushdownOverJoinOptionalSide"); + } else if (normalizedPragma == "rotatejointree") { + if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.RotateJoinTree)) { + Error() << "Expected boolean literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + } else if (normalizedPragma == "allowunnamedcolumns") { + Ctx.WarnUnnamedColumns = false; + Ctx.IncrementMonCounter("sql_pragma", "AllowUnnamedColumns"); + } else if (normalizedPragma == "warnunnamedcolumns") { + Ctx.WarnUnnamedColumns = true; + Ctx.IncrementMonCounter("sql_pragma", "WarnUnnamedColumns"); + } else if (normalizedPragma == "discoverymode") { + Ctx.DiscoveryMode = true; + Ctx.IncrementMonCounter("sql_pragma", "DiscoveryMode"); + } else if (normalizedPragma == "enablesystemcolumns") { + if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.EnableSystemColumns)) { + Error() << "Expected boolean literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.IncrementMonCounter("sql_pragma", "EnableSystemColumns"); + } else if (normalizedPragma == "ansiinforemptyornullableitemscollections") { + Ctx.AnsiInForEmptyOrNullableItemsCollections = true; + Ctx.IncrementMonCounter("sql_pragma", "AnsiInForEmptyOrNullableItemsCollections"); + } else if (normalizedPragma == "disableansiinforemptyornullableitemscollections") { + Ctx.AnsiInForEmptyOrNullableItemsCollections = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiInForEmptyOrNullableItemsCollections"); + } else if (normalizedPragma == "dqengine" || normalizedPragma == "blockengine") { + Ctx.IncrementMonCounter("sql_pragma", "DqEngine"); + if (values.size() != 1 || !values[0].GetLiteral() + || ! (*values[0].GetLiteral() == "disable" || *values[0].GetLiteral() == "auto" || *values[0].GetLiteral() == "force")) + { + Error() << "Expected `disable|auto|force' argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + const bool isDqEngine = normalizedPragma == "dqengine"; + auto& enable = isDqEngine ? Ctx.DqEngineEnable : Ctx.BlockEngineEnable; + auto& force = isDqEngine ? Ctx.DqEngineForce : Ctx.BlockEngineForce; + if (*values[0].GetLiteral() == "disable") { + enable = false; + force = false; + } else if (*values[0].GetLiteral() == "force") { + enable = true; + force = true; + } else if (*values[0].GetLiteral() == "auto") { + enable = true; + force = false; + } + } else if (normalizedPragma == "ansirankfornullablekeys") { + Ctx.AnsiRankForNullableKeys = true; + Ctx.IncrementMonCounter("sql_pragma", "AnsiRankForNullableKeys"); + } else if (normalizedPragma == "disableansirankfornullablekeys") { + Ctx.AnsiRankForNullableKeys = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiRankForNullableKeys"); + } else if (normalizedPragma == "ansiorderbylimitinunionall") { + Ctx.IncrementMonCounter("sql_pragma", "AnsiOrderByLimitInUnionAll"); + } else if (normalizedPragma == "disableansiorderbylimitinunionall") { + Error() << "DisableAnsiOrderByLimitInUnionAll pragma is deprecated and no longer supported"; + Ctx.IncrementMonCounter("sql_errors", "DeprecatedPragma"); + return {}; + } else if (normalizedPragma == "ansioptionalas") { + Ctx.AnsiOptionalAs = true; + Ctx.IncrementMonCounter("sql_pragma", "AnsiOptionalAs"); + } else if (normalizedPragma == "disableansioptionalas") { + Ctx.AnsiOptionalAs = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiOptionalAs"); + } else if (normalizedPragma == "warnonansialiasshadowing") { + Ctx.WarnOnAnsiAliasShadowing = true; + Ctx.IncrementMonCounter("sql_pragma", "WarnOnAnsiAliasShadowing"); + } else if (normalizedPragma == "disablewarnonansialiasshadowing") { + Ctx.WarnOnAnsiAliasShadowing = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableWarnOnAnsiAliasShadowing"); + } else if (normalizedPragma == "regexusere2") { + if (values.size() != 1U || !values.front().GetLiteral() || !TryFromString(*values.front().GetLiteral(), Ctx.PragmaRegexUseRe2)) { + Error() << "Expected 'true' or 'false' for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.IncrementMonCounter("sql_pragma", "RegexUseRe2"); + } else if (normalizedPragma == "jsonqueryreturnsjsondocument") { + Ctx.JsonQueryReturnsJsonDocument = true; + Ctx.IncrementMonCounter("sql_pragma", "JsonQueryReturnsJsonDocument"); + } else if (normalizedPragma == "disablejsonqueryreturnsjsondocument") { + Ctx.JsonQueryReturnsJsonDocument = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableJsonQueryReturnsJsonDocument"); + } else if (normalizedPragma == "orderedcolumns") { + Ctx.OrderedColumns = true; + Ctx.IncrementMonCounter("sql_pragma", "OrderedColumns"); + } else if (normalizedPragma == "disableorderedcolumns") { + Ctx.OrderedColumns = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableOrderedColumns"); + } else if (normalizedPragma == "positionalunionall") { + Ctx.PositionalUnionAll = true; + // PositionalUnionAll implies OrderedColumns + Ctx.OrderedColumns = true; + Ctx.IncrementMonCounter("sql_pragma", "PositionalUnionAll"); + } else if (normalizedPragma == "pqreadby") { + if (values.size() != 1 || !values[0].GetLiteral()) { + Error() << "Expected string literal as a single argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + // special guard to raise error on situation: + // use cluster1; + // pragma PqReadPqBy="cluster2"; + const TString* currentClusterLiteral = Ctx.Scoped->CurrCluster.GetLiteral(); + if (currentClusterLiteral && *values[0].GetLiteral() != "dq" && *currentClusterLiteral != *values[0].GetLiteral()) { + Error() << "Cluster in PqReadPqBy pragma differs from cluster specified in USE statement: " << *values[0].GetLiteral() << " != " << *currentClusterLiteral; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + Ctx.PqReadByRtmrCluster = *values[0].GetLiteral(); + Ctx.IncrementMonCounter("sql_pragma", "PqReadBy"); + } else if (normalizedPragma == "bogousstaringroupbyoverjoin") { + Ctx.BogousStarInGroupByOverJoin = true; + Ctx.IncrementMonCounter("sql_pragma", "BogousStarInGroupByOverJoin"); + } else if (normalizedPragma == "strictjoinkeytypes") { + Ctx.Scoped->StrictJoinKeyTypes = true; + Ctx.IncrementMonCounter("sql_pragma", "StrictJoinKeyTypes"); + } else if (normalizedPragma == "disablestrictjoinkeytypes") { + Ctx.Scoped->StrictJoinKeyTypes = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableStrictJoinKeyTypes"); + } else if (normalizedPragma == "unicodeliterals") { + Ctx.Scoped->UnicodeLiterals = true; + Ctx.IncrementMonCounter("sql_pragma", "UnicodeLiterals"); + } else if (normalizedPragma == "disableunicodeliterals") { + Ctx.Scoped->UnicodeLiterals = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableUnicodeLiterals"); + } else if (normalizedPragma == "warnuntypedstringliterals") { + Ctx.Scoped->WarnUntypedStringLiterals = true; + Ctx.IncrementMonCounter("sql_pragma", "WarnUntypedStringLiterals"); + } else if (normalizedPragma == "disablewarnuntypedstringliterals") { + Ctx.Scoped->WarnUntypedStringLiterals = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableWarnUntypedStringLiterals"); + } else if (normalizedPragma == "unorderedsubqueries") { + Ctx.UnorderedSubqueries = true; + Ctx.IncrementMonCounter("sql_pragma", "UnorderedSubqueries"); + } else if (normalizedPragma == "disableunorderedsubqueries") { + Ctx.UnorderedSubqueries = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableUnorderedSubqueries"); + } else if (normalizedPragma == "datawatermarks") { + if (values.size() != 1 || !values[0].GetLiteral() + || ! (*values[0].GetLiteral() == "enable" || *values[0].GetLiteral() == "disable")) + { + Error() << "Expected `enable|disable' argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + if (*values[0].GetLiteral() == "enable") { + Ctx.PragmaDataWatermarks = true; + } else if (*values[0].GetLiteral() == "disable") { + Ctx.PragmaDataWatermarks = false; + } + + Ctx.IncrementMonCounter("sql_pragma", "DataWatermarks"); + } else if (normalizedPragma == "flexibletypes") { + Ctx.FlexibleTypes = true; + Ctx.IncrementMonCounter("sql_pragma", "FlexibleTypes"); + } else if (normalizedPragma == "disableflexibletypes") { + Ctx.FlexibleTypes = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableFlexibleTypes"); + } else if (normalizedPragma == "ansicurrentrow") { + Ctx.AnsiCurrentRow = true; + Ctx.IncrementMonCounter("sql_pragma", "AnsiCurrentRow"); + } else if (normalizedPragma == "disableansicurrentrow") { + Ctx.AnsiCurrentRow = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiCurrentRow"); + } else if (normalizedPragma == "emitaggapply") { + Ctx.EmitAggApply = true; + Ctx.IncrementMonCounter("sql_pragma", "EmitAggApply"); + } else if (normalizedPragma == "disableemitaggapply") { + Ctx.EmitAggApply = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableEmitAggApply"); + } else if (normalizedPragma == "useblocks") { + Ctx.UseBlocks = true; + Ctx.IncrementMonCounter("sql_pragma", "UseBlocks"); + } else if (normalizedPragma == "disableuseblocks") { + Ctx.UseBlocks = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableUseBlocks"); + } else if (normalizedPragma == "ansilike") { + Ctx.AnsiLike = true; + Ctx.IncrementMonCounter("sql_pragma", "AnsiLike"); + } else if (normalizedPragma == "disableansilike") { + Ctx.AnsiLike = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiLike"); + } else if (normalizedPragma == "unorderedresult") { + Ctx.UnorderedResult = true; + Ctx.IncrementMonCounter("sql_pragma", "UnorderedResult"); + } else if (normalizedPragma == "disableunorderedresult") { + Ctx.UnorderedResult = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableUnorderedResult"); + } else if (normalizedPragma == "featurer010") { + if (values.size() == 1 && values[0].GetLiteral()) { + const auto& value = *values[0].GetLiteral(); + if ("prototype" == value) + Ctx.FeatureR010 = true; + else { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + } + else { + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + Ctx.IncrementMonCounter("sql_pragma", "FeatureR010"); + } else if (normalizedPragma == "compactgroupby") { + Ctx.CompactGroupBy = true; + Ctx.IncrementMonCounter("sql_pragma", "CompactGroupBy"); + } else if (normalizedPragma == "disablecompactgroupby") { + Ctx.CompactGroupBy = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableCompactGroupBy"); + } else if (normalizedPragma == "costbasedoptimizer") { + Ctx.IncrementMonCounter("sql_pragma", "CostBasedOptimizer"); + if (values.size() == 1 && values[0].GetLiteral()) { + Ctx.CostBasedOptimizer = to_lower(*values[0].GetLiteral()); + } + if (values.size() != 1 || !values[0].GetLiteral() + || ! (Ctx.CostBasedOptimizer == "disable" || Ctx.CostBasedOptimizer == "pg" || Ctx.CostBasedOptimizer == "native")) + { + Error() << "Expected `disable|pg|native' argument for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + } else if (normalizedPragma == "compactnamedexprs") { + Ctx.CompactNamedExprs = true; + Ctx.IncrementMonCounter("sql_pragma", "CompactNamedExprs"); + } else if (normalizedPragma == "disablecompactnamedexprs") { + Ctx.CompactNamedExprs = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableCompactNamedExprs"); + } else if (normalizedPragma == "validateunusedexprs") { + Ctx.ValidateUnusedExprs = true; + Ctx.IncrementMonCounter("sql_pragma", "ValidateUnusedExprs"); + } else if (normalizedPragma == "disablevalidateunusedexprs") { + Ctx.ValidateUnusedExprs = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableValidateUnusedExprs"); + } else if (normalizedPragma == "ansiimplicitcrossjoin") { + Ctx.AnsiImplicitCrossJoin = true; + Ctx.IncrementMonCounter("sql_pragma", "AnsiImplicitCrossJoin"); + } else if (normalizedPragma == "disableansiimplicitcrossjoin") { + Ctx.AnsiImplicitCrossJoin = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiImplicitCrossJoin"); + } else if (normalizedPragma == "distinctoverwindow") { + Ctx.DistinctOverWindow = true; + Ctx.IncrementMonCounter("sql_pragma", "DistinctOverWindow"); + } else if (normalizedPragma == "disabledistinctoverwindow") { + Ctx.DistinctOverWindow = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableDistinctOverWindow"); + } else { + Error() << "Unknown pragma: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "UnknownPragma"); + return {}; + } + } else { + if (lowerPrefix == "yson") { + if (!TopLevel) { + Error() << "This pragma '" << pragma << "' is not allowed to be used in actions"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + if (normalizedPragma == "fast") { + Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_DEPRECATED_PRAGMA) + << "Use of deprecated yson.Fast pragma. It will be dropped soon"; + success = true; + return {}; + } else if (normalizedPragma == "autoconvert") { + Ctx.PragmaYsonAutoConvert = true; + success = true; + return {}; + } else if (normalizedPragma == "strict") { + if (values.size() == 0U) { + Ctx.PragmaYsonStrict = true; + success = true; + } else if (values.size() == 1U && values.front().GetLiteral() && TryFromString(*values.front().GetLiteral(), Ctx.PragmaYsonStrict)) { + success = true; + } else { + Error() << "Expected 'true', 'false' or no parameter for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + } + return {}; + } else if (normalizedPragma == "disablestrict") { + if (values.size() == 0U) { + Ctx.PragmaYsonStrict = false; + success = true; + return {}; + } + bool pragmaYsonDisableStrict; + if (values.size() == 1U && values.front().GetLiteral() && TryFromString(*values.front().GetLiteral(), pragmaYsonDisableStrict)) { + Ctx.PragmaYsonStrict = !pragmaYsonDisableStrict; + success = true; + } else { + Error() << "Expected 'true', 'false' or no parameter for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + } + return {}; + } else if (normalizedPragma == "casttostring" || normalizedPragma == "disablecasttostring") { + const bool allow = normalizedPragma == "casttostring"; + if (values.size() == 0U) { + Ctx.YsonCastToString = allow; + success = true; + return {}; + } + bool pragmaYsonCastToString; + if (values.size() == 1U && values.front().GetLiteral() && TryFromString(*values.front().GetLiteral(), pragmaYsonCastToString)) { + Ctx.PragmaYsonStrict = allow ? pragmaYsonCastToString : !pragmaYsonCastToString; + success = true; + } else { + Error() << "Expected 'true', 'false' or no parameter for: " << pragma; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + } + return {}; + } else { + Error() << "Unknown pragma: '" << pragma << "'"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + + } else if (std::find(Providers.cbegin(), Providers.cend(), lowerPrefix) == Providers.cend()) { + if (!Ctx.HasCluster(prefix)) { + Error() << "Unknown pragma prefix: " << prefix << ", please use cluster name or one of provider " << + JoinRange(", ", Providers.cbegin(), Providers.cend()); + Ctx.IncrementMonCounter("sql_errors", "UnknownPragma"); + return {}; + } + } + + if (normalizedPragma != "flags" && normalizedPragma != "packageversion") { + if (values.size() > 1) { + Error() << "Expected at most one value in the pragma"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + } else { + if (pragmaValueDefault || values.size() < 1) { + Error() << "Expected at least one value in the pragma"; + Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue"); + return {}; + } + } + + success = true; + Ctx.IncrementMonCounter("sql_pragma", pragma); + return BuildPragma(Ctx.Pos(), lowerPrefix, normalizedPragma, values, pragmaValueDefault); + } + success = true; + return {}; +} + +TNodePtr TSqlQuery::Build(const TRule_delete_stmt& stmt) { + TTableRef table; + if (!SimpleTableRefImpl(stmt.GetRule_simple_table_ref3(), table)) { + return nullptr; + } + + const bool isKikimr = table.Service == KikimrProviderName; + if (!isKikimr) { + Ctx.Error(GetPos(stmt.GetToken1())) << "DELETE is unsupported for " << table.Service; + return nullptr; + } + + TSourcePtr source = BuildTableSource(Ctx.Pos(), table); + + TNodePtr options = nullptr; + if (stmt.HasBlock5()) { + options = ReturningList(stmt.GetBlock5().GetRule_returning_columns_list1()); + options = options->Y(options); + } + + if (stmt.HasBlock4()) { + switch (stmt.GetBlock4().Alt_case()) { + case TRule_delete_stmt_TBlock4::kAlt1: { + const auto& alt = stmt.GetBlock4().GetAlt1(); + + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TSqlExpression sqlExpr(Ctx, Mode); + auto whereExpr = sqlExpr.Build(alt.GetRule_expr2()); + if (!whereExpr) { + return nullptr; + } + source->AddFilter(Ctx, whereExpr); + break; + } + + case TRule_delete_stmt_TBlock4::kAlt2: { + const auto& alt = stmt.GetBlock4().GetAlt2(); + + auto values = TSqlIntoValues(Ctx, Mode).Build(alt.GetRule_into_values_source2(), "DELETE ON"); + if (!values) { + return nullptr; + } + + return BuildWriteColumns(Ctx.Pos(), Ctx.Scoped, table, EWriteColumnMode::DeleteOn, std::move(values), options); + } + + case TRule_delete_stmt_TBlock4::ALT_NOT_SET: + return nullptr; + } + } + + return BuildDelete(Ctx.Pos(), Ctx.Scoped, table, std::move(source), options); +} + +TNodePtr TSqlQuery::Build(const TRule_update_stmt& stmt) { + TTableRef table; + if (!SimpleTableRefImpl(stmt.GetRule_simple_table_ref2(), table)) { + return nullptr; + } + + const bool isKikimr = table.Service == KikimrProviderName; + + if (!isKikimr) { + Ctx.Error(GetPos(stmt.GetToken1())) << "UPDATE is unsupported for " << table.Service; + return nullptr; + } + + TNodePtr options = nullptr; + if (stmt.HasBlock4()) { + options = ReturningList(stmt.GetBlock4().GetRule_returning_columns_list1()); + options = options->Y(options); + } + + switch (stmt.GetBlock3().Alt_case()) { + case TRule_update_stmt_TBlock3::kAlt1: { + const auto& alt = stmt.GetBlock3().GetAlt1(); + TSourcePtr values = Build(alt.GetRule_set_clause_choice2()); + auto source = BuildTableSource(Ctx.Pos(), table); + + if (alt.HasBlock3()) { + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TSqlExpression sqlExpr(Ctx, Mode); + auto whereExpr = sqlExpr.Build(alt.GetBlock3().GetRule_expr2()); + if (!whereExpr) { + return nullptr; + } + source->AddFilter(Ctx, whereExpr); + } + + return BuildUpdateColumns(Ctx.Pos(), Ctx.Scoped, table, std::move(values), std::move(source), options); + } + + case TRule_update_stmt_TBlock3::kAlt2: { + const auto& alt = stmt.GetBlock3().GetAlt2(); + + auto values = TSqlIntoValues(Ctx, Mode).Build(alt.GetRule_into_values_source2(), "UPDATE ON"); + if (!values) { + return nullptr; + } + + return BuildWriteColumns(Ctx.Pos(), Ctx.Scoped, table, EWriteColumnMode::UpdateOn, std::move(values), options); + } + + case TRule_update_stmt_TBlock3::ALT_NOT_SET: + return nullptr; + } +} + +TSourcePtr TSqlQuery::Build(const TRule_set_clause_choice& stmt) { + switch (stmt.Alt_case()) { + case TRule_set_clause_choice::kAltSetClauseChoice1: + return Build(stmt.GetAlt_set_clause_choice1().GetRule_set_clause_list1()); + case TRule_set_clause_choice::kAltSetClauseChoice2: + return Build(stmt.GetAlt_set_clause_choice2().GetRule_multiple_column_assignment1()); + case TRule_set_clause_choice::ALT_NOT_SET: + AltNotImplemented("set_clause_choice", stmt); + return nullptr; + } +} + +bool TSqlQuery::FillSetClause(const TRule_set_clause& node, TVector<TString>& targetList, TVector<TNodePtr>& values) { + targetList.push_back(ColumnNameAsSingleStr(*this, node.GetRule_set_target1().GetRule_column_name1())); + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TSqlExpression sqlExpr(Ctx, Mode); + if (!Expr(sqlExpr, values, node.GetRule_expr3())) { + return false; + } + return true; +} + +TSourcePtr TSqlQuery::Build(const TRule_set_clause_list& stmt) { + TVector<TString> targetList; + TVector<TNodePtr> values; + const TPosition pos(Ctx.Pos()); + if (!FillSetClause(stmt.GetRule_set_clause1(), targetList, values)) { + return nullptr; + } + for (auto& block: stmt.GetBlock2()) { + if (!FillSetClause(block.GetRule_set_clause2(), targetList, values)) { + return nullptr; + } + } + Y_DEBUG_ABORT_UNLESS(targetList.size() == values.size()); + return BuildUpdateValues(pos, targetList, values); +} + +TSourcePtr TSqlQuery::Build(const TRule_multiple_column_assignment& stmt) { + TVector<TString> targetList; + FillTargetList(*this, stmt.GetRule_set_target_list1(), targetList); + auto simpleValuesNode = stmt.GetRule_simple_values_source4(); + const TPosition pos(Ctx.Pos()); + switch (simpleValuesNode.Alt_case()) { + case TRule_simple_values_source::kAltSimpleValuesSource1: { + TVector<TNodePtr> values; + TSqlExpression sqlExpr(Ctx, Mode); + if (!ExprList(sqlExpr, values, simpleValuesNode.GetAlt_simple_values_source1().GetRule_expr_list1())) { + return nullptr; + } + return BuildUpdateValues(pos, targetList, values); + } + case TRule_simple_values_source::kAltSimpleValuesSource2: { + TSqlSelect select(Ctx, Mode); + TPosition selectPos; + auto source = select.Build(simpleValuesNode.GetAlt_simple_values_source2().GetRule_select_stmt1(), selectPos); + if (!source) { + return nullptr; + } + return BuildWriteValues(pos, "UPDATE", targetList, std::move(source)); + } + case TRule_simple_values_source::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownSimpleValuesSourceAlt"); + AltNotImplemented("simple_values_source", simpleValuesNode); + return nullptr; + } +} + +TNodePtr TSqlQuery::Build(const TSQLv1ParserAST& ast) { + if (Mode == NSQLTranslation::ESqlMode::QUERY) { + // inject externally declared named expressions + for (auto [name, type] : Ctx.Settings.DeclaredNamedExprs) { + if (name.empty()) { + Error() << "Empty names for externally declared expressions are not allowed"; + return nullptr; + } + TString varName = "$" + name; + if (IsAnonymousName(varName)) { + Error() << "Externally declared name '" << name << "' is anonymous"; + return nullptr; + } + + auto parsed = ParseType(type, *Ctx.Pool, Ctx.Issues, Ctx.Pos()); + if (!parsed) { + Error() << "Failed to parse type for externally declared name '" << name << "'"; + return nullptr; + } + + TNodePtr typeNode = BuildBuiltinFunc(Ctx, Ctx.Pos(), "ParseType", { BuildLiteralRawString(Ctx.Pos(), type) }); + PushNamedAtom(Ctx.Pos(), varName); + // no duplicates are possible at this stage + bool isWeak = true; + Ctx.DeclareVariable(varName, {}, typeNode, isWeak); + // avoid 'Symbol is not used' warning for externally declared expression + YQL_ENSURE(GetNamedNode(varName)); + } + } + + const auto& query = ast.GetRule_sql_query(); + TVector<TNodePtr> blocks; + Ctx.PushCurrentBlocks(&blocks); + Y_DEFER { + Ctx.PopCurrentBlocks(); + }; + if (query.Alt_case() == TRule_sql_query::kAltSqlQuery1) { + const auto& statements = query.GetAlt_sql_query1().GetRule_sql_stmt_list1(); + if (!Statement(blocks, statements.GetRule_sql_stmt2().GetRule_sql_stmt_core2())) { + return nullptr; + } + for (auto block: statements.GetBlock3()) { + if (!Statement(blocks, block.GetRule_sql_stmt2().GetRule_sql_stmt_core2())) { + return nullptr; + } + } + } + + ui32 topLevelSelects = 0; + bool hasTailOps = false; + for (auto& block : blocks) { + if (block->SubqueryAlias()) { + continue; + } + + if (block->HasSelectResult()) { + ++topLevelSelects; + } else if (topLevelSelects) { + hasTailOps = true; + } + } + + if ((Mode == NSQLTranslation::ESqlMode::SUBQUERY || Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) && (topLevelSelects != 1 || hasTailOps)) { + Error() << "Strictly one select/process/reduce statement is expected at the end of " + << (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW ? "view" : "subquery"); + return nullptr; + } + + if (!Ctx.PragmaAutoCommit && Ctx.Settings.EndOfQueryCommit && IsQueryMode(Mode)) { + AddStatementToBlocks(blocks, BuildCommitClusters(Ctx.Pos())); + } + + auto result = BuildQuery(Ctx.Pos(), blocks, true, Ctx.Scoped); + WarnUnusedNodes(); + return result; +} + +TNodePtr TSqlQuery::Build(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core>& statements) { + if (Mode == NSQLTranslation::ESqlMode::QUERY) { + // inject externally declared named expressions + for (auto [name, type] : Ctx.Settings.DeclaredNamedExprs) { + if (name.empty()) { + Error() << "Empty names for externally declared expressions are not allowed"; + return nullptr; + } + TString varName = "$" + name; + if (IsAnonymousName(varName)) { + Error() << "Externally declared name '" << name << "' is anonymous"; + return nullptr; + } + + auto parsed = ParseType(type, *Ctx.Pool, Ctx.Issues, Ctx.Pos()); + if (!parsed) { + Error() << "Failed to parse type for externally declared name '" << name << "'"; + return nullptr; + } + + TNodePtr typeNode = BuildBuiltinFunc(Ctx, Ctx.Pos(), "ParseType", { BuildLiteralRawString(Ctx.Pos(), type) }); + PushNamedAtom(Ctx.Pos(), varName); + // no duplicates are possible at this stage + bool isWeak = true; + Ctx.DeclareVariable(varName, {}, typeNode, isWeak); + // avoid 'Symbol is not used' warning for externally declared expression + YQL_ENSURE(GetNamedNode(varName)); + } + } + + TVector<TNodePtr> blocks; + Ctx.PushCurrentBlocks(&blocks); + Y_DEFER { + Ctx.PopCurrentBlocks(); + }; + for (const auto& statement : statements) { + if (!Statement(blocks, statement)) { + return nullptr; + } + } + + ui32 topLevelSelects = 0; + bool hasTailOps = false; + for (auto& block : blocks) { + if (block->SubqueryAlias()) { + continue; + } + + if (block->HasSelectResult()) { + ++topLevelSelects; + } else if (topLevelSelects) { + hasTailOps = true; + } + } + + if ((Mode == NSQLTranslation::ESqlMode::SUBQUERY || Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) && (topLevelSelects != 1 || hasTailOps)) { + Error() << "Strictly one select/process/reduce statement is expected at the end of " + << (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW ? "view" : "subquery"); + return nullptr; + } + + if (!Ctx.PragmaAutoCommit && Ctx.Settings.EndOfQueryCommit && IsQueryMode(Mode)) { + AddStatementToBlocks(blocks, BuildCommitClusters(Ctx.Pos())); + } + + auto result = BuildQuery(Ctx.Pos(), blocks, true, Ctx.Scoped); + return result; +} +namespace { + + static bool BuildColumnFeatures(std::map<TString, TDeferredAtom>& result, const TRule_column_schema& columnSchema, const NYql::TPosition& pos, TSqlTranslation& translation) { + const TString columnName(Id(columnSchema.GetRule_an_id_schema1(), translation)); + TString columnType; + + const auto constraints = ColumnConstraints(columnSchema, translation); + if (!constraints) { + return false; + } + + auto& typeBind = columnSchema.GetRule_type_name_or_bind2(); + switch (typeBind.Alt_case()) { + case TRule_type_name_or_bind::kAltTypeNameOrBind1: + { + auto& typeNameOrBind = typeBind.GetAlt_type_name_or_bind1().GetRule_type_name1(); + if (typeNameOrBind.Alt_case() != TRule_type_name::kAltTypeName2) { + return false; + } + auto& alt = typeNameOrBind.GetAlt_type_name2(); + auto& block = alt.GetBlock1(); + auto& simpleType = block.GetAlt2().GetRule_type_name_simple1(); + columnType = Id(simpleType.GetRule_an_id_pure1(), translation); + if (columnType.empty()) { + return false; + } + break; + } + case TRule_type_name_or_bind::kAltTypeNameOrBind2: + return false; + case TRule_type_name_or_bind::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + result["NAME"] = TDeferredAtom(pos, columnName); + YQL_ENSURE(columnType, "Unknown column type"); + result["TYPE"] = TDeferredAtom(pos, columnType); + if (!constraints->Nullable) { + result["NOT_NULL"] = TDeferredAtom(pos, "true"); + } + return true; + } +} + +bool TSqlQuery::ParseTableStoreFeatures(std::map<TString, TDeferredAtom> & result, const TRule_alter_table_store_action & actions) { + switch (actions.Alt_case()) { + case TRule_alter_table_store_action::kAltAlterTableStoreAction1: { + // ADD COLUMN + const auto& addRule = actions.GetAlt_alter_table_store_action1().GetRule_alter_table_add_column1(); + if (!BuildColumnFeatures(result, addRule.GetRule_column_schema3(), Ctx.Pos(), *this)) { + return false; + } + result["ACTION"] = TDeferredAtom(Ctx.Pos(), "NEW_COLUMN"); + break; + } + case TRule_alter_table_store_action::kAltAlterTableStoreAction2: { + // DROP COLUMN + const auto& dropRule = actions.GetAlt_alter_table_store_action2().GetRule_alter_table_drop_column1(); + TString columnName = Id(dropRule.GetRule_an_id3(), *this); + if (!columnName) { + return false; + } + result["NAME"] = TDeferredAtom(Ctx.Pos(), columnName); + result["ACTION"] = TDeferredAtom(Ctx.Pos(), "DROP_COLUMN"); + break; + } + case TRule_alter_table_store_action::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + return true; +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_query.h b/yql/essentials/sql/v1/sql_query.h new file mode 100644 index 00000000000..99e1a9c4efd --- /dev/null +++ b/yql/essentials/sql/v1/sql_query.h @@ -0,0 +1,86 @@ +#pragma once + +#include "sql_translation.h" + +#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h> +#include <util/string/split.h> + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +class TSqlQuery: public TSqlTranslation { +public: + TSqlQuery(TContext& ctx, NSQLTranslation::ESqlMode mode, bool topLevel) + : TSqlTranslation(ctx, mode) + , TopLevel(topLevel) + { + } + + TNodePtr Build(const TSQLv1ParserAST& ast); + TNodePtr Build(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core>& ast); + + bool Statement(TVector<TNodePtr>& blocks, const TRule_sql_stmt_core& core); +private: + bool DeclareStatement(const TRule_declare_stmt& stmt); + bool ExportStatement(const TRule_export_stmt& stmt); + bool AlterTableAction(const TRule_alter_table_action& node, TAlterTableParameters& params); + bool AlterExternalTableAction(const TRule_alter_external_table_action& node, TAlterTableParameters& params); + bool AlterTableAddColumn(const TRule_alter_table_add_column& node, TAlterTableParameters& params); + bool AlterTableDropColumn(const TRule_alter_table_drop_column& node, TAlterTableParameters& params); + bool AlterTableAlterColumn(const TRule_alter_table_alter_column& node, TAlterTableParameters& params); + bool AlterTableAddFamily(const TRule_family_entry& node, TAlterTableParameters& params); + bool AlterTableAlterFamily(const TRule_alter_table_alter_column_family& node, TAlterTableParameters& params); + bool AlterTableSetTableSetting(const TRule_alter_table_set_table_setting_uncompat& node, TTableSettings& tableSettings, ETableType tableType); + bool AlterTableSetTableSetting(const TRule_alter_table_set_table_setting_compat& node, TTableSettings& tableSettings, ETableType tableType); + bool AlterTableResetTableSetting(const TRule_alter_table_reset_table_setting& node, TTableSettings& tableSettings, ETableType tableType); + bool AlterTableAddIndex(const TRule_alter_table_add_index& node, TAlterTableParameters& params); + void AlterTableDropIndex(const TRule_alter_table_drop_index& node, TAlterTableParameters& params); + void AlterTableRenameTo(const TRule_alter_table_rename_to& node, TAlterTableParameters& params); + bool AlterTableAddChangefeed(const TRule_alter_table_add_changefeed& node, TAlterTableParameters& params); + bool AlterTableAlterChangefeed(const TRule_alter_table_alter_changefeed& node, TAlterTableParameters& params); + void AlterTableDropChangefeed(const TRule_alter_table_drop_changefeed& node, TAlterTableParameters& params); + void AlterTableRenameIndexTo(const TRule_alter_table_rename_index_to& node, TAlterTableParameters& params); + bool AlterTableAlterIndex(const TRule_alter_table_alter_index& node, TAlterTableParameters& params); + TNodePtr PragmaStatement(const TRule_pragma_stmt& stmt, bool& success); + void AddStatementToBlocks(TVector<TNodePtr>& blocks, TNodePtr node); + bool ParseTableStoreFeatures(std::map<TString, TDeferredAtom> & result, const TRule_alter_table_store_action & actions); + bool AlterTableAlterColumnDropNotNull(const TRule_alter_table_alter_column_drop_not_null& node, TAlterTableParameters& params); + + TNodePtr Build(const TRule_delete_stmt& stmt); + + TNodePtr Build(const TRule_update_stmt& stmt); + TSourcePtr Build(const TRule_set_clause_choice& stmt); + bool FillSetClause(const TRule_set_clause& node, TVector<TString>& targetList, TVector<TNodePtr>& values); + TSourcePtr Build(const TRule_set_clause_list& stmt); + TSourcePtr Build(const TRule_multiple_column_assignment& stmt); + + template<class TNode> + void ParseStatementName(const TNode& node, TString& internalStatementName, TString& humanStatementName) { + internalStatementName.clear(); + humanStatementName.clear(); + const auto& descr = AltDescription(node); + TVector<TString> parts; + if (!Ctx.Settings.Antlr4Parser) { + const auto pos = descr.find(": "); + Y_DEBUG_ABORT_UNLESS(pos != TString::npos); + Split(TString(descr.begin() + pos + 2, descr.end()), "_", parts); + } else { + Split(descr, "_", parts); + } + Y_DEBUG_ABORT_UNLESS(parts.size() > 1); + parts.pop_back(); + for (auto& part: parts) { + part.to_upper(0, 1); + internalStatementName += part; + if (!humanStatementName.empty()) { + humanStatementName += ' '; + } + humanStatementName += to_upper(part); + } + } + + const bool TopLevel; +}; + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_select.cpp b/yql/essentials/sql/v1/sql_select.cpp new file mode 100644 index 00000000000..4a06f8e51b1 --- /dev/null +++ b/yql/essentials/sql/v1/sql_select.cpp @@ -0,0 +1,1470 @@ +#include "sql_select.h" +#include "sql_call_expr.h" +#include "sql_expression.h" +#include "sql_group_by.h" +#include "sql_values.h" +#include "sql_match_recognize.h" + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +namespace { + +bool IsColumnsOnly(const TVector<TSortSpecificationPtr>& container) { + for (const auto& elem: container) { + if (!elem->OrderExpr->GetColumnName()) { + return false; + } + } + return true; +} + +bool CollectJoinLinkSettings(TPosition pos, TJoinLinkSettings& linkSettings, TContext& ctx) { + linkSettings = {}; + auto hints = ctx.PullHintForToken(pos); + for (const auto& hint: hints) { + const auto canonizedName = to_lower(hint.Name); + auto newStrategy = TJoinLinkSettings::EStrategy::Default; + if (canonizedName == "merge") { + newStrategy = TJoinLinkSettings::EStrategy::SortedMerge; + } else if (canonizedName == "streamlookup") { + newStrategy = TJoinLinkSettings::EStrategy::StreamLookup; + } else if (canonizedName == "map") { + newStrategy = TJoinLinkSettings::EStrategy::ForceMap; + } else if (canonizedName == "grace") { + newStrategy = TJoinLinkSettings::EStrategy::ForceGrace; + } else if (canonizedName == "compact") { + linkSettings.Compact = true; + continue; + } else { + ctx.Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Unsupported join hint: " << hint.Name; + } + + if (TJoinLinkSettings::EStrategy::Default == linkSettings.Strategy) { + linkSettings.Strategy = newStrategy; + } else if (newStrategy == linkSettings.Strategy) { + ctx.Error() << "Duplicate join strategy hint"; + return false; + } else { + ctx.Error() << "Conflicting join strategy hints"; + return false; + } + } + return true; +} + +} // namespace + +bool TSqlSelect::JoinOp(ISource* join, const TRule_join_source::TBlock3& block, TMaybe<TPosition> anyPos) { + // block: (join_op (ANY)? flatten_source join_constraint?) + // join_op: + // COMMA + // | (NATURAL)? ((LEFT (ONLY | SEMI)? | RIGHT (ONLY | SEMI)? | EXCLUSION | FULL)? (OUTER)? | INNER | CROSS) JOIN + //; + const auto& node = block.GetRule_join_op1(); + TString joinOp("Inner"); + TJoinLinkSettings linkSettings; + switch (node.Alt_case()) { + case TRule_join_op::kAltJoinOp1: { + joinOp = "Cross"; + if (!Ctx.AnsiImplicitCrossJoin) { + Error() << "Cartesian product of tables is disabled. Please use " + "explicit CROSS JOIN or enable it via PRAGMA AnsiImplicitCrossJoin"; + return false; + } + auto alt = node.GetAlt_join_op1(); + if (!CollectJoinLinkSettings(Ctx.TokenPosition(alt.GetToken1()), linkSettings, Ctx)) { + return false; + } + Ctx.IncrementMonCounter("sql_join_operations", "CartesianProduct"); + break; + } + case TRule_join_op::kAltJoinOp2: { + auto alt = node.GetAlt_join_op2(); + if (alt.HasBlock1()) { + Ctx.IncrementMonCounter("sql_join_operations", "Natural"); + Error() << "Natural join is not implemented yet"; + return false; + } + if (!CollectJoinLinkSettings(Ctx.TokenPosition(alt.GetToken3()), linkSettings, Ctx)) { + return false; + } + switch (alt.GetBlock2().Alt_case()) { + case TRule_join_op::TAlt2::TBlock2::kAlt1: + if (alt.GetBlock2().GetAlt1().HasBlock1()) { + auto block = alt.GetBlock2().GetAlt1().GetBlock1(); + switch (block.Alt_case()) { + case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt1: + // left + joinOp = Token(block.GetAlt1().GetToken1()); + if (block.GetAlt1().HasBlock2()) { + joinOp += " " + Token(block.GetAlt1().GetBlock2().GetToken1()); + } + break; + case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt2: + // right + joinOp = Token(block.GetAlt2().GetToken1()); + if (block.GetAlt2().HasBlock2()) { + joinOp += " " + Token(block.GetAlt2().GetBlock2().GetToken1()); + } + + break; + case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt3: + // exclusion + joinOp = Token(block.GetAlt3().GetToken1()); + break; + case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt4: + // full + joinOp = Token(block.GetAlt4().GetToken1()); + break; + case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation"); + AltNotImplemented("join_op", node); + return false; + } + } + if (alt.GetBlock2().GetAlt1().HasBlock2()) { + TString normalizedOp = alt.GetBlock2().GetAlt1().HasBlock1() ? joinOp : ""; + normalizedOp.to_upper(); + if (!(normalizedOp == "LEFT" || normalizedOp == "RIGHT" || normalizedOp == "FULL")) { + Token(alt.GetBlock2().GetAlt1().GetBlock2().GetToken1()); + Error() << "Invalid join type: " << normalizedOp << (normalizedOp.empty() ? "" : " ") << "OUTER JOIN. " + << "OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL"; + Ctx.IncrementMonCounter("sql_errors", "BadJoinType"); + return false; + } + } + break; + case TRule_join_op::TAlt2::TBlock2::kAlt2: + joinOp = Token(alt.GetBlock2().GetAlt2().GetToken1()); + break; + case TRule_join_op::TAlt2::TBlock2::kAlt3: + joinOp = Token(alt.GetBlock2().GetAlt3().GetToken1()); + break; + case TRule_join_op::TAlt2::TBlock2::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation"); + AltNotImplemented("join_op", node); + return false; + } + Ctx.IncrementMonCounter("sql_features", "Join"); + Ctx.IncrementMonCounter("sql_join_operations", joinOp); + break; + } + case TRule_join_op::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation2"); + AltNotImplemented("join_op", node); + return false; + } + joinOp = NormalizeJoinOp(joinOp); + if (linkSettings.Strategy != TJoinLinkSettings::EStrategy::Default && joinOp == "Cross") { + Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_UNUSED_HINT) << "Non-default join strategy will not be used for CROSS JOIN"; + linkSettings.Strategy = TJoinLinkSettings::EStrategy::Default; + } + + TNodePtr joinKeyExpr; + if (block.HasBlock4()) { + if (joinOp == "Cross") { + Error() << "Cross join should not have ON or USING expression"; + Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr"); + return false; + } + + joinKeyExpr = JoinExpr(join, block.GetBlock4().GetRule_join_constraint1()); + if (!joinKeyExpr) { + Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr"); + return false; + } + } + else { + if (joinOp != "Cross") { + Error() << "Expected ON or USING expression"; + Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr"); + return false; + } + } + + if (joinOp == "Cross" && anyPos) { + Ctx.Error(*anyPos) << "ANY should not be used with Cross JOIN"; + Ctx.IncrementMonCounter("sql_errors", "BadJoinAny"); + return false; + } + + Y_DEBUG_ABORT_UNLESS(join->GetJoin()); + join->GetJoin()->SetupJoin(joinOp, joinKeyExpr, linkSettings); + + return true; +} + +TNodePtr TSqlSelect::JoinExpr(ISource* join, const TRule_join_constraint& node) { + switch (node.Alt_case()) { + case TRule_join_constraint::kAltJoinConstraint1: { + auto& alt = node.GetAlt_join_constraint1(); + Token(alt.GetToken1()); + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TSqlExpression expr(Ctx, Mode); + return expr.Build(alt.GetRule_expr2()); + } + case TRule_join_constraint::kAltJoinConstraint2: { + auto& alt = node.GetAlt_join_constraint2(); + Token(alt.GetToken1()); + TPosition pos(Ctx.Pos()); + TVector<TDeferredAtom> names; + if (!PureColumnOrNamedListStr(alt.GetRule_pure_column_or_named_list2(), *this, names)) { + return nullptr; + } + + Y_DEBUG_ABORT_UNLESS(join->GetJoin()); + return join->GetJoin()->BuildJoinKeys(Ctx, names); + } + case TRule_join_constraint::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownJoinConstraint"); + AltNotImplemented("join_constraint", node); + break; + } + return nullptr; +} + +bool TSqlSelect::FlattenByArg(const TString& sourceLabel, TVector<TNodePtr>& flattenByColumns, TVector<TNodePtr>& flattenByExprs, + const TRule_flatten_by_arg& node) +{ + // flatten_by_arg: + // named_column + // | LPAREN named_expr_list COMMA? RPAREN + // ; + + flattenByColumns.clear(); + flattenByExprs.clear(); + + TVector<TNodePtr> namedExprs; + switch (node.Alt_case()) { + case TRule_flatten_by_arg::kAltFlattenByArg1: { + TVector<TNodePtr> columns; + if (!NamedColumn(columns, node.GetAlt_flatten_by_arg1().GetRule_named_column1())) { + return false; + } + YQL_ENSURE(columns.size() == 1); + auto& column = columns.back(); + auto columnNamePtr = column->GetColumnName(); + YQL_ENSURE(columnNamePtr && *columnNamePtr); + + auto sourcePtr = column->GetSourceName(); + const bool isEmptySource = !sourcePtr || !*sourcePtr; + if (isEmptySource || *sourcePtr == sourceLabel) { + // select * from T flatten by x + // select * from T as s flatten by x + // select * from T as s flatten by s.x + flattenByColumns.emplace_back(std::move(column)); + } else { + // select * from T as s flatten by x.y as z + if (!column->GetLabel()) { + Ctx.Error(column->GetPos()) << "Unnamed expression after FLATTEN BY is not allowed"; + return false; + } + flattenByColumns.emplace_back(BuildColumn(column->GetPos(), column->GetLabel())); + + TVector<INode::TIdPart> ids; + ids.push_back(BuildColumn(column->GetPos())); + ids.push_back(*sourcePtr); + ids.push_back(*columnNamePtr); + auto node = BuildAccess(column->GetPos(), ids, false); + node->SetLabel(column->GetLabel()); + flattenByExprs.emplace_back(std::move(node)); + } + + break; + } + case TRule_flatten_by_arg::kAltFlattenByArg2: { + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + if (!NamedExprList(node.GetAlt_flatten_by_arg2().GetRule_named_expr_list2(), namedExprs) || Ctx.HasPendingErrors) { + return false; + } + for (auto& namedExprNode : namedExprs) { + YQL_ENSURE(!namedExprNode->ContentListPtr()); + + auto sourcePtr = namedExprNode->GetSourceName(); + const bool isEmptySource = !sourcePtr || !*sourcePtr; + auto columnNamePtr = namedExprNode->GetColumnName(); + if (columnNamePtr && (isEmptySource || *sourcePtr == sourceLabel)) { + namedExprNode->AssumeColumn(); + flattenByColumns.emplace_back(std::move(namedExprNode)); + } else { + auto nodeLabel = namedExprNode->GetLabel(); + if (!nodeLabel) { + Ctx.Error(namedExprNode->GetPos()) << "Unnamed expression after FLATTEN BY is not allowed"; + return false; + } + flattenByColumns.emplace_back(BuildColumn(namedExprNode->GetPos(), nodeLabel)); + flattenByExprs.emplace_back(std::move(namedExprNode)); + } + } + break; + } + case TRule_flatten_by_arg::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownFlattenByArg"); + AltNotImplemented("flatten_by_arg", node); + return false; + } + return true; +} + +TSourcePtr TSqlSelect::FlattenSource(const TRule_flatten_source& node) { + auto source = NamedSingleSource(node.GetRule_named_single_source1(), true); + if (!source) { + return nullptr; + } + if (node.HasBlock2()) { + auto flatten = node.GetBlock2(); + auto flatten2 = flatten.GetBlock2(); + switch (flatten2.Alt_case()) { + case TRule_flatten_source::TBlock2::TBlock2::kAlt1: { + TString mode = "auto"; + if (flatten2.GetAlt1().HasBlock1()) { + mode = to_lower(Token(flatten2.GetAlt1().GetBlock1().GetToken1())); + } + + TVector<TNodePtr> flattenByColumns; + TVector<TNodePtr> flattenByExprs; + if (!FlattenByArg(source->GetLabel(), flattenByColumns, flattenByExprs, flatten2.GetAlt1().GetRule_flatten_by_arg3())) { + return nullptr; + } + + Ctx.IncrementMonCounter("sql_features", "FlattenByColumns"); + if (!source->AddExpressions(Ctx, flattenByColumns, EExprSeat::FlattenBy)) { + return nullptr; + } + + if (!source->AddExpressions(Ctx, flattenByExprs, EExprSeat::FlattenByExpr)) { + return nullptr; + } + + source->SetFlattenByMode(mode); + break; + } + case TRule_flatten_source::TBlock2::TBlock2::kAlt2: { + Ctx.IncrementMonCounter("sql_features", "FlattenColumns"); + source->MarkFlattenColumns(); + break; + } + + case TRule_flatten_source::TBlock2::TBlock2::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownOrdinaryNamedColumn"); + AltNotImplemented("flatten_source", flatten2); + } + } + return source; +} + +TSourcePtr TSqlSelect::JoinSource(const TRule_join_source& node) { + // join_source: (ANY)? flatten_source (join_op (ANY)? flatten_source join_constraint?)*; + if (node.HasBlock1() && !node.Block3Size()) { + Error() << "ANY is not allowed without JOIN"; + return nullptr; + } + + TSourcePtr source(FlattenSource(node.GetRule_flatten_source2())); + if (!source) { + return nullptr; + } + + if (node.Block3Size()) { + TPosition pos(Ctx.Pos()); + TVector<TSourcePtr> sources; + TVector<TMaybe<TPosition>> anyPositions; + TVector<bool> anyFlags; + + sources.emplace_back(std::move(source)); + anyPositions.emplace_back(node.HasBlock1() ? Ctx.TokenPosition(node.GetBlock1().GetToken1()) : TMaybe<TPosition>()); + anyFlags.push_back(bool(anyPositions.back())); + + for (auto& block: node.GetBlock3()) { + sources.emplace_back(FlattenSource(block.GetRule_flatten_source3())); + if (!sources.back()) { + Ctx.IncrementMonCounter("sql_errors", "NoJoinWith"); + return nullptr; + } + + anyPositions.emplace_back(block.HasBlock2() ? Ctx.TokenPosition(block.GetBlock2().GetToken1()) : TMaybe<TPosition>()); + anyFlags.push_back(bool(anyPositions.back())); + } + + source = BuildEquiJoin(pos, std::move(sources), std::move(anyFlags), Ctx.Scoped->StrictJoinKeyTypes); + size_t idx = 1; + for (auto& block: node.GetBlock3()) { + YQL_ENSURE(idx < anyPositions.size()); + TMaybe<TPosition> leftAny = (idx == 1) ? anyPositions[0] : Nothing(); + TMaybe<TPosition> rightAny = anyPositions[idx]; + + if (!JoinOp(source.Get(), block, leftAny ? leftAny : rightAny)) { + Ctx.IncrementMonCounter("sql_errors", "NoJoinOp"); + return nullptr; + } + ++idx; + } + } + + return source; +} + +bool TSqlSelect::SelectTerm(TVector<TNodePtr>& terms, const TRule_result_column& node) { + // result_column: + // opt_id_prefix ASTERISK + // | expr ((AS an_id) | an_id_pure)? + // ; + switch (node.Alt_case()) { + case TRule_result_column::kAltResultColumn1: { + auto alt = node.GetAlt_result_column1(); + + Token(alt.GetToken2()); + auto idAsteriskQualify = OptIdPrefixAsStr(alt.GetRule_opt_id_prefix1(), *this); + Ctx.IncrementMonCounter("sql_features", idAsteriskQualify ? "QualifyAsterisk" : "Asterisk"); + terms.push_back(BuildColumn(Ctx.Pos(), "*", idAsteriskQualify)); + break; + } + case TRule_result_column::kAltResultColumn2: { + auto alt = node.GetAlt_result_column2(); + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TSqlExpression expr(Ctx, Mode); + TNodePtr term(expr.Build(alt.GetRule_expr1())); + if (!term) { + Ctx.IncrementMonCounter("sql_errors", "NoTerm"); + return false; + } + if (alt.HasBlock2()) { + TString label; + bool implicitLabel = false; + switch (alt.GetBlock2().Alt_case()) { + case TRule_result_column_TAlt2_TBlock2::kAlt1: + label = Id(alt.GetBlock2().GetAlt1().GetRule_an_id_or_type2(), *this); + break; + case TRule_result_column_TAlt2_TBlock2::kAlt2: + label = Id(alt.GetBlock2().GetAlt2().GetRule_an_id_as_compat1(), *this); + if (!Ctx.AnsiOptionalAs) { + // AS is mandatory + Ctx.Error() << "Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility"; + return false; + } + implicitLabel = true; + break; + case TRule_result_column_TAlt2_TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + term->SetLabel(label, Ctx.Pos()); + term->MarkImplicitLabel(implicitLabel); + } + terms.push_back(term); + break; + } + case TRule_result_column::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownResultColumn"); + AltNotImplemented("result_column", node); + return false; + } + return true; +} + +bool TSqlSelect::ValidateSelectColumns(const TVector<TNodePtr>& terms) { + TSet<TString> labels; + TSet<TString> asteriskSources; + for (const auto& term: terms) { + const auto& label = term->GetLabel(); + if (!Ctx.PragmaAllowDotInAlias && label.find('.') != TString::npos) { + Ctx.Error(term->GetPos()) << "Unable to use '.' in column name. Invalid column name: " << label; + return false; + } + if (!label.empty()) { + if (!labels.insert(label).second) { + Ctx.Error(term->GetPos()) << "Unable to use duplicate column names. Collision in name: " << label; + return false; + } + } + if (term->IsAsterisk()) { + const auto& source = *term->GetSourceName(); + if (source.empty() && terms.ysize() > 1) { + Ctx.Error(term->GetPos()) << "Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias)."; + return false; + } else if (!asteriskSources.insert(source).second) { + Ctx.Error(term->GetPos()) << "Unable to use twice same quialified asterisk. Invalid source: " << source; + return false; + } + } else if (label.empty()) { + const auto* column = term->GetColumnName(); + if (column && !column->empty()) { + const auto& source = *term->GetSourceName(); + const auto usedName = source.empty() ? *column : source + '.' + *column; + if (!labels.insert(usedName).second) { + Ctx.Error(term->GetPos()) << "Unable to use duplicate column names. Collision in name: " << usedName; + return false; + } + } + } + } + return true; +} + +TSourcePtr TSqlSelect::SingleSource(const TRule_single_source& node, const TVector<TString>& derivedColumns, TPosition derivedColumnsPos, bool unorderedSubquery) { + switch (node.Alt_case()) { + case TRule_single_source::kAltSingleSource1: { + const auto& alt = node.GetAlt_single_source1(); + const auto& table_ref = alt.GetRule_table_ref1(); + + if (auto maybeSource = AsTableImpl(table_ref)) { + auto source = *maybeSource; + if (!source) { + return nullptr; + } + + return source; + } else { + TTableRef table; + if (!TableRefImpl(alt.GetRule_table_ref1(), table, unorderedSubquery)) { + return nullptr; + } + + if (table.Source) { + return table.Source; + } + + TPosition pos(Ctx.Pos()); + Ctx.IncrementMonCounter("sql_select_clusters", table.Cluster.GetLiteral() ? *table.Cluster.GetLiteral() : "unknown"); + return BuildTableSource(pos, table); + } + } + case TRule_single_source::kAltSingleSource2: { + const auto& alt = node.GetAlt_single_source2(); + Token(alt.GetToken1()); + TSqlSelect innerSelect(Ctx, Mode); + TPosition pos; + auto source = innerSelect.Build(alt.GetRule_select_stmt2(), pos); + if (!source) { + return nullptr; + } + return BuildInnerSource(pos, BuildSourceNode(pos, std::move(source)), Ctx.Scoped->CurrService, Ctx.Scoped->CurrCluster); + } + case TRule_single_source::kAltSingleSource3: { + const auto& alt = node.GetAlt_single_source3(); + TPosition pos; + return TSqlValues(Ctx, Mode).Build(alt.GetRule_values_stmt2(), pos, derivedColumns, derivedColumnsPos); + } + case TRule_single_source::ALT_NOT_SET: + AltNotImplemented("single_source", node); + Ctx.IncrementMonCounter("sql_errors", "UnknownSingleSource"); + return nullptr; + } +} + +TSourcePtr TSqlSelect::NamedSingleSource(const TRule_named_single_source& node, bool unorderedSubquery) { + // named_single_source: single_source match_recognize_clause? (((AS an_id) | an_id_as_compat) pure_column_list?)? (sample_clause | tablesample_clause)?; + TVector<TString> derivedColumns; + TPosition derivedColumnsPos; + if (node.HasBlock3() && node.GetBlock3().HasBlock2()) { + const auto& columns = node.GetBlock3().GetBlock2().GetRule_pure_column_list1(); + Token(columns.GetToken1()); + derivedColumnsPos = Ctx.Pos(); + + if (node.GetRule_single_source1().Alt_case() != TRule_single_source::kAltSingleSource3) { + Error() << "Derived column list is only supported for VALUES"; + return nullptr; + } + + PureColumnListStr(columns, *this, derivedColumns); + } + + auto singleSource = SingleSource(node.GetRule_single_source1(), derivedColumns, derivedColumnsPos, unorderedSubquery); + if (!singleSource) { + return nullptr; + } + if (node.HasBlock2()) { + if (node.HasBlock4()) { + //CAN/CSA-ISO/IEC 9075-2:18 7.6 <table reference> + //4) TF shall not simply contain both a <sample clause> and a <row pattern recognition clause and name>. + Ctx.Error() << "Source shall not simply contain both a sample clause and a row pattern recognition clause"; + return {}; + } + auto matchRecognizeClause = TSqlMatchRecognizeClause(Ctx, Mode); + auto matchRecognize = matchRecognizeClause.CreateBuilder(node.GetBlock2().GetRule_row_pattern_recognition_clause1()); + singleSource->SetMatchRecognize(matchRecognize); + } + if (node.HasBlock3()) { + TString label; + switch (node.GetBlock3().GetBlock1().Alt_case()) { + case TRule_named_single_source_TBlock3_TBlock1::kAlt1: + label = Id(node.GetBlock3().GetBlock1().GetAlt1().GetRule_an_id2(), *this); + break; + case TRule_named_single_source_TBlock3_TBlock1::kAlt2: + label = Id(node.GetBlock3().GetBlock1().GetAlt2().GetRule_an_id_as_compat1(), *this); + if (!Ctx.AnsiOptionalAs) { + // AS is mandatory + Ctx.Error() << "Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility"; + return {}; + } + break; + case TRule_named_single_source_TBlock3_TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + singleSource->SetLabel(label); + } + if (node.HasBlock4()) { + ESampleClause sampleClause; + ESampleMode mode; + TSqlExpression expr(Ctx, Mode); + TNodePtr samplingRateNode; + TNodePtr samplingSeedNode; + const auto& sampleBlock = node.GetBlock4(); + TPosition pos; + switch (sampleBlock.Alt_case()) { + case TRule_named_single_source::TBlock4::kAlt1: + { + sampleClause = ESampleClause::Sample; + mode = ESampleMode::Bernoulli; + const auto& sampleExpr = sampleBlock.GetAlt1().GetRule_sample_clause1().GetRule_expr2(); + samplingRateNode = expr.Build(sampleExpr); + if (!samplingRateNode) { + return nullptr; + } + pos = GetPos(sampleBlock.GetAlt1().GetRule_sample_clause1().GetToken1()); + Ctx.IncrementMonCounter("sql_features", "SampleClause"); + } + break; + case TRule_named_single_source::TBlock4::kAlt2: + { + sampleClause = ESampleClause::TableSample; + const auto& tableSampleClause = sampleBlock.GetAlt2().GetRule_tablesample_clause1(); + const auto& modeToken = tableSampleClause.GetRule_sampling_mode2().GetToken1(); + const TCiString& token = Token(modeToken); + if (token == "system") { + mode = ESampleMode::System; + } else if (token == "bernoulli") { + mode = ESampleMode::Bernoulli; + } else { + Ctx.Error(GetPos(modeToken)) << "Unsupported sampling mode: " << token; + Ctx.IncrementMonCounter("sql_errors", "UnsupportedSamplingMode"); + return nullptr; + } + const auto& tableSampleExpr = tableSampleClause.GetRule_expr4(); + samplingRateNode = expr.Build(tableSampleExpr); + if (!samplingRateNode) { + return nullptr; + } + if (tableSampleClause.HasBlock6()) { + const auto& repeatableExpr = tableSampleClause.GetBlock6().GetRule_repeatable_clause1().GetRule_expr3(); + samplingSeedNode = expr.Build(repeatableExpr); + if (!samplingSeedNode) { + return nullptr; + } + } + pos = GetPos(sampleBlock.GetAlt2().GetRule_tablesample_clause1().GetToken1()); + Ctx.IncrementMonCounter("sql_features", "SampleClause"); + } + break; + case TRule_named_single_source::TBlock4::ALT_NOT_SET: + Y_ABORT("SampleClause: does not corresond to grammar changes"); + } + if (!singleSource->SetSamplingOptions(Ctx, pos, sampleClause, mode, samplingRateNode, samplingSeedNode)) { + Ctx.IncrementMonCounter("sql_errors", "IncorrectSampleClause"); + return nullptr; + } + } + return singleSource; +} + +bool TSqlSelect::ColumnName(TVector<TNodePtr>& keys, const TRule_column_name& node) { + const auto sourceName = OptIdPrefixAsStr(node.GetRule_opt_id_prefix1(), *this); + const auto columnName = Id(node.GetRule_an_id2(), *this); + if (columnName.empty()) { + // TDOD: Id() should return TMaybe<TString> + if (!Ctx.HasPendingErrors) { + Ctx.Error() << "Empty column name is not allowed"; + } + return false; + } + keys.push_back(BuildColumn(Ctx.Pos(), columnName, sourceName)); + return true; +} + +bool TSqlSelect::ColumnName(TVector<TNodePtr>& keys, const TRule_without_column_name& node) { + // without_column_name: (an_id DOT an_id) | an_id_without; + TString sourceName; + TString columnName; + switch (node.Alt_case()) { + case TRule_without_column_name::kAltWithoutColumnName1: + sourceName = Id(node.GetAlt_without_column_name1().GetRule_an_id1(), *this); + columnName = Id(node.GetAlt_without_column_name1().GetRule_an_id3(), *this); + break; + case TRule_without_column_name::kAltWithoutColumnName2: + columnName = Id(node.GetAlt_without_column_name2().GetRule_an_id_without1(), *this); + break; + case TRule_without_column_name::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + if (columnName.empty()) { + // TDOD: Id() should return TMaybe<TString> + if (!Ctx.HasPendingErrors) { + Ctx.Error() << "Empty column name is not allowed"; + } + return false; + } + keys.push_back(BuildColumn(Ctx.Pos(), columnName, sourceName)); + return true; +} + +template<typename TRule> +bool TSqlSelect::ColumnList(TVector<TNodePtr>& keys, const TRule& node) { + bool result; + if constexpr (std::is_same_v<TRule, TRule_column_list>) { + result = ColumnName(keys, node.GetRule_column_name1()); + } else { + result = ColumnName(keys, node.GetRule_without_column_name1()); + } + + if (!result) { + return false; + } + + for (auto b: node.GetBlock2()) { + Token(b.GetToken1()); + if constexpr (std::is_same_v<TRule, TRule_column_list>) { + result = ColumnName(keys, b.GetRule_column_name2()); + } else { + result = ColumnName(keys, b.GetRule_without_column_name2()); + } + if (!result) { + return false; + } + } + return true; +} + +bool TSqlSelect::NamedColumn(TVector<TNodePtr>& columnList, const TRule_named_column& node) { + if (!ColumnName(columnList, node.GetRule_column_name1())) { + return false; + } + if (node.HasBlock2()) { + const auto label = Id(node.GetBlock2().GetRule_an_id2(), *this); + columnList.back()->SetLabel(label); + } + return true; +} + +TSourcePtr TSqlSelect::ProcessCore(const TRule_process_core& node, const TWriteSettings& settings, TPosition& selectPos) { + // PROCESS STREAM? named_single_source (COMMA named_single_source)* (USING using_call_expr (AS an_id)? + // (WITH external_call_settings)? + // (WHERE expr)? (HAVING expr)? (ASSUME order_by_clause)?)? + + Token(node.GetToken1()); + TPosition startPos(Ctx.Pos()); + + if (!selectPos) { + selectPos = startPos; + } + + const bool hasUsing = node.HasBlock5(); + const bool unorderedSubquery = hasUsing; + TSourcePtr source(NamedSingleSource(node.GetRule_named_single_source3(), unorderedSubquery)); + if (!source) { + return nullptr; + } + if (node.GetBlock4().size()) { + TVector<TSourcePtr> sources(1, source); + for (auto& s: node.GetBlock4()) { + sources.push_back(NamedSingleSource(s.GetRule_named_single_source2(), unorderedSubquery)); + if (!sources.back()) { + return nullptr; + } + } + auto pos = source->GetPos(); + source = BuildMuxSource(pos, std::move(sources)); + } + + const bool processStream = node.HasBlock2(); + + if (!hasUsing) { + return BuildProcess(startPos, std::move(source), nullptr, false, {}, false, processStream, settings, {}); + } + + const auto& block5 = node.GetBlock5(); + if (block5.HasBlock5()) { + TSqlExpression expr(Ctx, Mode); + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TNodePtr where = expr.Build(block5.GetBlock5().GetRule_expr2()); + if (!where || !source->AddFilter(Ctx, where)) { + return nullptr; + } + Ctx.IncrementMonCounter("sql_features", "ProcessWhere"); + } else { + Ctx.IncrementMonCounter("sql_features", processStream ? "ProcessStream" : "Process"); + } + + if (block5.HasBlock6()) { + Ctx.Error() << "PROCESS does not allow HAVING yet! You may request it on yql@ maillist."; + return nullptr; + } + + bool listCall = false; + TSqlCallExpr call(Ctx, Mode); + bool initRet = call.Init(block5.GetRule_using_call_expr2()); + if (initRet) { + call.IncCounters(); + } + + if (!initRet) { + return nullptr; + } + + auto args = call.GetArgs(); + for (auto& arg: args) { + if (auto placeholder = dynamic_cast<TTableRows*>(arg.Get())) { + if (listCall) { + Ctx.Error() << "Only one TableRows() argument is allowed."; + return nullptr; + } + listCall = true; + } + } + + if (!call.IsExternal() && block5.HasBlock4()) { + Ctx.Error() << "PROCESS without USING EXTERNAL FUNCTION doesn't allow WITH block"; + return nullptr; + } + + if (block5.HasBlock4()) { + const auto& block54 = block5.GetBlock4(); + if (!call.ConfigureExternalCall(block54.GetRule_external_call_settings2())) { + return nullptr; + } + } + + TSqlCallExpr finalCall(call, args); + TNodePtr with(finalCall.IsExternal() ? finalCall.BuildCall() : finalCall.BuildUdf(/* forReduce = */ false)); + if (!with) { + return {}; + } + args = finalCall.GetArgs(); + if (call.IsExternal()) + listCall = true; + + if (block5.HasBlock3()) { + with->SetLabel(Id(block5.GetBlock3().GetRule_an_id2(), *this)); + } + + if (call.IsExternal() && block5.HasBlock7()) { + Ctx.Error() << "PROCESS with USING EXTERNAL FUNCTION doesn't allow ASSUME block"; + return nullptr; + } + + TVector<TSortSpecificationPtr> assumeOrderBy; + if (block5.HasBlock7()) { + if (!OrderByClause(block5.GetBlock7().GetRule_order_by_clause2(), assumeOrderBy)) { + return nullptr; + } + Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(assumeOrderBy) ? "AssumeOrderBy" : "AssumeOrderByExpr"); + } + + return BuildProcess(startPos, std::move(source), with, finalCall.IsExternal(), std::move(args), listCall, processStream, settings, assumeOrderBy); +} + +TSourcePtr TSqlSelect::ReduceCore(const TRule_reduce_core& node, const TWriteSettings& settings, TPosition& selectPos) { + // REDUCE named_single_source (COMMA named_single_source)* (PRESORT sort_specification_list)? + // ON column_list USING ALL? using_call_expr (AS an_id)? + // (WHERE expr)? (HAVING expr)? (ASSUME order_by_clause)? + Token(node.GetToken1()); + TPosition startPos(Ctx.Pos()); + if (!selectPos) { + selectPos = startPos; + } + + TSourcePtr source(NamedSingleSource(node.GetRule_named_single_source2(), true)); + if (!source) { + return {}; + } + if (node.GetBlock3().size()) { + TVector<TSourcePtr> sources(1, source); + for (auto& s: node.GetBlock3()) { + sources.push_back(NamedSingleSource(s.GetRule_named_single_source2(), true)); + if (!sources.back()) { + return nullptr; + } + } + auto pos = source->GetPos(); + source = BuildMuxSource(pos, std::move(sources)); + } + + TVector<TSortSpecificationPtr> orderBy; + if (node.HasBlock4()) { + if (!SortSpecificationList(node.GetBlock4().GetRule_sort_specification_list2(), orderBy)) { + return {}; + } + } + + TVector<TNodePtr> keys; + if (!ColumnList(keys, node.GetRule_column_list6())) { + return nullptr; + } + + if (node.HasBlock11()) { + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TSqlExpression expr(Ctx, Mode); + TNodePtr where = expr.Build(node.GetBlock11().GetRule_expr2()); + if (!where || !source->AddFilter(Ctx, where)) { + return nullptr; + } + Ctx.IncrementMonCounter("sql_features", "ReduceWhere"); + } else { + Ctx.IncrementMonCounter("sql_features", "Reduce"); + } + + TNodePtr having; + if (node.HasBlock12()) { + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TSqlExpression expr(Ctx, Mode); + having = expr.Build(node.GetBlock12().GetRule_expr2()); + if (!having) { + return nullptr; + } + } + + bool listCall = false; + TSqlCallExpr call(Ctx, Mode); + bool initRet = call.Init(node.GetRule_using_call_expr9()); + if (initRet) { + call.IncCounters(); + } + + if (!initRet) { + return nullptr; + } + + auto args = call.GetArgs(); + for (auto& arg: args) { + if (auto placeholder = dynamic_cast<TTableRows*>(arg.Get())) { + if (listCall) { + Ctx.Error() << "Only one TableRows() argument is allowed."; + return nullptr; + } + listCall = true; + } + } + + TSqlCallExpr finalCall(call, args); + + TNodePtr udf(finalCall.BuildUdf(/* forReduce = */ true)); + if (!udf) { + return {}; + } + + if (node.HasBlock10()) { + udf->SetLabel(Id(node.GetBlock10().GetRule_an_id2(), *this)); + } + + const auto reduceMode = node.HasBlock8() ? ReduceMode::ByAll : ReduceMode::ByPartition; + + TVector<TSortSpecificationPtr> assumeOrderBy; + if (node.HasBlock13()) { + if (!OrderByClause(node.GetBlock13().GetRule_order_by_clause2(), assumeOrderBy)) { + return nullptr; + } + Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(assumeOrderBy) ? "AssumeOrderBy" : "AssumeOrderByExpr"); + } + + return BuildReduce(startPos, reduceMode, std::move(source), std::move(orderBy), std::move(keys), std::move(args), udf, having, + settings, assumeOrderBy, listCall); +} + +TSourcePtr TSqlSelect::SelectCore(const TRule_select_core& node, const TWriteSettings& settings, TPosition& selectPos, + TMaybe<TSelectKindPlacement> placement, TVector<TSortSpecificationPtr>& selectOpOrderBy, bool& selectOpAssumeOrderBy) +{ + // (FROM join_source)? SELECT STREAM? opt_set_quantifier result_column (COMMA result_column)* COMMA? (WITHOUT column_list)? (FROM join_source)? (WHERE expr)? + // group_by_clause? (HAVING expr)? window_clause? ext_order_by_clause? + selectOpOrderBy = {}; + selectOpAssumeOrderBy = false; + if (node.HasBlock1()) { + Token(node.GetBlock1().GetToken1()); + } else { + Token(node.GetToken2()); + } + + TPosition startPos(Ctx.Pos()); + if (!selectPos) { + selectPos = Ctx.Pos(); + } + + const auto hints = Ctx.PullHintForToken(selectPos); + TColumnsSets uniqueSets, distinctSets; + for (const auto& hint : hints) { + if (const auto& name = to_lower(hint.Name); name == "unique") + uniqueSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend())); + else if (name == "distinct") { + uniqueSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend())); + distinctSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend())); + } else { + Ctx.Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Hint " << hint.Name << " will not be used"; + } + } + + const bool distinct = IsDistinctOptSet(node.GetRule_opt_set_quantifier4()); + if (distinct) { + Ctx.IncrementMonCounter("sql_features", "DistinctInSelect"); + } + + TSourcePtr source(BuildFakeSource(selectPos, /* missingFrom = */ true, Mode == NSQLTranslation::ESqlMode::SUBQUERY)); + if (node.HasBlock1() && node.HasBlock9()) { + Token(node.GetBlock9().GetToken1()); + Ctx.IncrementMonCounter("sql_errors", "DoubleFrom"); + Ctx.Error() << "Only one FROM clause is allowed"; + return nullptr; + } + if (node.HasBlock1()) { + source = JoinSource(node.GetBlock1().GetRule_join_source2()); + Ctx.IncrementMonCounter("sql_features", "FromInFront"); + } else if (node.HasBlock9()) { + source = JoinSource(node.GetBlock9().GetRule_join_source2()); + } + if (!source) { + return nullptr; + } + + const bool selectStream = node.HasBlock3(); + TVector<TNodePtr> without; + if (node.HasBlock8()) { + if (!ColumnList(without, node.GetBlock8().GetRule_without_column_list2())) { + return nullptr; + } + } + if (node.HasBlock10()) { + auto block = node.GetBlock10(); + Token(block.GetToken1()); + TPosition pos(Ctx.Pos()); + TNodePtr where; + { + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TSqlExpression expr(Ctx, Mode); + where = expr.Build(block.GetRule_expr2()); + } + if (!where) { + Ctx.IncrementMonCounter("sql_errors", "WhereInvalid"); + return nullptr; + } + if (!source->AddFilter(Ctx, where)) { + Ctx.IncrementMonCounter("sql_errors", "WhereNotSupportedBySource"); + return nullptr; + } + Ctx.IncrementMonCounter("sql_features", "Where"); + } + + /// \todo merge gtoupByExpr and groupBy in one + TVector<TNodePtr> groupByExpr, groupBy; + TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec; + bool compactGroupBy = false; + TString groupBySuffix; + if (node.HasBlock11()) { + TGroupByClause clause(Ctx, Mode); + if (!clause.Build(node.GetBlock11().GetRule_group_by_clause1())) { + return nullptr; + } + bool hasHopping = (bool)clause.GetLegacyHoppingWindow(); + for (const auto& exprAlias: clause.Aliases()) { + YQL_ENSURE(exprAlias.first == exprAlias.second->GetLabel()); + groupByExpr.emplace_back(exprAlias.second); + hasHopping |= (bool)dynamic_cast<THoppingWindow*>(exprAlias.second.Get()); + } + groupBy = std::move(clause.Content()); + clause.SetFeatures("sql_features"); + legacyHoppingWindowSpec = clause.GetLegacyHoppingWindow(); + compactGroupBy = clause.IsCompactGroupBy(); + groupBySuffix = clause.GetSuffix(); + + if (source->IsStream() && !hasHopping) { + Ctx.Error() << "Streaming group by query must have a hopping window specification."; + return nullptr; + } + } + + TNodePtr having; + if (node.HasBlock12()) { + TSqlExpression expr(Ctx, Mode); + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + having = expr.Build(node.GetBlock12().GetRule_expr2()); + if (!having) { + return nullptr; + } + Ctx.IncrementMonCounter("sql_features", "Having"); + } + + TWinSpecs windowSpec; + if (node.HasBlock13()) { + if (source->IsStream()) { + Ctx.Error() << "WINDOW is not allowed in streaming queries"; + return nullptr; + } + if (!WindowClause(node.GetBlock13().GetRule_window_clause1(), windowSpec)) { + return nullptr; + } + Ctx.IncrementMonCounter("sql_features", "WindowClause"); + } + + bool assumeSorted = false; + TVector<TSortSpecificationPtr> orderBy; + if (node.HasBlock14()) { + auto& orderBlock = node.GetBlock14().GetRule_ext_order_by_clause1(); + assumeSorted = orderBlock.HasBlock1(); + + Token(orderBlock.GetRule_order_by_clause2().GetToken1()); + + if (source->IsStream()) { + Ctx.Error() << "ORDER BY is not allowed in streaming queries"; + return nullptr; + } + + if (!ValidateLimitOrderByWithSelectOp(placement, "ORDER BY")) { + return nullptr; + } + + if (!OrderByClause(orderBlock.GetRule_order_by_clause2(), orderBy)) { + return nullptr; + } + Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(orderBy) + ? (assumeSorted ? "AssumeOrderBy" : "OrderBy") + : (assumeSorted ? "AssumeOrderByExpr" : "OrderByExpr") + ); + + if (!NeedPassLimitOrderByToUnderlyingSelect(placement)) { + selectOpOrderBy.swap(orderBy); + std::swap(selectOpAssumeOrderBy, assumeSorted); + } + } + + TVector<TNodePtr> terms; + { + class TScopedWinSpecs { + public: + TScopedWinSpecs(TContext& ctx, TWinSpecs& specs) + : Ctx(ctx) + { + Ctx.WinSpecsScopes.push_back(std::ref(specs)); + } + ~TScopedWinSpecs() { + Ctx.WinSpecsScopes.pop_back(); + } + private: + TContext& Ctx; + }; + + + TScopedWinSpecs scoped(Ctx, windowSpec); + if (!SelectTerm(terms, node.GetRule_result_column5())) { + return nullptr; + } + for (auto block: node.GetBlock6()) { + if (!SelectTerm(terms, block.GetRule_result_column2())) { + return nullptr; + } + } + + } + if (!ValidateSelectColumns(terms)) { + return nullptr; + } + return BuildSelectCore(Ctx, startPos, std::move(source), groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted, orderBy, having, + std::move(windowSpec), legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets)); +} + +bool TSqlSelect::WindowDefinition(const TRule_window_definition& rule, TWinSpecs& winSpecs) { + const TString windowName = Id(rule.GetRule_new_window_name1().GetRule_window_name1().GetRule_an_id_window1(), *this); + if (winSpecs.contains(windowName)) { + Ctx.Error() << "Unable to declare window with same name: " << windowName; + return false; + } + auto windowSpec = WindowSpecification(rule.GetRule_window_specification3().GetRule_window_specification_details2()); + if (!windowSpec) { + return false; + } + winSpecs.emplace(windowName, std::move(windowSpec)); + return true; +} + +bool TSqlSelect::WindowClause(const TRule_window_clause& rule, TWinSpecs& winSpecs) { + auto windowList = rule.GetRule_window_definition_list2(); + if (!WindowDefinition(windowList.GetRule_window_definition1(), winSpecs)) { + return false; + } + for (auto& block: windowList.GetBlock2()) { + if (!WindowDefinition(block.GetRule_window_definition2(), winSpecs)) { + return false; + } + } + return true; +} + +bool TSqlTranslation::OrderByClause(const TRule_order_by_clause& node, TVector<TSortSpecificationPtr>& orderBy) { + return SortSpecificationList(node.GetRule_sort_specification_list3(), orderBy); +} + +bool TSqlSelect::ValidateLimitOrderByWithSelectOp(TMaybe<TSelectKindPlacement> placement, TStringBuf what) { + if (!placement.Defined()) { + // not in select_op chain + return true; + } + + if (!placement->IsLastInSelectOp) { + Ctx.Error() << what << " within UNION ALL is only allowed after last subquery"; + return false; + } + return true; +} + +bool TSqlSelect::NeedPassLimitOrderByToUnderlyingSelect(TMaybe<TSelectKindPlacement> placement) { + return !placement.Defined() || !placement->IsLastInSelectOp; +} + +TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind_partial& node, TPosition& selectPos, + TMaybe<TSelectKindPlacement> placement) +{ + auto res = SelectKind(node.GetRule_select_kind1(), selectPos, placement); + if (!res) { + return {}; + } + TPosition startPos(Ctx.Pos()); + /// LIMIT INTEGER block + TNodePtr skipTake; + if (node.HasBlock2()) { + auto block = node.GetBlock2(); + + Token(block.GetToken1()); + TPosition pos(Ctx.Pos()); + + if (!ValidateLimitOrderByWithSelectOp(placement, "LIMIT")) { + return {}; + } + + TSqlExpression takeExpr(Ctx, Mode); + auto take = takeExpr.Build(block.GetRule_expr2()); + if (!take) { + return{}; + } + + TNodePtr skip; + if (block.HasBlock3()) { + TSqlExpression skipExpr(Ctx, Mode); + skip = skipExpr.Build(block.GetBlock3().GetRule_expr2()); + if (!skip) { + return {}; + } + if (Token(block.GetBlock3().GetToken1()) == ",") { + // LIMIT skip, take + skip.Swap(take); + Ctx.IncrementMonCounter("sql_features", "LimitSkipTake"); + } else { + Ctx.IncrementMonCounter("sql_features", "LimitOffset"); + } + } + + auto st = BuildSkipTake(pos, skip, take); + if (NeedPassLimitOrderByToUnderlyingSelect(placement)) { + skipTake = st; + } else { + res.SelectOpSkipTake = st; + } + + Ctx.IncrementMonCounter("sql_features", "Limit"); + } + + res.Source = BuildSelect(startPos, std::move(res.Source), skipTake); + return res; +} + +TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind& node, TPosition& selectPos, + TMaybe<TSelectKindPlacement> placement) +{ + const bool discard = node.HasBlock1(); + const bool hasLabel = node.HasBlock3(); + if (hasLabel && (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW || Mode == NSQLTranslation::ESqlMode::SUBQUERY)) { + Ctx.Error() << "INTO RESULT is not allowed in current mode"; + return {}; + } + + if (discard && hasLabel) { + Ctx.Error() << "DISCARD and INTO RESULT cannot be used at the same time"; + return {}; + } + + if (discard && !selectPos) { + selectPos = Ctx.TokenPosition(node.GetBlock1().GetToken1()); + } + + TWriteSettings settings; + settings.Discard = discard; + if (hasLabel) { + settings.Label = PureColumnOrNamed(node.GetBlock3().GetRule_pure_column_or_named3(), *this); + } + + TSelectKindResult res; + if (placement.Defined()) { + if (placement->IsFirstInSelectOp) { + res.Settings.Discard = settings.Discard; + } else if (settings.Discard) { + auto discardPos = Ctx.TokenPosition(node.GetBlock1().GetToken1()); + Ctx.Error(discardPos) << "DISCARD within UNION ALL is only allowed before first subquery"; + return {}; + } + + if (placement->IsLastInSelectOp) { + res.Settings.Label = settings.Label; + } else if (!settings.Label.Empty()) { + auto labelPos = Ctx.TokenPosition(node.GetBlock3().GetToken1()); + Ctx.Error(labelPos) << "INTO RESULT within UNION ALL is only allowed after last subquery"; + return {}; + } + + settings = {}; + } + + switch (node.GetBlock2().Alt_case()) { + case TRule_select_kind_TBlock2::kAlt1: + res.Source = ProcessCore(node.GetBlock2().GetAlt1().GetRule_process_core1(), settings, selectPos); + break; + case TRule_select_kind_TBlock2::kAlt2: + res.Source = ReduceCore(node.GetBlock2().GetAlt2().GetRule_reduce_core1(), settings, selectPos); + break; + case TRule_select_kind_TBlock2::kAlt3: { + res.Source = SelectCore(node.GetBlock2().GetAlt3().GetRule_select_core1(), settings, selectPos, + placement, res.SelectOpOrderBy, res.SelectOpAssumeOrderBy); + break; + } + case TRule_select_kind_TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + return res; +} + +TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind_parenthesis& node, TPosition& selectPos, + TMaybe<TSelectKindPlacement> placement) +{ + if (node.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis1) { + return SelectKind(node.GetAlt_select_kind_parenthesis1().GetRule_select_kind_partial1(), selectPos, placement); + } else { + return SelectKind(node.GetAlt_select_kind_parenthesis2().GetRule_select_kind_partial2(), selectPos, {}); + } +} + +template<typename TRule> +TSourcePtr TSqlSelect::Build(const TRule& node, TPosition pos, TSelectKindResult&& first) { + if (node.GetBlock2().empty()) { + return std::move(first.Source); + } + + auto blocks = node.GetBlock2(); + + TPosition unionPos = pos; // Position of first select + TVector<TSortSpecificationPtr> orderBy; + bool assumeOrderBy = false; + TNodePtr skipTake; + TWriteSettings outermostSettings; + outermostSettings.Discard = first.Settings.Discard; + + TVector<TSourcePtr> sources{ std::move(first.Source)}; + bool currentQuantifier = false; + + for (int i = 0; i < blocks.size(); ++i) { + auto& b = blocks[i]; + const bool second = (i == 0); + const bool last = (i + 1 == blocks.size()); + TSelectKindPlacement placement; + placement.IsLastInSelectOp = last; + + TSelectKindResult next = SelectKind(b.GetRule_select_kind_parenthesis2(), pos, placement); + if (!next) { + return nullptr; + } + + if (last) { + orderBy = next.SelectOpOrderBy; + assumeOrderBy = next.SelectOpAssumeOrderBy; + skipTake = next.SelectOpSkipTake; + outermostSettings.Label = next.Settings.Label; + } + + switch (b.GetRule_select_op1().Alt_case()) { + case TRule_select_op::kAltSelectOp1: + break; + case TRule_select_op::kAltSelectOp2: + case TRule_select_op::kAltSelectOp3: + Ctx.Error() << "INTERSECT and EXCEPT are not implemented yet"; + return nullptr; + case TRule_select_op::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + const bool quantifier = b.GetRule_select_op1().GetAlt_select_op1().HasBlock2(); + + if (!second && quantifier != currentQuantifier) { + auto source = BuildUnion(pos, std::move(sources), currentQuantifier, {}); + sources.clear(); + sources.emplace_back(std::move(source)); + } + + sources.emplace_back(std::move(next.Source)); + currentQuantifier = quantifier; + } + + auto result = BuildUnion(pos, std::move(sources), currentQuantifier, outermostSettings); + + if (orderBy) { + TVector<TNodePtr> groupByExpr; + TVector<TNodePtr> groupBy; + bool compactGroupBy = false; + TString groupBySuffix = ""; + TNodePtr having; + TWinSpecs winSpecs; + TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec; + bool distinct = false; + TVector<TNodePtr> without; + bool stream = false; + + TVector<TNodePtr> terms; + terms.push_back(BuildColumn(unionPos, "*", "")); + + result = BuildSelectCore(Ctx, unionPos, std::move(result), groupByExpr, groupBy, compactGroupBy, groupBySuffix, + assumeOrderBy, orderBy, having, std::move(winSpecs), legacyHoppingWindowSpec, std::move(terms), + distinct, std::move(without), stream, outermostSettings, {}, {}); + + result = BuildSelect(unionPos, std::move(result), skipTake); + } else if (skipTake) { + result = BuildSelect(unionPos, std::move(result), skipTake); + } + + return result; +} + +TSourcePtr TSqlSelect::Build(const TRule_select_stmt& node, TPosition& selectPos) { + TMaybe<TSelectKindPlacement> placement; + if (!node.GetBlock2().empty()) { + placement.ConstructInPlace(); + placement->IsFirstInSelectOp = true; + } + + auto res = SelectKind(node.GetRule_select_kind_parenthesis1(), selectPos, placement); + if (!res) { + return nullptr; + } + + return Build(node, selectPos, std::move(res)); +} + +TSourcePtr TSqlSelect::Build(const TRule_select_unparenthesized_stmt& node, TPosition& selectPos) { + TMaybe<TSelectKindPlacement> placement; + if (!node.GetBlock2().empty()) { + placement.ConstructInPlace(); + placement->IsFirstInSelectOp = true; + } + + auto res = SelectKind(node.GetRule_select_kind_partial1(), selectPos, placement); + if (!res) { + return nullptr; + } + + return Build(node, selectPos, std::move(res)); +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_select.h b/yql/essentials/sql/v1/sql_select.h new file mode 100644 index 00000000000..fd6f0bece52 --- /dev/null +++ b/yql/essentials/sql/v1/sql_select.h @@ -0,0 +1,74 @@ +#pragma once + +#include "sql_translation.h" +#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h> + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +class TSqlSelect: public TSqlTranslation { +public: + TSqlSelect(TContext& ctx, NSQLTranslation::ESqlMode mode) + : TSqlTranslation(ctx, mode) + { + } + + TSourcePtr Build(const TRule_select_stmt& node, TPosition& selectPos); + TSourcePtr Build(const TRule_select_unparenthesized_stmt& node, TPosition& selectPos); + +private: + bool SelectTerm(TVector<TNodePtr>& terms, const TRule_result_column& node); + bool ValidateSelectColumns(const TVector<TNodePtr>& terms); + bool ColumnName(TVector<TNodePtr>& keys, const TRule_column_name& node); + bool ColumnName(TVector<TNodePtr>& keys, const TRule_without_column_name& node); + template<typename TRule> + bool ColumnList(TVector<TNodePtr>& keys, const TRule& node); + bool NamedColumn(TVector<TNodePtr>& columnList, const TRule_named_column& node); + TSourcePtr SingleSource(const TRule_single_source& node, const TVector<TString>& derivedColumns, TPosition derivedColumnsPos, bool unorderedSubquery); + TSourcePtr NamedSingleSource(const TRule_named_single_source& node, bool unorderedSubquery); + bool FlattenByArg(const TString& sourceLabel, TVector<TNodePtr>& flattenByColumns, TVector<TNodePtr>& flattenByExprs, const TRule_flatten_by_arg& node); + TSourcePtr FlattenSource(const TRule_flatten_source& node); + TSourcePtr JoinSource(const TRule_join_source& node); + bool JoinOp(ISource* join, const TRule_join_source::TBlock3& block, TMaybe<TPosition> anyPos); + TNodePtr JoinExpr(ISource*, const TRule_join_constraint& node); + TSourcePtr ProcessCore(const TRule_process_core& node, const TWriteSettings& settings, TPosition& selectPos); + TSourcePtr ReduceCore(const TRule_reduce_core& node, const TWriteSettings& settings, TPosition& selectPos); + + struct TSelectKindPlacement { + bool IsFirstInSelectOp = false; + bool IsLastInSelectOp = false; + }; + + TSourcePtr SelectCore(const TRule_select_core& node, const TWriteSettings& settings, TPosition& selectPos, + TMaybe<TSelectKindPlacement> placement, TVector<TSortSpecificationPtr>& selectOpOrederBy, bool& selectOpAssumeOrderBy); + + bool WindowDefinition(const TRule_window_definition& node, TWinSpecs& winSpecs); + bool WindowClause(const TRule_window_clause& node, TWinSpecs& winSpecs); + + struct TSelectKindResult { + TSourcePtr Source; + TWriteSettings Settings; + + TVector<TSortSpecificationPtr> SelectOpOrderBy; + bool SelectOpAssumeOrderBy = false; + TNodePtr SelectOpSkipTake; + + inline explicit operator bool() const { + return static_cast<bool>(Source); + } + }; + + bool ValidateLimitOrderByWithSelectOp(TMaybe<TSelectKindPlacement> placement, TStringBuf what); + bool NeedPassLimitOrderByToUnderlyingSelect(TMaybe<TSelectKindPlacement> placement); + + template<typename TRule> + TSourcePtr Build(const TRule& node, TPosition pos, TSelectKindResult&& first); + + + TSelectKindResult SelectKind(const TRule_select_kind& node, TPosition& selectPos, TMaybe<TSelectKindPlacement> placement); + TSelectKindResult SelectKind(const TRule_select_kind_partial& node, TPosition& selectPos, TMaybe<TSelectKindPlacement> placement); + TSelectKindResult SelectKind(const TRule_select_kind_parenthesis& node, TPosition& selectPos, TMaybe<TSelectKindPlacement> placement); +}; + +} //namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_translation.cpp b/yql/essentials/sql/v1/sql_translation.cpp new file mode 100644 index 00000000000..4bcfb7de847 --- /dev/null +++ b/yql/essentials/sql/v1/sql_translation.cpp @@ -0,0 +1,5149 @@ +#include "sql_translation.h" +#include "sql_expression.h" +#include "sql_call_expr.h" +#include "sql_query.h" +#include "sql_values.h" +#include "sql_select.h" +#include "source.h" + +#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h> +#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h> +#include <yql/essentials/sql/settings/partitioning.h> +#include <yql/essentials/sql/v1/proto_parser/proto_parser.h> + +#include <util/generic/scope.h> +#include <util/string/join.h> + +#include <library/cpp/protobuf/util/simple_reflection.h> + +namespace { + +using namespace NSQLTranslationV1; + +template <typename Callback> +void VisitAllFields(const NProtoBuf::Message& msg, Callback& callback) { + const auto* descr = msg.GetDescriptor(); + for (int i = 0; i < descr->field_count(); ++i) { + const auto* fd = descr->field(i); + NProtoBuf::TConstField field(msg, fd); + if (field.IsMessage()) { + for (size_t j = 0; j < field.Size(); ++j) { + const auto& message = *field.Get<NProtoBuf::Message>(j); + callback(message); + VisitAllFields(message, callback); + } + } + } +} + +struct TTokenCollector { + void operator()(const NProtoBuf::Message& message) { + if (const auto* token = dynamic_cast<const NSQLv1Generated::TToken*>(&message)) { + if (!Tokens.empty()) { + Tokens << ' '; + } + Tokens << token->GetValue(); + } + } + + TStringBuilder Tokens; +}; + +TString CollectTokens(const TRule_select_stmt& selectStatement) { + TTokenCollector tokenCollector; + VisitAllFields(selectStatement, tokenCollector); + return tokenCollector.Tokens; +} + +bool RecreateContext( + TContext& ctx, const NSQLTranslation::TTranslationSettings& settings, const TString& recreationQuery +) { + if (!recreationQuery) { + return true; + } + const TString queryName = "context recreation query"; + + const auto* ast = NSQLTranslationV1::SqlAST( + recreationQuery, queryName, ctx.Issues, + settings.MaxErrors, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena + ); + if (!ast) { + return false; + } + + TSqlQuery queryTranslator(ctx, ctx.Settings.Mode, true); + auto node = queryTranslator.Build(static_cast<const TSQLv1ParserAST&>(*ast)); + + return node && node->Init(ctx, nullptr) && node->Translate(ctx); +} + +TNodePtr BuildViewSelect( + const TRule_select_stmt& selectStatement, + TContext& parentContext, + const TString& contextRecreationQuery +) { + TIssues issues; + TContext context(parentContext.Settings, {}, issues); + if (!RecreateContext(context, context.Settings, contextRecreationQuery)) { + parentContext.Issues.AddIssues(issues); + return nullptr; + } + issues.Clear(); + + // Holds (among other things) subquery references. + // These references need to be passed to the parent context + // to be able to compile view queries with subqueries. + context.PushCurrentBlocks(&parentContext.GetCurrentBlocks()); + + context.Settings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW; + + TSqlSelect selectTranslator(context, context.Settings.Mode); + TPosition pos = parentContext.Pos(); + auto source = selectTranslator.Build(selectStatement, pos); + if (!source) { + parentContext.Issues.AddIssues(issues); + return nullptr; + } + auto node = BuildSelectResult( + pos, + std::move(source), + false, + false, + context.Scoped + ); + if (!node) { + parentContext.Issues.AddIssues(issues); + return nullptr; + } + return node; +} + +} + +namespace NSQLTranslationV1 { + +using NALPDefault::SQLv1LexerTokens; +using NALPDefaultAntlr4::SQLv1Antlr4Lexer; + +using namespace NSQLv1Generated; + +TIdentifier GetKeywordId(TTranslation& ctx, const TRule_keyword& node) { + // keyword: + // keyword_compat + // | keyword_expr_uncompat + // | keyword_table_uncompat + // | keyword_select_uncompat + // | keyword_alter_uncompat + // | keyword_in_uncompat + // | keyword_window_uncompat + // | keyword_hint_uncompat + //; + switch (node.Alt_case()) { + case TRule_keyword::kAltKeyword1: + return GetIdentifier(ctx, node.GetAlt_keyword1().GetRule_keyword_compat1()); + case TRule_keyword::kAltKeyword2: + return GetIdentifier(ctx, node.GetAlt_keyword2().GetRule_keyword_expr_uncompat1()); + case TRule_keyword::kAltKeyword3: + return GetIdentifier(ctx, node.GetAlt_keyword3().GetRule_keyword_table_uncompat1()); + case TRule_keyword::kAltKeyword4: + return GetIdentifier(ctx, node.GetAlt_keyword4().GetRule_keyword_select_uncompat1()); + case TRule_keyword::kAltKeyword5: + return GetIdentifier(ctx, node.GetAlt_keyword5().GetRule_keyword_alter_uncompat1()); + case TRule_keyword::kAltKeyword6: + return GetIdentifier(ctx, node.GetAlt_keyword6().GetRule_keyword_in_uncompat1()); + case TRule_keyword::kAltKeyword7: + return GetIdentifier(ctx, node.GetAlt_keyword7().GetRule_keyword_window_uncompat1()); + case TRule_keyword::kAltKeyword8: + return GetIdentifier(ctx, node.GetAlt_keyword8().GetRule_keyword_hint_uncompat1()); + case TRule_keyword::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_id& node, TTranslation& ctx) { + // id: identifier | keyword; + switch (node.Alt_case()) { + case TRule_id::kAltId1: + return Id(node.GetAlt_id1().GetRule_identifier1(), ctx); + case TRule_id::kAltId2: + return GetKeyword(ctx, node.GetAlt_id2().GetRule_keyword1()); + case TRule_id::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_id_or_type& node, TTranslation& ctx) { + switch (node.Alt_case()) { + case TRule_id_or_type::kAltIdOrType1: + return Id(node.GetAlt_id_or_type1().GetRule_id1(), ctx); + case TRule_id_or_type::kAltIdOrType2: + return ctx.Identifier(node.GetAlt_id_or_type2().GetRule_type_id1().GetToken1()); + case TRule_id_or_type::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_id_as_compat& node, TTranslation& ctx) { + switch (node.Alt_case()) { + case TRule_id_as_compat::kAltIdAsCompat1: + return Id(node.GetAlt_id_as_compat1().GetRule_identifier1(), ctx); + case TRule_id_as_compat::kAltIdAsCompat2: + return ctx.Token(node.GetAlt_id_as_compat2().GetRule_keyword_as_compat1().GetToken1()); + case TRule_id_as_compat::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_an_id_as_compat& node, TTranslation& ctx) { + switch (node.Alt_case()) { + case TRule_an_id_as_compat::kAltAnIdAsCompat1: + return Id(node.GetAlt_an_id_as_compat1().GetRule_id_as_compat1(), ctx); + case TRule_an_id_as_compat::kAltAnIdAsCompat2: + return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_as_compat2().GetToken1())); + case TRule_an_id_as_compat::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_id_schema& node, TTranslation& ctx) { + //id_schema: + // identifier + // | keyword_compat + // | keyword_expr_uncompat + // // | keyword_table_uncompat + // | keyword_select_uncompat + // // | keyword_alter_uncompat + // | keyword_in_uncompat + // | keyword_window_uncompat + // | keyword_hint_uncompat + //; + switch (node.Alt_case()) { + case TRule_id_schema::kAltIdSchema1: + return Id(node.GetAlt_id_schema1().GetRule_identifier1(), ctx); + case TRule_id_schema::kAltIdSchema2: + return GetKeyword(ctx, node.GetAlt_id_schema2().GetRule_keyword_compat1()); + case TRule_id_schema::kAltIdSchema3: + return GetKeyword(ctx, node.GetAlt_id_schema3().GetRule_keyword_expr_uncompat1()); + case TRule_id_schema::kAltIdSchema4: + return GetKeyword(ctx, node.GetAlt_id_schema4().GetRule_keyword_select_uncompat1()); + case TRule_id_schema::kAltIdSchema5: + return GetKeyword(ctx, node.GetAlt_id_schema5().GetRule_keyword_in_uncompat1()); + case TRule_id_schema::kAltIdSchema6: + return GetKeyword(ctx, node.GetAlt_id_schema6().GetRule_keyword_window_uncompat1()); + case TRule_id_schema::kAltIdSchema7: + return GetKeyword(ctx, node.GetAlt_id_schema7().GetRule_keyword_hint_uncompat1()); + case TRule_id_schema::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_an_id_or_type& node, TTranslation& ctx) { + // an_id_or_type: id_or_type | STRING_VALUE; + switch (node.Alt_case()) { + case TRule_an_id_or_type::kAltAnIdOrType1: + return Id(node.GetAlt_an_id_or_type1().GetRule_id_or_type1(), ctx); + case TRule_an_id_or_type::kAltAnIdOrType2: + return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_or_type2().GetToken1())); + case TRule_an_id_or_type::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +std::pair<bool, TString> Id(const TRule_id_or_at& node, TTranslation& ctx) { + bool hasAt = node.HasBlock1(); + return std::make_pair(hasAt, Id(node.GetRule_an_id_or_type2(), ctx) ); +} + +TString Id(const TRule_id_table& node, TTranslation& ctx) { + //id_table: + // identifier + // | keyword_compat + // | keyword_expr_uncompat + // // | keyword_table_uncompat + // | keyword_select_uncompat + // // | keyword_alter_uncompat + // | keyword_in_uncompat + // | keyword_window_uncompat + // | keyword_hint_uncompat + //; + switch (node.Alt_case()) { + case TRule_id_table::kAltIdTable1: + return Id(node.GetAlt_id_table1().GetRule_identifier1(), ctx); + case TRule_id_table::kAltIdTable2: + return GetKeyword(ctx, node.GetAlt_id_table2().GetRule_keyword_compat1()); + case TRule_id_table::kAltIdTable3: + return GetKeyword(ctx, node.GetAlt_id_table3().GetRule_keyword_expr_uncompat1()); + case TRule_id_table::kAltIdTable4: + return GetKeyword(ctx, node.GetAlt_id_table4().GetRule_keyword_select_uncompat1()); + case TRule_id_table::kAltIdTable5: + return GetKeyword(ctx, node.GetAlt_id_table5().GetRule_keyword_in_uncompat1()); + case TRule_id_table::kAltIdTable6: + return GetKeyword(ctx, node.GetAlt_id_table6().GetRule_keyword_window_uncompat1()); + case TRule_id_table::kAltIdTable7: + return GetKeyword(ctx, node.GetAlt_id_table7().GetRule_keyword_hint_uncompat1()); + case TRule_id_table::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_an_id_table& node, TTranslation& ctx) { + // an_id_table: id_table | STRING_VALUE; + switch (node.Alt_case()) { + case TRule_an_id_table::kAltAnIdTable1: + return Id(node.GetAlt_an_id_table1().GetRule_id_table1(), ctx); + case TRule_an_id_table::kAltAnIdTable2: + return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_table2().GetToken1())); + case TRule_an_id_table::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_id_table_or_type& node, TTranslation& ctx) { + switch (node.Alt_case()) { + case TRule_id_table_or_type::kAltIdTableOrType1: + return Id(node.GetAlt_id_table_or_type1().GetRule_an_id_table1(), ctx); + case TRule_id_table_or_type::kAltIdTableOrType2: + return ctx.Identifier(node.GetAlt_id_table_or_type2().GetRule_type_id1().GetToken1()); + case TRule_id_table_or_type::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_id_expr& node, TTranslation& ctx) { + //id_expr: + // identifier + // | keyword_compat + // // | keyword_expr_uncompat + // // | keyword_table_uncompat + // // | keyword_select_uncompat + // | keyword_alter_uncompat + // | keyword_in_uncompat + // | keyword_window_uncompat + // | keyword_hint_uncompat + //; + switch (node.Alt_case()) { + case TRule_id_expr::kAltIdExpr1: + return Id(node.GetAlt_id_expr1().GetRule_identifier1(), ctx); + case TRule_id_expr::kAltIdExpr2: + return GetKeyword(ctx, node.GetAlt_id_expr2().GetRule_keyword_compat1()); + case TRule_id_expr::kAltIdExpr3: + return GetKeyword(ctx, node.GetAlt_id_expr3().GetRule_keyword_alter_uncompat1()); + case TRule_id_expr::kAltIdExpr4: + return GetKeyword(ctx, node.GetAlt_id_expr4().GetRule_keyword_in_uncompat1()); + case TRule_id_expr::kAltIdExpr5: + return GetKeyword(ctx, node.GetAlt_id_expr5().GetRule_keyword_window_uncompat1()); + case TRule_id_expr::kAltIdExpr6: + return GetKeyword(ctx, node.GetAlt_id_expr6().GetRule_keyword_hint_uncompat1()); + case TRule_id_expr::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +bool IsQuotedId(const TRule_id_expr& node, TTranslation& ctx) { + if (node.Alt_case() != TRule_id_expr::kAltIdExpr1) { + return false; + } + const auto& id = ctx.Token(node.GetAlt_id_expr1().GetRule_identifier1().GetToken1()); + // identifier: ID_PLAIN | ID_QUOTED; + return id.StartsWith('`'); +} + +TString Id(const TRule_id_expr_in& node, TTranslation& ctx) { + //id_expr_in: + // identifier + // | keyword_compat + // // | keyword_expr_uncompat + // // | keyword_table_uncompat + // // | keyword_select_uncompat + // | keyword_alter_uncompat + // // | keyword_in_uncompat + // | keyword_window_uncompat + // | keyword_hint_uncompat + //; + switch (node.Alt_case()) { + case TRule_id_expr_in::kAltIdExprIn1: + return Id(node.GetAlt_id_expr_in1().GetRule_identifier1(), ctx); + case TRule_id_expr_in::kAltIdExprIn2: + return GetKeyword(ctx, node.GetAlt_id_expr_in2().GetRule_keyword_compat1()); + case TRule_id_expr_in::kAltIdExprIn3: + return GetKeyword(ctx, node.GetAlt_id_expr_in3().GetRule_keyword_alter_uncompat1()); + case TRule_id_expr_in::kAltIdExprIn4: + return GetKeyword(ctx, node.GetAlt_id_expr_in4().GetRule_keyword_window_uncompat1()); + case TRule_id_expr_in::kAltIdExprIn5: + return GetKeyword(ctx, node.GetAlt_id_expr_in5().GetRule_keyword_hint_uncompat1()); + case TRule_id_expr_in::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_id_window& node, TTranslation& ctx) { + //id_window: + // identifier + // | keyword_compat + // | keyword_expr_uncompat + // | keyword_table_uncompat + // | keyword_select_uncompat + // | keyword_alter_uncompat + // | keyword_in_uncompat + // // | keyword_window_uncompat + // | keyword_hint_uncompat + //; + switch (node.Alt_case()) { + case TRule_id_window::kAltIdWindow1: + return Id(node.GetAlt_id_window1().GetRule_identifier1(), ctx); + case TRule_id_window::kAltIdWindow2: + return GetKeyword(ctx, node.GetAlt_id_window2().GetRule_keyword_compat1()); + case TRule_id_window::kAltIdWindow3: + return GetKeyword(ctx, node.GetAlt_id_window3().GetRule_keyword_expr_uncompat1()); + case TRule_id_window::kAltIdWindow4: + return GetKeyword(ctx, node.GetAlt_id_window4().GetRule_keyword_table_uncompat1()); + case TRule_id_window::kAltIdWindow5: + return GetKeyword(ctx, node.GetAlt_id_window5().GetRule_keyword_select_uncompat1()); + case TRule_id_window::kAltIdWindow6: + return GetKeyword(ctx, node.GetAlt_id_window6().GetRule_keyword_alter_uncompat1()); + case TRule_id_window::kAltIdWindow7: + return GetKeyword(ctx, node.GetAlt_id_window7().GetRule_keyword_in_uncompat1()); + case TRule_id_window::kAltIdWindow8: + return GetKeyword(ctx, node.GetAlt_id_window8().GetRule_keyword_hint_uncompat1()); + case TRule_id_window::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_id_without& node, TTranslation& ctx) { + //id_without: + // identifier + // | keyword_compat + // // | keyword_expr_uncompat + // | keyword_table_uncompat + // // | keyword_select_uncompat + // | keyword_alter_uncompat + // | keyword_in_uncompat + // | keyword_window_uncompat + // | keyword_hint_uncompat + //; + switch (node.Alt_case()) { + case TRule_id_without::kAltIdWithout1: + return Id(node.GetAlt_id_without1().GetRule_identifier1(), ctx); + case TRule_id_without::kAltIdWithout2: + return GetKeyword(ctx, node.GetAlt_id_without2().GetRule_keyword_compat1()); + case TRule_id_without::kAltIdWithout3: + return GetKeyword(ctx, node.GetAlt_id_without3().GetRule_keyword_table_uncompat1()); + case TRule_id_without::kAltIdWithout4: + return GetKeyword(ctx, node.GetAlt_id_without4().GetRule_keyword_alter_uncompat1()); + case TRule_id_without::kAltIdWithout5: + return GetKeyword(ctx, node.GetAlt_id_without5().GetRule_keyword_in_uncompat1()); + case TRule_id_without::kAltIdWithout6: + return GetKeyword(ctx, node.GetAlt_id_without6().GetRule_keyword_window_uncompat1()); + case TRule_id_without::kAltIdWithout7: + return GetKeyword(ctx, node.GetAlt_id_without7().GetRule_keyword_hint_uncompat1()); + case TRule_id_without::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_id_hint& node, TTranslation& ctx) { + //id_hint: + // identifier + // | keyword_compat + // | keyword_expr_uncompat + // | keyword_table_uncompat + // | keyword_select_uncompat + // | keyword_alter_uncompat + // | keyword_in_uncompat + // | keyword_window_uncompat + // // | keyword_hint_uncompat + //; + switch (node.Alt_case()) { + case TRule_id_hint::kAltIdHint1: + return Id(node.GetAlt_id_hint1().GetRule_identifier1(), ctx); + case TRule_id_hint::kAltIdHint2: + return GetKeyword(ctx, node.GetAlt_id_hint2().GetRule_keyword_compat1()); + case TRule_id_hint::kAltIdHint3: + return GetKeyword(ctx, node.GetAlt_id_hint3().GetRule_keyword_expr_uncompat1()); + case TRule_id_hint::kAltIdHint4: + return GetKeyword(ctx, node.GetAlt_id_hint4().GetRule_keyword_table_uncompat1()); + case TRule_id_hint::kAltIdHint5: + return GetKeyword(ctx, node.GetAlt_id_hint5().GetRule_keyword_select_uncompat1()); + case TRule_id_hint::kAltIdHint6: + return GetKeyword(ctx, node.GetAlt_id_hint6().GetRule_keyword_alter_uncompat1()); + case TRule_id_hint::kAltIdHint7: + return GetKeyword(ctx, node.GetAlt_id_hint7().GetRule_keyword_in_uncompat1()); + case TRule_id_hint::kAltIdHint8: + return GetKeyword(ctx, node.GetAlt_id_hint8().GetRule_keyword_window_uncompat1()); + case TRule_id_hint::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_an_id& node, TTranslation& ctx) { + // an_id: id | STRING_VALUE; + switch (node.Alt_case()) { + case TRule_an_id::kAltAnId1: + return Id(node.GetAlt_an_id1().GetRule_id1(), ctx); + case TRule_an_id::kAltAnId2: + return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id2().GetToken1())); + case TRule_an_id::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_an_id_schema& node, TTranslation& ctx) { + // an_id_schema: id_schema | STRING_VALUE; + switch (node.Alt_case()) { + case TRule_an_id_schema::kAltAnIdSchema1: + return Id(node.GetAlt_an_id_schema1().GetRule_id_schema1(), ctx); + case TRule_an_id_schema::kAltAnIdSchema2: + return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_schema2().GetToken1())); + case TRule_an_id_schema::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_an_id_expr& node, TTranslation& ctx) { + // an_id_expr: id_expr | STRING_VALUE; + switch (node.Alt_case()) { + case TRule_an_id_expr::kAltAnIdExpr1: + return Id(node.GetAlt_an_id_expr1().GetRule_id_expr1(), ctx); + case TRule_an_id_expr::kAltAnIdExpr2: + return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_expr2().GetToken1())); + case TRule_an_id_expr::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_an_id_window& node, TTranslation& ctx) { + // an_id_window: id_window | STRING_VALUE; + switch (node.Alt_case()) { + case TRule_an_id_window::kAltAnIdWindow1: + return Id(node.GetAlt_an_id_window1().GetRule_id_window1(), ctx); + case TRule_an_id_window::kAltAnIdWindow2: + return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_window2().GetToken1())); + case TRule_an_id_window::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_an_id_without& node, TTranslation& ctx) { + // an_id_without: id_without | STRING_VALUE; + switch (node.Alt_case()) { + case TRule_an_id_without::kAltAnIdWithout1: + return Id(node.GetAlt_an_id_without1().GetRule_id_without1(), ctx); + case TRule_an_id_without::kAltAnIdWithout2: + return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_without2().GetToken1())); + case TRule_an_id_without::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_an_id_hint& node, TTranslation& ctx) { + // an_id_hint: id_hint | STRING_VALUE; + switch (node.Alt_case()) { + case TRule_an_id_hint::kAltAnIdHint1: + return Id(node.GetAlt_an_id_hint1().GetRule_id_hint1(), ctx); + case TRule_an_id_hint::kAltAnIdHint2: + return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_hint2().GetToken1())); + case TRule_an_id_hint::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TString Id(const TRule_an_id_pure& node, TTranslation& ctx) { + // an_id_pure: identifier | STRING_VALUE; + switch (node.Alt_case()) { + case TRule_an_id_pure::kAltAnIdPure1: + return Id(node.GetAlt_an_id_pure1().GetRule_identifier1(), ctx); + case TRule_an_id_pure::kAltAnIdPure2: + return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_pure2().GetToken1())); + case TRule_an_id_pure::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TViewDescription Id(const TRule_view_name& node, TTranslation& ctx) { + switch (node.Alt_case()) { + case TRule_view_name::kAltViewName1: + return {Id(node.GetAlt_view_name1().GetRule_an_id1(), ctx)}; + case TRule_view_name::kAltViewName2: + return {"", true}; + case TRule_view_name::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +bool NamedNodeImpl(const TRule_bind_parameter& node, TString& name, TTranslation& ctx) { + // bind_parameter: DOLLAR (an_id_or_type | TRUE | FALSE); + TString id; + switch (node.GetBlock2().Alt_case()) { + case TRule_bind_parameter::TBlock2::kAlt1: + id = Id(node.GetBlock2().GetAlt1().GetRule_an_id_or_type1(), ctx); + break; + case TRule_bind_parameter::TBlock2::kAlt2: + id = ctx.Token(node.GetBlock2().GetAlt2().GetToken1()); + break; + case TRule_bind_parameter::TBlock2::kAlt3: + id = ctx.Token(node.GetBlock2().GetAlt3().GetToken1()); + break; + case TRule_bind_parameter::TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + auto dollar = ctx.Token(node.GetToken1()); + if (id.empty()) { + ctx.Error() << "Empty symbol name is not allowed"; + return false; + } + + name = dollar + id; + return true; +} + +TString OptIdPrefixAsStr(const TRule_opt_id_prefix& node, TTranslation& ctx, const TString& defaultStr) { + if (!node.HasBlock1()) { + return defaultStr; + } + return Id(node.GetBlock1().GetRule_an_id1(), ctx); +} + +TString OptIdPrefixAsStr(const TRule_opt_id_prefix_or_type& node, TTranslation& ctx, const TString& defaultStr) { + if (!node.HasBlock1()) { + return defaultStr; + } + return Id(node.GetBlock1().GetRule_an_id_or_type1(), ctx); +} + +void PureColumnListStr(const TRule_pure_column_list& node, TTranslation& ctx, TVector<TString>& outList) { + outList.push_back(Id(node.GetRule_an_id2(), ctx)); + for (auto& block: node.GetBlock3()) { + outList.push_back(Id(block.GetRule_an_id2(), ctx)); + } +} + +bool NamedNodeImpl(const TRule_opt_bind_parameter& node, TString& name, bool& isOptional, TTranslation& ctx) { + // opt_bind_parameter: bind_parameter QUESTION?; + isOptional = false; + if (!NamedNodeImpl(node.GetRule_bind_parameter1(), name, ctx)) { + return false; + } + isOptional = node.HasBlock2(); + return true; +} + +TDeferredAtom PureColumnOrNamed(const TRule_pure_column_or_named& node, TTranslation& ctx) { + switch (node.Alt_case()) { + case TRule_pure_column_or_named::kAltPureColumnOrNamed1: { + TString named; + if (!NamedNodeImpl(node.GetAlt_pure_column_or_named1().GetRule_bind_parameter1(), named, ctx)) { + return {}; + } + auto namedNode = ctx.GetNamedNode(named); + if (!namedNode) { + return {}; + } + + return TDeferredAtom(namedNode, ctx.Context()); + } + + case TRule_pure_column_or_named::kAltPureColumnOrNamed2: + return TDeferredAtom(ctx.Context().Pos(), Id(node.GetAlt_pure_column_or_named2().GetRule_an_id1(), ctx)); + case TRule_pure_column_or_named::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +bool PureColumnOrNamedListStr(const TRule_pure_column_or_named_list& node, TTranslation& ctx, TVector<TDeferredAtom>& outList) { + outList.push_back(PureColumnOrNamed(node.GetRule_pure_column_or_named2(), ctx)); + if (outList.back().Empty()) { + return false; + } + + for (auto& block : node.GetBlock3()) { + outList.push_back(PureColumnOrNamed(block.GetRule_pure_column_or_named2(), ctx)); + if (outList.back().Empty()) { + return false; + } + } + + return true; +} + +bool TSqlTranslation::CreateTableIndex(const TRule_table_index& node, TVector<TIndexDescription>& indexes) { + indexes.emplace_back(IdEx(node.GetRule_an_id2(), *this)); + + const auto& indexType = node.GetRule_table_index_type3().GetBlock1(); + switch (indexType.Alt_case()) { + // "GLOBAL" + case TRule_table_index_type_TBlock1::kAlt1: { + auto globalIndex = indexType.GetAlt1().GetRule_global_index1(); + bool uniqIndex = false; + if (globalIndex.HasBlock2()) { + uniqIndex = true; + } + if (globalIndex.HasBlock3()) { + const TString token = to_lower(Ctx.Token(globalIndex.GetBlock3().GetToken1())); + if (token == "sync") { + if (uniqIndex) { + indexes.back().Type = TIndexDescription::EType::GlobalSyncUnique; + } else { + indexes.back().Type = TIndexDescription::EType::GlobalSync; + } + } else if (token == "async") { + if (uniqIndex) { + AltNotImplemented("unique", indexType); + return false; + } + indexes.back().Type = TIndexDescription::EType::GlobalAsync; + } else { + Y_ABORT("You should change implementation according to grammar changes"); + } + } + } + break; + // "LOCAL" + case TRule_table_index_type_TBlock1::kAlt2: + AltNotImplemented("local", indexType); + return false; + case TRule_table_index_type_TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + if (node.GetRule_table_index_type3().HasBlock2()) { + const TString subType = to_upper(IdEx(node.GetRule_table_index_type3().GetBlock2().GetRule_index_subtype2().GetRule_an_id1(), *this).Name) ; + if (subType == "VECTOR_KMEANS_TREE") { + if (indexes.back().Type != TIndexDescription::EType::GlobalSync) { + Ctx.Error() << subType << " index can only be GLOBAL [SYNC]"; + return false; + } + + indexes.back().Type = TIndexDescription::EType::GlobalVectorKmeansTree; + } else { + Ctx.Error() << subType << " index subtype is not supported"; + return false; + } + } + + // WITH + if (node.HasBlock10()) { + //const auto& with = node.GetBlock4(); + auto& index = indexes.back(); + if (index.Type == TIndexDescription::EType::GlobalVectorKmeansTree) { + auto& vectorSettings = index.IndexSettings.emplace<TVectorIndexSettings>(); + if (!CreateIndexSettings(node.GetBlock10().GetRule_with_index_settings1(), index.Type, index.IndexSettings)) { + return false; + } + if (!vectorSettings.Validate(Ctx)) { + return false; + } + + } else { + AltNotImplemented("with", indexType); + return false; + } + } + + indexes.back().IndexColumns.emplace_back(IdEx(node.GetRule_an_id_schema6(), *this)); + for (const auto& block : node.GetBlock7()) { + indexes.back().IndexColumns.emplace_back(IdEx(block.GetRule_an_id_schema2(), *this)); + } + + if (node.HasBlock9()) { + const auto& block = node.GetBlock9(); + indexes.back().DataColumns.emplace_back(IdEx(block.GetRule_an_id_schema3(), *this)); + for (const auto& inner : block.GetBlock4()) { + indexes.back().DataColumns.emplace_back(IdEx(inner.GetRule_an_id_schema2(), *this)); + } + } + + return true; +} + +bool TSqlTranslation::CreateIndexSettings(const TRule_with_index_settings& settingsNode, + TIndexDescription::EType indexType, + TIndexDescription::TIndexSettings& indexSettings) { + const auto& firstEntry = settingsNode.GetRule_index_setting_entry3(); + if (!CreateIndexSettingEntry(IdEx(firstEntry.GetRule_an_id1(), *this), firstEntry.GetRule_index_setting_value3(), indexType, indexSettings)) { + return false; + } + for (auto& block : settingsNode.GetBlock4()) { + const auto& entry = block.GetRule_index_setting_entry2(); + if (!CreateIndexSettingEntry(IdEx(entry.GetRule_an_id1(), *this), entry.GetRule_index_setting_value3(), indexType, indexSettings)) { + return false; + } + } + return true; +} + +template<typename T> +std::tuple<bool, T, TString> TSqlTranslation::GetIndexSettingValue(const TRule_index_setting_value& node) { + T value{}; + // id_or_type + if (node.HasAlt_index_setting_value1()) { + const TString stringValue = to_lower(IdEx(node.GetAlt_index_setting_value1().GetRule_id_or_type1(), *this).Name); + if (!TryFromString<T>(stringValue, value)) { + return {false, value, stringValue}; + } + return {true, value, stringValue}; + } + // STRING_VALUE + else if (node.HasAlt_index_setting_value2()) { + const TString stringValue = to_lower(Token(node.GetAlt_index_setting_value2().GetToken1())); + const auto unescaped = StringContent(Ctx, Ctx.Pos(), stringValue); + if (!unescaped) { + return {false, value, stringValue}; + } + if (!TryFromString<T>(unescaped->Content, value)) { + return {false, value, stringValue}; + } + return {true, value, unescaped->Content}; + } else { + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +template<> +std::tuple<bool, ui64, TString> TSqlTranslation::GetIndexSettingValue(const TRule_index_setting_value& node) { + const auto& intNode = node.GetAlt_index_setting_value3().GetRule_integer1(); + const TString stringValue = Token(intNode.GetToken1()); + ui64 value = 0; + TString suffix; + if (!ParseNumbers(Ctx, stringValue, value, suffix)) { + return {false, value, stringValue}; + } + return {true, value, stringValue}; +} + +template<> +std::tuple<bool, bool, TString> TSqlTranslation::GetIndexSettingValue(const TRule_index_setting_value& node) { + bool value = false; + const TString stringValue = to_lower(Token(node.GetAlt_index_setting_value4().GetRule_bool_value1().GetToken1()));; + if (!TryFromString<bool>(stringValue, value)) { + return {false, value, stringValue}; + } + return {true, value, stringValue}; +} + +bool TSqlTranslation::CreateIndexSettingEntry(const TIdentifier &id, + const TRule_index_setting_value& node, + TIndexDescription::EType indexType, + TIndexDescription::TIndexSettings& indexSettings) { + + + if (indexType == TIndexDescription::EType::GlobalVectorKmeansTree) { + TVectorIndexSettings &vectorIndexSettings = std::get<TVectorIndexSettings>(indexSettings); + + if (to_lower(id.Name) == "distance") { + const auto [success, value, stringValue] = GetIndexSettingValue<TVectorIndexSettings::EDistance>(node); + if (!success) { + Ctx.Error() << "Invalid distance: " << stringValue; + return false; + } + vectorIndexSettings.Distance = value; + } else if (to_lower(id.Name) == "similarity") { + const auto [success, value, stringValue] = GetIndexSettingValue<TVectorIndexSettings::ESimilarity>(node); + if (!success) { + Ctx.Error() << "Invalid similarity: " << stringValue; + return false; + } + vectorIndexSettings.Similarity = value; + } else if (to_lower(id.Name) == "vector_type") { + const auto [success, value, stringValue] = GetIndexSettingValue<TVectorIndexSettings::EVectorType>(node); + if (!success) { + Ctx.Error() << "Invalid vector_type: " << stringValue; + return false; + } + vectorIndexSettings.VectorType = value; + } else if (to_lower(id.Name) == "vector_dimension") { + const auto [success, value, stringValue] = GetIndexSettingValue<ui64>(node); + if (!success || value > Max<ui32>()) { + Ctx.Error() << "Invalid vector_dimension: " << stringValue; + return false; + } + vectorIndexSettings.VectorDimension = value; + } else if (to_lower(id.Name) == "clusters") { + const auto [success, value, stringValue] = GetIndexSettingValue<ui64>(node); + if (!success || value > Max<ui32>()) { + Ctx.Error() << "Invalid clusters: " << stringValue; + return false; + } + vectorIndexSettings.Clusters = value; + } else if (to_lower(id.Name) == "levels") { + const auto [success, value, stringValue] = GetIndexSettingValue<ui64>(node); + if (!success || value > Max<ui32>()) { + Ctx.Error() << "Invalid levels: " << stringValue; + return false; + } + vectorIndexSettings.Levels = value; + } else { + Ctx.Error() << "Unknown index setting: " << id.Name; + return false; + } + } else { + Ctx.Error() << "Unknown index setting: " << id.Name; + return false; + } + return true; + +} + +std::pair<TString, TViewDescription> TableKeyImpl(const std::pair<bool, TString>& nameWithAt, TViewDescription view, TTranslation& ctx) { + if (nameWithAt.first) { + view = {"@"}; + ctx.Context().IncrementMonCounter("sql_features", "AnonymousTable"); + } + + return std::make_pair(nameWithAt.second, view); +} + +std::pair<TString, TViewDescription> TableKeyImpl(const TRule_table_key& node, TTranslation& ctx, bool hasAt) { + auto name(Id(node.GetRule_id_table_or_type1(), ctx)); + TViewDescription view; + if (node.HasBlock2()) { + view = Id(node.GetBlock2().GetRule_view_name2(), ctx); + ctx.Context().IncrementMonCounter("sql_features", "View"); + } + + return TableKeyImpl(std::make_pair(hasAt, name), view, ctx); +} + +/// \return optional prefix +TString ColumnNameAsStr(TTranslation& ctx, const TRule_column_name& node, TString& id) { + id = Id(node.GetRule_an_id2(), ctx); + return OptIdPrefixAsStr(node.GetRule_opt_id_prefix1(), ctx); +} + +TString ColumnNameAsSingleStr(TTranslation& ctx, const TRule_column_name& node) { + TString body; + const TString prefix = ColumnNameAsStr(ctx, node, body); + return prefix ? prefix + '.' + body : body; +} + +TTableHints GetContextHints(TContext& ctx) { + TTableHints hints; + if (ctx.PragmaInferSchema) { + hints["infer_schema"] = {}; + } + if (ctx.PragmaDirectRead) { + hints["direct_read"] = {}; + } + + return hints; +} + +TTableHints GetTableFuncHints(TStringBuf funcName) { + TCiString func(funcName); + TTableHints res; + if (func.StartsWith("range") || func.StartsWith("like") || func.StartsWith("regexp") || func.StartsWith("filter")) { + res.emplace("ignore_non_existing", TVector<TNodePtr>{}); + } else if (func.StartsWith("each")) { + res.emplace("ignore_non_existing", TVector<TNodePtr>{}); + res.emplace("warn_non_existing", TVector<TNodePtr>{}); + } + + return res; +} + + +TNodePtr TSqlTranslation::NamedExpr(const TRule_named_expr& node, EExpr exprMode) { + TSqlExpression expr(Ctx, Mode); + if (exprMode == EExpr::GroupBy) { + expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::GroupBy); + } else if (exprMode == EExpr::SqlLambdaParams) { + expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::SqlLambdaParams); + } + if (node.HasBlock2()) { + expr.MarkAsNamed(); + } + TNodePtr exprNode(expr.Build(node.GetRule_expr1())); + if (!exprNode) { + Ctx.IncrementMonCounter("sql_errors", "NamedExprInvalid"); + return nullptr; + } + if (node.HasBlock2()) { + exprNode = SafeClone(exprNode); + exprNode->SetLabel(Id(node.GetBlock2().GetRule_an_id_or_type2(), *this)); + } + return exprNode; +} + +bool TSqlTranslation::NamedExprList(const TRule_named_expr_list& node, TVector<TNodePtr>& exprs, EExpr exprMode) { + exprs.emplace_back(NamedExpr(node.GetRule_named_expr1(), exprMode)); + if (!exprs.back()) { + return false; + } + for (auto& b: node.GetBlock2()) { + exprs.emplace_back(NamedExpr(b.GetRule_named_expr2(), exprMode)); + if (!exprs.back()) { + return false; + } + } + return true; +} + +bool TSqlTranslation::BindList(const TRule_bind_parameter_list& node, TVector<TSymbolNameWithPos>& bindNames) { + bindNames.clear(); + + TString name; + if (!NamedNodeImpl(node.GetRule_bind_parameter1(), name, *this)) { + return false; + } + + bindNames.emplace_back(TSymbolNameWithPos{name, Ctx.Pos()}); + for (auto& b: node.GetBlock2()) { + if (!NamedNodeImpl(b.GetRule_bind_parameter2(), name, *this)) { + return false; + } + + bindNames.emplace_back(TSymbolNameWithPos{name, Ctx.Pos()}); + } + return true; +} + +bool TSqlTranslation::ActionOrSubqueryArgs(const TRule_action_or_subquery_args& node, TVector<TSymbolNameWithPos>& bindNames, ui32& optionalArgsCount) { + bindNames.clear(); + optionalArgsCount = 0; + + TString name; + bool isOptional = false; + if (!NamedNodeImpl(node.GetRule_opt_bind_parameter1(), name, isOptional, *this)) { + return false; + } + + if (isOptional) { + optionalArgsCount++; + } + bindNames.emplace_back(TSymbolNameWithPos{name, Ctx.Pos()}); + + for (auto& b: node.GetBlock2()) { + if (!NamedNodeImpl(b.GetRule_opt_bind_parameter2(), name, isOptional, *this)) { + return false; + } + + if (isOptional) { + optionalArgsCount++; + } else if (optionalArgsCount > 0) { + Context().Error() << "Non-optional argument can not follow optional one"; + return false; + } + bindNames.emplace_back(TSymbolNameWithPos{name, Ctx.Pos()}); + } + return true; +} + +bool TSqlTranslation::ModulePath(const TRule_module_path& node, TVector<TString>& path) { + if (node.HasBlock1()) { + path.emplace_back(TString()); + } + path.emplace_back(Id(node.GetRule_an_id2(), *this)); + for (auto& b: node.GetBlock3()) { + path.emplace_back(Id(b.GetRule_an_id2(), *this)); + } + return true; +} + +bool TSqlTranslation::NamedBindList(const TRule_named_bind_parameter_list& node, TVector<TSymbolNameWithPos>& names, + TVector<TSymbolNameWithPos>& aliases) +{ + names.clear(); + aliases.clear(); + TSymbolNameWithPos name; + TSymbolNameWithPos alias; + + if (!NamedBindParam(node.GetRule_named_bind_parameter1(), name, alias)) { + return false; + } + names.push_back(name); + aliases.push_back(alias); + + for (auto& b: node.GetBlock2()) { + if (!NamedBindParam(b.GetRule_named_bind_parameter2(), name, alias)) { + return false; + } + names.push_back(name); + aliases.push_back(alias); + } + return true; +} + +bool TSqlTranslation::NamedBindParam(const TRule_named_bind_parameter& node, TSymbolNameWithPos& name, TSymbolNameWithPos& alias) { + name = alias = {}; + if (!NamedNodeImpl(node.GetRule_bind_parameter1(), name.Name, *this)) { + return false; + } + name.Pos = Ctx.Pos(); + if (node.HasBlock2()) { + if (!NamedNodeImpl(node.GetBlock2().GetRule_bind_parameter2(), alias.Name, *this)) { + return false; + } + alias.Pos = Ctx.Pos(); + } + return true; +} + +TMaybe<TTableArg> TSqlTranslation::TableArgImpl(const TRule_table_arg& node) { + TTableArg ret; + ret.HasAt = node.HasBlock1(); + TColumnRefScope scope(Ctx, EColumnRefState::AsStringLiteral); + ret.Expr = NamedExpr(node.GetRule_named_expr2()); + if (!ret.Expr) { + return Nothing(); + } + + if (node.HasBlock3()) { + ret.View = Id(node.GetBlock3().GetRule_view_name2(), *this); + Context().IncrementMonCounter("sql_features", "View"); + } + + return ret; +} + +bool TSqlTranslation::ClusterExpr(const TRule_cluster_expr& node, bool allowWildcard, TString& service, TDeferredAtom& cluster) { + bool allowBinding = false; + bool isBinding; + return ClusterExpr(node, allowWildcard, allowBinding, service, cluster, isBinding); +} + +bool TSqlTranslation::ClusterExprOrBinding(const TRule_cluster_expr& node, TString& service, TDeferredAtom& cluster, bool& isBinding) { + bool allowWildcard = false; + bool allowBinding = true; + return ClusterExpr(node, allowWildcard, allowBinding, service, cluster, isBinding); +} + +bool TSqlTranslation::ClusterExpr(const TRule_cluster_expr& node, bool allowWildcard, bool allowBinding, TString& service, + TDeferredAtom& cluster, bool& isBinding) +{ + service = ""; + cluster = TDeferredAtom(); + isBinding = false; + if (node.HasBlock1()) { + service = to_lower(Id(node.GetBlock1().GetRule_an_id1(), *this)); + allowBinding = false; + if (service != YtProviderName && + service != KikimrProviderName && + service != RtmrProviderName && service != StatProviderName) { + Ctx.Error() << "Unknown service: " << service; + return false; + } + } + + switch (node.GetBlock2().Alt_case()) { + case TRule_cluster_expr::TBlock2::kAlt1: { + auto value = PureColumnOrNamed(node.GetBlock2().GetAlt1().GetRule_pure_column_or_named1(), *this); + if (value.Empty()) { + return false; + } + + if (value.GetLiteral()) { + TString clusterName = *value.GetLiteral(); + if (allowBinding && to_lower(clusterName) == "bindings") { + switch (Ctx.Settings.BindingsMode) { + case NSQLTranslation::EBindingsMode::DISABLED: + Ctx.Error(Ctx.Pos(), TIssuesIds::YQL_DISABLED_BINDINGS) << "Please remove 'bindings.' from your query, the support for this syntax has ended"; + Ctx.IncrementMonCounter("sql_errors", "DisabledBinding"); + return false; + case NSQLTranslation::EBindingsMode::ENABLED: + isBinding = true; + break; + case NSQLTranslation::EBindingsMode::DROP_WITH_WARNING: + Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_DEPRECATED_BINDINGS) << "Please remove 'bindings.' from your query, the support for this syntax will be dropped soon"; + Ctx.IncrementMonCounter("sql_errors", "DeprecatedBinding"); + [[fallthrough]]; + case NSQLTranslation::EBindingsMode::DROP: + service = Context().Scoped->CurrService; + cluster = Context().Scoped->CurrCluster; + break; + } + + return true; + } + TString normalizedClusterName; + auto foundProvider = Ctx.GetClusterProvider(clusterName, normalizedClusterName); + if (!foundProvider) { + Ctx.Error() << "Unknown cluster: " << clusterName; + return false; + } + + if (service && *foundProvider != service) { + Ctx.Error() << "Mismatch of cluster " << clusterName << " service, expected: " + << *foundProvider << ", got: " << service; + return false; + } + + if (!service) { + service = *foundProvider; + } + + value = TDeferredAtom(Ctx.Pos(), normalizedClusterName); + } else { + if (!service) { + Ctx.Error() << "Cluster service is not set"; + return false; + } + } + + cluster = value; + return true; + } + case TRule_cluster_expr::TBlock2::kAlt2: { + if (!allowWildcard) { + Ctx.Error() << "Cluster wildcards allowed only in USE statement"; + return false; + } + + return true; + } + case TRule_cluster_expr::TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + + +bool TSqlTranslation::ApplyTableBinding(const TString& binding, TTableRef& tr, TTableHints& hints) { + NSQLTranslation::TBindingInfo bindingInfo; + if (const auto& error = ExtractBindingInfo(Context().Settings, binding, bindingInfo)) { + Ctx.Error() << error; + return false; + } + + if (bindingInfo.Schema) { + TNodePtr schema = BuildQuotedAtom(Ctx.Pos(), bindingInfo.Schema); + + TNodePtr type = new TCallNodeImpl(Ctx.Pos(), "SqlTypeFromYson", { schema }); + TNodePtr columns = new TCallNodeImpl(Ctx.Pos(), "SqlColumnOrderFromYson", { schema }); + + hints["user_schema"] = { type, columns }; + } + + for (auto& [key, values] : bindingInfo.Attributes) { + TVector<TNodePtr> hintValue; + for (auto& column : values) { + hintValue.push_back(BuildQuotedAtom(Ctx.Pos(), column)); + } + hints[key] = std::move(hintValue); + } + + tr.Service = bindingInfo.ClusterType; + tr.Cluster = TDeferredAtom(Ctx.Pos(), bindingInfo.Cluster); + + const TString view = ""; + tr.Keys = BuildTableKey(Ctx.Pos(), tr.Service, tr.Cluster, TDeferredAtom(Ctx.Pos(), bindingInfo.Path), {view}); + + return true; +} + +bool TSqlTranslation::TableRefImpl(const TRule_table_ref& node, TTableRef& result, bool unorderedSubquery) { + // table_ref: + // (cluster_expr DOT)? AT? + // (table_key | an_id_expr LPAREN (table_arg (COMMA table_arg)*)? RPAREN | + // bind_parameter (LPAREN expr_list? RPAREN)? (VIEW an_id)?) + // table_hints?; + if (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW && node.HasBlock1()) { + Ctx.Error() << "Cluster should not be used in limited view"; + return false; + } + auto service = Context().Scoped->CurrService; + auto cluster = Context().Scoped->CurrCluster; + const bool hasAt = node.HasBlock2(); + bool isBinding = false; + if (node.HasBlock1()) { + const auto& clusterExpr = node.GetBlock1().GetRule_cluster_expr1(); + bool result = !hasAt ? + ClusterExprOrBinding(clusterExpr, service, cluster, isBinding) : ClusterExpr(clusterExpr, false, service, cluster); + if (!result) { + return false; + } + } + + TTableRef tr(Context().MakeName("table"), service, cluster, nullptr); + TPosition pos(Context().Pos()); + TTableHints hints = GetContextHints(Ctx); + TTableHints tableHints; + + TMaybe<TString> keyFunc; + + auto& block = node.GetBlock3(); + switch (block.Alt_case()) { + case TRule_table_ref::TBlock3::kAlt1: { + if (!isBinding && cluster.Empty()) { + Ctx.Error() << "No cluster name given and no default cluster is selected"; + return false; + } + + auto pair = TableKeyImpl(block.GetAlt1().GetRule_table_key1(), *this, hasAt); + if (isBinding) { + TString binding = pair.first; + auto view = pair.second; + if (!view.ViewName.empty()) { + YQL_ENSURE(view != TViewDescription{"@"}); + Ctx.Error() << "VIEW is not supported for table bindings"; + return false; + } + + if (!ApplyTableBinding(binding, tr, tableHints)) { + return false; + } + } else { + tr.Keys = BuildTableKey(pos, service, cluster, TDeferredAtom(pos, pair.first), pair.second); + } + break; + } + case TRule_table_ref::TBlock3::kAlt2: { + if (cluster.Empty()) { + Ctx.Error() << "No cluster name given and no default cluster is selected"; + return false; + } + + auto& alt = block.GetAlt2(); + keyFunc = Id(alt.GetRule_an_id_expr1(), *this); + TVector<TTableArg> args; + if (alt.HasBlock3()) { + auto& argsBlock = alt.GetBlock3(); + auto arg = TableArgImpl(argsBlock.GetRule_table_arg1()); + if (!arg) { + return false; + } + + args.push_back(std::move(*arg)); + for (auto& b : argsBlock.GetBlock2()) { + arg = TableArgImpl(b.GetRule_table_arg2()); + if (!arg) { + return false; + } + + args.push_back(std::move(*arg)); + } + } + tableHints = GetTableFuncHints(*keyFunc); + tr.Keys = BuildTableKeys(pos, service, cluster, *keyFunc, args); + break; + } + case TRule_table_ref::TBlock3::kAlt3: { + auto& alt = block.GetAlt3(); + Ctx.IncrementMonCounter("sql_features", "NamedNodeUseSource"); + TString named; + if (!NamedNodeImpl(alt.GetRule_bind_parameter1(), named, *this)) { + return false; + } + if (hasAt) { + if (alt.HasBlock2()) { + Ctx.Error() << "Subquery must not be used as anonymous table name"; + return false; + } + + if (alt.HasBlock3()) { + Ctx.Error() << "View is not supported for anonymous tables"; + return false; + } + + if (node.HasBlock4()) { + Ctx.Error() << "Hints are not supported for anonymous tables"; + return false; + } + + auto namedNode = GetNamedNode(named); + if (!namedNode) { + return false; + } + + auto source = TryMakeSourceFromExpression(Ctx.Pos(), Ctx, service, cluster, namedNode, "@"); + if (!source) { + Ctx.Error() << "Cannot infer cluster and table name"; + return false; + } + + result.Source = source; + return true; + } + auto nodePtr = GetNamedNode(named); + if (!nodePtr) { + Ctx.IncrementMonCounter("sql_errors", "NamedNodeSourceError"); + return false; + } + if (alt.HasBlock2()) { + if (alt.HasBlock3()) { + Ctx.Error() << "View is not supported for subqueries"; + return false; + } + + if (node.HasBlock4()) { + Ctx.Error() << "Hints are not supported for subqueries"; + return false; + } + + TVector<TNodePtr> values; + values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "Apply", TNodeFlags::Default)); + values.push_back(nodePtr); + values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "world", TNodeFlags::Default)); + + TSqlExpression sqlExpr(Ctx, Mode); + if (alt.GetBlock2().HasBlock2() && !ExprList(sqlExpr, values, alt.GetBlock2().GetBlock2().GetRule_expr_list1())) { + return false; + } + + TNodePtr apply = new TAstListNodeImpl(Ctx.Pos(), std::move(values)); + if (unorderedSubquery && Ctx.UnorderedSubqueries) { + apply = new TCallNodeImpl(Ctx.Pos(), "UnorderedSubquery", { apply }); + } + result.Source = BuildNodeSource(Ctx.Pos(), apply); + return true; + } + + TTableHints hints; + TTableHints contextHints = GetContextHints(Ctx); + auto ret = BuildInnerSource(Ctx.Pos(), nodePtr, service, cluster); + if (alt.HasBlock3()) { + auto view = Id(alt.GetBlock3().GetRule_view_name2(), *this); + Ctx.IncrementMonCounter("sql_features", "View"); + bool result = view.PrimaryFlag + ? ret->SetPrimaryView(Ctx, Ctx.Pos()) + : ret->SetViewName(Ctx, Ctx.Pos(), view.ViewName); + if (!result) { + return false; + } + } + + if (node.HasBlock4()) { + auto tmp = TableHintsImpl(node.GetBlock4().GetRule_table_hints1(), service, keyFunc.GetOrElse("")); + if (!tmp) { + return false; + } + + hints = *tmp; + } + + if (hints || contextHints) { + if (!ret->SetTableHints(Ctx, Ctx.Pos(), hints, contextHints)) { + return false; + } + } + + result.Source = ret; + return true; + } + case TRule_table_ref::TBlock3::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + MergeHints(hints, tableHints); + + if (node.HasBlock4()) { + auto tmp = TableHintsImpl(node.GetBlock4().GetRule_table_hints1(), service, keyFunc.GetOrElse("")); + if (!tmp) { + Ctx.Error() << "Failed to parse table hints"; + return false; + } + + MergeHints(hints, *tmp); + } + + if (!hints.empty()) { + tr.Options = BuildInputOptions(pos, hints); + } + + if (!tr.Keys) { + return false; + } + + result = tr; + return true; +} + +TMaybe<TSourcePtr> TSqlTranslation::AsTableImpl(const TRule_table_ref& node) { + const auto& block = node.GetBlock3(); + + if (block.Alt_case() == TRule_table_ref::TBlock3::kAlt2) { + auto& alt = block.GetAlt2(); + TCiString func(Id(alt.GetRule_an_id_expr1(), *this)); + + if (func == "as_table") { + if (node.HasBlock1()) { + Ctx.Error() << "Cluster shouldn't be specified for AS_TABLE source"; + return TMaybe<TSourcePtr>(nullptr); + } + + if (!alt.HasBlock3() || !alt.GetBlock3().GetBlock2().empty()) { + Ctx.Error() << "Expected single argument for AS_TABLE source"; + return TMaybe<TSourcePtr>(nullptr); + } + + if (node.HasBlock4()) { + Ctx.Error() << "No hints expected for AS_TABLE source"; + return TMaybe<TSourcePtr>(nullptr); + } + + auto arg = TableArgImpl(alt.GetBlock3().GetRule_table_arg1()); + if (!arg) { + return TMaybe<TSourcePtr>(nullptr); + } + + if (arg->Expr->GetSource()) { + Ctx.Error() << "AS_TABLE shouldn't be used for table sources"; + return TMaybe<TSourcePtr>(nullptr); + } + + return BuildNodeSource(Ctx.Pos(), arg->Expr, true); + } + } + + return Nothing(); +} + +TMaybe<TColumnConstraints> ColumnConstraints(const TRule_column_schema& node, TTranslation& ctx) { + TNodePtr defaultExpr = nullptr; + bool nullable = true; + + auto constraintsNode = node.GetRule_opt_column_constraints4(); + if (constraintsNode.HasBlock1()) { + nullable = !constraintsNode.GetBlock1().HasBlock1(); + } + if (constraintsNode.HasBlock2()) { + TSqlExpression expr(ctx.Context(), ctx.Context().Settings.Mode); + defaultExpr = expr.Build(constraintsNode.GetBlock2().GetRule_expr2()); + if (!defaultExpr) { + return {}; + } + } + + return TColumnConstraints(defaultExpr, nullable); +} + +TMaybe<TColumnSchema> TSqlTranslation::ColumnSchemaImpl(const TRule_column_schema& node) { + const TString name(Id(node.GetRule_an_id_schema1(), *this)); + const TPosition pos(Context().Pos()); + TNodePtr type = SerialTypeNode(node.GetRule_type_name_or_bind2()); + const bool serial = (type != nullptr); + + const auto constraints = ColumnConstraints(node, *this); + if (!constraints){ + return {}; + } + + if (!type) { + type = TypeNodeOrBind(node.GetRule_type_name_or_bind2()); + } + + if (!type) { + return {}; + } + TVector<TIdentifier> families; + if (node.HasBlock3()) { + const auto& familyRelation = node.GetBlock3().GetRule_family_relation1(); + families.push_back(IdEx(familyRelation.GetRule_an_id2(), *this)); + } + return TColumnSchema(pos, name, type, constraints->Nullable, families, serial, constraints->DefaultExpr); +} + +TNodePtr TSqlTranslation::SerialTypeNode(const TRule_type_name_or_bind& node) { + if (node.Alt_case() != TRule_type_name_or_bind::kAltTypeNameOrBind1) { + return nullptr; + } + + TPosition pos = Ctx.Pos(); + + auto typeNameNode = node.GetAlt_type_name_or_bind1().GetRule_type_name1(); + if (typeNameNode.Alt_case() != TRule_type_name::kAltTypeName2) { + return nullptr; + } + + auto alt = typeNameNode.GetAlt_type_name2(); + auto& block = alt.GetBlock1(); + if (block.Alt_case() != TRule_type_name::TAlt2::TBlock1::kAlt2) { + return nullptr; + } + + auto alt2 = block.GetAlt2().GetRule_type_name_simple1(); + const TString name = Id(alt2.GetRule_an_id_pure1(), *this); + if (name.empty()) { + return nullptr; + } + + const auto res = to_lower(name); + if (res == "bigserial" || res == "serial8") { + return new TCallNodeImpl(pos, "DataType", { BuildQuotedAtom(pos, "Int64", TNodeFlags::Default) }); + } else if (res == "serial" || res == "serial4") { + return new TCallNodeImpl(pos, "DataType", { BuildQuotedAtom(pos, "Int32", TNodeFlags::Default) }); + } else if (res == "smallserial" || res == "serial2") { + return new TCallNodeImpl(pos, "DataType", { BuildQuotedAtom(pos, "Int16", TNodeFlags::Default) }); + } + + return nullptr; +} + +bool TSqlTranslation::FillFamilySettingsEntry(const TRule_family_settings_entry& settingNode, TFamilyEntry& family) { + TIdentifier id = IdEx(settingNode.GetRule_an_id1(), *this); + const TRule_family_setting_value& value = settingNode.GetRule_family_setting_value3(); + if (to_lower(id.Name) == "data") { + const TString stringValue(Ctx.Token(value.GetAlt_family_setting_value1().GetToken1())); + family.Data = BuildLiteralSmartString(Ctx, stringValue); + } else if (to_lower(id.Name) == "compression") { + const TString stringValue(Ctx.Token(value.GetAlt_family_setting_value1().GetToken1())); + family.Compression = BuildLiteralSmartString(Ctx, stringValue); + } else if (to_lower(id.Name) == "compression_level") { + family.CompressionLevel = LiteralNumber(Ctx, value.GetAlt_family_setting_value2().GetRule_integer1()); + } else { + Ctx.Error() << "Unknown table setting: " << id.Name; + return false; + } + return true; +} + +bool TSqlTranslation::FillFamilySettings(const TRule_family_settings& settingsNode, TFamilyEntry& family) { + // family_settings: LPAREN (family_settings_entry (COMMA family_settings_entry)*)? RPAREN; + if (settingsNode.HasBlock2()) { + auto& settings = settingsNode.GetBlock2(); + if (!FillFamilySettingsEntry(settings.GetRule_family_settings_entry1(), family)) { + return false; + } + for (auto& block : settings.GetBlock2()) { + if (!FillFamilySettingsEntry(block.GetRule_family_settings_entry2(), family)) { + return false; + } + } + } + return true; +} + + + +bool TSqlTranslation::CreateTableEntry(const TRule_create_table_entry& node, TCreateTableParameters& params, const bool isCreateTableAs) +{ + switch (node.Alt_case()) { + case TRule_create_table_entry::kAltCreateTableEntry1: + { + if (isCreateTableAs) { + Ctx.Error() << "Column types are not supported for CREATE TABLE AS"; + return false; + } + // column_schema + auto columnSchema = ColumnSchemaImpl(node.GetAlt_create_table_entry1().GetRule_column_schema1()); + if (!columnSchema) { + return false; + } + if (columnSchema->Families.size() > 1) { + Ctx.Error() << "Several column families for a single column are not yet supported"; + return false; + } + params.Columns.push_back(*columnSchema); + break; + } + case TRule_create_table_entry::kAltCreateTableEntry2: + { + // table_constraint + auto& constraint = node.GetAlt_create_table_entry2().GetRule_table_constraint1(); + switch (constraint.Alt_case()) { + case TRule_table_constraint::kAltTableConstraint1: { + if (!params.PkColumns.empty()) { + Ctx.Error() << "PRIMARY KEY statement must be specified only once"; + return false; + } + auto& pkConstraint = constraint.GetAlt_table_constraint1(); + params.PkColumns.push_back(IdEx(pkConstraint.GetRule_an_id4(), *this)); + for (auto& block : pkConstraint.GetBlock5()) { + params.PkColumns.push_back(IdEx(block.GetRule_an_id2(), *this)); + } + break; + } + case TRule_table_constraint::kAltTableConstraint2: { + if (!params.PartitionByColumns.empty()) { + Ctx.Error() << "PARTITION BY statement must be specified only once"; + return false; + } + auto& pbConstraint = constraint.GetAlt_table_constraint2(); + params.PartitionByColumns.push_back(IdEx(pbConstraint.GetRule_an_id4(), *this)); + for (auto& block : pbConstraint.GetBlock5()) { + params.PartitionByColumns.push_back(IdEx(block.GetRule_an_id2(), *this)); + } + break; + } + case TRule_table_constraint::kAltTableConstraint3: { + if (!params.OrderByColumns.empty()) { + Ctx.Error() << "ORDER BY statement must be specified only once"; + return false; + } + auto& obConstraint = constraint.GetAlt_table_constraint3(); + auto extractDirection = [this] (const TRule_column_order_by_specification& spec, bool& desc) { + desc = false; + if (!spec.HasBlock2()) { + return true; + } + + auto& token = spec.GetBlock2().GetToken1(); + auto tokenId = token.GetId(); + if (IS_TOKEN(tokenId, ASC)) { + return true; + } else if (IS_TOKEN(tokenId, DESC)) { + desc = true; + return true; + } else { + Ctx.Error() << "Unsupported direction token: " << token.GetId(); + return false; + } + }; + + bool desc = false; + auto& obSpec = obConstraint.GetRule_column_order_by_specification4(); + if (!extractDirection(obSpec, desc)) { + return false; + } + params.OrderByColumns.push_back(std::make_pair(IdEx(obSpec.GetRule_an_id1(), *this), desc)); + + for (auto& block : obConstraint.GetBlock5()) { + auto& obSpec = block.GetRule_column_order_by_specification2(); + if (!extractDirection(obSpec, desc)) { + return false; + } + params.OrderByColumns.push_back(std::make_pair(IdEx(obSpec.GetRule_an_id1(), *this), desc)); + } + break; + } + default: + AltNotImplemented("table_constraint", constraint); + return false; + } + break; + } + case TRule_create_table_entry::kAltCreateTableEntry3: + { + // table_index + auto& table_index = node.GetAlt_create_table_entry3().GetRule_table_index1(); + if (!CreateTableIndex(table_index, params.Indexes)) { + return false; + } + break; + } + case TRule_create_table_entry::kAltCreateTableEntry4: + { + if (isCreateTableAs) { + Ctx.Error() << "Column families are not supported for CREATE TABLE AS"; + return false; + } + // family_entry + auto& family_entry = node.GetAlt_create_table_entry4().GetRule_family_entry1(); + TFamilyEntry family(IdEx(family_entry.GetRule_an_id2(), *this)); + if (!FillFamilySettings(family_entry.GetRule_family_settings3(), family)) { + return false; + } + params.ColumnFamilies.push_back(family); + break; + } + case TRule_create_table_entry::kAltCreateTableEntry5: + { + // changefeed + auto& changefeed = node.GetAlt_create_table_entry5().GetRule_changefeed1(); + TSqlExpression expr(Ctx, Mode); + if (!CreateChangefeed(changefeed, expr, params.Changefeeds)) { + return false; + } + break; + } + case TRule_create_table_entry::kAltCreateTableEntry6: + { + if (!isCreateTableAs) { + Ctx.Error() << "Column requires a type"; + return false; + } + // an_id_schema + const TString name(Id(node.GetAlt_create_table_entry6().GetRule_an_id_schema1(), *this)); + const TPosition pos(Context().Pos()); + + params.Columns.push_back(TColumnSchema(pos, name, nullptr, true, {}, false, nullptr)); + break; + } + default: + AltNotImplemented("create_table_entry", node); + return false; + } + return true; +} + +namespace { + bool StoreId(const TRule_table_setting_value& from, TMaybe<TIdentifier>& to, TTranslation& ctx) { + switch (from.Alt_case()) { + case TRule_table_setting_value::kAltTableSettingValue1: { + // id + to = IdEx(from.GetAlt_table_setting_value1().GetRule_id1(), ctx); + break; + } + default: + return false; + } + return true; + } + + bool StoreString(const TRule_table_setting_value& from, TNodePtr& to, TContext& ctx) { + switch (from.Alt_case()) { + case TRule_table_setting_value::kAltTableSettingValue2: { + // STRING_VALUE + const TString stringValue(ctx.Token(from.GetAlt_table_setting_value2().GetToken1())); + to = BuildLiteralSmartString(ctx, stringValue); + break; + } + default: + return false; + } + return true; + } + + bool StoreString(const TRule_table_setting_value& from, TDeferredAtom& to, TContext& ctx, const TString& errorPrefix = {}) { + switch (from.Alt_case()) { + case TRule_table_setting_value::kAltTableSettingValue2: { + // STRING_VALUE + const TString stringValue(ctx.Token(from.GetAlt_table_setting_value2().GetToken1())); + auto unescaped = StringContent(ctx, ctx.Pos(), stringValue); + if (!unescaped) { + ctx.Error() << errorPrefix << " value cannot be unescaped"; + return false; + } + to = TDeferredAtom(ctx.Pos(), unescaped->Content); + break; + } + default: + ctx.Error() << errorPrefix << " value should be a string literal"; + return false; + } + return true; + } + + bool StoreInt(const TRule_table_setting_value& from, TNodePtr& to, TContext& ctx) { + switch (from.Alt_case()) { + case TRule_table_setting_value::kAltTableSettingValue3: { + // integer + to = LiteralNumber(ctx, from.GetAlt_table_setting_value3().GetRule_integer1()); + break; + } + default: + return false; + } + return true; + } + + bool StoreInt(const TRule_table_setting_value& from, TDeferredAtom& to, TContext& ctx, const TString& errorPrefix = {}) { + switch (from.Alt_case()) { + case TRule_table_setting_value::kAltTableSettingValue3: { + // integer + to = TDeferredAtom(LiteralNumber(ctx, from.GetAlt_table_setting_value3().GetRule_integer1()), ctx); + break; + } + default: + ctx.Error() << errorPrefix << " value should be an integer"; + return false; + } + return true; + } + + bool StoreSplitBoundary(const TRule_literal_value_list& boundary, TVector<TVector<TNodePtr>>& to, + TSqlExpression& expr, TContext& ctx) { + TVector<TNodePtr> boundaryKeys; + auto first_key = expr.LiteralExpr(boundary.GetRule_literal_value2()); + if (!first_key) { + ctx.Error() << "Empty key in partition at keys"; + return false; + } + if (!first_key->Expr) { + ctx.Error() << "Identifier is not expected in partition at keys"; + return false; + } + boundaryKeys.emplace_back(first_key->Expr); + for (auto& key : boundary.GetBlock3()) { + auto keyExprOrIdent = expr.LiteralExpr(key.GetRule_literal_value2()); + if (!keyExprOrIdent) { + ctx.Error() << "Empty key in partition at keys"; + return false; + } + if (!keyExprOrIdent->Expr) { + ctx.Error() << "Identifier is not expected in partition at keys"; + return false; + } + boundaryKeys.emplace_back(keyExprOrIdent->Expr); + } + to.push_back(boundaryKeys); + return true; + } + + bool StoreSplitBoundaries(const TRule_table_setting_value& from, TVector<TVector<TNodePtr>>& to, + TSqlExpression& expr, TContext& ctx) { + switch (from.Alt_case()) { + case TRule_table_setting_value::kAltTableSettingValue4: { + // split_boundaries + const auto& boundariesNode = from.GetAlt_table_setting_value4().GetRule_split_boundaries1(); + switch (boundariesNode.Alt_case()) { + case TRule_split_boundaries::kAltSplitBoundaries1: { + // literal_value_list (COMMA literal_value_list)* + auto& complexBoundaries = boundariesNode.GetAlt_split_boundaries1(); + + auto& first_boundary = complexBoundaries.GetRule_literal_value_list2(); + if (!StoreSplitBoundary(first_boundary, to, expr, ctx)) { + return false; + } + + for (auto& boundary : complexBoundaries.GetBlock3()) { + if (!StoreSplitBoundary(boundary.GetRule_literal_value_list2(), to, expr, ctx)) { + return false; + } + } + break; + } + case TRule_split_boundaries::kAltSplitBoundaries2: { + // literal_value_list + auto& simpleBoundaries = boundariesNode.GetAlt_split_boundaries2().GetRule_literal_value_list1(); + auto first_key = expr.LiteralExpr(simpleBoundaries.GetRule_literal_value2()); + if (!first_key) { + ctx.Error() << "Empty key in partition at keys"; + return false; + } + if (!first_key->Expr) { + ctx.Error() << "Identifier is not expected in partition at keys"; + return false; + } + to.push_back(TVector<TNodePtr>(1, first_key->Expr)); + for (auto& key : simpleBoundaries.GetBlock3()) { + auto keyExprOrIdent = expr.LiteralExpr(key.GetRule_literal_value2()); + if (!keyExprOrIdent) { + ctx.Error() << "Empty key in partition at keys"; + return false; + } + if (!first_key->Expr) { + ctx.Error() << "Identifier is not expected in partition at keys"; + return false; + } + to.push_back( + TVector<TNodePtr>(1, keyExprOrIdent->Expr) + ); + } + break; + } + default: + return false; + } + break; + } + default: + return false; + } + return true; + } + + bool StoreTtlSettings(const TRule_table_setting_value& from, TResetableSetting<TTtlSettings, void>& to, + TSqlExpression& expr, TContext& ctx, TTranslation& txc) { + switch (from.Alt_case()) { + case TRule_table_setting_value::kAltTableSettingValue5: { + auto columnName = IdEx(from.GetAlt_table_setting_value5().GetRule_an_id3(), txc); + auto exprNode = expr.Build(from.GetAlt_table_setting_value5().GetRule_expr1()); + if (!exprNode) { + return false; + } + + if (exprNode->GetOpName() != "Interval") { + ctx.Error() << "Literal of Interval type is expected for TTL"; + return false; + } + + TMaybe<TTtlSettings::EUnit> columnUnit; + if (from.GetAlt_table_setting_value5().HasBlock4()) { + const TString unit = to_lower(ctx.Token(from.GetAlt_table_setting_value5().GetBlock4().GetToken2())); + columnUnit.ConstructInPlace(); + if (!TryFromString<TTtlSettings::EUnit>(unit, *columnUnit)) { + ctx.Error() << "Invalid unit: " << unit; + return false; + } + } + + to.Set(TTtlSettings(columnName, exprNode, columnUnit)); + break; + } + default: + return false; + } + return true; + } + + template<typename TChar> + struct TPatternComponent { + TBasicString<TChar> Prefix; + TBasicString<TChar> Suffix; + bool IsSimple = true; + + void AppendPlain(TChar c) { + if (IsSimple) { + Prefix.push_back(c); + } + Suffix.push_back(c); + } + + void AppendAnyChar() { + IsSimple = false; + Suffix.clear(); + } + }; + + template<typename TChar> + TVector<TPatternComponent<TChar>> SplitPattern(const TBasicString<TChar>& pattern, TMaybe<char> escape, bool& inEscape) { + inEscape = false; + TVector<TPatternComponent<TChar>> result; + TPatternComponent<TChar> current; + bool prevIsPercentChar = false; + for (const TChar c : pattern) { + if (inEscape) { + current.AppendPlain(c); + inEscape = false; + prevIsPercentChar = false; + } else if (escape && c == static_cast<TChar>(*escape)) { + inEscape = true; + } else if (c == '%') { + if (!prevIsPercentChar) { + result.push_back(std::move(current)); + } + current = {}; + prevIsPercentChar = true; + } else if (c == '_') { + current.AppendAnyChar(); + prevIsPercentChar = false; + } else { + current.AppendPlain(c); + prevIsPercentChar = false; + } + } + result.push_back(std::move(current)); + return result; + } +} + +bool TSqlTranslation::StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, + TTableSettings& settings, ETableType tableType, bool alter, bool reset) { + switch (tableType) { + case ETableType::ExternalTable: + return StoreExternalTableSettingsEntry(id, value, settings, alter, reset); + case ETableType::Table: + case ETableType::TableStore: + return StoreTableSettingsEntry(id, value, settings, alter, reset); + } +} + +bool TSqlTranslation::StoreExternalTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, + TTableSettings& settings, bool alter, bool reset) { + YQL_ENSURE(value || reset); + YQL_ENSURE(!reset || reset && alter); + if (to_lower(id.Name) == "data_source") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + TDeferredAtom dataSource; + if (!StoreString(*value, dataSource, Ctx, to_upper(id.Name))) { + return false; + } + TString service = Context().Scoped->CurrService; + TDeferredAtom cluster = Context().Scoped->CurrCluster; + TNodePtr root = new TAstListNodeImpl(Ctx.Pos()); + root->Add("String", Ctx.GetPrefixedPath(service, cluster, dataSource)); + settings.DataSourcePath = root; + } else if (to_lower(id.Name) == "location") { + if (reset) { + settings.Location.Reset(); + } else { + TNodePtr location; + if (!StoreString(*value, location, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be a string literal"; + return false; + } + settings.Location.Set(location); + } + } else { + auto& setting = settings.ExternalSourceParameters.emplace_back(); + if (reset) { + setting.Reset(id); + } else { + TNodePtr node; + if (!StoreString(*value, node, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be a string literal"; + return false; + } + setting.Set(std::pair<TIdentifier, TNodePtr>{id, std::move(node)}); + } + } + return true; +} + +bool TSqlTranslation::ValidateTableSettings(const TTableSettings& settings) { + if (settings.PartitionCount) { + if (!settings.StoreType || to_lower(settings.StoreType->Name) != "column") { + Ctx.Error() << " PARTITION_COUNT can be used only with STORE=COLUMN"; + return false; + } + } + + return true; +} + +bool TSqlTranslation::StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, + TTableSettings& settings, bool alter, bool reset) { + YQL_ENSURE(value || reset); + YQL_ENSURE(!reset || reset && alter); + if (to_lower(id.Name) == "compaction_policy") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreString(*value, settings.CompactionPolicy, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be a string literal"; + return false; + } + } else if (to_lower(id.Name) == "auto_partitioning_by_size") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreId(*value, settings.AutoPartitioningBySize, *this)) { + Ctx.Error() << to_upper(id.Name) << " value should be an identifier"; + return false; + } + } else if (to_lower(id.Name) == "auto_partitioning_partition_size_mb") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreInt(*value, settings.PartitionSizeMb, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + } else if (to_lower(id.Name) == "auto_partitioning_by_load") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreId(*value, settings.AutoPartitioningByLoad, *this)) { + Ctx.Error() << to_upper(id.Name) << " value should be an identifier"; + return false; + } + } else if (to_lower(id.Name) == "auto_partitioning_min_partitions_count") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreInt(*value, settings.MinPartitions, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + } else if (to_lower(id.Name) == "auto_partitioning_max_partitions_count") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreInt(*value, settings.MaxPartitions, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + } else if (to_lower(id.Name) == "partition_count") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + + if (!StoreInt(*value, settings.PartitionCount, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + } else if (to_lower(id.Name) == "uniform_partitions") { + if (alter) { + Ctx.Error() << to_upper(id.Name) << " alter is not supported"; + return false; + } + if (!StoreInt(*value, settings.UniformPartitions, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + } else if (to_lower(id.Name) == "partition_at_keys") { + if (alter) { + Ctx.Error() << to_upper(id.Name) << " alter is not supported"; + return false; + } + TSqlExpression expr(Ctx, Mode); + if (!StoreSplitBoundaries(*value, settings.PartitionAtKeys, expr, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be a list of keys. " + << "Example1: (10, 1000) Example2: ((10), (1000, \"abc\"))"; + return false; + } + } else if (to_lower(id.Name) == "key_bloom_filter") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreId(*value, settings.KeyBloomFilter, *this)) { + Ctx.Error() << to_upper(id.Name) << " value should be an identifier"; + return false; + } + } else if (to_lower(id.Name) == "read_replicas_settings") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreString(*value, settings.ReadReplicasSettings, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be a string literal"; + return false; + } + } else if (to_lower(id.Name) == "ttl") { + if (!reset) { + TSqlExpression expr(Ctx, Mode); + if (!StoreTtlSettings(*value, settings.TtlSettings, expr, Ctx, *this)) { + Ctx.Error() << "Invalid TTL settings"; + return false; + } + } else { + settings.TtlSettings.Reset(); + } + } else if (to_lower(id.Name) == "tiering") { + if (!reset) { + TNodePtr tieringNode; + if (!StoreString(*value, tieringNode, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be a string literal"; + return false; + } + settings.Tiering.Set(tieringNode); + } else { + settings.Tiering.Reset(); + } + } else if (to_lower(id.Name) == "store") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreId(*value, settings.StoreType, *this)) { + Ctx.Error() << to_upper(id.Name) << " value should be an identifier"; + return false; + } + } else if (to_lower(id.Name) == "partition_by_hash_function") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreString(*value, settings.PartitionByHashFunction, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be a string literal"; + return false; + } + } else if (to_lower(id.Name) == "store_external_blobs") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!StoreId(*value, settings.StoreExternalBlobs, *this)) { + Ctx.Error() << to_upper(id.Name) << " value should be an identifier"; + return false; + } + } else { + Ctx.Error() << "Unknown table setting: " << id.Name; + return false; + } + + return ValidateTableSettings(settings); +} + +bool TSqlTranslation::StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value& value, + TTableSettings& settings, ETableType tableType, bool alter) { + return StoreTableSettingsEntry(id, &value, settings, tableType, alter, false); +} + +bool TSqlTranslation::ResetTableSettingsEntry(const TIdentifier& id, TTableSettings& settings, ETableType tableType) { + return StoreTableSettingsEntry(id, nullptr, settings, tableType, true, true); +} + +bool TSqlTranslation::CreateTableSettings(const TRule_with_table_settings& settingsNode, TCreateTableParameters& params) { + const auto& firstEntry = settingsNode.GetRule_table_settings_entry3(); + if (!StoreTableSettingsEntry(IdEx(firstEntry.GetRule_an_id1(), *this), firstEntry.GetRule_table_setting_value3(), + params.TableSettings, params.TableType)) { + return false; + } + for (auto& block : settingsNode.GetBlock4()) { + const auto& entry = block.GetRule_table_settings_entry2(); + if (!StoreTableSettingsEntry(IdEx(entry.GetRule_an_id1(), *this), entry.GetRule_table_setting_value3(), params.TableSettings, params.TableType)) { + return false; + } + } + return true; +} + +bool StoreConsumerSettingsEntry( + const TIdentifier& id, const TRule_topic_consumer_setting_value* value, TSqlExpression& ctx, + TTopicConsumerSettings& settings, + bool reset +) { + YQL_ENSURE(value || reset); + TNodePtr valueExprNode; + if (value) { + valueExprNode = ctx.Build(value->GetRule_expr1()); + if (!valueExprNode) { + ctx.Error() << "invalid value for setting: " << id.Name; + return false; + } + } + if (to_lower(id.Name) == "important") { + if (settings.Important) { + ctx.Error() << to_upper(id.Name) << " specified multiple times in ALTER CONSUMER statements for single consumer"; + return false; + } + if (reset) { + ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + if (!valueExprNode->IsLiteral() || valueExprNode->GetLiteralType() != "Bool") { + ctx.Error() << to_upper(id.Name) << " value should be boolean"; + return false; + } + settings.Important = valueExprNode; + + } else if (to_lower(id.Name) == "read_from") { + if (settings.ReadFromTs) { + ctx.Error() << to_upper(id.Name) << " specified multiple times in ALTER CONSUMER statements for single consumer"; + return false; + } + if (reset) { + settings.ReadFromTs.Reset(); + } else { + //ToDo: !! validate + settings.ReadFromTs.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "supported_codecs") { + if (settings.SupportedCodecs) { + ctx.Error() << to_upper(id.Name) << " specified multiple times in ALTER CONSUMER statements for single consumer"; + return false; + } + if (reset) { + settings.SupportedCodecs.Reset(); + } else { + if (!valueExprNode->IsLiteral() || valueExprNode->GetLiteralType() != "String") { + ctx.Error() << to_upper(id.Name) << " value should be a string literal"; + return false; + } + settings.SupportedCodecs.Set(valueExprNode); + } + } else { + ctx.Error() << to_upper(id.Name) << ": unknown option for consumer"; + return false; + } + return true; +} + +TIdentifier TSqlTranslation::GetTopicConsumerId(const TRule_topic_consumer_ref& node) { + return IdEx(node.GetRule_an_id_pure1(), *this); +} + +bool TSqlTranslation::CreateConsumerSettings( + const TRule_topic_consumer_settings& node, TTopicConsumerSettings& settings +) { + const auto& firstEntry = node.GetRule_topic_consumer_settings_entry1(); + TSqlExpression expr(Ctx, Mode); + if (!StoreConsumerSettingsEntry( + IdEx(firstEntry.GetRule_an_id1(), *this), + &firstEntry.GetRule_topic_consumer_setting_value3(), + expr, settings, false + )) { + return false; + } + for (auto& block : node.GetBlock2()) { + const auto& entry = block.GetRule_topic_consumer_settings_entry2(); + if (!StoreConsumerSettingsEntry( + IdEx(entry.GetRule_an_id1(), *this), + &entry.GetRule_topic_consumer_setting_value3(), + expr, settings, false + )) { + return false; + } + } + return true; +} + +bool TSqlTranslation::CreateTopicConsumer( + const TRule_topic_create_consumer_entry& node, + TVector<TTopicConsumerDescription>& consumers +) { + consumers.emplace_back(IdEx(node.GetRule_an_id2(), *this)); + + if (node.HasBlock3()) { + auto& settings = node.GetBlock3().GetRule_topic_consumer_with_settings1().GetRule_topic_consumer_settings3(); + if (!CreateConsumerSettings(settings, consumers.back().Settings)) { + return false; + } + } + + return true; +} + +bool TSqlTranslation::AlterTopicConsumerEntry( + const TRule_alter_topic_alter_consumer_entry& node, TTopicConsumerDescription& alterConsumer +) { + switch (node.Alt_case()) { + case TRule_alter_topic_alter_consumer_entry::kAltAlterTopicAlterConsumerEntry1: + return CreateConsumerSettings( + node.GetAlt_alter_topic_alter_consumer_entry1().GetRule_topic_alter_consumer_set1() + .GetRule_topic_consumer_settings3(), + alterConsumer.Settings + ); + //case TRule_alter_topic_alter_consumer_entry::ALT_NOT_SET: + case TRule_alter_topic_alter_consumer_entry::kAltAlterTopicAlterConsumerEntry2: { + auto& resetNode = node.GetAlt_alter_topic_alter_consumer_entry2().GetRule_topic_alter_consumer_reset1(); + TSqlExpression expr(Ctx, Mode); + if (!StoreConsumerSettingsEntry( + IdEx(resetNode.GetRule_an_id3(), *this), + nullptr, + expr, alterConsumer.Settings, true + )) { + return false; + } + + for (auto& resetItem: resetNode.GetBlock4()) { + if (!StoreConsumerSettingsEntry( + IdEx(resetItem.GetRule_an_id2(), *this), + nullptr, + expr, alterConsumer.Settings, true + )) { + return false; + } + } + return true; + } + default: + Ctx.Error() << "unknown alter consumer action"; + return false; + } + return true; +} + +bool TSqlTranslation::AlterTopicConsumer( + const TRule_alter_topic_alter_consumer& node, + THashMap<TString, TTopicConsumerDescription>& alterConsumers +) { + auto consumerId = GetTopicConsumerId(node.GetRule_topic_consumer_ref3()); + TString name = to_lower(consumerId.Name); + auto iter = alterConsumers.insert(std::make_pair( + name, TTopicConsumerDescription(std::move(consumerId)) + )).first; + if (!AlterTopicConsumerEntry(node.GetRule_alter_topic_alter_consumer_entry4(), iter->second)) { + return false; + } + return true; +} + +bool TSqlTranslation::CreateTopicEntry(const TRule_create_topic_entry& node, TCreateTopicParameters& params) { + // Will need a switch() here if (ever) create_topic_entry gets more than 1 type of statement + auto& consumer = node.GetRule_topic_create_consumer_entry1(); + if (!CreateTopicConsumer(consumer, params.Consumers)) { + return false; + } + return true; +} + +static bool StoreTopicSettingsEntry( + const TIdentifier& id, const TRule_topic_setting_value* value, TSqlExpression& ctx, + TTopicSettings& settings, bool reset +) { + YQL_ENSURE(value || reset); + TNodePtr valueExprNode; + if (value) { + valueExprNode = ctx.Build(value->GetRule_expr1()); + if (!valueExprNode) { + ctx.Error() << "invalid value for setting: " << id.Name; + return false; + } + } + + if (to_lower(id.Name) == "min_active_partitions") { + if (reset) { + settings.MinPartitions.Reset(); + } else { + if (!valueExprNode->IsIntegerLiteral()) { + ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + settings.MinPartitions.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "partition_count_limit" || to_lower(id.Name) == "max_active_partitions") { + if (reset) { + settings.MaxPartitions.Reset(); + } else { + if (!valueExprNode->IsIntegerLiteral()) { + ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + settings.MaxPartitions.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "retention_period") { + if (reset) { + settings.RetentionPeriod.Reset(); + } else { + if (valueExprNode->GetOpName() != "Interval") { + ctx.Error() << "Literal of Interval type is expected for retention"; + return false; + } + settings.RetentionPeriod.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "retention_storage_mb") { + if (reset) { + settings.RetentionStorage.Reset(); + } else { + if (!valueExprNode->IsIntegerLiteral()) { + ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + settings.RetentionStorage.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "partition_write_speed_bytes_per_second") { + if (reset) { + settings.PartitionWriteSpeed.Reset(); + } else { + if (!valueExprNode->IsIntegerLiteral()) { + ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + settings.PartitionWriteSpeed.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "partition_write_burst_bytes") { + if (reset) { + settings.PartitionWriteBurstSpeed.Reset(); + } else { + if (!valueExprNode->IsIntegerLiteral()) { + ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + settings.PartitionWriteBurstSpeed.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "metering_mode") { + if (reset) { + settings.MeteringMode.Reset(); + } else { + if (!valueExprNode->IsLiteral() || valueExprNode->GetLiteralType() != "String") { + ctx.Error() << to_upper(id.Name) << " value should be string"; + return false; + } + settings.MeteringMode.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "supported_codecs") { + if (reset) { + settings.SupportedCodecs.Reset(); + } else { + if (!valueExprNode->IsLiteral() || valueExprNode->GetLiteralType() != "String") { + ctx.Error() << to_upper(id.Name) << " value should be string"; + return false; + } + settings.SupportedCodecs.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "auto_partitioning_stabilization_window") { + if (reset) { + settings.AutoPartitioningStabilizationWindow.Reset(); + } else { + if (valueExprNode->GetOpName() != "Interval") { + ctx.Error() << "Literal of Interval type is expected for retention"; + return false; + } + settings.AutoPartitioningStabilizationWindow.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "auto_partitioning_up_utilization_percent") { + if (reset) { + settings.AutoPartitioningUpUtilizationPercent.Reset(); + } else { + if (!valueExprNode->IsIntegerLiteral()) { + ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + settings.AutoPartitioningUpUtilizationPercent.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "auto_partitioning_down_utilization_percent") { + if (reset) { + settings.AutoPartitioningDownUtilizationPercent.Reset(); + } else { + if (!valueExprNode->IsIntegerLiteral()) { + ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } + settings.AutoPartitioningDownUtilizationPercent.Set(valueExprNode); + } + } else if (to_lower(id.Name) == "auto_partitioning_strategy") { + if (reset) { + settings.AutoPartitioningStrategy.Reset(); + } else { + if (!valueExprNode->IsLiteral() || valueExprNode->GetLiteralType() != "String") { + ctx.Error() << to_upper(id.Name) << " value should be string"; + return false; + } + settings.AutoPartitioningStrategy.Set(valueExprNode); + } + } else { + ctx.Error() << "unknown topic setting: " << id.Name; + return false; + } + return true; +} + +bool TSqlTranslation::AlterTopicAction(const TRule_alter_topic_action& node, TAlterTopicParameters& params) { +// alter_topic_action: +// alter_topic_add_consumer +// | alter_topic_alter_consumer +// | alter_topic_drop_consumer +// | alter_topic_set_settings +// | alter_topic_reset_settings + + switch (node.Alt_case()) { + case TRule_alter_topic_action::kAltAlterTopicAction1: // alter_topic_add_consumer + return CreateTopicConsumer( + node.GetAlt_alter_topic_action1().GetRule_alter_topic_add_consumer1() + .GetRule_topic_create_consumer_entry2(), + params.AddConsumers + ); + + case TRule_alter_topic_action::kAltAlterTopicAction2: // alter_topic_alter_consumer + return AlterTopicConsumer( + node.GetAlt_alter_topic_action2().GetRule_alter_topic_alter_consumer1(), + params.AlterConsumers + ); + + case TRule_alter_topic_action::kAltAlterTopicAction3: // drop_consumer + params.DropConsumers.emplace_back(GetTopicConsumerId( + node.GetAlt_alter_topic_action3().GetRule_alter_topic_drop_consumer1() + .GetRule_topic_consumer_ref3() + )); + return true; + + case TRule_alter_topic_action::kAltAlterTopicAction4: // set_settings + return CreateTopicSettings( + node.GetAlt_alter_topic_action4().GetRule_alter_topic_set_settings1() + .GetRule_topic_settings3(), + params.TopicSettings + ); + + case TRule_alter_topic_action::kAltAlterTopicAction5: { // reset_settings + auto& resetNode = node.GetAlt_alter_topic_action5().GetRule_alter_topic_reset_settings1(); + TSqlExpression expr(Ctx, Mode); + if (!StoreTopicSettingsEntry( + IdEx(resetNode.GetRule_an_id3(), *this), + nullptr, expr, + params.TopicSettings, true + )) { + return false; + } + + for (auto& resetItem: resetNode.GetBlock4()) { + if (!StoreTopicSettingsEntry( + IdEx(resetItem.GetRule_an_id_pure2(), *this), + nullptr, expr, + params.TopicSettings, true + )) { + return false; + } + } + return true; + } + default: + Ctx.Error() << "unknown alter topic action"; + return false; + } + return true; +} + +bool TSqlTranslation::CreateTopicSettings(const TRule_topic_settings& node, TTopicSettings& settings) { + const auto& firstEntry = node.GetRule_topic_settings_entry1(); + TSqlExpression expr(Ctx, Mode); + + if (!StoreTopicSettingsEntry( + IdEx(firstEntry.GetRule_an_id1(), *this), + &firstEntry.GetRule_topic_setting_value3(), + expr, settings, false + )) { + return false; + } + for (auto& block : node.GetBlock2()) { + const auto& entry = block.GetRule_topic_settings_entry2(); + if (!StoreTopicSettingsEntry( + IdEx(entry.GetRule_an_id1(), *this), + &entry.GetRule_topic_setting_value3(), + expr, settings, false + )) { + return false; + } + } + return true; +} + +TNodePtr TSqlTranslation::IntegerOrBind(const TRule_integer_or_bind& node) { + switch (node.Alt_case()) { + case TRule_integer_or_bind::kAltIntegerOrBind1: { + const TString intString = Ctx.Token(node.GetAlt_integer_or_bind1().GetRule_integer1().GetToken1()); + ui64 value; + TString suffix; + if (!ParseNumbers(Ctx, intString, value, suffix)) { + return {}; + } + return BuildQuotedAtom(Ctx.Pos(), ToString(value), TNodeFlags::ArbitraryContent); + } + case TRule_integer_or_bind::kAltIntegerOrBind2: { + TString bindName; + if (!NamedNodeImpl(node.GetAlt_integer_or_bind2().GetRule_bind_parameter1(), bindName, *this)) { + return {}; + } + auto namedNode = GetNamedNode(bindName); + if (!namedNode) { + return {}; + } + auto atom = MakeAtomFromExpression(Ctx.Pos(), Ctx, namedNode); + return atom.Build(); + } + case TRule_integer_or_bind::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TNodePtr TSqlTranslation::TypeNameTag(const TRule_type_name_tag& node) { + switch (node.Alt_case()) { + case TRule_type_name_tag::kAltTypeNameTag1: { + auto content = Id(node.GetAlt_type_name_tag1().GetRule_id1(), *this); + auto atom = TDeferredAtom(Ctx.Pos(), content); + return atom.Build(); + } + case TRule_type_name_tag::kAltTypeNameTag2: { + auto value = Token(node.GetAlt_type_name_tag2().GetToken1()); + auto parsed = StringContentOrIdContent(Ctx, Ctx.Pos(), value); + if (!parsed) { + return {}; + } + auto atom = TDeferredAtom(Ctx.Pos(), parsed->Content); + return atom.Build(); + } + case TRule_type_name_tag::kAltTypeNameTag3: { + TString bindName; + if (!NamedNodeImpl(node.GetAlt_type_name_tag3().GetRule_bind_parameter1(), bindName, *this)) { + return {}; + } + auto namedNode = GetNamedNode(bindName); + if (!namedNode) { + return {}; + } + TDeferredAtom atom; + MakeTableFromExpression(Ctx.Pos(), Ctx, namedNode, atom); + return atom.Build(); + } + case TRule_type_name_tag::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TNodePtr TSqlTranslation::TypeSimple(const TRule_type_name_simple& node, bool onlyDataAllowed) { + const TString origName = Id(node.GetRule_an_id_pure1(), *this); + if (origName.empty()) { + return {}; + } + return BuildSimpleType(Ctx, Ctx.Pos(), origName, onlyDataAllowed); +} + +TNodePtr TSqlTranslation::TypeDecimal(const TRule_type_name_decimal& node) { + auto pos = Ctx.Pos(); + auto flags = TNodeFlags::Default; + + auto paramOne = IntegerOrBind(node.GetRule_integer_or_bind3()); + if (!paramOne) { + return {}; + } + auto paramTwo = IntegerOrBind(node.GetRule_integer_or_bind5()); + if (!paramTwo) { + return {}; + } + return new TCallNodeImpl(pos, "DataType", { BuildQuotedAtom(pos, "Decimal", flags), paramOne, paramTwo }); +} + +TNodePtr TSqlTranslation::AddOptionals(const TNodePtr& node, size_t optionalCount) { + TNodePtr result = node; + if (node) { + TPosition pos = node->GetPos(); + for (size_t i = 0; i < optionalCount; ++i) { + result = new TCallNodeImpl(pos, "OptionalType", { result }); + } + } + return result; +} + + +TMaybe<std::pair<TVector<TNodePtr>, bool>> TSqlTranslation::CallableArgList(const TRule_callable_arg_list& argList, bool namedArgsStarted) { + auto pos = Ctx.Pos(); + auto flags = TNodeFlags::Default; + auto& arg1 = argList.GetRule_callable_arg1(); + auto& varArg = arg1.GetRule_variant_arg1(); + TVector<TNodePtr> result; + TVector<TNodePtr> items; + auto typeNode = TypeNodeOrBind(varArg.GetRule_type_name_or_bind2()); + if (!typeNode) { + return {}; + } + items.push_back(typeNode); + if (varArg.HasBlock1()) { + namedArgsStarted = true; + auto tag = TypeNameTag(varArg.GetBlock1().GetRule_type_name_tag1()); + if (!tag) { + return {}; + } + items.push_back(tag); + } + if (arg1.HasBlock2()) { + if (!varArg.HasBlock1()) { + items.push_back(BuildQuotedAtom(pos, "", flags)); + } + items.push_back(BuildQuotedAtom(pos, "1", flags)); + } + result.push_back(new TAstListNodeImpl(pos, items)); + + for (auto& arg : argList.GetBlock2()) { + auto& varArg = arg.GetRule_callable_arg2().GetRule_variant_arg1(); + TVector<TNodePtr> items; + auto typeNode = TypeNodeOrBind(varArg.GetRule_type_name_or_bind2()); + if (!typeNode) { + return {}; + } + items.push_back(typeNode); + if (varArg.HasBlock1()) { + auto tag = TypeNameTag(varArg.GetBlock1().GetRule_type_name_tag1()); + if (!tag) { + return {}; + } + items.push_back(tag); + } else { + if (namedArgsStarted) { + Ctx.Error() << "Expected named argument, previous argument was named"; + return {}; + } + items.push_back(BuildQuotedAtom(pos, "", flags)); + } + if (arg.GetRule_callable_arg2().HasBlock2()) { + if (!varArg.HasBlock1()) { + items.push_back(BuildQuotedAtom(pos, "", flags)); + } + items.push_back(BuildQuotedAtom(pos, "1", flags)); + } + result.push_back(new TAstListNodeImpl(pos, items)); + } + return std::make_pair(result, namedArgsStarted); +} + +TNodePtr TSqlTranslation::TypeNodeOrBind(const TRule_type_name_or_bind& node) { + switch (node.Alt_case()) { + case TRule_type_name_or_bind::kAltTypeNameOrBind1: { + return TypeNode(node.GetAlt_type_name_or_bind1().GetRule_type_name1()); + } + case TRule_type_name_or_bind::kAltTypeNameOrBind2: { + TString bindName; + if (!NamedNodeImpl(node.GetAlt_type_name_or_bind2().GetRule_bind_parameter1(), bindName, *this)) { + return {}; + } + return GetNamedNode(bindName); + } + case TRule_type_name_or_bind::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +TNodePtr TSqlTranslation::TypeNode(const TRule_type_name& node) { + //type_name: + // type_name_composite + // | (type_name_decimal | type_name_simple) QUESTION*; + if (node.Alt_case() == TRule_type_name::kAltTypeName1) { + return TypeNode(node.GetAlt_type_name1().GetRule_type_name_composite1()); + } + + TNodePtr result; + TPosition pos = Ctx.Pos(); + + auto& alt = node.GetAlt_type_name2(); + auto& block = alt.GetBlock1(); + switch (block.Alt_case()) { + case TRule_type_name::TAlt2::TBlock1::kAlt1: { + auto& decimalType = block.GetAlt1().GetRule_type_name_decimal1(); + result = TypeDecimal(decimalType); + break; + } + case TRule_type_name::TAlt2::TBlock1::kAlt2: { + auto& simpleType = block.GetAlt2().GetRule_type_name_simple1(); + result = TypeSimple(simpleType, false); + break; + } + case TRule_type_name::TAlt2::TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + return AddOptionals(result, alt.GetBlock2().size()); +} + +TNodePtr TSqlTranslation::TypeNode(const TRule_type_name_composite& node) { + //type_name_composite: + // ( type_name_optional + // | type_name_tuple + // | type_name_struct + // | type_name_variant + // | type_name_list + // | type_name_stream + // | type_name_flow + // | type_name_dict + // | type_name_set + // | type_name_enum + // | type_name_resource + // | type_name_tagged + // | type_name_callable + // ) QUESTION*; + TNodePtr result; + TPosition pos = Ctx.Pos(); + auto flags = TNodeFlags::Default; + + auto wrapOneParamType = [&] (const TRule_type_name_or_bind& param, const char* type) -> TNodePtr { + auto node = TypeNodeOrBind(param); + return node ? new TAstListNodeImpl(pos, { BuildAtom(pos, type, flags), node }) : nullptr; + }; + auto makeVoid = [&] () -> TNodePtr { + return new TAstListNodeImpl(pos, { BuildAtom(pos, "VoidType", flags) }); + }; + auto makeQuote = [&] (const TNodePtr& node) -> TNodePtr { + return new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "quote", 0), node }); + }; + + auto& block = node.GetBlock1(); + switch (block.Alt_case()) { + case TRule_type_name_composite_TBlock1::kAlt1: { + auto& optionalType = block.GetAlt1().GetRule_type_name_optional1(); + result = wrapOneParamType(optionalType.GetRule_type_name_or_bind3(), "OptionalType"); + break; + } + case TRule_type_name_composite_TBlock1::kAlt2: { + auto& tupleType = block.GetAlt2().GetRule_type_name_tuple1(); + TVector<TNodePtr> items; + items.push_back(BuildAtom(pos, "TupleType", flags)); + + switch (tupleType.GetBlock2().Alt_case()) { + case TRule_type_name_tuple::TBlock2::kAlt1: { + if (tupleType.GetBlock2().GetAlt1().HasBlock2()) { + auto typeNode = TypeNodeOrBind(tupleType.GetBlock2().GetAlt1().GetBlock2().GetRule_type_name_or_bind1()); + if (!typeNode) { + return {}; + } + items.push_back(typeNode); + for (auto& arg : tupleType.GetBlock2().GetAlt1().GetBlock2().GetBlock2()) { + auto typeNode = TypeNodeOrBind(arg.GetRule_type_name_or_bind2()); + if (!typeNode) { + return {}; + } + items.push_back(typeNode); + } + } + [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME + } + case TRule_type_name_tuple::TBlock2::kAlt2: + break; + case TRule_type_name_tuple::TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + result = new TAstListNodeImpl(pos, items); + break; + } + case TRule_type_name_composite_TBlock1::kAlt3: { + auto& structType = block.GetAlt3().GetRule_type_name_struct1(); + TVector<TNodePtr> items; + items.push_back(BuildAtom(pos, "StructType", flags)); + + switch (structType.GetBlock2().Alt_case()) { + case TRule_type_name_struct::TBlock2::kAlt1: { + if (structType.GetBlock2().GetAlt1().HasBlock2()) { + auto& structArg = structType.GetBlock2().GetAlt1().GetBlock2().GetRule_struct_arg1(); + auto typeNode = TypeNodeOrBind(structArg.GetRule_type_name_or_bind3()); + if (!typeNode) { + return {}; + } + auto tag = TypeNameTag(structArg.GetRule_type_name_tag1()); + if (!tag) { + return {}; + } + + items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, typeNode }))); + for (auto& arg : structType.GetBlock2().GetAlt1().GetBlock2().GetBlock2()) { + auto typeNode = TypeNodeOrBind(arg.GetRule_struct_arg2().GetRule_type_name_or_bind3()); + if (!typeNode) { + return {}; + } + auto tag = TypeNameTag(arg.GetRule_struct_arg2().GetRule_type_name_tag1()); + if (!tag) { + return {}; + } + items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, typeNode }))); + } + } + [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME + } + case TRule_type_name_struct::TBlock2::kAlt2: + break; + case TRule_type_name_struct::TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + result = new TAstListNodeImpl(pos, items); + break; + } + case TRule_type_name_composite_TBlock1::kAlt4: { + auto& variantType = block.GetAlt4().GetRule_type_name_variant1(); + TVector<TNodePtr> items; + bool overStruct = false; + auto& variantArg = variantType.GetRule_variant_arg3(); + auto typeNode = TypeNodeOrBind(variantArg.GetRule_type_name_or_bind2()); + if (!typeNode) { + return {}; + } + if (variantArg.HasBlock1()) { + items.push_back(BuildAtom(pos, "StructType", flags)); + overStruct = true; + auto tag = TypeNameTag(variantArg.GetBlock1().GetRule_type_name_tag1()); + if (!tag) { + return {}; + } + items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, typeNode }))); + } else { + items.push_back(BuildAtom(pos, "TupleType", flags)); + items.push_back(typeNode); + } + + for (auto& arg : variantType.GetBlock4()) { + auto typeNode = TypeNodeOrBind(arg.GetRule_variant_arg2().GetRule_type_name_or_bind2()); + if (!typeNode) { + return {}; + } + if (overStruct) { + if (!arg.GetRule_variant_arg2().HasBlock1()) { + Ctx.Error() << "Variant over struct and tuple mixture"; + return {}; + } + auto tag = TypeNameTag(arg.GetRule_variant_arg2().GetBlock1().GetRule_type_name_tag1()); + if (!tag) { + return {}; + } + items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, typeNode }))); + } else { + if (arg.GetRule_variant_arg2().HasBlock1()) { + Ctx.Error() << "Variant over struct and tuple mixture"; + return {}; + } + items.push_back(typeNode); + } + } + typeNode = new TAstListNodeImpl(pos, items); + result = new TAstListNodeImpl(pos, { BuildAtom(pos, "VariantType", flags), typeNode }); + break; + } + case TRule_type_name_composite_TBlock1::kAlt5: { + auto& listType = block.GetAlt5().GetRule_type_name_list1(); + result = wrapOneParamType(listType.GetRule_type_name_or_bind3(), "ListType"); + break; + } + case TRule_type_name_composite_TBlock1::kAlt6: { + auto& streamType = block.GetAlt6().GetRule_type_name_stream1(); + result = wrapOneParamType(streamType.GetRule_type_name_or_bind3(), "StreamType"); + break; + } + case TRule_type_name_composite_TBlock1::kAlt7: { + auto& flowType = block.GetAlt7().GetRule_type_name_flow1(); + result = wrapOneParamType(flowType.GetRule_type_name_or_bind3(), "FlowType"); + break; + } + case TRule_type_name_composite_TBlock1::kAlt8: { + auto& dictType = block.GetAlt8().GetRule_type_name_dict1(); + TVector<TNodePtr> items; + items.push_back(BuildAtom(pos, "DictType", flags)); + auto typeNode = TypeNodeOrBind(dictType.GetRule_type_name_or_bind3()); + if (!typeNode) { + return {}; + } + items.push_back(typeNode); + typeNode = TypeNodeOrBind(dictType.GetRule_type_name_or_bind5()); + if (!typeNode) { + return {}; + } + items.push_back(typeNode); + result = new TAstListNodeImpl(pos, items); + break; + } + case TRule_type_name_composite_TBlock1::kAlt9: { + auto& setType = block.GetAlt9().GetRule_type_name_set1(); + auto typeNode = TypeNodeOrBind(setType.GetRule_type_name_or_bind3()); + if (!typeNode) { + return {}; + } + result = new TAstListNodeImpl(pos, { BuildAtom(pos, "DictType", flags), typeNode, makeVoid() }); + break; + } + case TRule_type_name_composite_TBlock1::kAlt10: { + auto& enumType = block.GetAlt10().GetRule_type_name_enum1(); + TVector<TNodePtr> items; + items.push_back(BuildAtom(pos, "StructType", flags)); + auto tag = TypeNameTag(enumType.GetRule_type_name_tag3()); + if (!tag) { + return {}; + } + items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, makeVoid() }))); + for (auto& arg : enumType.GetBlock4()) { + auto tag = TypeNameTag(arg.GetRule_type_name_tag2()); + if (!tag) { + return {}; + } + items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, makeVoid() }))); + } + auto typeNode = new TAstListNodeImpl(pos, items); + result = new TAstListNodeImpl(pos, { BuildAtom(pos, "VariantType", flags), typeNode }); + break; + } + case TRule_type_name_composite_TBlock1::kAlt11: { + auto& resourceType = block.GetAlt11().GetRule_type_name_resource1(); + auto tag = TypeNameTag(resourceType.GetRule_type_name_tag3()); + if (!tag) { + return {}; + } + result = new TAstListNodeImpl(pos, { BuildAtom(pos, "ResourceType", flags), tag }); + break; + } + case TRule_type_name_composite_TBlock1::kAlt12: { + auto& taggedType = block.GetAlt12().GetRule_type_name_tagged1(); + auto typeNode = TypeNodeOrBind(taggedType.GetRule_type_name_or_bind3()); + if (!typeNode) { + return {}; + } + auto tag = TypeNameTag(taggedType.GetRule_type_name_tag5()); + if (!tag) { + return {}; + } + result = new TAstListNodeImpl(pos, { BuildAtom(pos, "TaggedType", flags), typeNode, tag }); + break; + } + case TRule_type_name_composite_TBlock1::kAlt13: { + auto& callableType = block.GetAlt13().GetRule_type_name_callable1(); + TMaybe<std::pair<TVector<TNodePtr>, bool>> requiredArgs, optionalArgs; + bool namedArgsStarted = false; + size_t optionalArgsCount = 0; + if (callableType.HasBlock4()) { + auto& argList = callableType.GetBlock4().GetRule_callable_arg_list1(); + requiredArgs = CallableArgList(argList, namedArgsStarted); + if (!requiredArgs) { + return {}; + } + namedArgsStarted = requiredArgs->second; + } + if (callableType.HasBlock6()) { + auto& argList = callableType.GetBlock6().GetRule_callable_arg_list2(); + optionalArgs = CallableArgList(argList, namedArgsStarted); + if (!optionalArgs) { + return {}; + } + optionalArgsCount = optionalArgs->first.size(); + } + auto returnType = TypeNodeOrBind(callableType.GetRule_type_name_or_bind9()); + if (!returnType) { + return {}; + } + TVector<TNodePtr> items; + items.push_back(BuildAtom(pos, "CallableType", flags)); + if (optionalArgsCount) { + items.push_back(makeQuote(new TAstListNodeImpl(pos, + { BuildQuotedAtom(pos, ToString(optionalArgsCount), flags) }))); + } else { + items.push_back(makeQuote(new TAstListNodeImpl(pos, {}))); + } + items.push_back(makeQuote(new TAstListNodeImpl(pos, { returnType }))); + if (requiredArgs) { + for (auto& arg: requiredArgs->first) { + items.push_back(makeQuote(arg)); + } + } + if (optionalArgs) { + for (auto& arg: optionalArgs->first) { + items.push_back(makeQuote(arg)); + } + } + result = new TAstListNodeImpl(pos, items); + break; + } + case TRule_type_name_composite_TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + return AddOptionals(result, node.GetBlock2().size()); +} + +TNodePtr TSqlTranslation::ValueConstructorLiteral(const TRule_value_constructor_literal& node) { + return BuildLiteralSmartString(Ctx, Token(node.GetToken1())); +} + +TNodePtr TSqlTranslation::ValueConstructor(const TRule_value_constructor& node) { + TSqlCallExpr call(Ctx, Mode); + if (!call.Init(node)) { + return {}; + } + return call.BuildCall(); +} + +TNodePtr TSqlTranslation::ListLiteral(const TRule_list_literal& node) { + TVector<TNodePtr> values; + values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "AsListMayWarn", TNodeFlags::Default)); + + TSqlExpression sqlExpr(Ctx, Mode); + if (node.HasBlock2() && !ExprList(sqlExpr, values, node.GetBlock2().GetRule_expr_list1())) { + return nullptr; + } + + return new TAstListNodeImpl(Ctx.Pos(), std::move(values)); +} + +TNodePtr TSqlTranslation::DictLiteral(const TRule_dict_literal& node) { + TVector<TNodePtr> values; + if (node.HasBlock2()) { + const auto& list = node.GetBlock2().GetRule_expr_dict_list1(); + const bool isSet = !list.HasBlock2(); + values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), isSet ? "AsSet" : "AsDict", TNodeFlags::Default)); + TSqlExpression sqlExpr(Ctx, Mode); + if (isSet) { + if (!Expr(sqlExpr, values, list.GetRule_expr1())) { + return nullptr; + } + } else { + TVector<TNodePtr> tupleItems; + if (!Expr(sqlExpr, tupleItems, list.GetRule_expr1())) { + return nullptr; + } + + if (!Expr(sqlExpr, tupleItems, list.GetBlock2().GetRule_expr2())) { + return nullptr; + } + + values.push_back(new TTupleNode(Ctx.Pos(), std::move(tupleItems))); + } + + for (auto& b : list.GetBlock3()) { + sqlExpr.Token(b.GetToken1()); + const bool isSetCurr = !b.HasBlock3(); + if (isSetCurr != isSet) { + Error() << "Expected keys/values pair or keys, but got mix of them"; + return nullptr; + } + + if (isSet) { + if (!Expr(sqlExpr, values, b.GetRule_expr2())) { + return nullptr; + } + } else { + TVector<TNodePtr> tupleItems; + if (!Expr(sqlExpr, tupleItems, b.GetRule_expr2())) { + return nullptr; + } + + if (!Expr(sqlExpr, tupleItems, b.GetBlock3().GetRule_expr2())) { + return nullptr; + } + + values.push_back(new TTupleNode(Ctx.Pos(), std::move(tupleItems))); + } + } + } else { + values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "AsDict", TNodeFlags::Default)); + } + + return new TAstListNodeImpl(Ctx.Pos(), std::move(values)); +} + +bool TSqlTranslation::StructLiteralItem(TVector<TNodePtr>& labels, const TRule_expr& label, TVector<TNodePtr>& values, const TRule_expr& value) { + // label expr + { + TColumnRefScope scope(Ctx, EColumnRefState::AsStringLiteral, /* topLevel */ false); + TSqlExpression sqlExpr(Ctx, Mode); + if (!Expr(sqlExpr, labels, label)) { + return false; + } + + TDeferredAtom atom; + MakeTableFromExpression(Ctx.Pos(), Ctx, labels.back(), atom); + labels.back() = atom.Build(); + if (!labels.back()) { + return false; + } + } + + // value expr + { + TSqlExpression sqlExpr(Ctx, Mode); + if (!Expr(sqlExpr, values, value)) { + return false; + } + } + + return true; +} + +TNodePtr TSqlTranslation::StructLiteral(const TRule_struct_literal& node) { + TVector<TNodePtr> labels; + TVector<TNodePtr> values; + TPosition pos = Ctx.TokenPosition(node.GetToken1()); + if (node.HasBlock2()) { + const auto& list = node.GetBlock2().GetRule_expr_struct_list1(); + + if (!StructLiteralItem(labels, list.GetRule_expr1(), values, list.GetRule_expr3())) { + return {}; + } + + for (auto& b : list.GetBlock4()) { + if (!StructLiteralItem(labels, b.GetRule_expr2(), values, b.GetRule_expr4())) { + return {}; + } + } + } + return BuildStructure(pos, values, labels); +} + +bool TSqlTranslation::TableHintImpl(const TRule_table_hint& rule, TTableHints& hints, const TString& provider, const TString& keyFunc) { + // table_hint: + // an_id_hint (EQUALS (type_name_tag | LPAREN type_name_tag (COMMA type_name_tag)* COMMA? RPAREN))? + // | (SCHEMA | COLUMNS) EQUALS? type_name_or_bind + // | SCHEMA EQUALS? LPAREN (struct_arg_positional (COMMA struct_arg_positional)*)? COMMA? RPAREN + switch (rule.Alt_case()) { + case TRule_table_hint::kAltTableHint1: { + const auto& alt = rule.GetAlt_table_hint1(); + const TString id = Id(alt.GetRule_an_id_hint1(), *this); + const auto idLower = to_lower(id); + if (idLower == "schema" || idLower == "columns") { + Error() << "Expected type after " << to_upper(id); + return false; + } + TVector<TNodePtr> hint_val; + if (alt.HasBlock2()) { + auto& tags = alt.GetBlock2().GetBlock2(); + switch (tags.Alt_case()) { + case TRule_table_hint_TAlt1_TBlock2_TBlock2::kAlt1: + hint_val.push_back(TypeNameTag(tags.GetAlt1().GetRule_type_name_tag1())); + break; + case TRule_table_hint_TAlt1_TBlock2_TBlock2::kAlt2: { + hint_val.push_back(TypeNameTag(tags.GetAlt2().GetRule_type_name_tag2())); + for (auto& tag : tags.GetAlt2().GetBlock3()) { + hint_val.push_back(TypeNameTag(tag.GetRule_type_name_tag2())); + } + break; + } + case TRule_table_hint_TAlt1_TBlock2_TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + } + hints[id] = hint_val; + break; + } + + case TRule_table_hint::kAltTableHint2: { + const auto& alt2 = rule.GetAlt_table_hint2(); + auto node = TypeNodeOrBind(alt2.GetRule_type_name_or_bind3()); + if (!node) { + return false; + } + + hints["user_" + to_lower(alt2.GetToken1().GetValue())] = { node }; + break; + } + + case TRule_table_hint::kAltTableHint3: { + const auto& alt = rule.GetAlt_table_hint3(); + TVector<TNodePtr> labels; + TVector<TNodePtr> structTypeItems; + if (alt.HasBlock4()) { + bool warn = false; + auto processItem = [&](const TRule_struct_arg_positional& arg) { + // struct_arg_positional: + // type_name_tag type_name_or_bind (NOT? NULL)? + // | type_name_or_bind AS type_name_tag; //deprecated + const bool altCurrent = arg.Alt_case() == TRule_struct_arg_positional::kAltStructArgPositional1; + auto& typeNameOrBind = altCurrent ? + arg.GetAlt_struct_arg_positional1().GetRule_type_name_or_bind2() : + arg.GetAlt_struct_arg_positional2().GetRule_type_name_or_bind1(); + auto typeNode = TypeNodeOrBind(typeNameOrBind); + if (!typeNode) { + return false; + } + + auto pos = Ctx.Pos(); + if (!altCurrent && !warn) { + Ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_POSITIONAL_SCHEMA) + << "Deprecated syntax for positional schema: please use 'column type' instead of 'type AS column'"; + warn = true; + } + + if (altCurrent) { + bool notNull = arg.GetAlt_struct_arg_positional1().HasBlock3() && arg.GetAlt_struct_arg_positional1().GetBlock3().HasBlock1(); + if (!notNull) { + typeNode = new TCallNodeImpl(pos, "AsOptionalType", { typeNode }); + } + } + + auto& typeNameTag = altCurrent ? + arg.GetAlt_struct_arg_positional1().GetRule_type_name_tag1() : + arg.GetAlt_struct_arg_positional2().GetRule_type_name_tag3(); + auto tag = TypeNameTag(typeNameTag); + if (!tag) { + return false; + } + + labels.push_back(tag); + structTypeItems.push_back(BuildTuple(pos, { tag, typeNode })); + return true; + }; + + if (!processItem(alt.GetBlock4().GetRule_struct_arg_positional1())) { + return false; + } + + for (auto& entry : alt.GetBlock4().GetBlock2()) { + if (!processItem(entry.GetRule_struct_arg_positional2())) { + return false; + } + } + } + + TPosition pos = Ctx.TokenPosition(alt.GetToken1()); + TNodePtr structType = new TCallNodeImpl(pos, "StructType", structTypeItems); + bool shouldEmitLabel = provider != YtProviderName || TCiString(keyFunc) == "object"; + if (shouldEmitLabel) { + auto labelsTuple = BuildTuple(pos, labels); + hints["user_" + to_lower(alt.GetToken1().GetValue())] = { structType, labelsTuple }; + break; + } else { + hints["user_" + to_lower(alt.GetToken1().GetValue())] = { structType }; + break; + } + } + + case TRule_table_hint::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + return true; +} + +TMaybe<TTableHints> TSqlTranslation::TableHintsImpl(const TRule_table_hints& node, const TString& provider, const TString& keyFunc) { + TTableHints hints; + auto& block = node.GetBlock2(); + bool hasErrors = false; + switch (block.Alt_case()) { + case TRule_table_hints::TBlock2::kAlt1: { + hasErrors = !TableHintImpl(block.GetAlt1().GetRule_table_hint1(), hints, provider, keyFunc); + break; + } + case TRule_table_hints::TBlock2::kAlt2: { + hasErrors = !TableHintImpl(block.GetAlt2().GetRule_table_hint2(), hints, provider, keyFunc); + for (const auto& x : block.GetAlt2().GetBlock3()) { + hasErrors = hasErrors || !TableHintImpl(x.GetRule_table_hint2(), hints, provider, keyFunc); + } + + break; + } + case TRule_table_hints::TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + if (hasErrors) { + return Nothing(); + } + + return hints; +} + +bool TSqlTranslation::SimpleTableRefImpl(const TRule_simple_table_ref& node, TTableRef& result) { + // simple_table_ref: simple_table_ref_core table_hints?; + if (!SimpleTableRefCoreImpl(node.GetRule_simple_table_ref_core1(), result)) { + return false; + } + + TTableHints hints = GetContextHints(Context()); + if (node.HasBlock2()) { + const TString& service = Context().Scoped->CurrService; + auto tmp = TableHintsImpl(node.GetBlock2().GetRule_table_hints1(), service); + if (!tmp) { + Error() << "Failed to parse table hints"; + return false; + } + + hints = *tmp; + } + + if (!hints.empty()) { + result.Options = BuildInputOptions(Context().Pos(), hints); + } + + return true; +} + +bool TSqlTranslation::SimpleTableRefCoreImpl(const TRule_simple_table_ref_core& node, TTableRef& result) { + // simple_table_ref_core: ((cluster_expr DOT)? id_or_at) | AT? bind_parameter; + TString service = Context().Scoped->CurrService; + TDeferredAtom cluster = Context().Scoped->CurrCluster; + switch (node.Alt_case()) { + case TRule_simple_table_ref_core::AltCase::kAltSimpleTableRefCore1: { + if (node.GetAlt_simple_table_ref_core1().GetRule_object_ref1().HasBlock1()) { + if (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) { + Error() << "Cluster should not be used in limited view"; + return false; + } + + if (!ClusterExpr(node.GetAlt_simple_table_ref_core1().GetRule_object_ref1().GetBlock1().GetRule_cluster_expr1(), false, service, cluster)) { + return false; + } + } + + if (cluster.Empty()) { + Error() << "No cluster name given and no default cluster is selected"; + return false; + } + + result = TTableRef(Context().MakeName("table"), service, cluster, nullptr); + auto tableOrAt = Id(node.GetAlt_simple_table_ref_core1().GetRule_object_ref1().GetRule_id_or_at2(), *this); + auto tableAndView = TableKeyImpl(tableOrAt, {}, *this); + result.Keys = BuildTableKey(Context().Pos(), result.Service, result.Cluster, + TDeferredAtom(Context().Pos(), tableAndView.first), tableAndView.second); + break; + } + case TRule_simple_table_ref_core::AltCase::kAltSimpleTableRefCore2: { + if (cluster.Empty()) { + Error() << "No cluster name given and no default cluster is selected"; + return false; + } + + auto at = node.GetAlt_simple_table_ref_core2().HasBlock1(); + TString bindName; + if (!NamedNodeImpl(node.GetAlt_simple_table_ref_core2().GetRule_bind_parameter2(), bindName, *this)) { + return false; + } + auto named = GetNamedNode(bindName); + if (!named) { + return false; + } + + TDeferredAtom table; + MakeTableFromExpression(Context().Pos(), Context(), named, table); + result = TTableRef(Context().MakeName("table"), service, cluster, nullptr); + result.Keys = BuildTableKey(Context().Pos(), result.Service, result.Cluster, table, {at ? "@" : ""}); + break; + } + case TRule_simple_table_ref_core::AltCase::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + return result.Keys != nullptr; +} + +bool TSqlTranslation::TopicRefImpl(const TRule_topic_ref& node, TTopicRef& result) { + TString service = Context().Scoped->CurrService; + TDeferredAtom cluster = Context().Scoped->CurrCluster; + if (node.HasBlock1()) { + if (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) { + Error() << "Cluster should not be used in limited view"; + return false; + } + + if (!ClusterExpr(node.GetBlock1().GetRule_cluster_expr1(), false, service, cluster)) { + return false; + } + } + + if (cluster.Empty()) { + Error() << "No cluster name given and no default cluster is selected"; + return false; + } + + result = TTopicRef(Context().MakeName("topic"), cluster, nullptr); + auto topic = Id(node.GetRule_an_id2(), *this); + result.Keys = BuildTopicKey(Context().Pos(), result.Cluster, TDeferredAtom(Context().Pos(), topic)); + + return true; +} + +TNodePtr TSqlTranslation::NamedNode(const TRule_named_nodes_stmt& rule, TVector<TSymbolNameWithPos>& names) { + // named_nodes_stmt: bind_parameter_list EQUALS (expr | subselect_stmt); + // subselect_stmt: (LPAREN select_stmt RPAREN | select_unparenthesized_stmt); + if (!BindList(rule.GetRule_bind_parameter_list1(), names)) { + return {}; + } + + TNodePtr nodeExpr = nullptr; + switch (rule.GetBlock3().Alt_case()) { + case TRule_named_nodes_stmt::TBlock3::kAlt1: { + TSqlExpression expr(Ctx, Mode); + auto result = expr.Build(rule.GetBlock3().GetAlt1().GetRule_expr1()); + return result; + } + + case TRule_named_nodes_stmt::TBlock3::kAlt2:{ + const auto& subselect_rule = rule.GetBlock3().GetAlt2().GetRule_subselect_stmt1(); + + TSqlSelect expr(Ctx, Mode); + TPosition pos; + TSourcePtr source = nullptr; + switch (subselect_rule.GetBlock1().Alt_case()) { + case TRule_subselect_stmt::TBlock1::kAlt1: + source = expr.Build(subselect_rule.GetBlock1().GetAlt1().GetRule_select_stmt2(), pos); + break; + + case TRule_subselect_stmt::TBlock1::kAlt2: + source = expr.Build(subselect_rule.GetBlock1().GetAlt2().GetRule_select_unparenthesized_stmt1(), pos); + break; + + case TRule_subselect_stmt::TBlock1::ALT_NOT_SET: + AltNotImplemented("subselect_stmt", subselect_rule.GetBlock1()); + Ctx.IncrementMonCounter("sql_errors", "UnknownNamedNode"); + return nullptr; + } + + if (!source) { + return {}; + } + + return BuildSourceNode(pos, std::move(source)); + } + + case TRule_named_nodes_stmt::TBlock3::ALT_NOT_SET: + AltNotImplemented("named_node", rule.GetBlock3()); + Ctx.IncrementMonCounter("sql_errors", "UnknownNamedNode"); + return nullptr; + } +} + +bool TSqlTranslation::ImportStatement(const TRule_import_stmt& stmt, TVector<TString>* namesPtr) { + TVector<TString> modulePath; + if (!ModulePath(stmt.GetRule_module_path2(), modulePath)) { + return false; + } + + TVector<TSymbolNameWithPos> names; + TVector<TSymbolNameWithPos> aliases; + if (!NamedBindList(stmt.GetRule_named_bind_parameter_list4(), names, aliases)) { + return false; + } + YQL_ENSURE(names.size() == aliases.size()); + const TString moduleAlias = Ctx.AddImport(std::move(modulePath)); + if (!moduleAlias) { + return false; + } + + for (size_t i = 0; i < names.size(); ++i) { + auto& name = names[i]; + auto& alias = aliases[i]; + + auto& var = alias.Name ? alias : name; + if (IsAnonymousName(var.Name)) { + Ctx.Error(var.Pos) << "Can not import anonymous name " << var.Name; + return false; + } + + auto builder = [&](const TString& realName) { + YQL_ENSURE(realName == var.Name); + auto atom = BuildQuotedAtom(name.Pos, name.Name); + return atom->Y("bind", moduleAlias, atom); + }; + + var.Name = PushNamedNode(var.Pos, var.Name, builder); + if (namesPtr) { + namesPtr->push_back(var.Name); + } + } + return true; +} + +bool TSqlTranslation::SortSpecification(const TRule_sort_specification& node, TVector<TSortSpecificationPtr>& sortSpecs) { + bool asc = true; + TSqlExpression expr(Ctx, Mode); + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + TNodePtr exprNode = expr.Build(node.GetRule_expr1()); + if (!exprNode) { + return false; + } + if (node.HasBlock2()) { + const auto& token = node.GetBlock2().GetToken1(); + Token(token); + auto tokenId = token.GetId(); + if (IS_TOKEN(tokenId, ASC)) { + Ctx.IncrementMonCounter("sql_features", "OrderByAsc"); + } else if (IS_TOKEN(tokenId, DESC)) { + asc = false; + Ctx.IncrementMonCounter("sql_features", "OrderByDesc"); + } else { + Ctx.IncrementMonCounter("sql_errors", "UnknownOrderBy"); + Error() << "Unsupported direction token: " << token.GetId(); + return false; + } + } else { + Ctx.IncrementMonCounter("sql_features", "OrderByDefault"); + } + sortSpecs.emplace_back(MakeIntrusive<TSortSpecification>(exprNode, asc)); + return true; +} + +bool TSqlTranslation::SortSpecificationList(const TRule_sort_specification_list& node, TVector<TSortSpecificationPtr>& sortSpecs) { + if (!SortSpecification(node.GetRule_sort_specification1(), sortSpecs)) { + return false; + } + for (auto sortSpec: node.GetBlock2()) { + Token(sortSpec.GetToken1()); + if (!SortSpecification(sortSpec.GetRule_sort_specification2(), sortSpecs)) { + return false; + } + } + return true; +} + +bool TSqlTranslation::IsDistinctOptSet(const TRule_opt_set_quantifier& node) const { + TPosition pos; + return node.HasBlock1() && IS_TOKEN(node.GetBlock1().GetToken1().GetId(), DISTINCT); +} + +bool TSqlTranslation::IsDistinctOptSet(const TRule_opt_set_quantifier& node, TPosition& distinctPos) const { + if (node.HasBlock1() && IS_TOKEN(node.GetBlock1().GetToken1().GetId(), DISTINCT)) { + distinctPos = Ctx.TokenPosition(node.GetBlock1().GetToken1()); + return true; + } + return false; +} + +bool TSqlTranslation::RoleNameClause(const TRule_role_name& node, TDeferredAtom& result, bool allowSystemRoles) { + // role_name: an_id_or_type | bind_parameter; + switch (node.Alt_case()) { + case TRule_role_name::kAltRoleName1: + { + TString name = Id(node.GetAlt_role_name1().GetRule_an_id_or_type1(), *this); + result = TDeferredAtom(Ctx.Pos(), name); + break; + } + case TRule_role_name::kAltRoleName2: + { + if (!BindParameterClause(node.GetAlt_role_name2().GetRule_bind_parameter1(), result)) { + return false; + } + break; + } + case TRule_role_name::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + if (auto literalName = result.GetLiteral(); literalName && !allowSystemRoles) { + static const THashSet<TStringBuf> systemRoles = { "current_role", "current_user", "session_user" }; + if (systemRoles.contains(to_lower(*literalName))) { + Ctx.Error() << "System role " << to_upper(*literalName) << " can not be used here"; + return false; + } + } + + return true; +} + +bool TSqlTranslation::RoleParameters(const TRule_create_user_option& node, TRoleParameters& result) { + // create_user_option: ENCRYPTED? PASSWORD expr; + result = TRoleParameters{}; + + TSqlExpression expr(Ctx, Mode); + TNodePtr password = expr.Build(node.GetRule_expr3()); + if (!password) { + return false; + } + + result.IsPasswordEncrypted = node.HasBlock1(); + if (!password->IsNull()) { + result.Password = MakeAtomFromExpression(Ctx.Pos(), Ctx, password); + } + + return true; +} + +bool TSqlTranslation::PermissionNameClause(const TRule_permission_id& node, TDeferredAtom& result) { + // permission_id: + // CONNECT + // | LIST + // | INSERT + // | MANAGE + // | DROP + // | GRANT + // | MODIFY (TABLES | ATTRIBUTES) + // | (UPDATE | ERASE) ROW + // | (REMOVE | DESCRIBE | ALTER) SCHEMA + // | SELECT (TABLES | ATTRIBUTES | ROW)? + // | (USE | FULL) LEGACY? + // | CREATE (DIRECTORY | TABLE | QUEUE)? + + auto handleOneIdentifier = [&result, this] (const auto& permissionNameKeyword) { + result = TDeferredAtom(Ctx.Pos(), GetIdentifier(*this, permissionNameKeyword).Name); + }; + + auto handleTwoIdentifiers = [&result, this] (const auto& permissionNameKeyword) { + const auto& token1 = permissionNameKeyword.GetToken1(); + const auto& token2 = permissionNameKeyword.GetToken2(); + TString identifierName = TIdentifier(TPosition(token1.GetColumn(), token1.GetLine()), Identifier(token1)).Name + + "_" + + TIdentifier(TPosition(token2.GetColumn(), token2.GetLine()), Identifier(token2)).Name; + result = TDeferredAtom(Ctx.Pos(), identifierName); + }; + + auto handleOneOrTwoIdentifiers = [&result, this] (const auto& permissionNameKeyword) { + TString identifierName = GetIdentifier(*this, permissionNameKeyword).Name; + if (permissionNameKeyword.HasBlock2()) { + identifierName += "_" + GetIdentifier(*this, permissionNameKeyword.GetBlock2()).Name; + } + result = TDeferredAtom(Ctx.Pos(), identifierName); + }; + + switch (node.GetAltCase()) { + case TRule_permission_id::kAltPermissionId1: + { + // CONNECT + handleOneIdentifier(node.GetAlt_permission_id1()); + break; + } + case TRule_permission_id::kAltPermissionId2: + { + // LIST + handleOneIdentifier(node.GetAlt_permission_id2()); + break; + } + case TRule_permission_id::kAltPermissionId3: + { + // INSERT + handleOneIdentifier(node.GetAlt_permission_id3()); + break; + } + case TRule_permission_id::kAltPermissionId4: + { + // MANAGE + handleOneIdentifier(node.GetAlt_permission_id4()); + break; + } + case TRule_permission_id::kAltPermissionId5: + { + // DROP + handleOneIdentifier(node.GetAlt_permission_id5()); + break; + } + case TRule_permission_id::kAltPermissionId6: + { + // GRANT + handleOneIdentifier(node.GetAlt_permission_id6()); + break; + } + case TRule_permission_id::kAltPermissionId7: + { + // MODIFY (TABLES | ATTRIBUTES) + handleTwoIdentifiers(node.GetAlt_permission_id7()); + break; + } + case TRule_permission_id::kAltPermissionId8: + { + // (UPDATE | ERASE) ROW + handleTwoIdentifiers(node.GetAlt_permission_id8()); + break; + } + case TRule_permission_id::kAltPermissionId9: + { + // (REMOVE | DESCRIBE | ALTER) SCHEMA + handleTwoIdentifiers(node.GetAlt_permission_id9()); + break; + } + case TRule_permission_id::kAltPermissionId10: + { + // SELECT (TABLES | ATTRIBUTES | ROW)? + handleOneOrTwoIdentifiers(node.GetAlt_permission_id10()); + break; + } + case TRule_permission_id::kAltPermissionId11: + { + // (USE | FULL) LEGACY? + handleOneOrTwoIdentifiers(node.GetAlt_permission_id11()); + break; + } + case TRule_permission_id::kAltPermissionId12: + { + // CREATE (DIRECTORY | TABLE | QUEUE)? + handleOneOrTwoIdentifiers(node.GetAlt_permission_id12()); + break; + } + case TRule_permission_id::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + return true; +} + +bool TSqlTranslation::PermissionNameClause(const TRule_permission_name& node, TDeferredAtom& result) { + // permission_name: permission_id | STRING_VALUE; + switch (node.Alt_case()) { + case TRule_permission_name::kAltPermissionName1: + { + return PermissionNameClause(node.GetAlt_permission_name1().GetRule_permission_id1(), result); + break; + } + case TRule_permission_name::kAltPermissionName2: + { + const TString stringValue(Ctx.Token(node.GetAlt_permission_name2().GetToken1())); + auto unescaped = StringContent(Ctx, Ctx.Pos(), stringValue); + if (!unescaped) { + return false; + } + result = TDeferredAtom(Ctx.Pos(), unescaped->Content); + break; + } + case TRule_permission_name::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + return true; +} + +bool TSqlTranslation::PermissionNameClause(const TRule_permission_name_target& node, TVector<TDeferredAtom>& result, bool withGrantOption) { + // permission_name_target: permission_name (COMMA permission_name)* COMMA? | ALL PRIVILEGES?; + switch (node.Alt_case()) { + case TRule_permission_name_target::kAltPermissionNameTarget1: + { + const auto& permissionNameRule = node.GetAlt_permission_name_target1(); + result.emplace_back(); + if (!PermissionNameClause(permissionNameRule.GetRule_permission_name1(), result.back())) { + return false; + } + for (const auto& item : permissionNameRule.GetBlock2()) { + result.emplace_back(); + if (!PermissionNameClause(item.GetRule_permission_name2(), result.back())) { + return false; + } + } + break; + } + case TRule_permission_name_target::kAltPermissionNameTarget2: + { + result.emplace_back(Ctx.Pos(), "all_privileges"); + break; + } + case TRule_permission_name_target::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + if (withGrantOption) { + result.emplace_back(Ctx.Pos(), "grant"); + } + return true; +} + +bool TSqlTranslation::StoreStringSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result) { + YQL_ENSURE(value); + + const TString key = to_lower(id.Name); + if (result.find(key) != result.end()) { + Ctx.Error() << to_upper(key) << " duplicate keys"; + return false; + } + + switch (value->Alt_case()) { + case TRule_table_setting_value::kAltTableSettingValue2: + return StoreString(*value, result[key], Ctx, to_upper(key)); + + default: + Ctx.Error() << to_upper(key) << " value should be a string literal"; + return false; + } + + return true; +} + +bool TSqlTranslation::StoreStringSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result) { + const TIdentifier id = IdEx(entry.GetRule_an_id1(), *this); + return StoreStringSettingsEntry(id, &entry.GetRule_table_setting_value3(), result); +} + +bool TSqlTranslation::ParseBackupCollectionSettings(std::map<TString, TDeferredAtom>& result, const TRule_backup_collection_settings& settings) { + const auto& firstEntry = settings.GetRule_backup_collection_settings_entry1(); + if (!StoreStringSettingsEntry(IdEx(firstEntry.GetRule_an_id1(), *this), &firstEntry.GetRule_table_setting_value3(), result)) { + return false; + } + for (const auto& block : settings.GetBlock2()) { + const auto& entry = block.GetRule_backup_collection_settings_entry2(); + if (!StoreStringSettingsEntry(IdEx(entry.GetRule_an_id1(), *this), &entry.GetRule_table_setting_value3(), result)) { + return false; + } + } + return true; +} + + +bool TSqlTranslation::ParseBackupCollectionSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_backup_collection_actions& actions) { + auto parseAction = [&](auto& actionVariant) { + switch (actionVariant.Alt_case()) { + case TRule_alter_backup_collection_action::kAltAlterBackupCollectionAction1: { + const auto& action = actionVariant.GetAlt_alter_backup_collection_action1().GetRule_alter_table_set_table_setting_compat1(); + if (!StoreStringSettingsEntry(action.GetRule_alter_table_setting_entry3(), result)) { + return false; + } + for (const auto& entry : action.GetBlock4()) { + if (!StoreStringSettingsEntry(entry.GetRule_alter_table_setting_entry2(), result)) { + return false; + } + } + return true; + } + case TRule_alter_backup_collection_action::kAltAlterBackupCollectionAction2: { + const auto& action = actionVariant.GetAlt_alter_backup_collection_action2().GetRule_alter_table_reset_table_setting1(); + const TString firstKey = to_lower(IdEx(action.GetRule_an_id3(), *this).Name); + toReset.insert(firstKey); + for (const auto& key : action.GetBlock4()) { + toReset.insert(to_lower(IdEx(key.GetRule_an_id2(), *this).Name)); + } + return true; + } + case TRule_alter_backup_collection_action::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + }; + + const auto& firstAction = actions.GetRule_alter_backup_collection_action1(); + if (!parseAction(firstAction)) { + return false; + } + + for (const auto& action : actions.GetBlock2()) { + if (!parseAction(action.GetRule_alter_backup_collection_action2())) { + return false; + } + } + + + return true; +} + +bool TSqlTranslation::ParseBackupCollectionTables(TVector<TDeferredAtom>& result, const TRule_table_list& tables) { + const auto& firstEntry = tables.GetRule_an_id_table2(); + result.push_back(TDeferredAtom(Ctx.Pos(), Id(firstEntry, *this))); + for (const auto& block : tables.GetBlock3()) { + const auto& entry = block.GetRule_an_id_table3(); + result.push_back(TDeferredAtom(Ctx.Pos(), Id(entry, *this))); + } + return true; +} + +bool TSqlTranslation::ParseBackupCollectionEntry( + bool& addDatabase, + bool& removeDatabase, + TVector<TDeferredAtom>& addTables, + TVector<TDeferredAtom>& removeTables, + const TRule_alter_backup_collection_entry& entry) +{ + switch (entry.Alt_case()) { + case TRule_alter_backup_collection_entry::kAltAlterBackupCollectionEntry1: { + addDatabase = true; + return true; + } + case TRule_alter_backup_collection_entry::kAltAlterBackupCollectionEntry2: { + removeDatabase = true; + return true; + } + case TRule_alter_backup_collection_entry::kAltAlterBackupCollectionEntry3: { + auto table = entry.GetAlt_alter_backup_collection_entry3().GetRule_an_id_table3(); + addTables.push_back(TDeferredAtom(Ctx.Pos(), Id(table, *this))); + return true; + } + case TRule_alter_backup_collection_entry::kAltAlterBackupCollectionEntry4: { + auto table = entry.GetAlt_alter_backup_collection_entry4().GetRule_an_id_table3(); + removeTables.push_back(TDeferredAtom(Ctx.Pos(), Id(table, *this))); + return true; + } + case TRule_alter_backup_collection_entry::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + return true; +} + +bool TSqlTranslation::ParseBackupCollectionEntries( + bool& addDatabase, + bool& removeDatabase, + TVector<TDeferredAtom>& addTables, + TVector<TDeferredAtom>& removeTables, + const TRule_alter_backup_collection_entries& entries) +{ + const auto& firstEntry = entries.GetRule_alter_backup_collection_entry1(); + if (!ParseBackupCollectionEntry(addDatabase, removeDatabase, addTables, removeTables, firstEntry)) { + return false; + } + for (const auto& block : entries.GetBlock2()) { + const auto& entry = block.GetRule_alter_backup_collection_entry2(); + if (!ParseBackupCollectionEntry(addDatabase, removeDatabase, addTables, removeTables, entry)) { + return false; + } + } + return true; +} + + +TString TSqlTranslation::FrameSettingsToString(EFrameSettings settings, bool isUnbounded) { + TString result; + switch (settings) { + case FramePreceding: + result = "PRECEDING"; break; + case FrameCurrentRow: + YQL_ENSURE(!isUnbounded); + result = "CURRENT ROW"; break; + case FrameFollowing: + result = "FOLLOWING"; break; + default: + Y_ABORT("Unexpected frame settings"); + } + + return (isUnbounded ? "UNBOUNDED " : "") + result; +} + +bool CheckFrameBoundLiteral(TContext& ctx, const TFrameBound& bound, TMaybe<i32>& boundValue) { + boundValue = {}; + auto node = bound.Bound; + if (node && node->IsLiteral()) { + auto type = node->GetLiteralType(); + if (type != "Int32") { + ctx.Error(node->GetPos()) << "Expecting Int32 as frame bound value, but got " << type << " literal"; + return false; + } + + i32 value = FromString<i32>(node->GetLiteralValue()); + if (value < 0) { + ctx.Error(node->GetPos()) << "Expecting non-negative value for frame bound, but got " << value; + return false; + } + + boundValue = value; + } + + return true; +} + +bool TSqlTranslation::IsValidFrameSettings(TContext& ctx, const TFrameSpecification& frameSpec, size_t sortSpecSize) { + const TFrameBound& begin = *frameSpec.FrameBegin; + const TFrameBound& end = *frameSpec.FrameEnd; + + YQL_ENSURE(begin.Settings != FrameUndefined); + YQL_ENSURE(end.Settings != FrameUndefined); + + const bool beginUnbounded = !begin.Bound && begin.Settings != FrameCurrentRow; + const bool endUnbounded = !end.Bound && end.Settings != FrameCurrentRow; + + if (beginUnbounded && begin.Settings == FrameFollowing) { + ctx.Error(begin.Pos) << "Frame cannot start from " << FrameSettingsToString(begin.Settings, beginUnbounded); + return false; + } + + if (endUnbounded && end.Settings == FramePreceding) { + ctx.Error(end.Pos) << "Frame cannot end with " << FrameSettingsToString(end.Settings, endUnbounded); + return false; + } + + if (begin.Settings > end.Settings) { + ctx.Error(begin.Pos) << "Frame cannot start from " << FrameSettingsToString(begin.Settings, beginUnbounded) + << " and end with " << FrameSettingsToString(end.Settings, endUnbounded); + return false; + } + + if (frameSpec.FrameType == FrameByRange && sortSpecSize != 1) { + TStringBuf msg = "RANGE with <offset> PRECEDING/FOLLOWING requires exactly one expression in ORDER BY partition clause"; + if (begin.Bound) { + ctx.Error(begin.Bound->GetPos()) << msg; + return false; + } + if (end.Bound) { + ctx.Error(end.Bound->GetPos()) << msg; + return false; + } + } + + TMaybe<i32> beginValue; + TMaybe<i32> endValue; + + if (frameSpec.FrameType != EFrameType::FrameByRange) { + if (!CheckFrameBoundLiteral(ctx, begin, beginValue) || !CheckFrameBoundLiteral(ctx, end, endValue)) { + return false; + } + } + + if (beginValue.Defined() && endValue.Defined()) { + if (begin.Settings == FramePreceding) { + beginValue = 0 - *beginValue; + } + if (end.Settings == FramePreceding) { + endValue = 0 - *endValue; + } + + if (*beginValue > *endValue) { + YQL_ENSURE(begin.Bound); + ctx.Warning(begin.Bound->GetPos(), TIssuesIds::YQL_EMPTY_WINDOW_FRAME) << "Used frame specification implies empty window frame"; + } + } + + return true; +} + +bool TSqlTranslation::FrameBound(const TRule_window_frame_bound& rule, TFrameBoundPtr& bound) { + // window_frame_bound: + // CURRENT ROW + // | (expr | UNBOUNDED) (PRECEDING | FOLLOWING) + // ; + bound = new TFrameBound; + switch (rule.Alt_case()) { + case TRule_window_frame_bound::kAltWindowFrameBound1: + bound->Pos = GetPos(rule.GetAlt_window_frame_bound1().GetToken1()); + bound->Settings = FrameCurrentRow; + break; + case TRule_window_frame_bound::kAltWindowFrameBound2: { + auto block = rule.GetAlt_window_frame_bound2().GetBlock1(); + switch (block.Alt_case()) { + case TRule_window_frame_bound_TAlt2_TBlock1::kAlt1: { + TSqlExpression boundExpr(Ctx, Mode); + bound->Bound = boundExpr.Build(block.GetAlt1().GetRule_expr1()); + if (!bound->Bound) { + return false; + } + bound->Pos = bound->Bound->GetPos(); + break; + } + case TRule_window_frame_bound_TAlt2_TBlock1::kAlt2: + bound->Pos = GetPos(block.GetAlt2().GetToken1()); + break; + case TRule_window_frame_bound_TAlt2_TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + const TString settingToken = to_lower(Token(rule.GetAlt_window_frame_bound2().GetToken2())); + if (settingToken == "preceding") { + bound->Settings = FramePreceding; + } else if (settingToken == "following") { + bound->Settings = FrameFollowing; + } else { + Y_ABORT("You should change implementation according to grammar changes"); + } + break; + } + case TRule_window_frame_bound::ALT_NOT_SET: + Y_ABORT("FrameClause: frame bound not corresond to grammar changes"); + } + return true; +} + +bool TSqlTranslation::FrameClause(const TRule_window_frame_clause& rule, TFrameSpecificationPtr& frameSpec, size_t sortSpecSize) { + // window_frame_clause: window_frame_units window_frame_extent window_frame_exclusion?; + frameSpec = new TFrameSpecification; + const TString frameUnitStr = to_lower(Token(rule.GetRule_window_frame_units1().GetToken1())); + if (frameUnitStr == "rows") { + frameSpec->FrameType = EFrameType::FrameByRows; + } else if (frameUnitStr == "range") { + frameSpec->FrameType = EFrameType::FrameByRange; + } else { + YQL_ENSURE(frameUnitStr == "groups"); + frameSpec->FrameType = EFrameType::FrameByGroups; + } + + auto frameExtent = rule.GetRule_window_frame_extent2(); + // window_frame_extent: window_frame_bound | window_frame_between; + switch (frameExtent.Alt_case()) { + case TRule_window_frame_extent::kAltWindowFrameExtent1: { + auto start = frameExtent.GetAlt_window_frame_extent1().GetRule_window_frame_bound1(); + if (!FrameBound(start, frameSpec->FrameBegin)) { + return false; + } + + // frame end is CURRENT ROW + frameSpec->FrameEnd = new TFrameBound; + frameSpec->FrameEnd->Pos = frameSpec->FrameBegin->Pos; + frameSpec->FrameEnd->Settings = FrameCurrentRow; + break; + } + case TRule_window_frame_extent::kAltWindowFrameExtent2: { + // window_frame_between: BETWEEN window_frame_bound AND window_frame_bound; + auto between = frameExtent.GetAlt_window_frame_extent2().GetRule_window_frame_between1(); + if (!FrameBound(between.GetRule_window_frame_bound2(), frameSpec->FrameBegin)) { + return false; + } + if (!FrameBound(between.GetRule_window_frame_bound4(), frameSpec->FrameEnd)) { + return false; + } + break; + } + case TRule_window_frame_extent::ALT_NOT_SET: + Y_ABORT("FrameClause: frame extent not correspond to grammar changes"); + } + YQL_ENSURE(frameSpec->FrameBegin); + YQL_ENSURE(frameSpec->FrameEnd); + if (!IsValidFrameSettings(Ctx, *frameSpec, sortSpecSize)) { + return false; + } + + if (rule.HasBlock3()) { + // window_frame_exclusion: EXCLUDE CURRENT ROW | EXCLUDE GROUP | EXCLUDE TIES | EXCLUDE NO OTHERS; + switch (rule.GetBlock3().GetRule_window_frame_exclusion1().Alt_case()) { + case TRule_window_frame_exclusion::kAltWindowFrameExclusion1: + frameSpec->FrameExclusion = FrameExclCurRow; + break; + case TRule_window_frame_exclusion::kAltWindowFrameExclusion2: + frameSpec->FrameExclusion = FrameExclGroup; + break; + case TRule_window_frame_exclusion::kAltWindowFrameExclusion3: + frameSpec->FrameExclusion = FrameExclTies; + break; + case TRule_window_frame_exclusion::kAltWindowFrameExclusion4: + frameSpec->FrameExclusion = FrameExclNone; + break; + case TRule_window_frame_exclusion::ALT_NOT_SET: + Y_ABORT("FrameClause: frame exclusion not correspond to grammar changes"); + } + } + + if (frameSpec->FrameExclusion != FrameExclNone) { + Ctx.Error() << "Frame exclusion is not supported yet"; + return false; + } + + return true; +} + +TWindowSpecificationPtr TSqlTranslation::WindowSpecification(const TRule_window_specification_details& rule) { + /* + window_specification_details: + existing_window_name? + window_partition_clause? + window_order_clause? + window_frame_clause? + */ + TWindowSpecificationPtr winSpecPtr = new TWindowSpecification; + if (rule.HasBlock1()) { + Ctx.Error() << "Existing window name is not supported in window specification yet!"; + return {}; + } + if (rule.HasBlock2()) { + /* + window_partition_clause: PARTITION COMPACT? BY named_expr_list; + */ + auto& partitionClause = rule.GetBlock2().GetRule_window_partition_clause1(); + winSpecPtr->IsCompact = partitionClause.HasBlock2(); + if (!winSpecPtr->IsCompact) { + auto hints = Ctx.PullHintForToken(Ctx.TokenPosition(partitionClause.GetToken1())); + winSpecPtr->IsCompact = AnyOf(hints, [](const NSQLTranslation::TSQLHint& hint) { return to_lower(hint.Name) == "compact"; }); + } + TColumnRefScope scope(Ctx, EColumnRefState::Allow); + if (!NamedExprList(partitionClause.GetRule_named_expr_list4(), winSpecPtr->Partitions)) { + return {}; + } + // ignore empty unnamed tuples: + // "PARTITION BY (), foo(x) as y, (), (z)" is allowed and will work exactly the same as + // "PARTITION BY foo(x) as y, z" + auto removed = std::remove_if(winSpecPtr->Partitions.begin(), winSpecPtr->Partitions.end(), + [](const TNodePtr& partitionNode) { + return !partitionNode->GetLabel() && !partitionNode->GetColumnName() && + partitionNode->GetTupleNode() != nullptr && + partitionNode->GetTupleSize() == 0; + }); + winSpecPtr->Partitions.erase(removed, winSpecPtr->Partitions.end()); + + } + if (rule.HasBlock3()) { + if (!OrderByClause(rule.GetBlock3().GetRule_window_order_clause1().GetRule_order_by_clause1(), winSpecPtr->OrderBy)) { + return {}; + } + } + const bool ordered = !winSpecPtr->OrderBy.empty(); + if (rule.HasBlock4()) { + if (!FrameClause(rule.GetBlock4().GetRule_window_frame_clause1(), winSpecPtr->Frame, winSpecPtr->OrderBy.size())) { + return {}; + } + } else { + winSpecPtr->Frame = new TFrameSpecification; + winSpecPtr->Frame->FrameBegin = new TFrameBound; + winSpecPtr->Frame->FrameEnd = new TFrameBound; + winSpecPtr->Frame->FrameBegin->Pos = winSpecPtr->Frame->FrameEnd->Pos = Ctx.Pos(); + winSpecPtr->Frame->FrameExclusion = EFrameExclusions::FrameExclNone; + + winSpecPtr->Frame->FrameBegin->Settings = EFrameSettings::FramePreceding; + if (Ctx.AnsiCurrentRow) { + // RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + winSpecPtr->Frame->FrameType = EFrameType::FrameByRange; + winSpecPtr->Frame->FrameEnd->Settings = EFrameSettings::FrameCurrentRow; + } else if (ordered) { + // legacy behavior + // ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + winSpecPtr->Frame->FrameType = EFrameType::FrameByRows; + winSpecPtr->Frame->FrameEnd->Settings = EFrameSettings::FrameCurrentRow; + } else { + // ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + winSpecPtr->Frame->FrameType = EFrameType::FrameByRows; + winSpecPtr->Frame->FrameEnd->Settings = EFrameSettings::FrameFollowing; + } + } + + // Normalize and simplify + auto replaceCurrentWith = [](TFrameBound& frame, bool preceding, TNodePtr value ) { + frame.Settings = preceding ? EFrameSettings::FramePreceding : EFrameSettings::FrameFollowing; + frame.Bound = value; + }; + + const auto frameSpec = winSpecPtr->Frame; + if (!ordered && frameSpec->FrameType != EFrameType::FrameByRows) { + // CURRENT ROW -> UNBOUNDED + if (frameSpec->FrameBegin->Settings == EFrameSettings::FrameCurrentRow) { + replaceCurrentWith(*frameSpec->FrameBegin, true, nullptr); + } + if (frameSpec->FrameEnd->Settings == EFrameSettings::FrameCurrentRow) { + replaceCurrentWith(*frameSpec->FrameBegin, false, nullptr); + } + } + + // RANGE/GROUPS UNBOUNDED -> ROWS UNBOUNDED + if (frameSpec->FrameBegin->Settings == EFrameSettings::FramePreceding && !frameSpec->FrameBegin->Bound && + frameSpec->FrameEnd->Settings == EFrameSettings::FrameFollowing && !frameSpec->FrameEnd->Bound) + { + frameSpec->FrameType = EFrameType::FrameByRows; + } + + if (frameSpec->FrameType != EFrameType::FrameByRange) { + // replace FrameCurrentRow for ROWS/GROUPS with 0 preceding/following + // FrameCurrentRow has special meaning ( = first/last peer row) + if (frameSpec->FrameBegin->Settings == EFrameSettings::FrameCurrentRow) { + TNodePtr zero = new TLiteralNumberNode<i32>(winSpecPtr->Frame->FrameBegin->Pos, "Int32", "0"); + replaceCurrentWith(*frameSpec->FrameBegin, true, zero); + } + + if (frameSpec->FrameEnd->Settings == EFrameSettings::FrameCurrentRow) { + TNodePtr zero = new TLiteralNumberNode<i32>(winSpecPtr->Frame->FrameEnd->Pos, "Int32", "0"); + replaceCurrentWith(*frameSpec->FrameEnd, false, zero); + } + } + + return winSpecPtr; +} + +TNodePtr TSqlTranslation::DoStatement(const TRule_do_stmt& stmt, bool makeLambda, const TVector<TString>& args) { + switch (stmt.GetBlock2().Alt_case()) { + case TRule_do_stmt_TBlock2::kAlt1: { + const auto& callAction = stmt.GetBlock2().GetAlt1().GetRule_call_action1(); + TNodePtr action; + switch (callAction.GetBlock1().GetAltCase()) { + case TRule_call_action_TBlock1::kAlt1: { + TString bindName; + if (!NamedNodeImpl(callAction.GetBlock1().GetAlt1().GetRule_bind_parameter1(), bindName, *this)) { + return nullptr; + } + action = GetNamedNode(bindName); + if (!action) { + return nullptr; + } + break; + } + case TRule_call_action_TBlock1::kAlt2: + action = BuildEmptyAction(Ctx.Pos()); + break; + case TRule_call_action_TBlock1::ALT_NOT_SET: + Ctx.IncrementMonCounter("sql_errors", "UnknownDoStmt"); + AltNotImplemented("do_stmt", callAction.GetBlock1()); + return nullptr; + } + + TVector<TNodePtr> values; + values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "Apply", TNodeFlags::Default)); + values.push_back(action); + values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "world", TNodeFlags::Default)); + + TSqlExpression sqlExpr(Ctx, Mode); + if (callAction.HasBlock3() && !ExprList(sqlExpr, values, callAction.GetBlock3().GetRule_expr_list1())) { + return nullptr; + } + + TNodePtr apply = new TAstListNodeImpl(Ctx.Pos(), std::move(values)); + if (!makeLambda) { + return BuildDoCall(Ctx.Pos(), apply); + } + + TNodePtr params = new TAstListNodeImpl(Ctx.Pos()); + params->Add("world"); + for (const auto& arg : args) { + params->Add(new TAstAtomNodeImpl(Ctx.Pos(), arg, TNodeFlags::ArbitraryContent)); + } + + return BuildDoCall(Ctx.Pos(), BuildLambda(Ctx.Pos(), params, apply)); + } + case TRule_do_stmt_TBlock2::kAlt2: { + const auto& inlineAction = stmt.GetBlock2().GetAlt2().GetRule_inline_action1(); + const auto& body = inlineAction.GetRule_define_action_or_subquery_body2(); + + auto saveScoped = Ctx.Scoped; + Ctx.Scoped = MakeIntrusive<TScopedState>(); + Ctx.AllScopes.push_back(Ctx.Scoped); + *Ctx.Scoped = *saveScoped; + Ctx.Scoped->Local = TScopedState::TLocal{}; + Ctx.ScopeLevel++; + TSqlQuery query(Ctx, Ctx.Settings.Mode, false); + TBlocks innerBlocks; + + const bool hasValidBody = DefineActionOrSubqueryBody(query, innerBlocks, body); + auto ret = hasValidBody ? BuildQuery(Ctx.Pos(), innerBlocks, false, Ctx.Scoped) : nullptr; + WarnUnusedNodes(); + Ctx.ScopeLevel--; + Ctx.Scoped = saveScoped; + + if (!ret) { + return {}; + } + + TNodePtr blockNode = new TAstListNodeImpl(Ctx.Pos()); + blockNode->Add("block"); + blockNode->Add(blockNode->Q(ret)); + if (!makeLambda) { + return blockNode; + } + + TNodePtr params = new TAstListNodeImpl(Ctx.Pos()); + params->Add("world"); + for (const auto& arg : args) { + params->Add(new TAstAtomNodeImpl(Ctx.Pos(), arg, TNodeFlags::ArbitraryContent)); + } + + return BuildLambda(Ctx.Pos(), params, blockNode); + } + case TRule_do_stmt_TBlock2::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +bool TSqlTranslation::DefineActionOrSubqueryBody(TSqlQuery& query, TBlocks& blocks, const TRule_define_action_or_subquery_body& body) { + if (body.HasBlock2()) { + Ctx.PushCurrentBlocks(&blocks); + Y_DEFER { + Ctx.PopCurrentBlocks(); + }; + if (!query.Statement(blocks, body.GetBlock2().GetRule_sql_stmt_core1())) { + return false; + } + + for (const auto& nestedStmtItem : body.GetBlock2().GetBlock2()) { + const auto& nestedStmt = nestedStmtItem.GetRule_sql_stmt_core2(); + if (!query.Statement(blocks, nestedStmt)) { + return false; + } + } + } + + return true; +} + +bool TSqlTranslation::DefineActionOrSubqueryStatement(const TRule_define_action_or_subquery_stmt& stmt, TSymbolNameWithPos& nameAndPos, TNodePtr& lambda) { + auto kind = Ctx.Token(stmt.GetToken2()); + const bool isSubquery = to_lower(kind) == "subquery"; + if (!isSubquery && Mode == NSQLTranslation::ESqlMode::SUBQUERY) { + Error() << "Definition of actions is not allowed in the subquery"; + return false; + } + + TString actionName; + if (!NamedNodeImpl(stmt.GetRule_bind_parameter3(), actionName, *this)) { + return false; + } + if (IsAnonymousName(actionName)) { + Error() << "Can not use anonymous name '" << actionName << "' as " << to_upper(kind) << " name"; + return false; + } + TPosition actionNamePos = Ctx.Pos(); + + TVector<TSymbolNameWithPos> argNames; + ui32 optionalArgumentsCount = 0; + if (stmt.HasBlock5() && !ActionOrSubqueryArgs(stmt.GetBlock5().GetRule_action_or_subquery_args1(), argNames, optionalArgumentsCount)) { + return false; + } + + auto saveScoped = Ctx.Scoped; + Ctx.Scoped = MakeIntrusive<TScopedState>(); + Ctx.AllScopes.push_back(Ctx.Scoped); + *Ctx.Scoped = *saveScoped; + Ctx.Scoped->Local = TScopedState::TLocal{}; + Ctx.ScopeLevel++; + + for (auto& arg : argNames) { + arg.Name = PushNamedAtom(arg.Pos, arg.Name); + } + + auto saveMode = Ctx.Settings.Mode; + if (isSubquery) { + Ctx.Settings.Mode = NSQLTranslation::ESqlMode::SUBQUERY; + } + + TSqlQuery query(Ctx, Ctx.Settings.Mode, false); + TBlocks innerBlocks; + const bool hasValidBody = DefineActionOrSubqueryBody(query, innerBlocks, stmt.GetRule_define_action_or_subquery_body8()); + + ui32 topLevelSelects = 0; + bool hasTailOps = false; + for (auto& block : innerBlocks) { + if (block->SubqueryAlias()) { + continue; + } + + if (block->HasSelectResult()) { + ++topLevelSelects; + } else if (topLevelSelects) { + hasTailOps = true; + } + } + + if (isSubquery && (topLevelSelects != 1 || hasTailOps)) { + Error() << "Strictly one select/process/reduce statement is expected at the end of subquery"; + return false; + } + + auto ret = hasValidBody ? BuildQuery(Ctx.Pos(), innerBlocks, false, Ctx.Scoped) : nullptr; + WarnUnusedNodes(); + Ctx.Scoped = saveScoped; + Ctx.ScopeLevel--; + Ctx.Settings.Mode = saveMode; + + if (!ret) { + return false; + } + + TNodePtr blockNode = new TAstListNodeImpl(Ctx.Pos()); + blockNode->Add("block"); + blockNode->Add(blockNode->Q(ret)); + + TNodePtr params = new TAstListNodeImpl(Ctx.Pos()); + params->Add("world"); + for (const auto& arg : argNames) { + params->Add(BuildAtom(arg.Pos, arg.Name)); + } + + lambda = BuildLambda(Ctx.Pos(), params, blockNode); + if (optionalArgumentsCount > 0) { + lambda = new TCallNodeImpl(Ctx.Pos(), "WithOptionalArgs", { + lambda, + BuildQuotedAtom(Ctx.Pos(), ToString(optionalArgumentsCount), TNodeFlags::Default) + }); + } + + nameAndPos.Name = actionName; + nameAndPos.Pos = actionNamePos; + return true; +} + +TNodePtr TSqlTranslation::IfStatement(const TRule_if_stmt& stmt) { + bool isEvaluate = stmt.HasBlock1(); + TSqlExpression expr(Ctx, Mode); + auto exprNode = expr.Build(stmt.GetRule_expr3()); + if (!exprNode) { + return {}; + } + + auto thenNode = DoStatement(stmt.GetRule_do_stmt4(), isEvaluate); + if (!thenNode) { + return {}; + } + + TNodePtr elseNode; + if (stmt.HasBlock5()) { + elseNode = DoStatement(stmt.GetBlock5().GetRule_do_stmt2(), isEvaluate); + if (!elseNode) { + return {}; + } + } + + return BuildWorldIfNode(Ctx.Pos(), exprNode, thenNode, elseNode, isEvaluate); +} + +TNodePtr TSqlTranslation::ForStatement(const TRule_for_stmt& stmt) { + bool isEvaluate = stmt.HasBlock1(); + bool isParallel = stmt.HasBlock2(); + TSqlExpression expr(Ctx, Mode); + TString itemArgName; + if (!NamedNodeImpl(stmt.GetRule_bind_parameter4(), itemArgName, *this)) { + return {}; + } + TPosition itemArgNamePos = Ctx.Pos(); + + auto exprNode = expr.Build(stmt.GetRule_expr6()); + if (!exprNode) { + return{}; + } + + itemArgName = PushNamedAtom(itemArgNamePos, itemArgName); + if (isParallel) { + ++Ctx.ParallelModeCount; + } + + auto bodyNode = DoStatement(stmt.GetRule_do_stmt7(), true, { itemArgName }); + if (isParallel) { + --Ctx.ParallelModeCount; + } + + PopNamedNode(itemArgName); + if (!bodyNode) { + return{}; + } + + TNodePtr elseNode; + if (stmt.HasBlock8()) { + elseNode = DoStatement(stmt.GetBlock8().GetRule_do_stmt2(), true); + if (!elseNode) { + return{}; + } + } + + return BuildWorldForNode(Ctx.Pos(), exprNode, bodyNode, elseNode, isEvaluate, isParallel); +} + +bool TSqlTranslation::BindParameterClause(const TRule_bind_parameter& node, TDeferredAtom& result) { + TString paramName; + if (!NamedNodeImpl(node, paramName, *this)) { + return false; + } + auto named = GetNamedNode(paramName); + if (!named) { + return false; + } + + result = MakeAtomFromExpression(Ctx.Pos(), Ctx, named); + return true; +} + +bool TSqlTranslation::ObjectFeatureValueClause(const TRule_object_feature_value& node, TDeferredAtom& result) { + // object_feature_value: id_or_type | bind_parameter | STRING_VALUE | bool_value; + switch (node.Alt_case()) { + case TRule_object_feature_value::kAltObjectFeatureValue1: + { + TString name = Id(node.GetAlt_object_feature_value1().GetRule_id_or_type1(), *this); + result = TDeferredAtom(Ctx.Pos(), name); + break; + } + case TRule_object_feature_value::kAltObjectFeatureValue2: + { + if (!BindParameterClause(node.GetAlt_object_feature_value2().GetRule_bind_parameter1(), result)) { + return false; + } + break; + } + case TRule_object_feature_value::kAltObjectFeatureValue3: + { + auto strValue = StringContent(Ctx, Ctx.Pos(), Ctx.Token(node.GetAlt_object_feature_value3().GetToken1())); + if (!strValue) { + Error() << "Cannot parse string correctly: " << Ctx.Token(node.GetAlt_object_feature_value3().GetToken1()); + return false; + } + result = TDeferredAtom(Ctx.Pos(), strValue->Content); + break; + } + case TRule_object_feature_value::kAltObjectFeatureValue4: + { + TString value = Ctx.Token(node.GetAlt_object_feature_value4().GetRule_bool_value1().GetToken1()); + result = TDeferredAtom(BuildLiteralBool(Ctx.Pos(), FromString<bool>(value)), Ctx); + break; + } + case TRule_object_feature_value::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + return true; +} + +bool TSqlTranslation::AddObjectFeature(std::map<TString, TDeferredAtom>& result, const TRule_object_feature& feature) { + if (feature.has_alt_object_feature1()) { + auto& kv = feature.GetAlt_object_feature1().GetRule_object_feature_kv1(); + const TString& key = Id(kv.GetRule_an_id_or_type1(), *this); + auto& ruleValue = kv.GetRule_object_feature_value3(); + TDeferredAtom value; + if (!ObjectFeatureValueClause(ruleValue, value)) { + return false; + } + result[key] = value; + } else if (feature.has_alt_object_feature2()) { + result[Id(feature.GetAlt_object_feature2().GetRule_object_feature_flag1().GetRule_an_id_or_type1(), *this)] = TDeferredAtom(); + } + return true; +} + +bool TSqlTranslation::ParseObjectFeatures(std::map<TString, TDeferredAtom>& result, const TRule_object_features& features) { + if (features.has_alt_object_features1()) { + if (!AddObjectFeature(result, features.alt_object_features1().GetRule_object_feature1())) { + return false; + } + + } else if (features.has_alt_object_features2()) { + if (!AddObjectFeature(result, features.alt_object_features2().GetRule_object_feature2())) { + return false; + } + for (auto&& i : features.alt_object_features2().GetBlock3()) { + if (!AddObjectFeature(result, i.GetRule_object_feature2())) { + return false; + } + } + } else { + return false; + } + return true; +} + +bool TSqlTranslation::StoreDataSourceSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result) { + YQL_ENSURE(value); + + const TString key = to_lower(id.Name); + if (result.find(key) != result.end()) { + Ctx.Error() << to_upper(key) << " duplicate keys"; + return false; + } + + if (!StoreString(*value, result[key], Ctx, to_upper(key))) { + return false; + } + + return true; +} + +bool TSqlTranslation::StoreDataSourceSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result) { + const TIdentifier id = IdEx(entry.GetRule_an_id1(), *this); + return StoreDataSourceSettingsEntry(id, &entry.GetRule_table_setting_value3(), result); +} + +bool TSqlTranslation::ParseExternalDataSourceSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settingsNode) { + const auto& firstEntry = settingsNode.GetRule_table_settings_entry3(); + if (!StoreDataSourceSettingsEntry(IdEx(firstEntry.GetRule_an_id1(), *this), &firstEntry.GetRule_table_setting_value3(), + result)) { + return false; + } + for (auto& block : settingsNode.GetBlock4()) { + const auto& entry = block.GetRule_table_settings_entry2(); + if (!StoreDataSourceSettingsEntry(IdEx(entry.GetRule_an_id1(), *this), &entry.GetRule_table_setting_value3(), result)) { + return false; + } + } + if (result.find("source_type") == result.end()) { + Ctx.Error() << "SOURCE_TYPE requires key"; + return false; + } + if (!ValidateAuthMethod(result)) { + return false; + } + return true; +} + +bool TSqlTranslation::ParseExternalDataSourceSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_external_data_source_action& alterAction) { + switch (alterAction.Alt_case()) { + case TRule_alter_external_data_source_action::kAltAlterExternalDataSourceAction1: { + const auto& action = alterAction.GetAlt_alter_external_data_source_action1().GetRule_alter_table_set_table_setting_uncompat1(); + if (!StoreDataSourceSettingsEntry(IdEx(action.GetRule_an_id2(), *this), &action.GetRule_table_setting_value3(), result)) { + return false; + } + return true; + } + case TRule_alter_external_data_source_action::kAltAlterExternalDataSourceAction2: { + const auto& action = alterAction.GetAlt_alter_external_data_source_action2().GetRule_alter_table_set_table_setting_compat1(); + if (!StoreDataSourceSettingsEntry(action.GetRule_alter_table_setting_entry3(), result)) { + return false; + } + for (const auto& entry : action.GetBlock4()) { + if (!StoreDataSourceSettingsEntry(entry.GetRule_alter_table_setting_entry2(), result)) { + return false; + } + } + return true; + } + case TRule_alter_external_data_source_action::kAltAlterExternalDataSourceAction3: { + const auto& action = alterAction.GetAlt_alter_external_data_source_action3().GetRule_alter_table_reset_table_setting1(); + const TString key = to_lower(IdEx(action.GetRule_an_id3(), *this).Name); + toReset.insert(key); + for (const auto& keys : action.GetBlock4()) { + const TString key = to_lower(IdEx(keys.GetRule_an_id2(), *this).Name); + toReset.insert(key); + } + return true; + } + case TRule_alter_external_data_source_action::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +bool TSqlTranslation::ValidateAuthMethod(const std::map<TString, TDeferredAtom>& result) { + const static TSet<TStringBuf> allAuthFields{ + "service_account_id", + "service_account_secret_name", + "login", + "password_secret_name", + "aws_access_key_id_secret_name", + "aws_secret_access_key_secret_name", + "aws_region", + "token_secret_name" + }; + const static TMap<TStringBuf, TSet<TStringBuf>> authMethodFields{ + {"NONE", {}}, + {"SERVICE_ACCOUNT", {"service_account_id", "service_account_secret_name"}}, + {"BASIC", {"login", "password_secret_name"}}, + {"AWS", {"aws_access_key_id_secret_name", "aws_secret_access_key_secret_name", "aws_region"}}, + {"MDB_BASIC", {"service_account_id", "service_account_secret_name", "login", "password_secret_name"}}, + {"TOKEN", {"token_secret_name"}} + }; + auto authMethodIt = result.find("auth_method"); + if (authMethodIt == result.end() || authMethodIt->second.GetLiteral() == nullptr) { + Ctx.Error() << "AUTH_METHOD requires key"; + return false; + } + const auto& authMethod = *authMethodIt->second.GetLiteral(); + auto it = authMethodFields.find(authMethod); + if (it == authMethodFields.end()) { + Ctx.Error() << "Unknown AUTH_METHOD = " << authMethod; + return false; + } + const auto& currentAuthFields = it->second; + for (const auto& authField: allAuthFields) { + if (currentAuthFields.contains(authField) && !result.contains(TString{authField})) { + Ctx.Error() << to_upper(TString{authField}) << " requires key"; + return false; + } + if (!currentAuthFields.contains(authField) && result.contains(TString{authField})) { + Ctx.Error() << to_upper(TString{authField}) << " key is not supported for AUTH_METHOD = " << authMethod; + return false; + } + } + return true; +} + +bool TSqlTranslation::ValidateExternalTable(const TCreateTableParameters& params) { + if (params.TableType != ETableType::ExternalTable) { + return true; + } + + if (!params.TableSettings.DataSourcePath) { + Ctx.Error() << "DATA_SOURCE requires key"; + return false; + } + + if (!params.TableSettings.Location) { + Ctx.Error() << "LOCATION requires key"; + return false; + } + + if (params.PkColumns) { + Ctx.Error() << "PRIMARY KEY is not supported for external table"; + return false; + } + + return true; +} + +bool TSqlTranslation::ParseViewQuery( + std::map<TString, TDeferredAtom>& features, + const TRule_select_stmt& query +) { + TString queryText = CollectTokens(query); + TString contextRecreationQuery; + { + const auto& service = Ctx.Scoped->CurrService; + const auto& cluster = Ctx.Scoped->CurrCluster; + const auto effectivePathPrefix = Ctx.GetPrefixPath(service, cluster); + + // TO DO: capture all runtime pragmas in a similar fashion. + if (effectivePathPrefix != Ctx.Settings.PathPrefix) { + contextRecreationQuery = TStringBuilder() << "PRAGMA TablePathPrefix = \"" << effectivePathPrefix << "\";\n"; + } + + // TO DO: capture other compilation-affecting statements except USE. + if (cluster.GetLiteral() && *cluster.GetLiteral() != Ctx.Settings.DefaultCluster) { + contextRecreationQuery = TStringBuilder() << "USE " << *cluster.GetLiteral() << ";\n"; + } + } + features["query_text"] = { Ctx.Pos(), contextRecreationQuery + queryText }; + + // AST is needed for ready-made validation of CREATE VIEW statement. + // Query is stored as plain text, not AST. + const auto viewSelect = BuildViewSelect(query, Ctx, contextRecreationQuery); + if (!viewSelect) { + return false; + } + features["query_ast"] = {viewSelect, Ctx}; + + return true; +} + +class TReturningListColumns : public INode { +public: + TReturningListColumns(TPosition pos) + : INode(pos) + { + } + + void SetStar() { + ColumnNames.clear(); + Star = true; + } + + void AddColumn(const NSQLv1Generated::TRule_an_id & rule, TTranslation& ctx) { + ColumnNames.push_back(NSQLTranslationV1::Id(rule, ctx)); + } + + bool DoInit(TContext& ctx, ISource* source) override { + Node = Y(); + if (Star) { + Node->Add(Y("ReturningStar")); + } else { + for (auto&& column : ColumnNames) { + Node->Add(Y("ReturningListItem", Q(column))); + } + } + Node = Q(Y(Q("returning"), Q(Node))); + return Node->Init(ctx, source); + } + + TNodePtr DoClone() const override { + return new TReturningListColumns(GetPos()); + } + + TAstNode* Translate(TContext& ctx) const override { + return Node->Translate(ctx); + } + +private: + TNodePtr Node; + TVector<TString> ColumnNames; + bool Star = false; +}; + +TNodePtr TSqlTranslation::ReturningList(const ::NSQLv1Generated::TRule_returning_columns_list& columns) { + auto result = MakeHolder<TReturningListColumns>(Ctx.Pos()); + + if (columns.GetBlock2().Alt_case() == TRule_returning_columns_list_TBlock2::AltCase::kAlt1) { + result->SetStar(); + } else if (columns.GetBlock2().Alt_case() == TRule_returning_columns_list_TBlock2::AltCase::kAlt2) { + result->AddColumn(columns.GetBlock2().alt2().GetRule_an_id1(), *this); + for (auto& block : columns.GetBlock2().alt2().GetBlock2()) { + result->AddColumn(block.GetRule_an_id2(), *this); + } + } + + return result.Release(); +} + +bool TSqlTranslation::StoreResourcePoolSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result) { + YQL_ENSURE(value); + + const TString key = to_lower(id.Name); + if (result.find(key) != result.end()) { + Ctx.Error() << to_upper(key) << " duplicate keys"; + return false; + } + + switch (value->Alt_case()) { + case TRule_table_setting_value::kAltTableSettingValue2: + return StoreString(*value, result[key], Ctx, to_upper(key)); + + case TRule_table_setting_value::kAltTableSettingValue3: + return StoreInt(*value, result[key], Ctx, to_upper(key)); + + default: + Ctx.Error() << to_upper(key) << " value should be a string literal or integer"; + return false; + } + + return true; +} + +bool TSqlTranslation::StoreResourcePoolSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result) { + const TIdentifier id = IdEx(entry.GetRule_an_id1(), *this); + return StoreResourcePoolSettingsEntry(id, &entry.GetRule_table_setting_value3(), result); +} + +bool TSqlTranslation::ParseResourcePoolSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settingsNode) { + const auto& firstEntry = settingsNode.GetRule_table_settings_entry3(); + if (!StoreResourcePoolSettingsEntry(IdEx(firstEntry.GetRule_an_id1(), *this), &firstEntry.GetRule_table_setting_value3(), result)) { + return false; + } + for (const auto& block : settingsNode.GetBlock4()) { + const auto& entry = block.GetRule_table_settings_entry2(); + if (!StoreResourcePoolSettingsEntry(IdEx(entry.GetRule_an_id1(), *this), &entry.GetRule_table_setting_value3(), result)) { + return false; + } + } + return true; +} + +bool TSqlTranslation::ParseResourcePoolSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_resource_pool_action& alterAction) { + switch (alterAction.Alt_case()) { + case TRule_alter_resource_pool_action::kAltAlterResourcePoolAction1: { + const auto& action = alterAction.GetAlt_alter_resource_pool_action1().GetRule_alter_table_set_table_setting_compat1(); + if (!StoreResourcePoolSettingsEntry(action.GetRule_alter_table_setting_entry3(), result)) { + return false; + } + for (const auto& entry : action.GetBlock4()) { + if (!StoreResourcePoolSettingsEntry(entry.GetRule_alter_table_setting_entry2(), result)) { + return false; + } + } + return true; + } + case TRule_alter_resource_pool_action::kAltAlterResourcePoolAction2: { + const auto& action = alterAction.GetAlt_alter_resource_pool_action2().GetRule_alter_table_reset_table_setting1(); + const TString firstKey = to_lower(IdEx(action.GetRule_an_id3(), *this).Name); + toReset.insert(firstKey); + for (const auto& key : action.GetBlock4()) { + toReset.insert(to_lower(IdEx(key.GetRule_an_id2(), *this).Name)); + } + return true; + } + case TRule_alter_resource_pool_action::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +bool TSqlTranslation::StoreResourcePoolClassifierSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result) { + YQL_ENSURE(value); + + const TString key = to_lower(id.Name); + if (result.find(key) != result.end()) { + Ctx.Error() << to_upper(key) << " duplicate keys"; + return false; + } + + switch (value->Alt_case()) { + case TRule_table_setting_value::kAltTableSettingValue2: + return StoreString(*value, result[key], Ctx, to_upper(key)); + + case TRule_table_setting_value::kAltTableSettingValue3: + return StoreInt(*value, result[key], Ctx, to_upper(key)); + + default: + Ctx.Error() << to_upper(key) << " value should be a string literal or integer"; + return false; + } + + return true; +} + +bool TSqlTranslation::StoreResourcePoolClassifierSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result) { + const TIdentifier id = IdEx(entry.GetRule_an_id1(), *this); + return StoreResourcePoolClassifierSettingsEntry(id, &entry.GetRule_table_setting_value3(), result); +} + +bool TSqlTranslation::ParseResourcePoolClassifierSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settingsNode) { + const auto& firstEntry = settingsNode.GetRule_table_settings_entry3(); + if (!StoreResourcePoolClassifierSettingsEntry(IdEx(firstEntry.GetRule_an_id1(), *this), &firstEntry.GetRule_table_setting_value3(), result)) { + return false; + } + for (const auto& block : settingsNode.GetBlock4()) { + const auto& entry = block.GetRule_table_settings_entry2(); + if (!StoreResourcePoolClassifierSettingsEntry(IdEx(entry.GetRule_an_id1(), *this), &entry.GetRule_table_setting_value3(), result)) { + return false; + } + } + return true; +} + +bool TSqlTranslation::ParseResourcePoolClassifierSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_resource_pool_classifier_action& alterAction) { + switch (alterAction.Alt_case()) { + case TRule_alter_resource_pool_classifier_action::kAltAlterResourcePoolClassifierAction1: { + const auto& action = alterAction.GetAlt_alter_resource_pool_classifier_action1().GetRule_alter_table_set_table_setting_compat1(); + if (!StoreResourcePoolClassifierSettingsEntry(action.GetRule_alter_table_setting_entry3(), result)) { + return false; + } + for (const auto& entry : action.GetBlock4()) { + if (!StoreResourcePoolClassifierSettingsEntry(entry.GetRule_alter_table_setting_entry2(), result)) { + return false; + } + } + return true; + } + case TRule_alter_resource_pool_classifier_action::kAltAlterResourcePoolClassifierAction2: { + const auto& action = alterAction.GetAlt_alter_resource_pool_classifier_action2().GetRule_alter_table_reset_table_setting1(); + const TString firstKey = to_lower(IdEx(action.GetRule_an_id3(), *this).Name); + toReset.insert(firstKey); + for (const auto& key : action.GetBlock4()) { + toReset.insert(to_lower(IdEx(key.GetRule_an_id2(), *this).Name)); + } + return true; + } + case TRule_alter_resource_pool_classifier_action::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_translation.h b/yql/essentials/sql/v1/sql_translation.h new file mode 100644 index 00000000000..683647f16bf --- /dev/null +++ b/yql/essentials/sql/v1/sql_translation.h @@ -0,0 +1,342 @@ +#pragma once +#include "context.h" +#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h> +#include <library/cpp/charset/ci_string.h> + +namespace NSQLTranslationV1 { + +using namespace NYql; +using namespace NSQLv1Generated; + +inline TPosition GetPos(const TToken& token) { + return TPosition(token.GetColumn(), token.GetLine()); +} + +template <typename TToken> +TIdentifier GetIdentifier(TTranslation& ctx, const TToken& node) { + auto token = node.GetToken1(); + return TIdentifier(TPosition(token.GetColumn(), token.GetLine()), ctx.Identifier(token)); +} + +TIdentifier GetKeywordId(TTranslation& ctx, const TRule_keyword& node); + +inline TString GetKeyword(TTranslation& ctx, const TRule_keyword& node) { + return GetKeywordId(ctx, node).Name; +} + +template <typename TRule> +inline TString GetKeyword(TTranslation& ctx, const TRule& node) { + return GetIdentifier(ctx, node).Name; +} + +inline TString Id(const TRule_identifier& node, TTranslation& ctx) { + // identifier: ID_PLAIN | ID_QUOTED; + return ctx.Identifier(node.GetToken1()); +} + +TString Id(const TRule_id& node, TTranslation& ctx); + +TString Id(const TRule_id_or_type& node, TTranslation& ctx); + +TString Id(const TRule_id_as_compat& node, TTranslation& ctx); + +TString Id(const TRule_an_id_as_compat& node, TTranslation& ctx); + +TString Id(const TRule_id_schema& node, TTranslation& ctx); + +TString Id(const TRule_an_id_or_type& node, TTranslation& ctx); + +std::pair<bool, TString> Id(const TRule_id_or_at& node, TTranslation& ctx); + +TString Id(const TRule_id_table& node, TTranslation& ctx); + +TString Id(const TRule_an_id_table& node, TTranslation& ctx); + +TString Id(const TRule_id_table_or_type& node, TTranslation& ctx); + +TString Id(const TRule_id_expr& node, TTranslation& ctx); + +bool IsQuotedId(const TRule_id_expr& node, TTranslation& ctx); + +TString Id(const TRule_id_expr_in& node, TTranslation& ctx); + +TString Id(const TRule_id_window& node, TTranslation& ctx); + +TString Id(const TRule_id_without& node, TTranslation& ctx); + +TString Id(const TRule_id_hint& node, TTranslation& ctx); + +TString Id(const TRule_an_id& node, TTranslation& ctx); + +TString Id(const TRule_an_id_schema& node, TTranslation& ctx); + +TString Id(const TRule_an_id_expr& node, TTranslation& ctx); + +TString Id(const TRule_an_id_window& node, TTranslation& ctx); + +TString Id(const TRule_an_id_without& node, TTranslation& ctx); + +TString Id(const TRule_an_id_hint& node, TTranslation& ctx); + +TString Id(const TRule_an_id_pure& node, TTranslation& ctx); + +template<typename TRule> +inline TIdentifier IdEx(const TRule& node, TTranslation& ctx) { + const TString name(Id(node, ctx)); + const TPosition pos(ctx.Context().Pos()); + return TIdentifier(pos, name); +} + +bool NamedNodeImpl(const TRule_bind_parameter& node, TString& name, TTranslation& ctx); + +TString OptIdPrefixAsStr(const TRule_opt_id_prefix& node, TTranslation& ctx, const TString& defaultStr = {}); + +TString OptIdPrefixAsStr(const TRule_opt_id_prefix_or_type& node, TTranslation& ctx, const TString& defaultStr = {}); + +void PureColumnListStr(const TRule_pure_column_list& node, TTranslation& ctx, TVector<TString>& outList); + +bool NamedNodeImpl(const TRule_opt_bind_parameter& node, TString& name, bool& isOptional, TTranslation& ctx); + +TDeferredAtom PureColumnOrNamed(const TRule_pure_column_or_named& node, TTranslation& ctx); + +bool PureColumnOrNamedListStr(const TRule_pure_column_or_named_list& node, TTranslation& ctx, TVector<TDeferredAtom>& outList); + +std::pair<TString, TViewDescription> TableKeyImpl(const std::pair<bool, TString>& nameWithAt, TViewDescription view, TTranslation& ctx); + +std::pair<TString, TViewDescription> TableKeyImpl(const TRule_table_key& node, TTranslation& ctx, bool hasAt); + +TMaybe<TColumnConstraints> ColumnConstraints(const TRule_column_schema& node, TTranslation& ctx); + +/// \return optional prefix +TString ColumnNameAsStr(TTranslation& ctx, const TRule_column_name& node, TString& id); + +TString ColumnNameAsSingleStr(TTranslation& ctx, const TRule_column_name& node); + +class TSqlQuery; + +struct TSymbolNameWithPos { + TString Name; + TPosition Pos; +}; + +class TSqlTranslation: public TTranslation { +protected: + TSqlTranslation(TContext& ctx, NSQLTranslation::ESqlMode mode) + : TTranslation(ctx) + , Mode(mode) + { + /// \todo remove NSQLTranslation::ESqlMode params + YQL_ENSURE(ctx.Settings.Mode == mode); + } + +protected: + enum class EExpr { + Regular, + GroupBy, + SqlLambdaParams, + }; + TNodePtr NamedExpr(const TRule_named_expr& node, EExpr exprMode = EExpr::Regular); + bool NamedExprList(const TRule_named_expr_list& node, TVector<TNodePtr>& exprs, EExpr exprMode = EExpr::Regular); + bool BindList(const TRule_bind_parameter_list& node, TVector<TSymbolNameWithPos>& bindNames); + bool ActionOrSubqueryArgs(const TRule_action_or_subquery_args& node, TVector<TSymbolNameWithPos>& bindNames, ui32& optionalArgsCount); + bool ModulePath(const TRule_module_path& node, TVector<TString>& path); + bool NamedBindList(const TRule_named_bind_parameter_list& node, TVector<TSymbolNameWithPos>& names, + TVector<TSymbolNameWithPos>& aliases); + bool NamedBindParam(const TRule_named_bind_parameter& node, TSymbolNameWithPos& name, TSymbolNameWithPos& alias); + TNodePtr NamedNode(const TRule_named_nodes_stmt& rule, TVector<TSymbolNameWithPos>& names); + + bool ImportStatement(const TRule_import_stmt& stmt, TVector<TString>* namesPtr = nullptr); + TNodePtr DoStatement(const TRule_do_stmt& stmt, bool makeLambda, const TVector<TString>& args = {}); + bool DefineActionOrSubqueryStatement(const TRule_define_action_or_subquery_stmt& stmt, TSymbolNameWithPos& nameAndPos, TNodePtr& lambda); + bool DefineActionOrSubqueryBody(TSqlQuery& query, TBlocks& blocks, const TRule_define_action_or_subquery_body& body); + TNodePtr IfStatement(const TRule_if_stmt& stmt); + TNodePtr ForStatement(const TRule_for_stmt& stmt); + TMaybe<TTableArg> TableArgImpl(const TRule_table_arg& node); + bool TableRefImpl(const TRule_table_ref& node, TTableRef& result, bool unorderedSubquery); + TMaybe<TSourcePtr> AsTableImpl(const TRule_table_ref& node); + bool ClusterExpr(const TRule_cluster_expr& node, bool allowWildcard, TString& service, TDeferredAtom& cluster); + bool ClusterExprOrBinding(const TRule_cluster_expr& node, TString& service, TDeferredAtom& cluster, bool& isBinding); + bool ApplyTableBinding(const TString& binding, TTableRef& tr, TTableHints& hints); + + TMaybe<TColumnSchema> ColumnSchemaImpl(const TRule_column_schema& node); + bool CreateTableEntry(const TRule_create_table_entry& node, TCreateTableParameters& params, const bool isCreateTableAs); + + bool FillFamilySettingsEntry(const TRule_family_settings_entry& settingNode, TFamilyEntry& family); + bool FillFamilySettings(const TRule_family_settings& settingsNode, TFamilyEntry& family); + bool CreateTableSettings(const TRule_with_table_settings& settingsNode, TCreateTableParameters& params); + bool StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, TTableSettings& settings, + ETableType tableType, bool alter, bool reset); + bool StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, TTableSettings& settings, + bool alter, bool reset); + bool StoreExternalTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, TTableSettings& settings, + bool alter, bool reset); + bool StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value& value, TTableSettings& settings, ETableType tableType, bool alter = false); + bool StoreDataSourceSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result); + bool StoreDataSourceSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result); + bool StoreResourcePoolSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result); + bool StoreResourcePoolSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result); + bool StoreResourcePoolClassifierSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result); + bool StoreResourcePoolClassifierSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result); + bool ResetTableSettingsEntry(const TIdentifier& id, TTableSettings& settings, ETableType tableType); + + bool CreateTableIndex(const TRule_table_index& node, TVector<TIndexDescription>& indexes); + bool CreateIndexSettings(const TRule_with_index_settings& settingsNode, TIndexDescription::EType indexType, TIndexDescription::TIndexSettings& indexSettings); + bool CreateIndexSettingEntry(const TIdentifier& id, const TRule_index_setting_value& value, TIndexDescription::EType indexType, TIndexDescription::TIndexSettings& indexSettings); + template<typename T> + std::tuple<bool, T, TString> GetIndexSettingValue(const TRule_index_setting_value& node); + + TIdentifier GetTopicConsumerId(const TRule_topic_consumer_ref& node); + bool CreateConsumerSettings(const TRule_topic_consumer_settings& settingsNode, TTopicConsumerSettings& settings); + bool CreateTopicSettings(const TRule_topic_settings& node, TTopicSettings& params); + bool CreateTopicConsumer(const TRule_topic_create_consumer_entry& node, + TVector<TTopicConsumerDescription>& consumers); + bool CreateTopicEntry(const TRule_create_topic_entry& node, TCreateTopicParameters& params); + + bool AlterTopicConsumer(const TRule_alter_topic_alter_consumer& node, + THashMap<TString, TTopicConsumerDescription>& alterConsumers); + + bool AlterTopicConsumerEntry(const TRule_alter_topic_alter_consumer_entry& node, + TTopicConsumerDescription& alterConsumer); + + + bool AlterTopicAction(const TRule_alter_topic_action& node, TAlterTopicParameters& params); + + + TNodePtr TypeSimple(const TRule_type_name_simple& node, bool onlyDataAllowed); + TNodePtr TypeDecimal(const TRule_type_name_decimal& node); + TNodePtr AddOptionals(const TNodePtr& node, size_t optionalCount); + TMaybe<std::pair<TVector<TNodePtr>, bool>> CallableArgList(const TRule_callable_arg_list& argList, bool namedArgsStarted); + + TNodePtr IntegerOrBind(const TRule_integer_or_bind& node); + TNodePtr TypeNameTag(const TRule_type_name_tag& node); + TNodePtr TypeNodeOrBind(const TRule_type_name_or_bind& node); + TNodePtr SerialTypeNode(const TRule_type_name_or_bind& node); + TNodePtr TypeNode(const TRule_type_name& node); + TNodePtr TypeNode(const TRule_type_name_composite& node); + TNodePtr ValueConstructorLiteral(const TRule_value_constructor_literal& node); + TNodePtr ValueConstructor(const TRule_value_constructor& node); + TNodePtr ListLiteral(const TRule_list_literal& node); + TNodePtr DictLiteral(const TRule_dict_literal& node); + TNodePtr StructLiteral(const TRule_struct_literal& node); + TMaybe<TTableHints> TableHintsImpl(const TRule_table_hints& node, const TString& provider, const TString& keyFunc = ""); + bool TableHintImpl(const TRule_table_hint& rule, TTableHints& hints, const TString& provider, const TString& keyFunc = ""); + bool SimpleTableRefImpl(const TRule_simple_table_ref& node, TTableRef& result); + bool TopicRefImpl(const TRule_topic_ref& node, TTopicRef& result); + TWindowSpecificationPtr WindowSpecification(const TRule_window_specification_details& rule); + bool OrderByClause(const TRule_order_by_clause& node, TVector<TSortSpecificationPtr>& orderBy); + bool SortSpecificationList(const TRule_sort_specification_list& node, TVector<TSortSpecificationPtr>& sortSpecs); + + bool IsDistinctOptSet(const TRule_opt_set_quantifier& node) const; + bool IsDistinctOptSet(const TRule_opt_set_quantifier& node, TPosition& distinctPos) const; + + bool AddObjectFeature(std::map<TString, TDeferredAtom>& result, const TRule_object_feature& feature); + bool BindParameterClause(const TRule_bind_parameter& node, TDeferredAtom& result); + bool ObjectFeatureValueClause(const TRule_object_feature_value& node, TDeferredAtom& result); + bool ParseObjectFeatures(std::map<TString, TDeferredAtom>& result, const TRule_object_features& features); + bool ParseExternalDataSourceSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settings); + bool ParseExternalDataSourceSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_external_data_source_action& alterActions); + bool ParseViewOptions(std::map<TString, TDeferredAtom>& features, const TRule_with_table_settings& options); + bool ParseViewQuery(std::map<TString, TDeferredAtom>& features, const TRule_select_stmt& query); + bool ParseResourcePoolSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settings); + bool ParseResourcePoolSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_resource_pool_action& alterAction); + bool ParseResourcePoolClassifierSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settings); + bool ParseResourcePoolClassifierSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_resource_pool_classifier_action& alterAction); + bool RoleNameClause(const TRule_role_name& node, TDeferredAtom& result, bool allowSystemRoles); + bool RoleParameters(const TRule_create_user_option& node, TRoleParameters& result); + bool PermissionNameClause(const TRule_permission_name_target& node, TVector<TDeferredAtom>& result, bool withGrantOption); + bool PermissionNameClause(const TRule_permission_name& node, TDeferredAtom& result); + bool PermissionNameClause(const TRule_permission_id& node, TDeferredAtom& result); + bool StoreStringSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result); + bool StoreStringSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result); + bool ParseBackupCollectionSettings(std::map<TString, TDeferredAtom>& result, const TRule_backup_collection_settings& settings); + bool ParseBackupCollectionSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_backup_collection_actions& actions); + bool ParseBackupCollectionTables(TVector<TDeferredAtom>& result, const TRule_table_list& tables); + bool ParseBackupCollectionEntry( + bool& addDatabase, + bool& removeDatabase, + TVector<TDeferredAtom>& addTables, + TVector<TDeferredAtom>& removeTables, + const TRule_alter_backup_collection_entry& entry); + bool ParseBackupCollectionEntries( + bool& addDatabase, + bool& removeDatabase, + TVector<TDeferredAtom>& addTables, + TVector<TDeferredAtom>& removeTables, + const TRule_alter_backup_collection_entries& entries); + + bool ValidateAuthMethod(const std::map<TString, TDeferredAtom>& result); + bool ValidateExternalTable(const TCreateTableParameters& params); + + TNodePtr ReturningList(const ::NSQLv1Generated::TRule_returning_columns_list& columns); +private: + bool SimpleTableRefCoreImpl(const TRule_simple_table_ref_core& node, TTableRef& result); + static bool IsValidFrameSettings(TContext& ctx, const TFrameSpecification& frameSpec, size_t sortSpecSize); + static TString FrameSettingsToString(EFrameSettings settings, bool isUnbounded); + + bool FrameBound(const TRule_window_frame_bound& rule, TFrameBoundPtr& bound); + bool FrameClause(const TRule_window_frame_clause& node, TFrameSpecificationPtr& frameSpec, size_t sortSpecSize); + bool SortSpecification(const TRule_sort_specification& node, TVector<TSortSpecificationPtr>& sortSpecs); + + bool ClusterExpr(const TRule_cluster_expr& node, bool allowWildcard, bool allowBinding, TString& service, TDeferredAtom& cluster, bool& isBinding); + bool StructLiteralItem(TVector<TNodePtr>& labels, const TRule_expr& label, TVector<TNodePtr>& values, const TRule_expr& value); + bool ValidateTableSettings(const TTableSettings& settings); + +protected: + NSQLTranslation::ESqlMode Mode; +}; + +TNodePtr LiteralNumber(TContext& ctx, const TRule_integer& node); + +template<typename TChar> +struct TPatternComponent { + TBasicString<TChar> Prefix; + TBasicString<TChar> Suffix; + bool IsSimple = true; + + void AppendPlain(TChar c) { + if (IsSimple) { + Prefix.push_back(c); + } + Suffix.push_back(c); + } + + void AppendAnyChar() { + IsSimple = false; + Suffix.clear(); + } +}; + +template<typename TChar> +TVector<TPatternComponent<TChar>> SplitPattern(const TBasicString<TChar>& pattern, TMaybe<char> escape, bool& inEscape) { + inEscape = false; + TVector<TPatternComponent<TChar>> result; + TPatternComponent<TChar> current; + bool prevIsPercentChar = false; + for (const TChar c : pattern) { + if (inEscape) { + current.AppendPlain(c); + inEscape = false; + prevIsPercentChar = false; + } else if (escape && c == static_cast<TChar>(*escape)) { + inEscape = true; + } else if (c == '%') { + if (!prevIsPercentChar) { + result.push_back(std::move(current)); + } + current = {}; + prevIsPercentChar = true; + } else if (c == '_') { + current.AppendAnyChar(); + prevIsPercentChar = false; + } else { + current.AppendPlain(c); + prevIsPercentChar = false; + } + } + result.push_back(std::move(current)); + return result; +} + +bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& suffix); + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_ut.cpp b/yql/essentials/sql/v1/sql_ut.cpp new file mode 100644 index 00000000000..6663fe97657 --- /dev/null +++ b/yql/essentials/sql/v1/sql_ut.cpp @@ -0,0 +1,7462 @@ +#include "sql_ut.h" +#include "format/sql_format.h" +#include "lexer/lexer.h" + +#include <yql/essentials/providers/common/provider/yql_provider_names.h> +#include <yql/essentials/sql/sql.h> +#include <util/generic/map.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/string/split.h> + +#include <format> + +using namespace NSQLTranslation; + +namespace { + +TParsedTokenList Tokenize(const TString& query) { + auto lexer = NSQLTranslationV1::MakeLexer(true, false); + TParsedTokenList tokens; + NYql::TIssues issues; + UNIT_ASSERT_C(Tokenize(*lexer, query, "Query", tokens, issues, SQL_MAX_PARSER_ERRORS), + issues.ToString()); + + return tokens; +} + +TString ToString(const TParsedTokenList& tokens) { + TStringBuilder reconstructedQuery; + for (const auto& token : tokens) { + if (token.Name == "WS" || token.Name == "EOF") { + continue; + } + if (!reconstructedQuery.empty()) { + reconstructedQuery << ' '; + } + reconstructedQuery << token.Content; + } + return reconstructedQuery; +} + +} + +Y_UNIT_TEST_SUITE(AnsiMode) { + Y_UNIT_TEST(PragmaAnsi) { + UNIT_ASSERT(SqlToYql("PRAGMA ANSI 2016;").IsOk()); + } +} + +Y_UNIT_TEST_SUITE(SqlParsingOnly) { + ///This function is used in BACKWARD COMPATIBILITY tests below that LIMIT the sets of token that CAN NOT be used + ///as identifiers in different contexts in a SQL request + ///\return list of tokens that failed this check + TVector<TString> ValidateTokens(const THashSet<TString>& forbidden, const std::function<TString (const TString& )>& makeRequest) { + THashMap<TString, bool> allTokens; + for (const auto& t: NSQLFormat::GetKeywords()) { + allTokens[t] = !forbidden.contains((t)); + } + for (const auto& f: forbidden) { + UNIT_ASSERT(allTokens.contains(f)); //check that forbidden list contains tokens only(argument check) + } + TVector<TString> failed; + for (const auto& [token, allowed]: allTokens) { + if (SqlToYql(makeRequest(token)).IsOk() != allowed) + failed.push_back(token); + } + return failed; + } + + Y_UNIT_TEST(TokensAsColumnName) { //id_expr + auto failed = ValidateTokens({ + "ALL", "ANY", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST", + "CALLABLE", "CASE", "CAST", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", + "DICT", "DISTINCT", "ENUM", "ERASE", "EXCEPT", "EXISTS", "FLOW", "FROM", "FULL", "GLOBAL", + "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL", + "NOT", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP", + "SELECT", "SET", "STREAM", "STRUCT", "SYMMETRIC", "TAGGED", "TUPLE", "UNBOUNDED", + "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT " << token << " FROM Plato.Input"; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsWithoutColumnName) { //id_without + auto failed = ValidateTokens({ + "ALL", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST", + "CALLABLE", "CASE", "CAST", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", + "DICT", "DISTINCT", "EMPTY_ACTION", "ENUM", "EXCEPT", "EXISTS", "FALSE", "FLOW", "FROM", "FULL", "GLOBAL", + "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL", + "NOT", "NULL", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP", + "SELECT", "SET", "STRUCT", "SYMMETRIC", "TAGGED", "TRUE", "TUPLE", "UNBOUNDED", + "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * WITHOUT " << token << " FROM Plato.Input"; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsColumnNameInAddColumn) { //id_schema + auto failed = ValidateTokens({ + "ANY", "AUTOMAP", "CALLABLE", "COLUMN", "DICT", "ENUM", "ERASE", "FALSE", "FLOW", + "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE", + "SET", "STREAM", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT" + }, + [](const TString& token){ + TStringBuilder req; + req << "ALTER TABLE Plato.Input ADD COLUMN " << token << " Bool"; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsColumnAlias) { + auto failed = ValidateTokens({ + "AUTOMAP", "FALSE", + "GLOBAL", "REPEATABLE", "TRUE" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT Col as " << token << " FROM Plato.Input"; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsTableName) { //id_table_or_type + auto failed = ValidateTokens({ + "ANY", "AUTOMAP", "COLUMN", "ERASE", "FALSE", + "GLOBAL", "REPEATABLE", "STREAM", "TRUE" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * FROM Plato." << token; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsTableAlias) { //id_table + auto failed = ValidateTokens({ + "AUTOMAP", "CALLABLE", "DICT", "ENUM","FALSE", "FLOW", + "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE", + "SET", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * FROM Plato.Input AS " << token; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsHints) { //id_hint + auto failed = ValidateTokens({ + "AUTOMAP", "CALLABLE", "COLUMNS", "DICT", "ENUM", "FALSE", "FLOW", + "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE", + "SCHEMA", "SET", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * FROM Plato.Input WITH " << token; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsWindow) { //id_window + auto failed = ValidateTokens({ + "AUTOMAP", "CALLABLE", "DICT", "ENUM", "FALSE", "FLOW", "GLOBAL", "GROUPS", "LIST", "OPTIONAL", + "RANGE", "REPEATABLE", "RESOURCE", "ROWS", "SET", "STRUCT", "TAGGED" ,"TRUE", "TUPLE", "VARIANT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * FROM Plato.Input WINDOW " << token << " AS ()"; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsIdExprIn) { //id_expr_in + auto failed = ValidateTokens({ + "ALL", "ANY", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST", + "CALLABLE", "CASE", "CAST", "COMPACT", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", + "DICT", "DISTINCT", "ENUM", "ERASE", "EXCEPT", "EXISTS", "FLOW", "FROM", "FULL", "GLOBAL", + "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL", + "NOT", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP", + "SELECT", "SET", "STREAM", "STRUCT", "SYMMETRIC", "TAGGED", "TUPLE", "UNBOUNDED", + "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * FROM Plato.Input WHERE q IN " << token; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TableHints) { + UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input WITH INFER_SCHEMA").IsOk()); + UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input WITH (INFER_SCHEMA)").IsOk()); + } + + Y_UNIT_TEST(InNoHints) { + TString query = "SELECT * FROM plato.Input WHERE key IN (1,2,3)"; + + VerifySqlInHints(query, { "'('('warnNoAnsi))" }, {}); + VerifySqlInHints(query, { "'()" }, false); + VerifySqlInHints(query, { "'('('ansi))" }, true); + } + + Y_UNIT_TEST(InHintCompact) { + // should parse COMPACT as hint + TString query = "SELECT * FROM plato.Input WHERE key IN COMPACT(1, 2, 3)"; + + VerifySqlInHints(query, { "'('isCompact)" }); + } + + Y_UNIT_TEST(InHintSubquery) { + // should parse tableSource as hint + TString query = "$subq = (SELECT key FROM plato.Input); SELECT * FROM plato.Input WHERE key IN $subq"; + + VerifySqlInHints(query, { "'('tableSource)" }); + } + + Y_UNIT_TEST(InHintCompactSubquery) { + TString query = "$subq = (SELECT key FROM plato.Input); SELECT * FROM plato.Input WHERE key IN COMPACT $subq"; + + VerifySqlInHints(query, { "'('isCompact)", "'('tableSource)" }); + } + + Y_UNIT_TEST(CompactKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("SELECT COMPACT FROM plato.Input WHERE COMPACT IN COMPACT(1, 2, 3)").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT * FROM COMPACT").IsOk()); + } + + Y_UNIT_TEST(FamilyKeywordNotReservedForNames) { + // FIXME: check if we can get old behaviour + //UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE FAMILY (FAMILY Uint32, PRIMARY KEY (FAMILY));").IsOk()); + //UNIT_ASSERT(SqlToYql("USE plato; SELECT FAMILY FROM FAMILY").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT FAMILY FROM Input").IsOk()); + } + + Y_UNIT_TEST(ResetKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE RESET (RESET Uint32, PRIMARY KEY (RESET));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT RESET FROM RESET").IsOk()); + } + + Y_UNIT_TEST(SyncKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE SYNC (SYNC Uint32, PRIMARY KEY (SYNC));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT SYNC FROM SYNC").IsOk()); + } + + Y_UNIT_TEST(AsyncKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE ASYNC (ASYNC Uint32, PRIMARY KEY (ASYNC));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT ASYNC FROM ASYNC").IsOk()); + } + + Y_UNIT_TEST(DisableKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE DISABLE (DISABLE Uint32, PRIMARY KEY (DISABLE));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT DISABLE FROM DISABLE").IsOk()); + } + + Y_UNIT_TEST(ChangefeedKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE CHANGEFEED (CHANGEFEED Uint32, PRIMARY KEY (CHANGEFEED));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT CHANGEFEED FROM CHANGEFEED").IsOk()); + } + + Y_UNIT_TEST(ReplicationKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE REPLICATION (REPLICATION Uint32, PRIMARY KEY (REPLICATION));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT REPLICATION FROM REPLICATION").IsOk()); + } + + Y_UNIT_TEST(SecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE SECONDS (SECONDS Uint32, PRIMARY KEY (SECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT SECONDS FROM SECONDS").IsOk()); + } + + Y_UNIT_TEST(MillisecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MILLISECONDS (MILLISECONDS Uint32, PRIMARY KEY (MILLISECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT MILLISECONDS FROM MILLISECONDS").IsOk()); + } + + Y_UNIT_TEST(MicrosecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MICROSECONDS (MICROSECONDS Uint32, PRIMARY KEY (MICROSECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT MICROSECONDS FROM MICROSECONDS").IsOk()); + } + + Y_UNIT_TEST(NanosecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE NANOSECONDS (NANOSECONDS Uint32, PRIMARY KEY (NANOSECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT NANOSECONDS FROM NANOSECONDS").IsOk()); + } + + Y_UNIT_TEST(Jubilee) { + NYql::TAstParseResult res = SqlToYql("USE plato; INSERT INTO Arcadia (r2000000) VALUES (\"2M GET!!!\");"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(QualifiedAsteriskBefore) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA DisableSimpleColumns;" + "select interested_table.*, LENGTH(value) AS megahelpful_len from plato.Input as interested_table;" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + static bool seenStar = false; + if (word == "FlattenMembers") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("interested_table.")); + } else if (word == "SqlProjectItem") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megahelpful_len"))); + UNIT_ASSERT_VALUES_EQUAL(seenStar, true); + } else if (word == "SqlProjectStarItem") { + seenStar = true; + } + }; + TWordCountHive elementStat = {{TString("FlattenMembers"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["FlattenMembers"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]); + } + + Y_UNIT_TEST(QualifiedAsteriskAfter) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA DisableSimpleColumns;" + "select LENGTH(value) AS megahelpful_len, interested_table.* from plato.Input as interested_table;" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + static bool seenStar = false; + if (word == "FlattenMembers") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("interested_table.")); + } else if (word == "SqlProjectItem") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megahelpful_len"))); + UNIT_ASSERT_VALUES_EQUAL(seenStar, false); + } else if (word == "SqlProjectStarItem") { + seenStar = true; + } + }; + TWordCountHive elementStat = {{TString("FlattenMembers"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["FlattenMembers"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]); + } + + Y_UNIT_TEST(QualifiedMembers) { + NYql::TAstParseResult res = SqlToYql("select interested_table.key, interested_table.value from plato.Input as interested_table;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + const bool fieldKey = TString::npos != line.find(Quote("key")); + const bool fieldValue = TString::npos != line.find(Quote("value")); + const bool refOnTable = TString::npos != line.find("interested_table."); + if (word == "SqlProjectItem") { + UNIT_ASSERT(fieldKey || fieldValue); + UNIT_ASSERT(!refOnTable); + } else if (word == "Write!") { + UNIT_ASSERT(fieldKey && fieldValue && !refOnTable); + } + }; + TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(JoinParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA DisableSimpleColumns;" + " SELECT table_bb.*, table_aa.key as megakey" + " FROM plato.Input AS table_aa" + " JOIN plato.Input AS table_bb" + " ON table_aa.value == table_bb.value;" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "SelectMembers") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa.")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table_bb.")); + } else if (word == "SqlProjectItem") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megakey"))); + } else if (word == "SqlColumn") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("table_aa"))); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("key"))); + } + }; + TWordCountHive elementStat = {{TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}, {TString("SelectMembers"), 0}, {TString("SqlColumn"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SelectMembers"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlColumn"]); + } + + Y_UNIT_TEST(Join3Table) { + NYql::TAstParseResult res = SqlToYql( + " PRAGMA DisableSimpleColumns;" + " SELECT table_bb.*, table_aa.key as gigakey, table_cc.* " + " FROM plato.Input AS table_aa" + " JOIN plato.Input AS table_bb ON table_aa.key == table_bb.key" + " JOIN plato.Input AS table_cc ON table_aa.subkey == table_cc.subkey;" + ); + Err2Str(res); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "SelectMembers") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa.")); + UNIT_ASSERT(line.find("table_bb.") != TString::npos || line.find("table_cc.") != TString::npos); + } else if (word == "SqlProjectItem") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("gigakey"))); + } else if (word == "SqlColumn") { + const auto posTableAA = line.find(Quote("table_aa")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, posTableAA); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("key"))); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa", posTableAA + 3)); + } + }; + TWordCountHive elementStat = {{TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}, {TString("SelectMembers"), 0}, {TString("SqlColumn"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectStarItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SelectMembers"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlColumn"]); + } + + Y_UNIT_TEST(DisabledJoinCartesianProduct) { + NYql::TAstParseResult res = SqlToYql("pragma DisableAnsiImplicitCrossJoin; use plato; select * from A,B,C"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:67: Error: Cartesian product of tables is disabled. Please use explicit CROSS JOIN or enable it via PRAGMA AnsiImplicitCrossJoin\n"); + } + + Y_UNIT_TEST(JoinCartesianProduct) { + NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from A,B,C"); + UNIT_ASSERT(res.Root); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "EquiJoin") { + auto pos = line.find("Cross"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, pos); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Cross", pos + 1)); + } + }; + TWordCountHive elementStat = {{TString("EquiJoin"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["EquiJoin"]); + } + + Y_UNIT_TEST(JoinWithoutConcreteColumns) { + NYql::TAstParseResult res = SqlToYql( + " use plato;" + " SELECT a.v, b.value" + " FROM `Input1` VIEW `ksv` AS a" + " JOIN `Input2` AS b" + " ON a.k == b.key;" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "SqlProjectItem") { + UNIT_ASSERT(line.find(Quote("a.v")) != TString::npos || line.find(Quote("b.value")) != TString::npos); + } else if (word == "SqlColumn") { + const auto posTableA = line.find(Quote("a")); + const auto posTableB = line.find(Quote("b")); + if (posTableA != TString::npos) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("v"))); + } else { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, posTableB); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("value"))); + } + } + }; + TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlColumn"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlColumn"]); + } + + Y_UNIT_TEST(JoinWithSameValues) { + NYql::TAstParseResult res = SqlToYql("SELECT a.value, b.value FROM plato.Input AS a JOIN plato.Input as b ON a.key == b.key;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "SqlProjectItem") { + const bool isValueFromA = TString::npos != line.find(Quote("a.value")); + const bool isValueFromB = TString::npos != line.find(Quote("b.value")); + UNIT_ASSERT(isValueFromA || isValueFromB); + } if (word == "Write!") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("a.a.")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("b.b.")); + } + }; + TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {"Write!", 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(SameColumnsForDifferentTables) { + NYql::TAstParseResult res = SqlToYql("SELECT a.key, b.key FROM plato.Input as a JOIN plato.Input as b on a.key==b.key;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SameColumnsForDifferentTablesFullJoin) { + NYql::TAstParseResult res = SqlToYql("SELECT a.key, b.key, a.value, b.value FROM plato.Input AS a FULL JOIN plato.Input AS b USING(key);"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(JoinStreamLookupStrategyHint) { + { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ plato.Input AS b USING(key);"); + UNIT_ASSERT(res.Root); + } + //case insensitive + { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ streamlookup() */ plato.Input AS b USING(key);"); + UNIT_ASSERT(res.Root); + } + } + + Y_UNIT_TEST(JoinConflictingStrategyHint) { + { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ /*+ Merge() */ plato.Input AS b USING(key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:91: Error: Conflicting join strategy hints\n"); + } + } + + Y_UNIT_TEST(JoinDuplicatingStrategyHint) { + { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ /*+ StreamLookup() */ plato.Input AS b USING(key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:98: Error: Duplicate join strategy hint\n"); + } + } + + Y_UNIT_TEST(WarnCrossJoinStrategyHint) { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a CROSS JOIN /*+ merge() */ plato.Input AS b;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:32: Warning: Non-default join strategy will not be used for CROSS JOIN, code: 4534\n"); + } + + Y_UNIT_TEST(WarnCartesianProductStrategyHint) { + NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; SELECT * FROM A, /*+ merge() */ B;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:74: Warning: Non-default join strategy will not be used for CROSS JOIN, code: 4534\n"); + } + + Y_UNIT_TEST(WarnUnknownJoinStrategyHint) { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ xmerge() */ plato.Input AS b USING (key);"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:41: Warning: Unsupported join hint: xmerge, code: 4534\n"); + } + + Y_UNIT_TEST(ReverseLabels) { + NYql::TAstParseResult res = SqlToYql("select in.key as subkey, subkey as key from plato.Input as in;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(AutogenerationAliasWithoutCollisionConflict1) { + NYql::TAstParseResult res = SqlToYql("select LENGTH(Value), key as column1 from plato.Input;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(AutogenerationAliasWithoutCollision2Conflict2) { + NYql::TAstParseResult res = SqlToYql("select key as column0, LENGTH(Value) from plato.Input;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(InputAliasForQualifiedAsterisk) { + NYql::TAstParseResult res = SqlToYql("use plato; select zyuzya.*, key from plato.Input as zyuzya;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectSupportsResultColumnsWithTrailingComma) { + NYql::TAstParseResult res = SqlToYql("select a, b, c, from plato.Input;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectOrderByLabeledColumn) { + NYql::TAstParseResult res = SqlToYql("pragma DisableOrderedColumns; select key as goal from plato.Input order by goal"); + UNIT_ASSERT(res.Root); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "DataSource") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("plato")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Input")); + + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("goal")); + } else if (word == "Sort") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("goal")); + + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("key")); + } + }; + TWordCountHive elementStat = {{TString("DataSource"), 0}, {TString("Sort"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["DataSource"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]); + } + + Y_UNIT_TEST(SelectOrderBySimpleExpr) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by a + a"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectOrderByDuplicateLabels) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by a, a"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectOrderByExpression) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input as i order by cast(key as uint32) + cast(subkey as uint32)"); + UNIT_ASSERT(res.Root); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Sort") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"+MayWarn\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("key")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("subkey")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'true)")); + + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i.key")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i.subkey")); + } + }; + TWordCountHive elementStat = {{TString("Sort"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]); + } + + Y_UNIT_TEST(SelectOrderByExpressionDesc) { + NYql::TAstParseResult res = SqlToYql("pragma disablesimplecolumns; select i.*, key, subkey from plato.Input as i order by cast(i.key as uint32) - cast(i.subkey as uint32) desc"); + UNIT_ASSERT(res.Root); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Sort") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"-MayWarn\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'false)")); + } else if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'columns")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("prefix")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"i.\"")); + } + }; + TWordCountHive elementStat = {{TString("Sort"), 0}, {TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(SelectOrderByExpressionAsc) { + NYql::TAstParseResult res = SqlToYql("select i.key, i.subkey from plato.Input as i order by cast(key as uint32) % cast(i.subkey as uint32) asc"); + UNIT_ASSERT(res.Root); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Sort") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"%MayWarn\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'true)")); + } else if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'columns")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\"")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i.")); + } + }; + TWordCountHive elementStat = {{TString("Sort"), 0}, {TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(ReferenceToKeyInSubselect) { + NYql::TAstParseResult res = SqlToYql("select b.key from (select a.key from plato.Input as a) as b;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(OrderByCastValue) { + NYql::TAstParseResult res = SqlToYql("select i.key, i.subkey from plato.Input as i order by cast(key as uint32) desc;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(GroupByCastValue) { + NYql::TAstParseResult res = SqlToYql("select count(1) from plato.Input as i group by cast(key as uint8);"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(KeywordInSelectColumns) { + NYql::TAstParseResult res = SqlToYql("select in, s.check from (select 1 as in, \"test\" as check) as s;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectAllGroupBy) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input group by subkey;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(CreateObjectWithFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"Key1\" '\"Value1\")")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(CreateObjectIfNotExists) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT IF NOT EXISTS secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectIfNotExists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(CreateObjectWithFeaturesStrings) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=\"Value1\", K2='V2', K3=V3, K4='', K5=`aaa`, K6='a\\'aa');"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"K3\" '\"V3\") '('\"K4\" '\"\") '('\"K5\" '\"aaa\") '('\"K6\" '\"a'aa\") '('\"Key1\" '\"Value1\")")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("SECRET"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + } + + Y_UNIT_TEST(UpsertObjectWithFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; UPSERT OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"Key1\" '\"Value1\")")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("upsertObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(CreateObjectWithFeaturesAndFlags) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2, RECURSE);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"Key1\" '\"Value1\") '('\"RECURSE\")")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(Select1Type) { + NYql::TAstParseResult res = SqlToYql("SELECT 1 type;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectTableType) { + NYql::TAstParseResult res = SqlToYql("USE plato; SELECT * from T type;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(CreateObjectNoFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(AlterObjectWithFeatures) { + NYql::TAstParseResult res = SqlToYql( + "USE plato;\n" + "declare $path as String;\n" + "ALTER OBJECT secretId (TYPE SECRET) SET (Key1=$path, K2=V2);" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"Key1\" (EvaluateAtom \"$path\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"K2\" '\"V2\"")); + + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alterObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(AlterObjectNoFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER OBJECT secretId (TYPE SECRET);"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(DropObjectNoFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(DropObjectWithFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET) WITH (A, B, C);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(DropObjectWithOneOption) { + NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET) WITH OVERRIDE;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"OVERRIDE\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(DropObjectIfExists) { + NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT IF EXISTS secretId (TYPE SECRET);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObjectIfExists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(PrimaryKeyParseCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32, Subkey Int64, Value String, PRIMARY KEY (Key, Subkey));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"Key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"Subkey\"")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("primarykey"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["primarykey"]); + } + + Y_UNIT_TEST(CreateTableNonNullableYqlTypeAstCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (DataType 'Int32) '('columnConstrains '('('not_null))) '())))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableNullableYqlTypeAstCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableNonNullablePgTypeAstCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a pg_int4 not null);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (PgType '_int4) '('columnConstrains '('('not_null))) '())))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableNullablePgTypeAstCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a pg_int4);"); + UNIT_ASSERT(res.Root); + + res.Root->PrettyPrintTo(Cout, PRETTY_FLAGS); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (PgType '_int4)) '('columnConstrains '()) '()))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableNullPkColumnsAreAllowed) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32, primary key(a));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableNotNullPkColumnsAreIdempotentAstCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null, primary key(a));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (DataType 'Int32) '('columnConstrains '('('not_null))) '()))) '('primarykey '('"a"))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableWithIfNotExists) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE IF NOT EXISTS t (a int32, primary key(a));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create_if_not_exists) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTempTable) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TEMP TABLE t (a int32, primary key(a));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")) '('temporary))))__"), line); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTemporaryTable) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TEMPORARY TABLE t (a int32, primary key(a));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")) '('temporary))))__"), line); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableWithoutTypes) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, primary key(a));"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(CreateTableAsSelectWithTypes) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32, primary key(a)) AS SELECT * FROM ts;"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(CreateTableAsSelect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, b, primary key(a)) AS SELECT * FROM ts;"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((let world (Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a") '('"b"))) '('primarykey '('"a"))))))__")); + } + if (word == "Read!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Read! world (DataSource '"yt" '"plato") (MrTableConcat (Key '('table (String '"ts")))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}, {TString("Read!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]); + } + + Y_UNIT_TEST(CreateTableAsSelectOnlyPrimary) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (primary key(a)) AS SELECT * FROM ts;"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((let world (Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '()) '('primarykey '('"a"))))))__")); + } + if (word == "Read!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Read! world (DataSource '"yt" '"plato") (MrTableConcat (Key '('table (String '"ts")))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}, {TString("Read!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]); + } + + Y_UNIT_TEST(CreateTableAsValuesFail) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, primary key(a)) AS VALUES (1), (2);"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(CreateTableDuplicatedPkColumnsFail) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null, primary key(a, a));"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(DeleteFromTableByKey) { + NYql::TAstParseResult res = SqlToYql("delete from plato.Input where key = 200;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DeleteFromTable) { + NYql::TAstParseResult res = SqlToYql("delete from plato.Input;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DeleteFromTableOnValues) { + NYql::TAstParseResult res = SqlToYql("delete from plato.Input on (key) values (1);", + 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete_on)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DeleteFromTableOnSelect) { + NYql::TAstParseResult res = SqlToYql( + "delete from plato.Input on select key from plato.Input where value > 0;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete_on)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(UpdateByValues) { + NYql::TAstParseResult res = SqlToYql("update plato.Input set key = 777, value = 'cool' where key = 200;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)")); + } else if (word == "AsStruct") { + const bool isKey = line.find("key") != TString::npos; + const bool isValue = line.find("value") != TString::npos; + UNIT_ASSERT(isKey || isValue); + if (isKey) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("777"))); + } else if (isValue) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("cool"))); + } + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]); + } + + Y_UNIT_TEST(UpdateByMultiValues) { + NYql::TAstParseResult res = SqlToYql("update plato.Input set (key, value, subkey) = ('2','ddd',':') where key = 200;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)")); + } else if (word == "AsStruct") { + const bool isKey = line.find("key") != TString::npos; + const bool isSubkey = line.find("subkey") != TString::npos; + const bool isValue = line.find("value") != TString::npos; + UNIT_ASSERT(isKey || isSubkey || isValue); + if (isKey && !isSubkey) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("2"))); + } else if (isSubkey) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote(":"))); + } else if (isValue) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("ddd"))); + } + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]); + } + + Y_UNIT_TEST(UpdateBySelect) { + NYql::TAstParseResult res = SqlToYql("update plato.Input set (key, value, subkey) = (select key, value, subkey from plato.Input where key = 911) where key = 200;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + int lineIndex = 0; + int writeLineIndex = -1; + bool found = false; + + TVerifyLineFunc verifyLine = [&lineIndex, &writeLineIndex, &found](const TString& word, const TString& line) { + if (word == "Write") { + writeLineIndex = lineIndex; + found = line.find("('mode 'update)") != TString::npos; + } else if (word == "mode") { + found |= lineIndex == writeLineIndex + 1 && line.find("('mode 'update)") != TString::npos; + UNIT_ASSERT(found); + } + + ++lineIndex; + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("mode"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(UpdateSelfModifyAll) { + NYql::TAstParseResult res = SqlToYql("update plato.Input set subkey = subkey + 's';", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)")); + } else if (word == "AsStruct") { + const bool isSubkey = line.find("subkey") != TString::npos; + UNIT_ASSERT(isSubkey); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("subkey"))); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("s"))); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]); + } + + Y_UNIT_TEST(UpdateOnValues) { + NYql::TAstParseResult res = SqlToYql("update plato.Input on (key, value) values (5, 'cool')", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update_on)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(UpdateOnSelect) { + NYql::TAstParseResult res = SqlToYql( + "update plato.Input on select key, value + 1 as value from plato.Input", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update_on)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(UnionAllTest) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input UNION ALL select subkey FROM plato.Input;"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("UnionAll"), 0}}; + VerifyProgram(res, elementStat, {}); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["UnionAll"]); + } + + Y_UNIT_TEST(UnionTest) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input UNION select subkey FROM plato.Input;"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("Union"), 0}}; + VerifyProgram(res, elementStat, {}); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Union"]); + } + + Y_UNIT_TEST(UnionAggregationTest) { + NYql::TAstParseResult res = SqlToYql(R"( + SELECT 1 + UNION ALL + SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 + UNION + SELECT 1 UNION SELECT 1 UNION SELECT 1 UNION SELECT 1 + UNION ALL + SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1; + )"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("Union"), 0}, {TString("UnionAll"), 0}}; + VerifyProgram(res, elementStat, {}); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["UnionAll"]); + UNIT_ASSERT_VALUES_EQUAL(3, elementStat["Union"]); + } + + Y_UNIT_TEST(DeclareDecimalParameter) { + NYql::TAstParseResult res = SqlToYql("declare $value as Decimal(22,9); select $value as cnt;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SimpleGroupBy) { + NYql::TAstParseResult res = SqlToYql("select count(1),z from plato.Input group by key as z order by z;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(EmptyColumnName0) { + /// Now it's parsed well and error occur on validate step like "4:31:Empty struct member name is not allowed" in "4:31:Function: AddMember" + NYql::TAstParseResult res = SqlToYql("insert into plato.Output (``, list1) values (0, AsList(0, 1, 2));"); + /// Verify that parsed well without crash + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(KikimrRollback) { + NYql::TAstParseResult res = SqlToYql("use plato; select * from Input; rollback;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("rollback"), 0}}; + VerifyProgram(res, elementStat); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["rollback"]); + } + + Y_UNIT_TEST(PragmaFile) { + NYql::TAstParseResult res = SqlToYql(R"(pragma file("HW", "sbr:181041334");)"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString(R"((let world (Configure! world (DataSource '"config") '"AddFileByUrl" '"HW" '"sbr:181041334")))"), 0}}; + VerifyProgram(res, elementStat); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat.cbegin()->second); + } + + Y_UNIT_TEST(DoNotCrashOnNamedInFilter) { + NYql::TAstParseResult res = SqlToYql("USE plato; $all = ($table_name) -> { return true; }; SELECT * FROM FILTER(Input, $all)"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(PragmasFileAndUdfOrder) { + NYql::TAstParseResult res = SqlToYql(R"( + PRAGMA file("libvideoplayers_udf.so", "https://proxy.sandbox.yandex-team.ru/235185290"); + PRAGMA udf("libvideoplayers_udf.so"); + )"); + UNIT_ASSERT(res.Root); + + const auto programm = GetPrettyPrint(res); + const auto file = programm.find("AddFileByUrl"); + const auto udfs = programm.find("ImportUdfs"); + UNIT_ASSERT(file < udfs); + } + + Y_UNIT_TEST(ProcessUserType) { + NYql::TAstParseResult res = SqlToYql("process plato.Input using Kikimr::PushData(TableRows());", 1, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Kikimr.PushData") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TupleType")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TypeOf")); + } + }; + + TWordCountHive elementStat = {{TString("Kikimr.PushData"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Kikimr.PushData"]); + } + + Y_UNIT_TEST(ProcessUserTypeAuth) { + NYql::TAstParseResult res = SqlToYql("process plato.Input using YDB::PushData(TableRows(), AsTuple('oauth', SecureParam('api:oauth')));", 1, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "YDB.PushData") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TupleType")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TypeOf")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("api:oauth")); + } + }; + + TWordCountHive elementStat = {{TString("YDB.PushData"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["YDB.PushData"]); + } + + Y_UNIT_TEST(SelectStreamRtmr) { + NYql::TAstParseResult res = SqlToYql( + "USE plato; INSERT INTO Output SELECT STREAM key FROM Input;", + 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + + res = SqlToYql( + "USE plato; INSERT INTO Output SELECT key FROM Input;", + 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectStreamRtmrJoinWithYt) { + NYql::TAstParseResult res = SqlToYql( + "USE plato; INSERT INTO Output SELECT STREAM key FROM Input LEFT JOIN hahn.ttt as t ON Input.key = t.Name;", + 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectStreamNonRtmr) { + NYql::TAstParseResult res = SqlToYql( + "USE plato; INSERT INTO Output SELECT STREAM key FROM Input;", + 10); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:31: Error: SELECT STREAM is unsupported for non-streaming sources\n"); + } + + Y_UNIT_TEST(GroupByHopRtmr) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; INSERT INTO Output SELECT key, SUM(value) AS value FROM Input + GROUP BY key, HOP(subkey, "PT10S", "PT30S", "PT20S"); + )", 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(GroupByHopRtmrSubquery) { + // 'use plato' intentially avoided + NYql::TAstParseResult res = SqlToYql(R"( + SELECT COUNT(*) AS value FROM (SELECT * FROM plato.Input) + GROUP BY HOP(Data, "PT10S", "PT30S", "PT20S") + )", 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(GroupByHopRtmrSubqueryBinding) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + $q = SELECT * FROM Input; + INSERT INTO Output SELECT STREAM * FROM ( + SELECT COUNT(*) AS value FROM $q + GROUP BY HOP(Data, "PT10S", "PT30S", "PT20S") + ); + )", 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(GroupByNoHopRtmr) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; INSERT INTO Output SELECT STREAM key, SUM(value) AS value FROM Input + GROUP BY key; + )", 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:22: Error: Streaming group by query must have a hopping window specification.\n"); + } + + Y_UNIT_TEST(KikimrInserts) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + INSERT INTO Output SELECT key, value FROM Input; + INSERT OR ABORT INTO Output SELECT key, value FROM Input; + INSERT OR IGNORE INTO Output SELECT key, value FROM Input; + INSERT OR REVERT INTO Output SELECT key, value FROM Input; + )", 10, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(WarnMissingIsBeforeNotNull) { + NYql::TAstParseResult res = SqlToYql("select 1 NOT NULL"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Missing IS keyword before NOT NULL, code: 4507\n"); + } + + Y_UNIT_TEST(Subqueries) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + $sq1 = (SELECT * FROM plato.Input); + + $sq2 = SELECT * FROM plato.Input; + + $squ1 = ( + SELECT * FROM plato.Input + UNION ALL + SELECT * FROM plato.Input + ); + + $squ2 = + SELECT * FROM plato.Input + UNION ALL + SELECT * FROM plato.Input; + + $squ3 = ( + (SELECT * FROM plato.Input) + UNION ALL + (SELECT * FROM plato.Input) + ); + + SELECT * FROM $sq1; + SELECT * FROM $sq2; + SELECT * FROM $squ1; + SELECT * FROM $squ2; + SELECT * FROM $squ3; + )"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SubqueriesJoin) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + + $left = SELECT * FROM plato.Input1 WHERE value != "BadValue"; + $right = SELECT * FROM plato.Input2; + + SELECT * FROM $left AS l + JOIN $right AS r + ON l.key == r.key; + )"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(AnyInBackticksAsTableName) { + NYql::TAstParseResult res = SqlToYql("use plato; select * from `any`;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(AnyJoinForTableAndSubQuery) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + + $r = SELECT * FROM plato.Input2; + + SELECT * FROM ANY plato.Input1 AS l + LEFT JOIN ANY $r AS r + USING (key); + )"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "EquiJoin") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('left 'any)")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('right 'any)")); + } + }; + + TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["right"]); + } + + Y_UNIT_TEST(AnyJoinForTableAndTableSource) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + + $r = AsList( + AsStruct("aaa" as key, "bbb" as subkey, "ccc" as value) + ); + + SELECT * FROM ANY plato.Input1 AS l + LEFT JOIN ANY AS_TABLE($r) AS r + USING (key); + )"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "EquiJoin") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('left 'any)")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('right 'any)")); + } + }; + + TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["right"]); + } + + Y_UNIT_TEST(AnyJoinNested) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + + FROM ANY Input1 as a + JOIN Input2 as b ON a.key = b.key + LEFT JOIN ANY Input3 as c ON a.key = c.key + RIGHT JOIN ANY Input4 as d ON d.key = b.key + CROSS JOIN Input5 + SELECT *; + )"); + + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}}; + VerifyProgram(res, elementStat); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["right"]); + } + + Y_UNIT_TEST(InlineAction) { + NYql::TAstParseResult res = SqlToYql( + "do begin\n" + " select 1\n" + "; end do\n"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), ""); + } + + Y_UNIT_TEST(FlattenByCorrelationName) { + UNIT_ASSERT(SqlToYql("select * from plato.Input as t flatten by t.x").IsOk()); + UNIT_ASSERT(SqlToYql("select * from plato.Input as t flatten by t -- same as flatten by t.t").IsOk()); + } + + Y_UNIT_TEST(DiscoveryMode) { + UNIT_ASSERT(SqlToYqlWithMode("insert into plato.Output select * from plato.Input", NSQLTranslation::ESqlMode::DISCOVERY).IsOk()); + UNIT_ASSERT(SqlToYqlWithMode("select * from plato.concat(Input1, Input2)", NSQLTranslation::ESqlMode::DISCOVERY).IsOk()); + UNIT_ASSERT(SqlToYqlWithMode("select * from plato.each(AsList(\"Input1\", \"Input2\"))", NSQLTranslation::ESqlMode::DISCOVERY).IsOk()); + } + + Y_UNIT_TEST(CubeWithAutoGeneratedLikeColumnName) { + UNIT_ASSERT(SqlToYql("select key,subkey,group from plato.Input group by cube(key,subkey,group)").IsOk()); + } + + Y_UNIT_TEST(CubeWithAutoGeneratedLikeAlias) { + UNIT_ASSERT(SqlToYql("select key,subkey,group from plato.Input group by cube(key,subkey,value as group)").IsOk()); + } + + Y_UNIT_TEST(FilterCanBeUsedAsColumnIdOrBind) { + UNIT_ASSERT(SqlToYql("select filter from plato.Input").IsOk()); + UNIT_ASSERT(SqlToYql("select 1 as filter").IsOk()); + UNIT_ASSERT(SqlToYql("$filter = 1; select $filter").IsOk()); + } + + Y_UNIT_TEST(DuplicateSemicolonsAreAllowedBetweenTopLevelStatements) { + UNIT_ASSERT(SqlToYql(";;select 1; ; select 2;/*comment*/;select 3;;--comment\n;select 4;;").IsOk()); + } + + Y_UNIT_TEST(DuplicateAndMissingTrailingSemicolonsAreAllowedBetweenActionStatements) { + TString req = + "define action $action($b,$c) as\n" + " ;;$d = $b + $c;\n" + " select $b;\n" + " select $c;;\n" + " select $d,\n" + "end define;\n" + "\n" + "do $action(1,2);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(DuplicateAndMissingTrailingSemicolonsAreAllowedBetweenInlineActionStatements) { + TString req = + "do begin\n" + " ;select 1,\n" + "end do;\n" + "evaluate for $i in AsList(1,2,3) do begin\n" + " select $i;;\n" + " select $i + $i;;\n" + "end do;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(DuplicateSemicolonsAreAllowedBetweenLambdaStatements) { + TString req = + "$x=1;\n" + "$foo = ($a, $b)->{\n" + " ;;$v = $a + $b;\n" + " $bar = ($c) -> {; return $c << $x};;\n" + " return $bar($v);;\n" + "};\n" + "select $foo(1,2);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(StringLiteralWithEscapedBackslash) { + NYql::TAstParseResult res1 = SqlToYql(R"foo(SELECT 'a\\';)foo"); + NYql::TAstParseResult res2 = SqlToYql(R"foo(SELECT "a\\";)foo"); + UNIT_ASSERT(res1.Root); + UNIT_ASSERT(res2.Root); + + TWordCountHive elementStat = {{TString("a\\"), 0}}; + + VerifyProgram(res1, elementStat); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["a\\"]); + + VerifyProgram(res2, elementStat); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["a\\"]); + } + + Y_UNIT_TEST(StringMultiLineLiteralWithEscapes) { + UNIT_ASSERT(SqlToYql("SELECT @@@foo@@@@bar@@@").IsOk()); + UNIT_ASSERT(SqlToYql("SELECT @@@@@@@@@").IsOk()); + } + + Y_UNIT_TEST(StringMultiLineLiteralConsequitiveAt) { + UNIT_ASSERT(!SqlToYql("SELECT @").IsOk()); + UNIT_ASSERT(!SqlToYql("SELECT @@").IsOk()); + UNIT_ASSERT(!SqlToYql("SELECT @@@").IsOk()); + UNIT_ASSERT( SqlToYql("SELECT @@@@").IsOk()); + UNIT_ASSERT( SqlToYql("SELECT @@@@@").IsOk()); + + UNIT_ASSERT(!SqlToYql("SELECT @@@@@@").IsOk()); + UNIT_ASSERT(!SqlToYql("SELECT @@@@@@@").IsOk()); + + UNIT_ASSERT( SqlToYql("SELECT @@@@@@@@").IsOk()); + UNIT_ASSERT( SqlToYql("SELECT @@@@@@@@@").IsOk()); + UNIT_ASSERT(!SqlToYql("SELECT @@@@@@@@@@").IsOk()); + } + + Y_UNIT_TEST(ConstnessForListDictSetCreate) { + auto req = "$foo = ($x, $y) -> (\"aaaa\");\n" + "\n" + "select\n" + " $foo(sum(key), ListCreate(String)),\n" + " $foo(sum(key), DictCreate(String, String)),\n" + " $foo(sum(key), SetCreate(String)),\n" + "from (select 1 as key);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(CanUseEmptyTupleInWindowPartitionBy) { + auto req = "select sum(key) over w\n" + "from plato.Input\n" + "window w as (partition compact by (), (subkey), (), value || value as dvalue);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(DenyAnsiOrderByLimitLegacyMode) { + auto req = "pragma DisableAnsiOrderByLimitInUnionAll;\n" + "use plato;\n" + "\n" + "select * from Input order by key limit 10\n" + "union all\n" + "select * from Input order by key limit 1;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: DisableAnsiOrderByLimitInUnionAll pragma is deprecated and no longer supported\n"); + } + + Y_UNIT_TEST(ReduceUsingUdfWithShortcutsWorks) { + auto req = "use plato;\n" + "\n" + "$arg = 'foo';\n" + "$func = XXX::YYY($arg);\n" + "\n" + "REDUCE Input ON key using $func(subkey);\n" + "REDUCE Input ON key using $func(UUU::VVV(TableRow()));\n"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + req = "use plato;\n" + "\n" + "$arg = 'foo';\n" + "$func = XXX::YYY($arg);\n" + "\n" + "REDUCE Input ON key using all $func(subkey);\n" + "REDUCE Input ON key using all $func(UUU::VVV(TableRow()));"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(YsonDisableStrict) { + UNIT_ASSERT(SqlToYql("pragma yson.DisableStrict = \"false\";").IsOk()); + UNIT_ASSERT(SqlToYql("pragma yson.DisableStrict;").IsOk()); + } + + Y_UNIT_TEST(YsonStrict) { + UNIT_ASSERT(SqlToYql("pragma yson.Strict = \"false\";").IsOk()); + UNIT_ASSERT(SqlToYql("pragma yson.Strict;").IsOk()); + } + + Y_UNIT_TEST(JoinByTuple) { + auto req = "use plato;\n" + "\n" + "select * from T1 as a\n" + "join T2 as b\n" + "on AsTuple(a.key, a.subkey) = AsTuple(b.key, b.subkey);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(JoinByStruct) { + auto req = "use plato;\n" + "\n" + "select * from T1 as a\n" + "join T2 as b\n" + "on AsStruct(a.key as k, a.subkey as sk) = AsStruct(b.key as k, b.subkey as sk);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(JoinByUdf) { + auto req = "use plato;\n" + "\n" + "select a.align\n" + "from T1 as a\n" + "join T2 as b\n" + "on Yson::SerializeJsonEncodeUtf8(a.align)=b.align;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(EscapedIdentifierAsLambdaArg) { + auto req = "$f = ($`foo bar`, $x) -> { return $`foo bar` + $x; };\n" + "\n" + "select $f(1, 2);"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(lambda '(\"$foo bar\" \"$x\")"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOnlyCallable) { + auto req = "SELECT Udf(DateTime::FromString)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType)))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarTypeNoRun) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\")"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarRunNoType) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, Void() as RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"\" (Void))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarFullTest) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, Void() As RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (Void))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOtherRunConfigs) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, '55' As RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (String '\"55\"))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOtherRunConfigs2) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, AsTuple(32, 'no', AsStruct(1e-9 As SomeFloat)) As RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" '((Int32 '\"32\") (String '\"no\") (AsStruct '('\"SomeFloat\" (Double '\"1e-9\")))))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOptional) { + auto req = "SELECT Udf(DateTime::FromString, String?, Int32??, Tuple<Int32, Float>, \"foo\" as TypeConfig, Void() As RunConfig)(\"2022-01-01\");"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (OptionalType (DataType 'String)) (OptionalType (OptionalType (DataType 'Int32))) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (Void))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(CompactionPolicyParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key)) + WITH ( COMPACTION_POLICY = "SomeCompactionPreset" );)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("compactionPolicy")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SomeCompactionPreset")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AutoPartitioningBySizeParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key)) + WITH ( AUTO_PARTITIONING_BY_SIZE = ENABLED );)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("autoPartitioningBySize")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("ENABLED")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(UniformPartitionsParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key)) + WITH ( UNIFORM_PARTITIONS = 16 );)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("uniformPartitions")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("16")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DateTimeTtlParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key)) + WITH (TTL = Interval("P1D") On CreatedAt);)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(IntTtlParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key)) + WITH (TTL = Interval("P1D") On CreatedAt AS SECONDS);)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnUnit")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("seconds")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(TieringParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key)) + WITH ( TIERING = 'my_tiering' );)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiering")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("my_tiering")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(StoreExternalBlobsParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key)) + WITH ( STORE_EXTERNAL_BLOBS = ENABLED );)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("storeExternalBlobs")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("ENABLED")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DefaultValueColumn2) { + auto res = SqlToYql(R"( use plato; + $lambda = () -> { + RETURN CAST(RandomUuid(2) as String) + }; + + CREATE TABLE tableName ( + Key Uint32 DEFAULT RandomNumber(1), + Value String DEFAULT $lambda, + PRIMARY KEY (Key) + ); + )"); + + UNIT_ASSERT_C(res.Root, Err2Str(res)); + + const auto program = GetPrettyPrint(res); + + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("RandomNumber")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("RandomUuid")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("columnConstrains")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("columnConstrains")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("Write")); + +#if 0 + Cerr << program << Endl; +#endif + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DefaultValueColumn3) { + auto res = SqlToYql(R"( use plato; + + CREATE TABLE tableName ( + database_id Utf8, + cloud_id Utf8, + global_id Utf8 DEFAULT database_id || "=====", + PRIMARY KEY (database_id) + ); + )"); + + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:6:40: Error: Column reference \"database_id\" is not allowed in current scope\n"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(DefaultValueColumn) { + auto res = SqlToYql(R"( use plato; + CREATE TABLE tableName ( + Key Uint32 FAMILY cold DEFAULT 5, + Value String FAMILY default DEFAULT "empty", + PRIMARY KEY (Key), + FAMILY default ( + DATA = "test", + COMPRESSION = "lz4" + ), + FAMILY cold ( + DATA = "test", + COMPRESSION = "off" + ) + ); + )"); + + UNIT_ASSERT_C(res.Root, Err2Str(res)); + +#if 0 + const auto program = GetPrettyPrint(res); + Cerr << program << Endl; +#endif + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("default")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnConstrains")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnFamilies")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(ChangefeedParseCorrect) { + auto res = SqlToYql(R"( USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH ( + MODE = 'KEYS_ONLY', + FORMAT = 'json', + INITIAL_SCAN = TRUE, + VIRTUAL_TIMESTAMPS = FALSE, + RESOLVED_TIMESTAMPS = Interval("PT1S"), + RETENTION_PERIOD = Interval("P1D"), + TOPIC_MIN_ACTIVE_PARTITIONS = 10, + AWS_REGION = 'aws:region' + ) + ); + )"); + UNIT_ASSERT_C(res.Root, Err2Str(res)); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("changefeed")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("mode")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("KEYS_ONLY")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("format")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("json")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("initial_scan")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("true")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("virtual_timestamps")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("false")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("resolved_timestamps")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("retention_period")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("topic_min_active_partitions")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("aws_region")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("aws:region")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CloneForAsTableWorksWithCube) { + UNIT_ASSERT(SqlToYql("SELECT * FROM AS_TABLE([<|k1:1, k2:1|>]) GROUP BY CUBE(k1, k2);").IsOk()); + } + + Y_UNIT_TEST(WindowPartitionByColumnProperlyEscaped) { + NYql::TAstParseResult res = SqlToYql("SELECT SUM(key) OVER w FROM plato.Input WINDOW w AS (PARTITION BY `column with space`);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "CalcOverWindow") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"column with space\"")); + } + }; + + TWordCountHive elementStat = { {TString("CalcOverWindow"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["CalcOverWindow"]); + } + + Y_UNIT_TEST(WindowPartitionByExpressionWithoutAliasesAreAllowed) { + NYql::TAstParseResult res = SqlToYql("SELECT SUM(key) OVER w FROM plato.Input as i WINDOW w AS (PARTITION BY ii.subkey);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "AddMember") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("AddMember row 'group_w_0 (SqlAccess 'struct (Member row '\"ii\")")); + } + if (word == "CalcOverWindow") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("CalcOverWindow core '('\"group_w_0\")")); + } + }; + + TWordCountHive elementStat = { {TString("CalcOverWindow"), 0}, {TString("AddMember"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["CalcOverWindow"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AddMember"]); + } + + Y_UNIT_TEST(PqReadByAfterUse) { + ExpectFailWithError("use plato; pragma PqReadBy='plato2';", + "<main>:1:28: Error: Cluster in PqReadPqBy pragma differs from cluster specified in USE statement: plato2 != plato\n"); + + UNIT_ASSERT(SqlToYql("pragma PqReadBy='plato2';").IsOk()); + UNIT_ASSERT(SqlToYql("pragma PqReadBy='plato2'; use plato;").IsOk()); + UNIT_ASSERT(SqlToYql("$x='plato'; use rtmr:$x; pragma PqReadBy='plato2';").IsOk()); + UNIT_ASSERT(SqlToYql("use plato; pragma PqReadBy='dq';").IsOk()); + } + + Y_UNIT_TEST(MrObject) { + NYql::TAstParseResult res = SqlToYql( + "declare $path as String;\n" + "select * from plato.object($path, `format`, \"comp\" || \"ression\" as compression, 1 as bar) with schema (Int32 as y, String as x)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "MrObject") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((MrObject (EvaluateAtom "$path") '"format" '('('"compression" (Concat (String '"comp") (String '"ression"))) '('"bar" (Int32 '"1")))))__")); + } else if (word == "userschema") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__('('('"userschema" (StructType '('"y" (DataType 'Int32)) '('"x" (DataType 'String))) '('"y" '"x"))))__")); + } + }; + + TWordCountHive elementStat = {{TString("MrObject"), 0}, {TString("userschema"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["MrObject"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["userschema"]); + } + + Y_UNIT_TEST(TableBindings) { + NSQLTranslation::TTranslationSettings settings = GetSettingsWithS3Binding("foo"); + NYql::TAstParseResult res = SqlToYqlWithSettings( + "select * from bindings.foo", + settings + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "MrObject") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((MrTableConcat (Key '('table (String '"path")))) (Void) '('('"bar" '"1") '('"compression" '"ccompression") '('"format" '"format") '('"partitionedby" '"key" '"subkey") '('"userschema" (SqlTypeFromYson)__")); + } + }; + + TWordCountHive elementStat = {{TString("MrTableConcat"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["MrTableConcat"]); + + settings.DefaultCluster = "plato"; + settings.BindingsMode = NSQLTranslation::EBindingsMode::DISABLED; + res = SqlToYqlWithSettings( + "select * from bindings.foo", + settings + ); + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:15: Error: Please remove 'bindings.' from your query, the support for this syntax has ended, code: 4601\n"); + UNIT_ASSERT(!res.Root); + + settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP; + res = SqlToYqlWithSettings( + "select * from bindings.foo", + settings + ); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine2 = [](const TString& word, const TString& line) { + if (word == "MrTableConcat") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((MrTableConcat (Key '('table (String '"foo")))) (Void) '())))__")); + } + }; + + TWordCountHive elementStat2 = {{TString("MrTableConcat"), 0}}; + VerifyProgram(res, elementStat2, verifyLine2); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat2["MrTableConcat"]); + + settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP_WITH_WARNING; + res = SqlToYqlWithSettings( + "select * from bindings.foo", + settings + ); + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:15: Warning: Please remove 'bindings.' from your query, the support for this syntax will be dropped soon, code: 4538\n"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat3 = {{TString("MrTableConcat"), 0}}; + VerifyProgram(res, elementStat3, verifyLine2); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat3["MrTableConcat"]); + } + + Y_UNIT_TEST(TableBindingsWithInsert) { + NSQLTranslation::TTranslationSettings settings = GetSettingsWithS3Binding("foo"); + NYql::TAstParseResult res = SqlToYqlWithSettings( + "insert into bindings.foo with truncate (x, y) values (1, 2);", + settings + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('table (String '"path"))) values '('('"bar" '"1") '('"compression" '"ccompression") '('"format" '"format") '('"partitionedby" '"key" '"subkey") '('"userschema" (SqlTypeFromYson)__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + + settings.DefaultCluster = "plato"; + settings.BindingsMode = NSQLTranslation::EBindingsMode::DISABLED; + res = SqlToYqlWithSettings( + "insert into bindings.foo with truncate (x, y) values (1, 2);", + settings + ); + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:13: Error: Please remove 'bindings.' from your query, the support for this syntax has ended, code: 4601\n"); + UNIT_ASSERT(!res.Root); + + settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP; + res = SqlToYqlWithSettings( + "insert into bindings.foo with truncate (x, y) values (1, 2);", + settings + ); + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), ""); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine2 = [](const TString& word, const TString& line) { + if (word == "Write!") { + //UNIT_ASSERT_VALUES_EQUAL(line, ""); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('table (String '"foo"))) values '('('mode 'renew)))__")); + } + }; + + TWordCountHive elementStat2 = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat2, verifyLine2); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat2["Write!"]); + + settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP_WITH_WARNING; + res = SqlToYqlWithSettings( + "insert into bindings.foo with truncate (x, y) values (1, 2);", + settings + ); + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:13: Warning: Please remove 'bindings.' from your query, the support for this syntax will be dropped soon, code: 4538\n"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat3 = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat3, verifyLine2); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat3["Write!"]); + } + + Y_UNIT_TEST(TrailingCommaInWithout) { + UNIT_ASSERT(SqlToYql("SELECT * WITHOUT stream, FROM plato.Input").IsOk()); + UNIT_ASSERT(SqlToYql("SELECT a.* WITHOUT a.intersect, FROM plato.Input AS a").IsOk()); + UNIT_ASSERT(SqlToYql("SELECT a.* WITHOUT col1, col2, a.col3, FROM plato.Input AS a").IsOk()); + } + + Y_UNIT_TEST(NoStackOverflowOnBigCaseStatement) { + TStringBuilder req; + req << "select case 1 + 123"; + for (size_t i = 0; i < 20000; ++i) { + req << " when " << i << " then " << i + 1; + } + req << " else 100500 end;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(CollectPreaggregatedInListLiteral) { + UNIT_ASSERT(SqlToYql("SELECT [COUNT(DISTINCT a+b)] FROM plato.Input").IsOk()); + } + + Y_UNIT_TEST(SmartParenInGroupByClause) { + UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input GROUP BY (k, v)").IsOk()); + } + + Y_UNIT_TEST(AlterTableRenameToIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table RENAME TO moved").IsOk()); + } + + Y_UNIT_TEST(AlterTableAddDropColumnIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ADD COLUMN addc uint64, DROP COLUMN dropc, ADD addagain uint64").IsOk()); + } + + Y_UNIT_TEST(AlterTableSetTTLIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column)").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column AS SECONDS)").IsOk()); + } + + Y_UNIT_TEST(AlterTableSetTieringIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TIERING = 'my_tiering')").IsOk()); + } + + Y_UNIT_TEST(AlterTableAddChangefeedIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ADD CHANGEFEED feed WITH (MODE = 'UPDATES', FORMAT = 'json')").IsOk()); + } + + Y_UNIT_TEST(AlterTableAlterChangefeedIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ALTER CHANGEFEED feed DISABLE").IsOk()); + ExpectFailWithError("USE plato; ALTER TABLE table ALTER CHANGEFEED feed SET (FORMAT = 'proto');", + "<main>:1:57: Error: FORMAT alter is not supported\n"); + } + + Y_UNIT_TEST(AlterTableDropChangefeedIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table DROP CHANGEFEED feed").IsOk()); + } + + Y_UNIT_TEST(AlterTableSetPartitioningIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (AUTO_PARTITIONING_BY_SIZE = DISABLED)").IsOk()); + } + + Y_UNIT_TEST(AlterTableAddIndexWithIsNotSupported) { + ExpectFailWithFuzzyError("USE plato; ALTER TABLE table ADD INDEX idx GLOBAL ON (col) WITH (a=b)", + "<main>:1:40: Error: with: alternative is not implemented yet: \\d+:\\d+: global_index\\n"); + } + + Y_UNIT_TEST(AlterTableAddIndexLocalIsNotSupported) { + ExpectFailWithFuzzyError("USE plato; ALTER TABLE table ADD INDEX idx LOCAL ON (col)", + "<main>:1:40: Error: local: alternative is not implemented yet: \\d+:\\d+: local_index\\n"); + } + + Y_UNIT_TEST(CreateTableAddIndexVector) { + const auto result = SqlToYql(R"(USE plato; + CREATE TABLE table ( + pk INT32 NOT NULL, + col String, + INDEX idx GLOBAL USING vector_kmeans_tree + ON (col) COVER (col) + WITH (distance=cosine, vector_type=float, vector_dimension=1024,), + PRIMARY KEY (pk)) + )"); + UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString()); + } + + Y_UNIT_TEST(AlterTableAddIndexVector) { + const auto result = SqlToYql(R"(USE plato; + ALTER TABLE table ADD INDEX idx + GLOBAL USING vector_kmeans_tree + ON (col) COVER (col) + WITH (distance=cosine, vector_type="float", vector_dimension=1024) + )"); + UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString()); + } + + Y_UNIT_TEST(AlterTableAddIndexUnknownSubtype) { + ExpectFailWithError("USE plato; ALTER TABLE table ADD INDEX idx GLOBAL USING unknown ON (col)", + "<main>:1:57: Error: UNKNOWN index subtype is not supported\n"); + } + + Y_UNIT_TEST(AlterTableAddIndexMissedParameter) { + ExpectFailWithError(R"(USE plato; + ALTER TABLE table ADD INDEX idx + GLOBAL USING vector_kmeans_tree + ON (col) + WITH (distance=cosine, vector_type=float) + )", + "<main>:5:52: Error: vector_dimension should be set\n"); + } + + Y_UNIT_TEST(AlterTableAlterIndexSetPartitioningIsCorrect) { + const auto result = SqlToYql("USE plato; ALTER TABLE table ALTER INDEX index SET AUTO_PARTITIONING_MIN_PARTITIONS_COUNT 10"); + UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString()); + } + + Y_UNIT_TEST(AlterTableAlterIndexSetMultiplePartitioningSettings) { + const auto result = SqlToYql("USE plato; ALTER TABLE table ALTER INDEX index SET " + "(AUTO_PARTITIONING_BY_LOAD = ENABLED, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10)" + ); + UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString()); + } + + Y_UNIT_TEST(AlterTableAlterIndexResetPartitioningIsNotSupported) { + ExpectFailWithError("USE plato; ALTER TABLE table ALTER INDEX index RESET (AUTO_PARTITIONING_MIN_PARTITIONS_COUNT)", + "<main>:1:55: Error: AUTO_PARTITIONING_MIN_PARTITIONS_COUNT reset is not supported\n" + ); + } + + Y_UNIT_TEST(AlterTableAlterColumnDropNotNullAstCorrect) { + auto reqSetNull = SqlToYql(R"( + USE plato; + CREATE TABLE tableName ( + id Uint32, + val Uint32 NOT NULL, + PRIMARY KEY (id) + ); + + COMMIT; + ALTER TABLE tableName ALTER COLUMN val DROP NOT NULL; + )"); + + UNIT_ASSERT(reqSetNull.IsOk()); + UNIT_ASSERT(reqSetNull.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + Y_UNUSED(word); + + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find( + R"(let world (Write! world sink (Key '('tablescheme (String '"tableName"))) (Void) '('('mode 'alter) '('actions '('('alterColumns '('('"val" '('changeColumnConstraints '('('drop_not_null)))))))))))" + )); + }; + + TWordCountHive elementStat({TString("\'mode \'alter")}); + VerifyProgram(reqSetNull, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["\'mode \'alter"]); + } + + Y_UNIT_TEST(OptionalAliases) { + UNIT_ASSERT(SqlToYql("USE plato; SELECT foo FROM (SELECT key foo FROM Input);").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT a.x FROM Input1 a JOIN Input2 b ON a.key = b.key;").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT a.x FROM (VALUES (1,2), (3,4)) a(x,key) JOIN Input b ON a.key = b.key;").IsOk()); + } + + Y_UNIT_TEST(TableNameConstness) { + UNIT_ASSERT(SqlToYql("USE plato; $path = 'foo'; SELECT TableName($path), count(*) FROM Input;").IsOk()); + UNIT_ASSERT(SqlToYql("$path = 'foo'; SELECT TableName($path, 'yt'), count(*) FROM plato.Input;").IsOk()); + ExpectFailWithError("USE plato; SELECT TableName(), count(*) FROM plato.Input;", + "<main>:1:19: Error: Expression has to be an aggregation function or key column, because aggregation is used elsewhere in this subquery\n"); + } + + Y_UNIT_TEST(UseShouldWorkAsColumnName) { + UNIT_ASSERT(SqlToYql("select use from (select 1 as use);").IsOk()); + } + + Y_UNIT_TEST(TrueFalseWorkAfterDollar) { + UNIT_ASSERT(SqlToYql("$ true = false; SELECT $ true or false;").IsOk()); + UNIT_ASSERT(SqlToYql("$False = 0; SELECT $False;").IsOk()); + } + + Y_UNIT_TEST(WithSchemaEquals) { + UNIT_ASSERT(SqlToYql("select * from plato.T with schema Struct<a:Int32, b:String>;").IsOk()); + UNIT_ASSERT(SqlToYql("select * from plato.T with columns = Struct<a:Int32, b:String>;").IsOk()); + } + + Y_UNIT_TEST(WithNonStructSchemaS3) { + NSQLTranslation::TTranslationSettings settings; + settings.ClusterMapping["s3bucket"] = NYql::S3ProviderName; + UNIT_ASSERT(SqlToYql("select * from s3bucket.`foo` with schema (col1 Int32, String as col2, Int64 as col3);", settings).IsOk()); + } + + Y_UNIT_TEST(AllowNestedTuplesInGroupBy) { + NYql::TAstParseResult res = SqlToYql("select count(*) from plato.Input group by 1 + (x, y, z);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Aggregate core '('\"group0\")")); + }; + + TWordCountHive elementStat({"Aggregate"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["Aggregate"] == 1); + } + + Y_UNIT_TEST(AllowGroupByWithParens) { + NYql::TAstParseResult res = SqlToYql("select count(*) from plato.Input group by (x, y as alias1, z);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Aggregate core '('\"x\" '\"alias1\" '\"z\")")); + }; + + TWordCountHive elementStat({"Aggregate"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["Aggregate"] == 1); + } + + Y_UNIT_TEST(CreateAsyncReplicationParseCorrect) { + auto req = R"( + USE plato; + CREATE ASYNC REPLICATION MyReplication + FOR table1 AS table2, table3 AS table4 + WITH ( + CONNECTION_STRING = "grpc://localhost:2135/?database=/MyDatabase", + ENDPOINT = "localhost:2135", + DATABASE = "/MyDatabase" + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("create")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table1")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table2")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table3")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table4")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("connection_string")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("grpc://localhost:2135/?database=/MyDatabase")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("endpoint")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("localhost:2135")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("database")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("/MyDatabase")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateAsyncReplicationUnsupportedSettings) { + auto reqTpl = R"( + USE plato; + CREATE ASYNC REPLICATION MyReplication + FOR table1 AS table2, table3 AS table4 + WITH ( + %s = "%s" + ) + )"; + + auto settings = THashMap<TString, TString>{ + {"STATE", "DONE"}, + {"FAILOVER_MODE", "FORCE"}, + }; + + for (const auto& [k, v] : settings) { + auto req = Sprintf(reqTpl, k.c_str(), v.c_str()); + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), Sprintf("<main>:6:%zu: Error: %s is not supported in CREATE\n", 20 + k.size(), k.c_str())); + } + } + + Y_UNIT_TEST(AlterAsyncReplicationParseCorrect) { + auto req = R"( + USE plato; + ALTER ASYNC REPLICATION MyReplication + SET ( + STATE = "DONE", + FAILOVER_MODE = "FORCE" + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("state")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DONE")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("failover_mode")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("FORCE")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AlterAsyncReplicationUnsupportedSettings) { + auto reqTpl = R"( + USE plato; + ALTER ASYNC REPLICATION MyReplication + SET ( + %s = "%s" + ) + )"; + + auto settings = THashMap<TString, TString>{ + {"connection_string", "grpc://localhost:2135/?database=/MyDatabase"}, + {"endpoint", "localhost:2135"}, + {"database", "/MyDatabase"}, + {"token", "foo"}, + {"token_secret_name", "foo_secret_name"}, + {"user", "user"}, + {"password", "bar"}, + {"password_secret_name", "bar_secret_name"}, + }; + + for (const auto& setting : settings) { + auto& key = setting.first; + auto& value = setting.second; + auto req = Sprintf(reqTpl, key.c_str(), value.c_str()); + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&key, &value](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(key)); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(value)); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + } + + Y_UNIT_TEST(AsyncReplicationInvalidSettings) { + auto req = R"( + USE plato; + ALTER ASYNC REPLICATION MyReplication SET (FOO = "BAR"); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:62: Error: Unknown replication setting: FOO\n"); + } + + Y_UNIT_TEST(DropAsyncReplicationParseCorrect) { + auto req = R"( + USE plato; + DROP ASYNC REPLICATION MyReplication; + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropAsyncReplicationCascade) { + auto req = R"( + USE plato; + DROP ASYNC REPLICATION MyReplication CASCADE; + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropCascade")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(PragmaCompactGroupBy) { + auto req = "PRAGMA CompactGroupBy; SELECT key, COUNT(*) FROM plato.Input GROUP BY key;"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Aggregate") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('compact)")); + } + }; + + TWordCountHive elementStat = { {TString("Aggregate"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Aggregate"]); + } + + Y_UNIT_TEST(PragmaDisableCompactGroupBy) { + auto req = "PRAGMA DisableCompactGroupBy; SELECT key, COUNT(*) FROM plato.Input GROUP /*+ compact() */ BY key;"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Aggregate") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'('compact)")); + } + }; + + TWordCountHive elementStat = { {TString("Aggregate"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Aggregate"]); + } + + Y_UNIT_TEST(AutoSampleWorksWithNamedSubquery) { + UNIT_ASSERT(SqlToYql("$src = select * from plato.Input; select * from $src sample 0.2").IsOk()); + } + + Y_UNIT_TEST(AutoSampleWorksWithSubquery) { + UNIT_ASSERT(SqlToYql("select * from (select * from plato.Input) sample 0.2").IsOk()); + } + + Y_UNIT_TEST(CreateTableTrailingComma) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32, PRIMARY KEY (Key),);").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32,);").IsOk()); + } + + Y_UNIT_TEST(BetweenSymmetric) { + UNIT_ASSERT(SqlToYql("select 3 between symmetric 5 and 4;").IsOk()); + UNIT_ASSERT(SqlToYql("select 3 between asymmetric 5 and 4;").IsOk()); + UNIT_ASSERT(SqlToYql("use plato; select key between symmetric and and and from Input;").IsOk()); + UNIT_ASSERT(SqlToYql("use plato; select key between and and and from Input;").IsOk()); + } +} + +Y_UNIT_TEST_SUITE(ExternalFunction) { + Y_UNIT_TEST(ValidUseFunctions) { + + UNIT_ASSERT(SqlToYql( + "PROCESS plato.Input" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', <|a: 123, b: a + 641|>)" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>," + " CONCURRENCY=3, OPTIMIZE_FOR='CALLS'").IsOk()); + + // use CALLS without quotes, as keyword + UNIT_ASSERT(SqlToYql( + "PROCESS plato.Input" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo')" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>," + " OPTIMIZE_FOR=CALLS").IsOk()); + + UNIT_ASSERT(SqlToYql( + "PROCESS plato.Input" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', TableRow())" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>," + " CONCURRENCY=3").IsOk()); + + UNIT_ASSERT(SqlToYql( + "PROCESS plato.Input" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo')" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>," + " CONCURRENCY=3, BATCH_SIZE=1000000, CONNECTION='yc-folder34fse-con'," + " INIT=[0, 900]").IsOk()); + + UNIT_ASSERT(SqlToYql( + "PROCESS plato.Input" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'bar', TableRow())" + " WITH UNKNOWN_PARAM_1='837747712', UNKNOWN_PARAM_2=Tuple<Uint16, Utf8>," + " INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>").IsOk()); + } + + + Y_UNIT_TEST(InValidUseFunctions) { + ExpectFailWithError("PROCESS plato.Input USING some::udf(*) WITH INPUT_TYPE=Struct<a:Int32>", + "<main>:1:33: Error: PROCESS without USING EXTERNAL FUNCTION doesn't allow WITH block\n"); + + ExpectFailWithError("PROCESS plato.Input USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'jhhjfh88134d')" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>" + " ASSUME ORDER BY key", + "<main>:1:129: Error: PROCESS with USING EXTERNAL FUNCTION doesn't allow ASSUME block\n"); + + ExpectFailWithError("PROCESS plato.Input USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', 'bar', 'baz')", + "<main>:1:15: Error: EXTERNAL FUNCTION requires from 2 to 3 arguments, but got: 4\n"); + + ExpectFailWithError("PROCESS plato.Input\n" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', <|field_1: a1, field_b: b1|>)\n" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,\n" + " CONCURRENCY=3, BATCH_SIZE=1000000, CONNECTION='yc-folder34fse-con',\n" + " CONCURRENCY=5, INPUT_TYPE=Struct<b:Bool>,\n" + " INIT=[0, 900]\n", + "<main>:5:2: Error: WITH \"CONCURRENCY\" clause should be specified only once\n" + "<main>:5:17: Error: WITH \"INPUT_TYPE\" clause should be specified only once\n"); + } +} + +Y_UNIT_TEST_SUITE(SqlToYQLErrors) { + Y_UNIT_TEST(UdfSyntaxSugarMissingCall) { + auto req = "SELECT Udf(DateTime::FromString, \"foo\" as RunConfig);"; + auto res = SqlToYql(req); + TString a1 = Err2Str(res); + TString a2("<main>:1:8: Error: Abstract Udf Node can't be used as a part of expression.\n"); + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(UdfSyntaxSugarIsNotCallable) { + auto req = "SELECT Udf(123, \"foo\" as RunConfig);"; + auto res = SqlToYql(req); + TString a1 = Err2Str(res); + TString a2("<main>:1:8: Error: Udf: first argument must be a callable, like Foo::Bar\n"); + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(UdfSyntaxSugarNoArgs) { + auto req = "SELECT Udf()();"; + auto res = SqlToYql(req); + TString a1 = Err2Str(res); + TString a2("<main>:1:8: Error: Udf: expected at least one argument\n"); + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(StrayUTF8) { + /// 'c' in plato is russian here + NYql::TAstParseResult res = SqlToYql("select * from сedar.Input"); + UNIT_ASSERT(!res.Root); + + TString a1 = Err2Str(res); + TString a2(R"foo(<main>:1:14: Error: Unexpected character 'с' (Unicode character <1089>) : cannot match to any predicted input... + +<main>:1:15: Error: Unexpected character : cannot match to any predicted input... + +)foo"); + + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(IvalidStringLiteralWithEscapedBackslash) { + NYql::TAstParseResult res1 = SqlToYql(R"foo($bar = 'a\\'b';)foo"); + NYql::TAstParseResult res2 = SqlToYql(R"foo($bar = "a\\"b";)foo"); + UNIT_ASSERT(!res1.Root); + UNIT_ASSERT(!res2.Root); + + UNIT_ASSERT_NO_DIFF(Err2Str(res1), "<main>:1:15: Error: Unexpected character : syntax error...\n\n"); + UNIT_ASSERT_NO_DIFF(Err2Str(res2), "<main>:1:15: Error: Unexpected character : syntax error...\n\n"); + } + + Y_UNIT_TEST(InvalidHexInStringLiteral) { + NYql::TAstParseResult res = SqlToYql("select \"foo\\x1\\xfe\""); + UNIT_ASSERT(!res.Root); + TString a1 = Err2Str(res); + TString a2 = "<main>:1:15: Error: Failed to parse string literal: Invalid hexadecimal value\n"; + + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(InvalidOctalInMultilineStringLiteral) { + NYql::TAstParseResult res = SqlToYql("select \"foo\n" + "bar\n" + "\\01\""); + UNIT_ASSERT(!res.Root); + TString a1 = Err2Str(res); + TString a2 = "<main>:3:4: Error: Failed to parse string literal: Invalid octal value\n"; + + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(InvalidDoubleAtString) { + NYql::TAstParseResult res = SqlToYql("select @@@@@@"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:13: Error: Unexpected character : syntax error...\n\n"); + } + + Y_UNIT_TEST(InvalidDoubleAtStringWhichWasAcceptedEarlier) { + NYql::TAstParseResult res = SqlToYql("SELECT @@foo@@ @ @@bar@@"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:7: Error: Unexpected token '@@foo@@' : cannot match to any predicted input...\n\n"); + } + + Y_UNIT_TEST(InvalidStringFromTable) { + NYql::TAstParseResult res = SqlToYql("select \"FOO\"\"BAR from plato.foo"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:31: Error: Unexpected character : syntax error...\n\n"); + } + + Y_UNIT_TEST(InvalidDoubleAtStringFromTable) { + NYql::TAstParseResult res = SqlToYql("select @@@@@@ from plato.foo"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Unexpected character : syntax error...\n\n"); + } + + Y_UNIT_TEST(SelectInvalidSyntax) { + NYql::TAstParseResult res = SqlToYql("select 1 form Wat"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:14: Error: Unexpected token 'Wat' : cannot match to any predicted input...\n\n"); + } + + Y_UNIT_TEST(SelectNoCluster) { + NYql::TAstParseResult res = SqlToYql("select foo from bar"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: No cluster name given and no default cluster is selected\n"); + } + + Y_UNIT_TEST(SelectDuplicateColumns) { + NYql::TAstParseResult res = SqlToYql("select a, a from plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:11: Error: Unable to use duplicate column names. Collision in name: a\n"); + } + + Y_UNIT_TEST(SelectDuplicateLabels) { + NYql::TAstParseResult res = SqlToYql("select a as foo, b as foo from plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unable to use duplicate column names. Collision in name: foo\n"); + } + + Y_UNIT_TEST(SelectCaseWithoutThen) { + NYql::TAstParseResult res = SqlToYql("select case when true 1;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:1:22: Error: Unexpected token absence : Missing THEN \n\n" + "<main>:1:23: Error: Unexpected token absence : Missing END \n\n" + ); + } + + Y_UNIT_TEST(SelectComplexCaseWithoutThen) { + NYql::TAstParseResult res = SqlToYql( + "SELECT *\n" + "FROM plato.Input AS a\n" + "WHERE CASE WHEN a.key = \"foo\" a.subkey ELSE a.value END\n" + ); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:30: Error: Unexpected token absence : Missing THEN \n\n"); + } + + Y_UNIT_TEST(SelectCaseWithoutEnd) { + NYql::TAstParseResult res = SqlToYql("select case a when b then c end from plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: ELSE is required\n"); + } + + Y_UNIT_TEST(SelectWithBadAggregationNoInput) { + NYql::TAstParseResult res = SqlToYql("select a, Min(b), c"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:8: Error: Column reference 'a'\n" + "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:15: Error: Column reference 'b'\n" + "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:19: Error: Column reference 'c'\n" + ); + } + + Y_UNIT_TEST(SelectWithBadAggregation) { + ExpectFailWithError("select count(*), 1 + key from plato.Input", + "<main>:1:22: Error: Column `key` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectWithBadAggregatedTerms) { + ExpectFailWithError("select key, 2 * subkey from plato.Input group by key", + "<main>:1:17: Error: Column `subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectDistinctWithBadAggregation) { + ExpectFailWithError("select distinct count(*), 1 + key from plato.Input", + "<main>:1:31: Error: Column `key` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + ExpectFailWithError("select distinct key, 2 * subkey from plato.Input group by key", + "<main>:1:26: Error: Column `subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectWithBadAggregationInHaving) { + ExpectFailWithError("select key from plato.Input group by key\n" + "having \"f\" || value == \"foo\"", + "<main>:2:15: Error: Column `value` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(JoinWithNonAggregatedColumnInProjection) { + ExpectFailWithError("select a.key, 1 + b.subkey\n" + "from plato.Input1 as a join plato.Input2 as b using(key)\n" + "group by a.key;", + "<main>:1:19: Error: Column `b.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + + ExpectFailWithError("select a.key, 1 + b.subkey.x\n" + "from plato.Input1 as a join plato.Input2 as b using(key)\n" + "group by a.key;", + "<main>:1:19: Error: Column must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectWithBadAggregatedTermsWithSources) { + ExpectFailWithError("select key, 1 + a.subkey\n" + "from plato.Input1 as a\n" + "group by a.key;", + "<main>:1:17: Error: Column `a.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + ExpectFailWithError("select key, 1 + a.subkey.x\n" + "from plato.Input1 as a\n" + "group by a.key;", + "<main>:1:17: Error: Column must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(WarnForAggregationBySelectAlias) { + NYql::TAstParseResult res = SqlToYql("select c + 1 as c from plato.Input\n" + "group by c"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:2:11: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n" + "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n"); + + res = SqlToYql("select c + 1 as c from plato.Input\n" + "group by Math::Floor(c + 2) as c;"); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:2:22: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n" + "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n"); + } + + Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenAggrFunctionsAreUsedInAlias) { + NYql::TAstParseResult res = SqlToYql("select\n" + " cast(avg(val) as int) as value,\n" + " value as key\n" + "from\n" + " plato.Input\n" + "group by value"); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + + res = SqlToYql("select\n" + " cast(avg(val) over w as int) as value,\n" + " value as key\n" + "from\n" + " plato.Input\n" + "group by value\n" + "window w as ()"); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + } + + Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenQualifiedNameIsUsed) { + NYql::TAstParseResult res = SqlToYql("select\n" + " Unwrap(a.key) as key\n" + "from plato.Input as a\n" + "join plato.Input2 as b using(k)\n" + "group by a.key;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + + res = SqlToYql("select Unwrap(a.key) as key\n" + "from plato.Input as a\n" + "group by a.key;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + } + + Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenTrivialRenamingIsUsed) { + NYql::TAstParseResult res = SqlToYql("select a.key as key\n" + "from plato.Input as a\n" + "group by key;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + + res = SqlToYql("select key as key\n" + "from plato.Input\n" + "group by key;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + } + + Y_UNIT_TEST(ErrorByAggregatingByExpressionWithSameExpressionInSelect) { + ExpectFailWithError("select k * 2 from plato.Input group by k * 2", + "<main>:1:8: Error: Column `k` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(ErrorForAggregationBySelectAlias) { + ExpectFailWithError("select key, Math::Floor(1.1 + a.subkey) as foo\n" + "from plato.Input as a\n" + "group by a.key, foo;", + "<main>:3:17: Warning: GROUP BY will aggregate by column `foo` instead of aggregating by SELECT expression with same alias, code: 4532\n" + "<main>:1:19: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n" + "<main>:1:31: Error: Column `a.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + + ExpectFailWithError("select c + 1 as c from plato.Input\n" + "group by Math::Floor(c + 2);", + "<main>:2:22: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n" + "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n" + "<main>:1:8: Error: Column `c` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectWithDuplicateGroupingColumns) { + NYql::TAstParseResult res = SqlToYql("select c from plato.Input group by c, c"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Duplicate grouping column: c\n"); + } + + Y_UNIT_TEST(SelectWithBadAggregationInGrouping) { + NYql::TAstParseResult res = SqlToYql("select a, Min(b), c group by c"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:30: Error: Column reference 'c'\n"); + } + + Y_UNIT_TEST(SelectWithOpOnBadAggregation) { + ExpectFailWithError("select 1 + a + Min(b) from plato.Input", + "<main>:1:12: Error: Column `a` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectOrderByConstantNum) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by 1"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY constant expression\n"); + } + + Y_UNIT_TEST(SelectOrderByConstantExpr) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by 1 * 42"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:38: Error: Unable to ORDER BY constant expression\n"); + } + + Y_UNIT_TEST(SelectOrderByConstantString) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by \"nest\""); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY constant expression\n"); + } + + Y_UNIT_TEST(SelectOrderByAggregated) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by min(a)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY aggregated values\n"); + } + + Y_UNIT_TEST(ErrorInOrderByExpresison) { + NYql::TAstParseResult res = SqlToYql("select key, value from plato.Input order by (key as zey)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:45: Error: You should use in ORDER BY column name, qualified field, callable function or expression\n"); + } + + Y_UNIT_TEST(ErrorsInOrderByWhenColumnIsMissingInProjection) { + ExpectFailWithError("select subkey from (select 1 as subkey) order by key", "<main>:1:50: Error: Column key is not in source column set\n"); + ExpectFailWithError("select subkey from plato.Input as a order by x.key", "<main>:1:46: Error: Unknown correlation name: x\n"); + ExpectFailWithError("select distinct a, b from plato.Input order by c", "<main>:1:48: Error: Column c is not in source column set. Did you mean a?\n"); + ExpectFailWithError("select count(*) as a from plato.Input order by c", "<main>:1:48: Error: Column c is not in source column set. Did you mean a?\n"); + ExpectFailWithError("select count(*) as a, b, from plato.Input group by b order by c", "<main>:1:63: Error: Column c is not in source column set. Did you mean a?\n"); + UNIT_ASSERT(SqlToYql("select a, b from plato.Input order by c").IsOk()); + } + + Y_UNIT_TEST(SelectAggregatedWhere) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input where count(key)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:33: Error: Can not use aggregated values in filtering\n"); + } + + Y_UNIT_TEST(DoubleFrom) { + NYql::TAstParseResult res = SqlToYql("from plato.Input select * from plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: Only one FROM clause is allowed\n"); + } + + Y_UNIT_TEST(SelectJoinMissingCorrName) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input1 as a join plato.Input2 as b on a.key == key"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:65: Error: JOIN: column requires correlation name\n"); + } + + Y_UNIT_TEST(SelectJoinMissingCorrName1) { + NYql::TAstParseResult res = SqlToYql( + "use plato;\n" + "$foo = select * from Input1;\n" + "select * from Input2 join $foo USING(key);\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:27: Error: JOIN: missing correlation name for source\n"); + } + + Y_UNIT_TEST(SelectJoinMissingCorrName2) { + NYql::TAstParseResult res = SqlToYql( + "use plato;\n" + "$foo = select * from Input1;\n" + "select * from Input2 cross join $foo;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:33: Error: JOIN: missing correlation name for source\n"); + } + + Y_UNIT_TEST(SelectJoinEmptyCorrNames) { + NYql::TAstParseResult res = SqlToYql( + "$left = (SELECT * FROM plato.Input1 LIMIT 2);\n" + "$right = (SELECT * FROM plato.Input2 LIMIT 2);\n" + "SELECT * FROM $left FULL JOIN $right USING (key);\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:45: Error: At least one correlation name is required in join\n"); + } + + Y_UNIT_TEST(SelectJoinSameCorrNames) { + NYql::TAstParseResult res = SqlToYql("SELECT Input.key FROM plato.Input JOIN plato.Input1 ON Input.key == Input.subkey\n"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:66: Error: JOIN: different correlation names are required for joined tables\n"); + } + + Y_UNIT_TEST(SelectJoinConstPredicateArg) { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input1 as A JOIN plato.Input2 as B ON A.key == B.key AND A.subkey == \"wtf\"\n"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:87: Error: JOIN: each equality predicate argument must depend on exactly one JOIN input\n"); + } + + Y_UNIT_TEST(SelectJoinNonEqualityPredicate) { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input1 as A JOIN plato.Input2 as B ON A.key == B.key AND A.subkey > B.subkey\n"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:87: Error: JOIN ON expression must be a conjunction of equality predicates\n"); + } + + Y_UNIT_TEST(SelectEquiJoinCorrNameOutOfScope) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA equijoin;\n" + "SELECT * FROM plato.A JOIN plato.B ON A.key == C.key JOIN plato.C ON A.subkey == C.subkey;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:45: Error: JOIN: can not use source: C in equality predicate, it is out of current join scope\n"); + } + + Y_UNIT_TEST(SelectEquiJoinNoRightSource) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA equijoin;\n" + "SELECT * FROM plato.A JOIN plato.B ON A.key == B.key JOIN plato.C ON A.subkey == B.subkey;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:79: Error: JOIN ON equality predicate must have one of its arguments from the rightmost source\n"); + } + + Y_UNIT_TEST(SelectEquiJoinOuterWithoutType) { + NYql::TAstParseResult res = SqlToYql( + "SELECT * FROM plato.A Outer JOIN plato.B ON A.key == B.key;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Invalid join type: OUTER JOIN. OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL\n"); + } + + Y_UNIT_TEST(SelectEquiJoinOuterWithWrongType) { + NYql::TAstParseResult res = SqlToYql( + "SELECT * FROM plato.A LEFT semi OUTER JOIN plato.B ON A.key == B.key;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:33: Error: Invalid join type: LEFT SEMI OUTER JOIN. OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL\n"); + } + + Y_UNIT_TEST(InsertNoCluster) { + NYql::TAstParseResult res = SqlToYql("insert into Output (foo) values (1)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: No cluster name given and no default cluster is selected\n"); + } + + Y_UNIT_TEST(InsertValuesNoLabels) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output values (1)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: INSERT INTO ... VALUES requires specification of table columns\n"); + } + + Y_UNIT_TEST(UpsertValuesNoLabelsKikimr) { + NYql::TAstParseResult res = SqlToYql("upsert into plato.Output values (1)", 10, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: UPSERT INTO ... VALUES requires specification of table columns\n"); + } + + Y_UNIT_TEST(ReplaceValuesNoLabelsKikimr) { + NYql::TAstParseResult res = SqlToYql("replace into plato.Output values (1)", 10, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:20: Error: REPLACE INTO ... VALUES requires specification of table columns\n"); + } + + Y_UNIT_TEST(InsertValuesInvalidLabels) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output (foo) values (1, 2)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: VALUES have 2 columns, INSERT INTO expects: 1\n"); + } + + Y_UNIT_TEST(BuiltinFileOpNoArgs) { + NYql::TAstParseResult res = SqlToYql("select FilePath()"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: FilePath() requires exactly 1 arguments, given: 0\n"); + } + + Y_UNIT_TEST(ProcessWithHaving) { + NYql::TAstParseResult res = SqlToYql("process plato.Input using some::udf(value) having value == 1"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: PROCESS does not allow HAVING yet! You may request it on yql@ maillist.\n"); + } + + Y_UNIT_TEST(ReduceNoBy) { + NYql::TAstParseResult res = SqlToYql("reduce plato.Input using some::udf(value)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unexpected token absence : Missing ON \n\n<main>:1:25: Error: Unexpected token absence : Missing USING \n\n"); + } + + Y_UNIT_TEST(ReduceDistinct) { + NYql::TAstParseResult res = SqlToYql("reduce plato.Input on key using some::udf(distinct value)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:43: Error: DISTINCT can not be used in PROCESS/REDUCE\n"); + } + + Y_UNIT_TEST(CreateTableWithView) { + NYql::TAstParseResult res = SqlToYql("CREATE TABLE plato.foo:bar (key INT);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:22: Error: Unexpected token ':' : syntax error...\n\n"); + } + + Y_UNIT_TEST(AsteriskWithSomethingAfter) { + NYql::TAstParseResult res = SqlToYql("select *, LENGTH(value) from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).\n"); + } + + Y_UNIT_TEST(AsteriskWithSomethingBefore) { + NYql::TAstParseResult res = SqlToYql("select LENGTH(value), * from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).\n"); + } + + Y_UNIT_TEST(DuplicatedQualifiedAsterisk) { + NYql::TAstParseResult res = SqlToYql("select in.*, key, in.* from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unable to use twice same quialified asterisk. Invalid source: in\n"); + } + + Y_UNIT_TEST(BrokenLabel) { + NYql::TAstParseResult res = SqlToYql("select in.*, key as `funny.label` from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:14: Error: Unable to use '.' in column name. Invalid column name: funny.label\n"); + } + + Y_UNIT_TEST(KeyConflictDetect0) { + NYql::TAstParseResult res = SqlToYql("select key, in.key as key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:13: Error: Unable to use duplicate column names. Collision in name: key\n"); + } + + Y_UNIT_TEST(KeyConflictDetect1) { + NYql::TAstParseResult res = SqlToYql("select length(key) as key, key from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Unable to use duplicate column names. Collision in name: key\n"); + } + + Y_UNIT_TEST(KeyConflictDetect2) { + NYql::TAstParseResult res = SqlToYql("select key, in.key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n"); + } + + Y_UNIT_TEST(AutogenerationAliasWithCollisionConflict1) { + UNIT_ASSERT(SqlToYql("select LENGTH(Value), key as column0 from plato.Input;").IsOk()); + } + + Y_UNIT_TEST(AutogenerationAliasWithCollisionConflict2) { + UNIT_ASSERT(SqlToYql("select key as column1, LENGTH(Value) from plato.Input;").IsOk()); + } + + Y_UNIT_TEST(MissedSourceTableForQualifiedAsteriskOnSimpleSelect) { + NYql::TAstParseResult res = SqlToYql("use plato; select Intop.*, Input.key from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unknown correlation name: Intop\n"); + } + + Y_UNIT_TEST(MissedSourceTableForQualifiedAsteriskOnJoin) { + NYql::TAstParseResult res = SqlToYql("use plato; select tmissed.*, t2.*, t1.key from plato.Input as t1 join plato.Input as t2 on t1.key==t2.key;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unknown correlation name for asterisk: tmissed\n"); + } + + Y_UNIT_TEST(UnableToReferenceOnNotExistSubcolumn) { + NYql::TAstParseResult res = SqlToYql("select b.subkey from (select key from plato.Input as a) as b;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Column subkey is not in source column set\n"); + } + + Y_UNIT_TEST(ConflictOnSameNameWithQualify0) { + NYql::TAstParseResult res = SqlToYql("select in.key, in.key as key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n"); + } + + Y_UNIT_TEST(ConflictOnSameNameWithQualify1) { + NYql::TAstParseResult res = SqlToYql("select in.key, length(key) as key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n"); + } + + Y_UNIT_TEST(ConflictOnSameNameWithQualify2) { + NYql::TAstParseResult res = SqlToYql("select key, in.key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n"); + } + + Y_UNIT_TEST(ConflictOnSameNameWithQualify3) { + NYql::TAstParseResult res = SqlToYql("select in.key, subkey as key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n"); + } + + Y_UNIT_TEST(SelectFlattenBySameColumns) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, key as kk)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Duplicate column name found: key in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenBySameAliases) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, subkey as kk);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Duplicate alias found: kk in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenByExprSameAliases) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, ListSkip(subkey,1) as kk);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Collision between alias and column name: kk in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenByConflictNameAndAlias0) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, subkey as key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Collision between alias and column name: key in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenByConflictNameAndAlias1) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, subkey as key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Collision between alias and column name: key in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenByExprConflictNameAndAlias1) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, ListSkip(subkey,1) as key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Duplicate column name found: key in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenByUnnamedExpr) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, ListSkip(key, 1))"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Unnamed expression after FLATTEN BY is not allowed\n"); + } + + Y_UNIT_TEST(UseInOnStrings) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input where \"foo\" in \"foovalue\";"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:42: Error: Unable to use IN predicate with string argument, it won't search substring - " + "expecting tuple, list, dict or single column table source\n"); + } + + Y_UNIT_TEST(UseSubqueryInScalarContextInsideIn) { + NYql::TAstParseResult res = SqlToYql("$q = (select key from plato.Input); select * from plato.Input where subkey in ($q);"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:79: Warning: Using subrequest in scalar context after IN, " + "perhaps you should remove parenthesis here, code: 4501\n"); + } + + Y_UNIT_TEST(InHintsWithKeywordClash) { + NYql::TAstParseResult res = SqlToYql("SELECT COMPACT FROM plato.Input WHERE COMPACT IN COMPACT `COMPACT`(1,2,3)"); + UNIT_ASSERT(!res.Root); + // should try to parse last compact as call expression + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:58: Error: Unknown builtin: COMPACT\n"); + } + + Y_UNIT_TEST(ErrorColumnPosition) { + NYql::TAstParseResult res = SqlToYql( + "USE plato;\n" + "SELECT \n" + "value FROM (\n" + "select key from Input\n" + ");\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:1: Error: Column value is not in source column set\n"); + } + + Y_UNIT_TEST(PrimaryViewAbortMapReduce) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input VIEW PRIMARY KEY"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: primary view is not supported for yt tables\n"); + } + + Y_UNIT_TEST(InsertAbortMapReduce) { + NYql::TAstParseResult res = SqlToYql("INSERT OR ABORT INTO plato.Output SELECT key FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: INSERT OR ABORT INTO is not supported for yt tables\n"); + } + + Y_UNIT_TEST(ReplaceIntoMapReduce) { + NYql::TAstParseResult res = SqlToYql("REPLACE INTO plato.Output SELECT key FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: Meaning of REPLACE INTO has been changed, now you should use INSERT INTO <table> WITH TRUNCATE ... for yt\n"); + } + + Y_UNIT_TEST(UpsertIntoMapReduce) { + NYql::TAstParseResult res = SqlToYql("UPSERT INTO plato.Output SELECT key FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: UPSERT INTO is not supported for yt tables\n"); + } + + Y_UNIT_TEST(UpdateMapReduce) { + NYql::TAstParseResult res = SqlToYql("UPDATE plato.Output SET value = value + 1 WHERE key < 1"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: UPDATE is unsupported for yt\n"); + } + + Y_UNIT_TEST(DeleteMapReduce) { + NYql::TAstParseResult res = SqlToYql("DELETE FROM plato.Output WHERE key < 1"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: DELETE is unsupported for yt\n"); + } + + Y_UNIT_TEST(ReplaceIntoWithTruncate) { + NYql::TAstParseResult res = SqlToYql("REPLACE INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:32: Error: Unable REPLACE INTO with truncate mode\n"); + } + + Y_UNIT_TEST(UpsertIntoWithTruncate) { + NYql::TAstParseResult res = SqlToYql("UPSERT INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:31: Error: Unable UPSERT INTO with truncate mode\n"); + } + + Y_UNIT_TEST(InsertIntoWithTruncateKikimr) { + NYql::TAstParseResult res = SqlToYql("INSERT INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input", 10, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: INSERT INTO WITH TRUNCATE is not supported for kikimr tables\n"); + } + + Y_UNIT_TEST(InsertIntoWithWrongArgumentCount) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output with truncate (key, value, subkey) values (5, '1', '2', '3');"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: VALUES have 4 columns, INSERT INTO ... WITH TRUNCATE expects: 3\n"); + } + + Y_UNIT_TEST(UpsertWithWrongArgumentCount) { + NYql::TAstParseResult res = SqlToYql("upsert into plato.Output (key, value, subkey) values (2, '3');", 10, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:39: Error: VALUES have 2 columns, UPSERT INTO expects: 3\n"); + } + + Y_UNIT_TEST(GroupingSetByExprWithoutAlias) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY GROUPING SETS (cast(key as uint32), subkey);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: Unnamed expressions are not supported in GROUPING SETS. Please use '<expr> AS <name>'.\n"); + } + + Y_UNIT_TEST(GroupingSetByExprWithoutAlias2) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY subkey || subkey, GROUPING SETS (\n" + "cast(key as uint32), subkey);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:1: Error: Unnamed expressions are not supported in GROUPING SETS. Please use '<expr> AS <name>'.\n"); + } + + Y_UNIT_TEST(CubeByExprWithoutAlias) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE (key, subkey / key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:56: Error: Unnamed expressions are not supported in CUBE. Please use '<expr> AS <name>'.\n"); + } + + Y_UNIT_TEST(RollupByExprWithoutAlias) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY ROLLUP (subkey / key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: Unnamed expressions are not supported in ROLLUP. Please use '<expr> AS <name>'.\n"); + } + + Y_UNIT_TEST(GroupByHugeCubeDeniedNoPragma) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE (key, subkey, value, key + subkey as sum, key - subkey as sub, key + val as keyval);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:119: Error: GROUP BY CUBE is allowed only for 5 columns, but you use 6\n"); + } + + Y_UNIT_TEST(GroupByInvalidPragma) { + NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByCubeLimit = '-4';"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: Expected unsigned integer literal as a single argument for: GroupByCubeLimit\n"); + } + + Y_UNIT_TEST(GroupByHugeCubeDeniedPragme) { + NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByCubeLimit = '4'; SELECT key FROM plato.Input GROUP BY CUBE (key, subkey, value, key + subkey as sum, key - subkey as sub);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:132: Error: GROUP BY CUBE is allowed only for 4 columns, but you use 5\n"); + } + + Y_UNIT_TEST(GroupByFewBigCubes) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE(key, subkey, key + subkey as sum), CUBE(value, value + key + subkey as total);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Unable to GROUP BY more than 64 groups, you try use 80 groups\n"); + } + + Y_UNIT_TEST(GroupByFewBigCubesWithPragmaLimit) { + NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByLimit = '16'; SELECT key FROM plato.Input GROUP BY GROUPING SETS(key, subkey, key + subkey as sum), ROLLUP(value, value + key + subkey as total);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:29: Error: Unable to GROUP BY more than 16 groups, you try use 18 groups\n"); + } + + Y_UNIT_TEST(NoGroupingColumn0) { + NYql::TAstParseResult res = SqlToYql( + "select count(1), key_first, val_first, grouping(key_first, val_first, nomind) as group\n" + "from plato.Input group by grouping sets (cast(key as uint32) /100 as key_first, Substring(value, 1, 1) as val_first);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:71: Error: Column 'nomind' is not a grouping column\n"); + } + + Y_UNIT_TEST(NoGroupingColumn1) { + NYql::TAstParseResult res = SqlToYql("select count(1), grouping(key, value) as group_duo from plato.Input group by cube (key, subkey);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:32: Error: Column 'value' is not a grouping column\n"); + } + + Y_UNIT_TEST(EmptyAccess0) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), AsList(``));"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:73: Error: Column reference \"\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(EmptyAccess1) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), ``);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:66: Error: Column reference \"\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(UseUnknownColumnInInsert) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), AsList(`test`));"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:73: Error: Column reference \"test\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(GroupByEmptyColumn) { + NYql::TAstParseResult res = SqlToYql("select count(1) from plato.Input group by ``;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:43: Error: Column name can not be empty\n"); + } + + Y_UNIT_TEST(ConvertNumberOutOfBase) { + NYql::TAstParseResult res = SqlToYql("select 0o80l;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 0o80l, char: '8' is out of base: 8\n"); + } + + Y_UNIT_TEST(ConvertNumberOutOfRangeForInt64ButFitsInUint64) { + NYql::TAstParseResult res = SqlToYql("select 0xc000000000000000l;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse 13835058055282163712 as integer literal of Int64 type: value out of range for Int64\n"); + } + + Y_UNIT_TEST(ConvertNumberOutOfRangeUint64) { + NYql::TAstParseResult res = SqlToYql("select 0xc0000000000000000l;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 0xc0000000000000000l, number limit overflow\n"); + + res = SqlToYql("select 1234234543563435151456;\n"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 1234234543563435151456, number limit overflow\n"); + } + + Y_UNIT_TEST(ConvertNumberNegativeOutOfRange) { + NYql::TAstParseResult res = SqlToYql("select -9223372036854775808;\n" + "select -9223372036854775809;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:8: Error: Failed to parse negative integer: -9223372036854775809, number limit overflow\n"); + } + + Y_UNIT_TEST(InvaildUsageReal0) { + NYql::TAstParseResult res = SqlToYql("select .0;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:7: Error: Unexpected token '.' : cannot match to any predicted input...\n\n"); + } + + Y_UNIT_TEST(InvaildUsageReal1) { + NYql::TAstParseResult res = SqlToYql("select .0f;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:7: Error: Unexpected token '.' : cannot match to any predicted input...\n\n"); + } + + Y_UNIT_TEST(InvaildUsageWinFunctionWithoutWindow) { + NYql::TAstParseResult res = SqlToYql("select lead(key, 2) from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to use window function Lead without window specification\n"); + } + + Y_UNIT_TEST(DropTableWithIfExists) { + NYql::TAstParseResult res = SqlToYql("DROP TABLE IF EXISTS plato.foo;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop_if_exists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(TooManyErrors) { + const char* q = R"( + USE plato; + select A, B, C, D, E, F, G, H, I, J, K, L, M, N from (select b from `abc`); +)"; + + NYql::TAstParseResult res = SqlToYql(q, 10); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + R"(<main>:3:16: Error: Column A is not in source column set. Did you mean b? +<main>:3:19: Error: Column B is not in source column set. Did you mean b? +<main>:3:22: Error: Column C is not in source column set. Did you mean b? +<main>:3:25: Error: Column D is not in source column set. Did you mean b? +<main>:3:28: Error: Column E is not in source column set. Did you mean b? +<main>:3:31: Error: Column F is not in source column set. Did you mean b? +<main>:3:34: Error: Column G is not in source column set. Did you mean b? +<main>:3:37: Error: Column H is not in source column set. Did you mean b? +<main>:3:40: Error: Column I is not in source column set. Did you mean b? +<main>: Error: Too many issues, code: 1 +)"); + }; + + Y_UNIT_TEST(ShouldCloneBindingForNamedParameter) { + NYql::TAstParseResult res = SqlToYql(R"($f = () -> { + $value_type = TypeOf(1); + $pair_type = StructType( + TypeOf("2") AS key, + $value_type AS value + ); + + RETURN TupleType( + ListType($value_type), + $pair_type); +}; + +select FormatType($f()); +)"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(BlockedInvalidFrameBounds) { + auto check = [](const TString& frame, const TString& err) { + const TString prefix = "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (PARTITION BY key ORDER BY subkey\n"; + NYql::TAstParseResult res = SqlToYql(prefix + frame + ")"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), err); + }; + + check("ROWS UNBOUNDED FOLLOWING", "<main>:2:5: Error: Frame cannot start from UNBOUNDED FOLLOWING\n"); + check("ROWS BETWEEN 5 PRECEDING AND UNBOUNDED PRECEDING", "<main>:2:29: Error: Frame cannot end with UNBOUNDED PRECEDING\n"); + check("ROWS BETWEEN CURRENT ROW AND 5 PRECEDING", "<main>:2:13: Error: Frame cannot start from CURRENT ROW and end with PRECEDING\n"); + check("ROWS BETWEEN 5 FOLLOWING AND CURRENT ROW", "<main>:2:14: Error: Frame cannot start from FOLLOWING and end with CURRENT ROW\n"); + check("ROWS BETWEEN 5 FOLLOWING AND 5 PRECEDING", "<main>:2:14: Error: Frame cannot start from FOLLOWING and end with PRECEDING\n"); + } + + Y_UNIT_TEST(BlockedRangeValueWithoutSingleOrderBy) { + UNIT_ASSERT(SqlToYql("SELECT COUNT(*) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM plato.Input").IsOk()); + UNIT_ASSERT(SqlToYql("SELECT COUNT(*) OVER (RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) FROM plato.Input").IsOk()); + + auto res = SqlToYql("SELECT COUNT(*) OVER (RANGE 5 PRECEDING) FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:29: Error: RANGE with <offset> PRECEDING/FOLLOWING requires exactly one expression in ORDER BY partition clause\n"); + + res = SqlToYql("SELECT COUNT(*) OVER (ORDER BY key, value RANGE 5 PRECEDING) FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: RANGE with <offset> PRECEDING/FOLLOWING requires exactly one expression in ORDER BY partition clause\n"); + } + + Y_UNIT_TEST(NoColumnsInFrameBounds) { + NYql::TAstParseResult res = SqlToYql( + "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (ROWS BETWEEN\n" + " 1 + key PRECEDING AND 2 + key FOLLOWING);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:6: Error: Column reference \"key\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(WarnOnEmptyFrameBounds) { + NYql::TAstParseResult res = SqlToYql( + "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (PARTITION BY key ORDER BY subkey\n" + "ROWS BETWEEN 10 FOLLOWING AND 5 FOLLOWING)"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:14: Warning: Used frame specification implies empty window frame, code: 4520\n"); + } + + Y_UNIT_TEST(WarnOnRankWithUnorderedWindow) { + NYql::TAstParseResult res = SqlToYql("SELECT RANK() OVER w FROM plato.Input WINDOW w AS ()"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Rank() is used with unordered window - all rows will be considered equal to each other, code: 4521\n"); + } + + Y_UNIT_TEST(WarnOnRankExprWithUnorderedWindow) { + NYql::TAstParseResult res = SqlToYql("SELECT RANK(key) OVER w FROM plato.Input WINDOW w AS ()"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Rank(<expression>) is used with unordered window - the result is likely to be undefined, code: 4521\n"); + } + + Y_UNIT_TEST(AnyAsTableName) { + NYql::TAstParseResult res = SqlToYql("use plato; select * from any;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Unexpected token ';' : syntax error...\n\n"); + } + + Y_UNIT_TEST(IncorrectOrderOfLambdaOptionalArgs) { + NYql::TAstParseResult res = SqlToYql("$f = ($x?, $y)->($x + $y); select $f(1);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Non-optional argument can not follow optional one\n"); + } + + Y_UNIT_TEST(IncorrectOrderOfActionOptionalArgs) { + NYql::TAstParseResult res = SqlToYql("define action $f($x?, $y) as select $x,$y; end define; do $f(1);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Non-optional argument can not follow optional one\n"); + } + + Y_UNIT_TEST(NotAllowedQuestionOnNamedNode) { + NYql::TAstParseResult res = SqlToYql("$f = 1; select $f?;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unexpected token '?' at the end of expression\n"); + } + + Y_UNIT_TEST(AnyAndCrossJoin) { + NYql::TAstParseResult res = SqlToYql("use plato; select * from any Input1 cross join Input2"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:26: Error: ANY should not be used with Cross JOIN\n"); + + res = SqlToYql("use plato; select * from Input1 cross join any Input2"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:44: Error: ANY should not be used with Cross JOIN\n"); + } + + Y_UNIT_TEST(AnyWithCartesianProduct) { + NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from any Input1, Input2"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:56: Error: ANY should not be used with Cross JOIN\n"); + + res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from Input1, any Input2"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:64: Error: ANY should not be used with Cross JOIN\n"); + } + + Y_UNIT_TEST(ErrorPlainEndAsInlineActionTerminator) { + NYql::TAstParseResult res = SqlToYql( + "do begin\n" + " select 1\n" + "; end\n"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:0: Error: Unexpected token absence : Missing DO \n\n"); + } + + Y_UNIT_TEST(ErrorMultiWayJoinWithUsing) { + NYql::TAstParseResult res = SqlToYql( + "USE plato;\n" + "PRAGMA DisableSimpleColumns;\n" + "SELECT *\n" + "FROM Input1 AS a\n" + "JOIN Input2 AS b USING(key)\n" + "JOIN Input3 AS c ON a.key = c.key;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:5:24: Error: Multi-way JOINs should be connected with ON clause instead of USING clause\n" + ); + } + + Y_UNIT_TEST(RequireLabelInFlattenByWithDot) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input flatten by x.y"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:1:40: Error: Unnamed expression after FLATTEN BY is not allowed\n" + ); + } + + Y_UNIT_TEST(WarnUnnamedColumns) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA WarnUnnamedColumns;\n" + "\n" + "SELECT key, subkey, key || subkey FROM plato.Input ORDER BY subkey;\n"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:28: Warning: Autogenerated column name column2 will be used for expression, code: 4516\n"); + } + + Y_UNIT_TEST(WarnSourceColumnMismatch) { + NYql::TAstParseResult res = SqlToYql( + "insert into plato.Output (key, subkey, new_value, one_more_value) select key as Key, subkey, value, \"x\" from plato.Input;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:51: Warning: Column names in SELECT don't match column specification in parenthesis. \"key\" doesn't match \"Key\". \"new_value\" doesn't match \"value\", code: 4517\n"); + } + + Y_UNIT_TEST(YtCaseInsensitive) { + NYql::TAstParseResult res = SqlToYql("select * from PlatO.foo;"); + UNIT_ASSERT(res.Root); + + res = SqlToYql("use PlatO; select * from foo;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(KikimrCaseSensitive) { + NYql::TAstParseResult res = SqlToYql("select * from PlatO.foo;", 10, "kikimr"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: Unknown cluster: PlatO\n"); + + res = SqlToYql("use PlatO; select * from foo;", 10, "kikimr"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:5: Error: Unknown cluster: PlatO\n"); + } + + Y_UNIT_TEST(DiscoveryModeForbidden) { + NYql::TAstParseResult res = SqlToYqlWithMode("insert into plato.Output select * from plato.range(\"\", Input1, Input4)", NSQLTranslation::ESqlMode::DISCOVERY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: range is not allowed in Discovery mode, code: 4600\n"); + + res = SqlToYqlWithMode("insert into plato.Output select * from plato.like(\"\", \"Input%\")", NSQLTranslation::ESqlMode::DISCOVERY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: like is not allowed in Discovery mode, code: 4600\n"); + + res = SqlToYqlWithMode("insert into plato.Output select * from plato.regexp(\"\", \"Input.\")", NSQLTranslation::ESqlMode::DISCOVERY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: regexp is not allowed in Discovery mode, code: 4600\n"); + + res = SqlToYqlWithMode("insert into plato.Output select * from plato.filter(\"\", ($name) -> { return find($name, \"Input\") is not null; })", NSQLTranslation::ESqlMode::DISCOVERY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: filter is not allowed in Discovery mode, code: 4600\n"); + + res = SqlToYqlWithMode("select Path from plato.folder(\"\") where Type == \"table\"", NSQLTranslation::ESqlMode::DISCOVERY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: folder is not allowed in Discovery mode, code: 4600\n"); + } + + Y_UNIT_TEST(YsonFuncWithoutArgs) { + UNIT_ASSERT(SqlToYql("SELECT Yson::SerializeText(Yson::From());").IsOk()); + } + + Y_UNIT_TEST(CanNotUseOrderByInNonLastSelectInUnionAllChain) { + auto req = "pragma AnsiOrderByLimitInUnionAll;\n" + "use plato;\n" + "\n" + "select * from Input order by key\n" + "union all\n" + "select * from Input order by key limit 1;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:21: Error: ORDER BY within UNION ALL is only allowed after last subquery\n"); + } + + Y_UNIT_TEST(CanNotUseLimitInNonLastSelectInUnionAllChain) { + auto req = "pragma AnsiOrderByLimitInUnionAll;\n" + "use plato;\n" + "\n" + "select * from Input limit 1\n" + "union all\n" + "select * from Input order by key limit 1;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:21: Error: LIMIT within UNION ALL is only allowed after last subquery\n"); + } + + Y_UNIT_TEST(CanNotUseDiscardInNonFirstSelectInUnionAllChain) { + auto req = "pragma AnsiOrderByLimitInUnionAll;\n" + "use plato;\n" + "\n" + "select * from Input\n" + "union all\n" + "discard select * from Input;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:6:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n"); + } + + Y_UNIT_TEST(CanNotUseIntoResultInNonLastSelectInUnionAllChain) { + auto req = "use plato;\n" + "pragma AnsiOrderByLimitInUnionAll;\n" + "\n" + "select * from Input\n" + "union all\n" + "discard select * from Input;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:6:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n"); + } + + Y_UNIT_TEST(YsonStrictInvalidPragma) { + auto res = SqlToYql("pragma yson.Strict = \"wrong\";"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:22: Error: Expected 'true', 'false' or no parameter for: Strict\n"); + } + + Y_UNIT_TEST(WarnTableNameInSomeContexts) { + UNIT_ASSERT(SqlToYql("use plato; select TableName() from Input;").IsOk()); + UNIT_ASSERT(SqlToYql("use plato; select TableName(\"aaaa\");").IsOk()); + UNIT_ASSERT(SqlToYql("select TableName(\"aaaa\", \"yt\");").IsOk()); + + auto res = SqlToYql("select TableName() from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: TableName requires either service name as second argument or current cluster name\n"); + + res = SqlToYql("use plato;\n" + "select TableName() from Input1 as a join Input2 as b using(key);"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:8: Warning: TableName() may produce empty result when used in ambiguous context (with JOIN), code: 4525\n"); + + res = SqlToYql("use plato;\n" + "select SOME(TableName()), key from Input group by key;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:13: Warning: TableName() will produce empty result when used with aggregation.\n" + "Please consult documentation for possible workaround, code: 4525\n"); + } + + Y_UNIT_TEST(WarnOnDistincWithHavingWithoutAggregations) { + auto res = SqlToYql("select distinct key from plato.Input having key != '0';"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Warning: The usage of HAVING without aggregations with SELECT DISTINCT is non-standard and will stop working soon. Please use WHERE instead., code: 4526\n"); + } + + Y_UNIT_TEST(FlattenByExprWithNestedNull) { + auto res = SqlToYql("USE plato;\n" + "\n" + "SELECT * FROM (SELECT 1 AS region_id)\n" + "FLATTEN BY (\n" + " CAST($unknown(region_id) AS List<String>) AS region\n" + ")"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:10: Error: Unknown name: $unknown\n"); + } + + Y_UNIT_TEST(EmptySymbolNameIsForbidden) { + auto req = " $`` = 1; select $``;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:5: Error: Empty symbol name is not allowed\n"); + } + + Y_UNIT_TEST(WarnOnBinaryOpWithNullArg) { + auto req = "select * from plato.Input where cast(key as Int32) != NULL"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Warning: Binary operation != will return NULL here, code: 4529\n"); + + req = "select 1 or null"; + res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), ""); + } + + Y_UNIT_TEST(ErrorIfTableSampleArgUsesColumns) { + auto req = "SELECT key FROM plato.Input TABLESAMPLE BERNOULLI(MIN_OF(100.0, CAST(subkey as Int32)));"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:70: Error: Column reference \"subkey\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(DerivedColumnListForSelectIsNotSupportedYet) { + auto req = "SELECT a,b,c FROM plato.Input as t(x,y,z);"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:35: Error: Derived column list is only supported for VALUES\n"); + } + + Y_UNIT_TEST(ErrorIfValuesHasDifferentCountOfColumns) { + auto req = "VALUES (1,2,3), (4,5);"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: All VALUES items should have same size: expecting 3, got 2\n"); + } + + Y_UNIT_TEST(ErrorIfDerivedColumnSizeExceedValuesColumnCount) { + auto req = "SELECT * FROM(VALUES (1,2), (3,4)) as t(x,y,z);"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: Derived column list size exceeds column count in VALUES\n"); + } + + Y_UNIT_TEST(WarnoOnAutogeneratedNamesForValues) { + auto req = "PRAGMA WarnUnnamedColumns;\n" + "SELECT * FROM (VALUES (1,2,3,4), (5,6,7,8)) as t(x,y);"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:16: Warning: Autogenerated column names column2...column3 will be used here, code: 4516\n"); + } + + Y_UNIT_TEST(ErrUnionAllWithOrderByWithoutExplicitLegacyMode) { + auto req = "use plato;\n" + "\n" + "select * from Input order by key\n" + "union all\n" + "select * from Input order by key;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: ORDER BY within UNION ALL is only allowed after last subquery\n"); + } + + Y_UNIT_TEST(ErrUnionAllWithLimitWithoutExplicitLegacyMode) { + auto req = "use plato;\n" + "\n" + "select * from Input limit 10\n" + "union all\n" + "select * from Input limit 1;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: LIMIT within UNION ALL is only allowed after last subquery\n"); + } + + Y_UNIT_TEST(ErrUnionAllWithIntoResultWithoutExplicitLegacyMode) { + auto req = "use plato;\n" + "\n" + "select * from Input into result aaa\n" + "union all\n" + "select * from Input;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: INTO RESULT within UNION ALL is only allowed after last subquery\n"); + } + + Y_UNIT_TEST(ErrUnionAllWithDiscardWithoutExplicitLegacyMode) { + auto req = "use plato;\n" + "\n" + "select * from Input\n" + "union all\n" + "discard select * from Input;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n"); + } + + Y_UNIT_TEST(ErrUnionAllKeepsIgnoredOrderByWarning) { + auto req = "use plato;\n" + "\n" + "SELECT * FROM (\n" + " SELECT * FROM Input\n" + " UNION ALL\n" + " SELECT t.* FROM Input AS t ORDER BY t.key\n" + ");"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:3: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n" + "<main>:6:39: Error: Unknown correlation name: t\n"); + } + + Y_UNIT_TEST(ErrOrderByIgnoredButCheckedForMissingColumns) { + auto req = "$src = SELECT key FROM (SELECT 1 as key, 2 as subkey) ORDER BY x; SELECT * FROM $src;"; + ExpectFailWithError(req, "<main>:1:8: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n" + "<main>:1:64: Error: Column x is not in source column set\n"); + + req = "$src = SELECT key FROM plato.Input ORDER BY x; SELECT * FROM $src;"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n"); + } + + Y_UNIT_TEST(InvalidTtlInterval) { + auto req = R"( + USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key)) + WITH (TTL = 1 On CreatedAt); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:25: Error: Literal of Interval type is expected for TTL\n" + "<main>:4:25: Error: Invalid TTL settings\n"); + } + + Y_UNIT_TEST(InvalidTtlUnit) { + auto req = R"( + USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key)) + WITH (TTL = Interval("P1D") On CreatedAt AS PICOSECONDS); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "<main>:4:56: Error: Unexpected token 'PICOSECONDS'"); + } + + Y_UNIT_TEST(InvalidChangefeedSink) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (SINK_TYPE = "S3", MODE = "KEYS_ONLY", FORMAT = "json") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:55: Error: Unknown changefeed sink type: S3\n"); + } + + Y_UNIT_TEST(InvalidChangefeedSettings) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (SINK_TYPE = "local", FOO = "bar") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:64: Error: Unknown changefeed setting: FOO\n"); + } + + Y_UNIT_TEST(InvalidChangefeedInitialScan) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", INITIAL_SCAN = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:95: Error: Literal of Bool type is expected for INITIAL_SCAN\n"); + } + + Y_UNIT_TEST(InvalidChangefeedVirtualTimestamps) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", VIRTUAL_TIMESTAMPS = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:101: Error: Literal of Bool type is expected for VIRTUAL_TIMESTAMPS\n"); + } + + Y_UNIT_TEST(InvalidChangefeedResolvedTimestamps) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", RESOLVED_TIMESTAMPS = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:102: Error: Literal of Interval type is expected for RESOLVED_TIMESTAMPS\n"); + } + + Y_UNIT_TEST(InvalidChangefeedRetentionPeriod) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", RETENTION_PERIOD = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:99: Error: Literal of Interval type is expected for RETENTION_PERIOD\n"); + } + + Y_UNIT_TEST(InvalidChangefeedTopicPartitions) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", TOPIC_MIN_ACTIVE_PARTITIONS = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:110: Error: Literal of integer type is expected for TOPIC_MIN_ACTIVE_PARTITIONS\n"); + } + + Y_UNIT_TEST(InvalidChangefeedAwsRegion) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", AWS_REGION = true) + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:93: Error: Literal of String type is expected for AWS_REGION\n"); + } + + Y_UNIT_TEST(ErrJoinWithGroupingSetsWithoutCorrelationName) { + auto req = "USE plato;\n" + "\n" + "SELECT k1, k2, subkey\n" + "FROM T1 AS a JOIN T2 AS b USING (key)\n" + "GROUP BY GROUPING SETS(\n" + " (a.key as k1, b.subkey as k2),\n" + " (k1),\n" + " (subkey)\n" + ");"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:8:4: Error: Columns in grouping sets should have correlation name, error in key: subkey\n"); + } + + Y_UNIT_TEST(ErrJoinWithGroupByWithoutCorrelationName) { + auto req = "USE plato;\n" + "\n" + "SELECT k1, k2,\n" + " value\n" + "FROM T1 AS a JOIN T2 AS b USING (key)\n" + "GROUP BY a.key as k1, b.subkey as k2,\n" + " value;"; + ExpectFailWithError(req, + "<main>:7:5: Error: Columns in GROUP BY should have correlation name, error in key: value\n"); + } + + Y_UNIT_TEST(ErrWithMissingFrom) { + auto req = "select 1 as key where 1 > 1;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:25: Error: Filtering is not allowed without FROM\n"); + + req = "select 1 + count(*);"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Aggregation is not allowed without FROM\n"); + + req = "select 1 as key, subkey + value;"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:18: Error: Column reference 'subkey'\n" + "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:27: Error: Column reference 'value'\n"); + + req = "select count(1) group by key;"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:26: Error: Column reference 'key'\n"); + } + + Y_UNIT_TEST(ErrWithMissingFromForWindow) { + auto req = "$c = () -> (1 + count(1) over w);\n" + "select $c();"; + ExpectFailWithError(req, + "<main>:1:9: Error: Window and aggregation functions are not allowed in this context\n" + "<main>:1:17: Error: Failed to use aggregation function Count without window specification or in wrong place\n"); + + req = "$c = () -> (1 + lead(1) over w);\n" + "select $c();"; + ExpectFailWithError(req, + "<main>:1:17: Error: Window functions are not allowed in this context\n" + "<main>:1:17: Error: Failed to use window function Lead without window specification or in wrong place\n"); + + req = "select 1 + count(1) over w window w as ();"; + ExpectFailWithError(req, + "<main>:1:1: Error: Window and aggregation functions are not allowed without FROM\n" + "<main>:1:12: Error: Failed to use aggregation function Count without window specification or in wrong place\n"); + + req = "select 1 + lead(1) over w window w as ();"; + ExpectFailWithError(req, + "<main>:1:12: Error: Window functions are not allowed without FROM\n" + "<main>:1:12: Error: Failed to use window function Lead without window specification or in wrong place\n"); + } + + Y_UNIT_TEST(ErrWithMissingFromForInplaceWindow) { + auto req = "$c = () -> (1 + count(1) over ());\n" + "select $c();"; + ExpectFailWithError(req, + "<main>:1:26: Error: Window and aggregation functions are not allowed in this context\n"); + + req = "$c = () -> (1 + lead(1) over (rows between unbounded preceding and current row));\n" + "select $c();"; + ExpectFailWithError(req, + "<main>:1:25: Error: Window and aggregation functions are not allowed in this context\n"); + + req = "select 1 + count(1) over ();"; + ExpectFailWithError(req, + "<main>:1:1: Error: Window and aggregation functions are not allowed without FROM\n" + "<main>:1:12: Error: Failed to use aggregation function Count without window specification or in wrong place\n"); + + req = "select 1 + lead(1) over (rows between current row and unbounded following);"; + ExpectFailWithError(req, + "<main>:1:12: Error: Window functions are not allowed without FROM\n" + "<main>:1:12: Error: Failed to use window function Lead without window specification or in wrong place\n"); + } + + Y_UNIT_TEST(ErrDistinctInWrongPlace) { + auto req = "select Some::Udf(distinct key) from plato.Input;"; + ExpectFailWithError(req, + "<main>:1:18: Error: DISTINCT can only be used in aggregation functions\n"); + req = "select sum(key)(distinct foo) from plato.Input;"; + ExpectFailWithError(req, + "<main>:1:17: Error: DISTINCT can only be used in aggregation functions\n"); + + req = "select len(distinct foo) from plato.Input;"; + ExpectFailWithError(req, + "<main>:1:8: Error: DISTINCT can only be used in aggregation functions\n"); + + req = "$foo = ($x) -> ($x); select $foo(distinct key) from plato.Input;"; + ExpectFailWithError(req, + "<main>:1:34: Error: DISTINCT can only be used in aggregation functions\n"); + } + + Y_UNIT_TEST(ErrForNotSingleChildInInlineAST) { + ExpectFailWithError("select YQL::\"\"", + "<main>:1:8: Error: Failed to parse YQL: expecting AST root node with single child, but got 0\n"); + ExpectFailWithError("select YQL::@@ \t@@", + "<main>:1:8: Error: Failed to parse YQL: expecting AST root node with single child, but got 0\n"); + auto req = "$lambda = YQL::@@(lambda '(x)(+ x x)) (lambda '(y)(+ y y))@@;\n" + "select ListMap([1, 2, 3], $lambda);"; + ExpectFailWithError(req, + "<main>:1:11: Error: Failed to parse YQL: expecting AST root node with single child, but got 2\n"); + } + + Y_UNIT_TEST(ErrEmptyColumnName) { + ExpectFailWithError("select * without \"\" from plato.Input", + "<main>:1:18: Error: String literal can not be used here\n"); + + ExpectFailWithError("select * without `` from plato.Input;", + "<main>:1:18: Error: Empty column name is not allowed\n"); + + ExpectFailWithErrorForAnsiLexer("select * without \"\" from plato.Input", + "<main>:1:18: Error: Empty column name is not allowed\n"); + + ExpectFailWithErrorForAnsiLexer("select * without `` from plato.Input;", + "<main>:1:18: Error: Empty column name is not allowed\n"); + } + + Y_UNIT_TEST(ErrOnNonZeroArgumentsForTableRows) { + ExpectFailWithError("$udf=\"\";process plato.Input using $udf(TableRows(k))", + "<main>:1:40: Error: TableRows requires exactly 0 arguments\n"); + } + + Y_UNIT_TEST(ErrGroupByWithAggregationFunctionAndDistinctExpr) { + ExpectFailWithError("select * from plato.Input group by count(distinct key|key)", + "<main>:1:36: Error: Unable to GROUP BY aggregated values\n"); + } + + // FIXME: check if we can get old behaviour +#if 0 + Y_UNIT_TEST(ErrWithSchemaWithColumnsWithoutType) { + ExpectFailWithError("select * from plato.Input with COLUMNs", + "<main>:1:32: Error: Expected type after COLUMNS\n" + "<main>:1:32: Error: Failed to parse table hints\n"); + + ExpectFailWithError("select * from plato.Input with scheMa", + "<main>:1:32: Error: Expected type after SCHEMA\n" + "<main>:1:32: Error: Failed to parse table hints\n"); + } +#endif + + Y_UNIT_TEST(ErrCollectPreaggregatedInListLiteralWithoutFrom) { + ExpectFailWithError("SELECT([VARIANCE(DISTINCT[])])", + "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:9: Error: Column reference '_yql_preagg_Variance0'\n"); + } + + Y_UNIT_TEST(ErrGroupBySmartParenAsTuple) { + ExpectFailWithError("SELECT * FROM plato.Input GROUP BY (k, v,)", + "<main>:1:41: Error: Unexpected trailing comma in grouping elements list\n"); + } + + Y_UNIT_TEST(HandleNestedSmartParensInGroupBy) { + ExpectFailWithError("SELECT * FROM plato.Input GROUP BY (+() as k)", + "<main>:1:37: Error: Unable to GROUP BY constant expression\n"); + } + + Y_UNIT_TEST(ErrRenameWithAddColumn) { + ExpectFailWithError("USE plato; ALTER TABLE table RENAME TO moved, ADD COLUMN addc uint64", + "<main>:1:40: Error: RENAME TO can not be used together with another table action\n"); + } + + Y_UNIT_TEST(ErrAddColumnAndRename) { + // FIXME: fix positions in ALTER TABLE + ExpectFailWithError("USE plato; ALTER TABLE table ADD COLUMN addc uint64, RENAME TO moved", + "<main>:1:46: Error: RENAME TO can not be used together with another table action\n"); + } + + Y_UNIT_TEST(InvalidUuidValue) { + ExpectFailWithError("SELECT Uuid('123e4567ae89ba12d3aa456a426614174ab0')", + "<main>:1:8: Error: Invalid value \"123e4567ae89ba12d3aa456a426614174ab0\" for type Uuid\n"); + ExpectFailWithError("SELECT Uuid('123e4567ae89b-12d3-a456-426614174000')", + "<main>:1:8: Error: Invalid value \"123e4567ae89b-12d3-a456-426614174000\" for type Uuid\n"); + } + + Y_UNIT_TEST(WindowFunctionWithoutOver) { + ExpectFailWithError("SELECT LAST_VALUE(foo) FROM plato.Input", + "<main>:1:8: Error: Can't use window function LastValue without window specification (OVER keyword is missing)\n"); + ExpectFailWithError("SELECT LAST_VALUE(foo) FROM plato.Input GROUP BY key", + "<main>:1:8: Error: Can't use window function LastValue without window specification (OVER keyword is missing)\n"); + } + + Y_UNIT_TEST(CreateAlterUserWithoutCluster) { + ExpectFailWithError("\n CREATE USER user ENCRYPTED PASSWORD 'foobar';", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n"); + ExpectFailWithError("ALTER USER CURRENT_USER RENAME TO $foo;", "<main>:1:1: Error: USE statement is missing - no default cluster is selected\n"); + } + + Y_UNIT_TEST(ModifyPermissionsWithoutCluster) { + ExpectFailWithError("\n GRANT CONNECT ON `/Root` TO user;", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n"); + ExpectFailWithError("\n REVOKE MANAGE ON `/Root` FROM user;", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n"); + } + + Y_UNIT_TEST(ReservedRoleNames) { + ExpectFailWithError("USE plato; CREATE USER current_User;", "<main>:1:24: Error: System role CURRENT_USER can not be used here\n"); + ExpectFailWithError("USE plato; ALTER USER current_User RENAME TO Current_role", "<main>:1:46: Error: System role CURRENT_ROLE can not be used here\n"); + UNIT_ASSERT(SqlToYql("USE plato; DROP GROUP IF EXISTS a, b, c, current_User;").IsOk()); + } + + Y_UNIT_TEST(DisableClassicDivisionWithError) { + ExpectFailWithError("pragma ClassicDivision = 'false'; select $foo / 30;", "<main>:1:42: Error: Unknown name: $foo\n"); + } + + Y_UNIT_TEST(AggregationOfAgrregatedDistinctExpr) { + ExpectFailWithError("select sum(sum(distinct x + 1)) from plato.Input", "<main>:1:12: Error: Aggregation of aggregated values is forbidden\n"); + } + + Y_UNIT_TEST(WarnForUnusedSqlHint) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input1 as a join /*+ merge() */ plato.Input2 as b using(key);\n" + "select --+ foo(bar)\n" + " 1;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:23: Warning: Hint foo will not be used, code: 4534\n"); + } + + Y_UNIT_TEST(WarnForDeprecatedSchema) { + NSQLTranslation::TTranslationSettings settings; + settings.ClusterMapping["s3bucket"] = NYql::S3ProviderName; + NYql::TAstParseResult res = SqlToYql("select * from s3bucket.`foo` with schema (col1 Int32, String as col2, Int64 as col3);", settings); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_STRING_CONTAINS(res.Issues.ToString(), "Warning: Deprecated syntax for positional schema: please use 'column type' instead of 'type AS column', code: 4535\n"); + } + + Y_UNIT_TEST(ErrorOnColumnNameInMaxByLimit) { + ExpectFailWithError( + "SELECT AGGREGATE_BY(AsTuple(value, key), AggregationFactory(\"MAX_BY\", subkey)) FROM plato.Input;", + "<main>:1:42: Error: Source does not allow column references\n" + "<main>:1:71: Error: Column reference 'subkey'\n"); + } + + Y_UNIT_TEST(ErrorInLibraryWithTopLevelNamedSubquery) { + TString withUnusedSubq = "$unused = select max(key) from plato.Input;\n" + "\n" + "define subquery $foo() as\n" + " $count = select count(*) from plato.Input;\n" + " select * from plato.Input limit $count / 2;\n" + "end define;\n" + "export $foo;\n"; + UNIT_ASSERT(SqlToYqlWithMode(withUnusedSubq, NSQLTranslation::ESqlMode::LIBRARY).IsOk()); + + TString withTopLevelSubq = "$count = select count(*) from plato.Input;\n" + "\n" + "define subquery $foo() as\n" + " select * from plato.Input limit $count / 2;\n" + "end define;\n" + "export $foo;\n"; + auto res = SqlToYqlWithMode(withTopLevelSubq, NSQLTranslation::ESqlMode::LIBRARY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Named subquery can not be used as a top level statement in libraries\n"); + } + + Y_UNIT_TEST(SessionStartAndSessionStateShouldSurviveSessionWindowArgsError){ + TString query = R"( + $init = ($_row) -> (min(1, 2)); -- error: aggregation func min() can not be used here + $calculate = ($_row, $_state) -> (1); + $update = ($_row, $_state) -> (2); + SELECT + SessionStart() over w as session_start, + SessionState() over w as session_state, + FROM plato.Input as t + WINDOW w AS ( + PARTITION BY user, SessionWindow(ts + 1, $init, $update, $calculate) + ) + )"; + ExpectFailWithError(query, "<main>:2:33: Error: Aggregation function Min requires exactly 1 argument(s), given: 2\n"); + } +} + +void CheckUnused(const TString& req, const TString& symbol, unsigned row, unsigned col) { + auto res = SqlToYql(req); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), TStringBuilder() << "<main>:" << row << ":" << col << ": Warning: Symbol " << symbol << " is not used, code: 4527\n"); +} + +Y_UNIT_TEST_SUITE(WarnUnused) { + Y_UNIT_TEST(ActionOrSubquery) { + TString req = " $a()\n" + "as select 1;\n" + "end define;\n" + "\n" + "select 1;"; + CheckUnused("define action\n" + req, "$a", 2, 3); + CheckUnused("define subquery\n" + req, "$a", 2, 3); + } + + Y_UNIT_TEST(Import) { + TString req = "import lib1 symbols\n" + " $sqr;\n" + "select 1;"; + CheckUnused(req, "$sqr", 2, 3); + + req = "import lib1 symbols\n" + " $sqr as\n" + " $sq;\n" + "select 1;"; + CheckUnused(req, "$sq", 3, 5); + } + + Y_UNIT_TEST(NamedNodeStatement) { + TString req = " $a, $a = AsTuple(1, 2);\n" + "select $a;"; + CheckUnused(req, "$a", 1, 2); + req = "$a, $b = AsTuple(1, 2);\n" + "select $a;"; + CheckUnused(req, "$b", 1, 6); + CheckUnused(" $a = 1; $a = 2; select $a;", "$a", 1, 2); + } + + Y_UNIT_TEST(Declare) { + CheckUnused("declare $a as String;select 1;", "$a", 1, 9); + } + + Y_UNIT_TEST(ActionParams) { + TString req = "define action $a($x, $y) as\n" + " select $x;\n" + "end define;\n" + "\n" + "do $a(1,2);"; + CheckUnused(req, "$y", 1, 22); + } + + Y_UNIT_TEST(SubqueryParams) { + TString req = "use plato;\n" + "define subquery $q($name, $x) as\n" + " select * from $name;\n" + "end define;\n" + "\n" + "select * from $q(\"Input\", 1);"; + CheckUnused(req, "$x", 2, 27); + } + + Y_UNIT_TEST(For) { + TString req = "define action $a() as\n" + " select 1;\n" + "end define;\n" + "\n" + "for $i in ListFromRange(1, 10)\n" + "do $a();"; + CheckUnused(req, "$i", 5, 5); + } + + Y_UNIT_TEST(LambdaParams) { + TString req = "$lambda = ($x, $y) -> ($x);\n" + "select $lambda(1, 2);"; + CheckUnused(req, "$y", 1, 16); + } + + Y_UNIT_TEST(InsideLambdaBody) { + TString req = "$lambda = () -> {\n" + " $x = 1; return 1;\n" + "};\n" + "select $lambda();"; + CheckUnused(req, "$x", 2, 3); + req = "$lambda = () -> {\n" + " $x = 1; $x = 2; return $x;\n" + "};\n" + "select $lambda();"; + CheckUnused(req, "$x", 2, 3); + } + + Y_UNIT_TEST(InsideAction) { + TString req = "define action $a() as\n" + " $x = 1; select 1;\n" + "end define;\n" + "\n" + "do $a();"; + CheckUnused(req, "$x", 2, 3); + req = "define action $a() as\n" + " $x = 1; $x = 2; select $x;\n" + "end define;\n" + "\n" + "do $a();"; + CheckUnused(req, "$x", 2, 3); + } + + Y_UNIT_TEST(NoWarnOnNestedActions) { + auto req = "pragma warning(\"error\", \"4527\");\n" + "define action $action($b) as\n" + " define action $aaa() as\n" + " select $b;\n" + " end define;\n" + " do $aaa();\n" + "end define;\n" + "\n" + "do $action(1);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(NoWarnForUsageAfterSubquery) { + auto req = "use plato;\n" + "pragma warning(\"error\", \"4527\");\n" + "\n" + "$a = 1;\n" + "\n" + "define subquery $q($table) as\n" + " select * from $table;\n" + "end define;\n" + "\n" + "select * from $q(\"Input\");\n" + "select $a;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } +} + +Y_UNIT_TEST_SUITE(AnonymousNames) { + Y_UNIT_TEST(ReferenceAnonymousVariableIsForbidden) { + auto req = "$_ = 1; select $_;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:16: Error: Unable to reference anonymous name $_\n"); + + req = "$`_` = 1; select $`_`;"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unable to reference anonymous name $_\n"); + } + + Y_UNIT_TEST(Declare) { + auto req = "declare $_ as String;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:9: Error: Can not use anonymous name '$_' in DECLARE statement\n"); + } + + Y_UNIT_TEST(ActionSubquery) { + auto req = "define action $_() as select 1; end define;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: Can not use anonymous name '$_' as ACTION name\n"); + + req = "define subquery $_() as select 1; end define;"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Can not use anonymous name '$_' as SUBQUERY name\n"); + } + + Y_UNIT_TEST(Import) { + auto req = "import lib symbols $sqr as $_;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Can not import anonymous name $_\n"); + } + + Y_UNIT_TEST(Export) { + auto req = "export $_;"; + auto res = SqlToYqlWithMode(req, NSQLTranslation::ESqlMode::LIBRARY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Can not export anonymous name $_\n"); + } + + Y_UNIT_TEST(AnonymousInActionArgs) { + auto req = "pragma warning(\"error\", \"4527\");\n" + "define action $a($_, $y, $_) as\n" + " select $y;\n" + "end define;\n" + "\n" + "do $a(1,2,3);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(AnonymousInSubqueryArgs) { + auto req = "use plato;\n" + "pragma warning(\"error\", \"4527\");\n" + "define subquery $q($_, $y, $_) as\n" + " select * from $y;\n" + "end define;\n" + "\n" + "select * from $q(1,\"Input\",3);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(AnonymousInLambdaArgs) { + auto req = "pragma warning(\"error\", \"4527\");\n" + "$lambda = ($_, $x, $_) -> ($x);\n" + "select $lambda(1,2,3);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(AnonymousInFor) { + auto req = "pragma warning(\"error\", \"4527\");\n" + "evaluate for $_ in ListFromRange(1, 10) do begin select 1; end do;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(Assignment) { + auto req = "pragma warning(\"error\", \"4527\");\n" + "$_ = 1;\n" + "$_, $x, $_ = AsTuple(1,2,3);\n" + "select $x;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } +} + +Y_UNIT_TEST_SUITE(JsonValue) { + Y_UNIT_TEST(JsonValueArgumentCount) { + NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json));"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: Unexpected token ')' : syntax error...\n\n"); + } + + Y_UNIT_TEST(JsonValueJsonPathMustBeLiteralString) { + NYql::TAstParseResult res = SqlToYql("$jsonPath = \"strict $.key\"; select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), $jsonPath);"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:79: Error: Unexpected token absence : Missing STRING_VALUE \n\n"); + } + + Y_UNIT_TEST(JsonValueTranslation) { + NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\");"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json")); + }; + + TWordCountHive elementStat({"JsonValue"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["JsonValue"]); + } + + Y_UNIT_TEST(JsonValueReturningSection) { + for (const auto& typeName : {"Bool", "Int64", "Double", "String"}) { + NYql::TAstParseResult res = SqlToYql( + TStringBuilder() << "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" RETURNING " << typeName << ");" + ); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(TStringBuilder() << "DataType '" << typeName)); + }; + + TWordCountHive elementStat({typeName}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat[typeName] > 0); + } + } + + Y_UNIT_TEST(JsonValueInvalidReturningType) { + NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{'key': 1238}@@ as Json), 'strict $.key' RETURNING invalid);"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:77: Error: Unknown simple type 'invalid'\n"); + } + + Y_UNIT_TEST(JsonValueAndReturningInExpressions) { + NYql::TAstParseResult res = SqlToYql( + "USE plato\n;" + "$json_value = \"some string\";\n" + "SELECT $json_value;\n" + "SELECT 1 as json_value;\n" + "SELECT $json_value as json_value;\n" + "$returning = \"another string\";\n" + "SELECT $returning;\n" + "SELECT 1 as returning;\n" + "SELECT $returning as returning;\n" + ); + + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(JsonValueValidCaseHandlers) { + const TVector<std::pair<TString, TString>> testCases = { + {"", "'DefaultValue (Null)"}, + {"NULL", "'DefaultValue (Null)"}, + {"ERROR", "'Error (Null)"}, + {"DEFAULT 123", "'DefaultValue (Int32 '\"123\")"}, + }; + + for (const auto& onEmpty : testCases) { + for (const auto& onError : testCases) { + TStringBuilder query; + query << "$json = CAST(@@{\"key\": 1238}@@ as Json);\n" + << "SELECT JSON_VALUE($json, \"strict $.key\""; + if (!onEmpty.first.empty()) { + query << " " << onEmpty.first << " ON EMPTY"; + } + if (!onError.first.empty()) { + query << " " << onError.first << " ON ERROR"; + } + query << ");\n"; + + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(onEmpty.second + " " + onError.second)); + }; + + TWordCountHive elementStat({"JsonValue"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonValue"] > 0); + } + } + } + + Y_UNIT_TEST(JsonValueTooManyCaseHandlers) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON ERROR NULL ON EMPTY);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: Only 1 ON EMPTY and/or 1 ON ERROR clause is expected\n" + ); + } + + Y_UNIT_TEST(JsonValueTooManyOnEmpty) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON EMPTY);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: Only 1 ON EMPTY clause is expected\n" + ); + } + + Y_UNIT_TEST(JsonValueTooManyOnError) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON ERROR);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: Only 1 ON ERROR clause is expected\n" + ); + } + + Y_UNIT_TEST(JsonValueOnEmptyAfterOnError) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON EMPTY);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: ON EMPTY clause must be before ON ERROR clause\n" + ); + } + + Y_UNIT_TEST(JsonValueNullInput) { + NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_VALUE(NULL, "strict $.key");)"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); + }; + + TWordCountHive elementStat({"JsonValue"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonValue"] > 0); + } +} + +Y_UNIT_TEST_SUITE(JsonExists) { + Y_UNIT_TEST(JsonExistsValidHandlers) { + const TVector<std::pair<TString, TString>> testCases = { + {"", "(Just (Bool '\"false\"))"}, + {"TRUE ON ERROR", "(Just (Bool '\"true\"))"}, + {"FALSE ON ERROR", "(Just (Bool '\"false\"))"}, + {"UNKNOWN ON ERROR", "(Nothing (OptionalType (DataType 'Bool)))"}, + // NOTE: in this case we expect arguments of JsonExists callable to end immediately + // after variables. This parenthesis at the end of the expression is left on purpose + {"ERROR ON ERROR", "(Utf8 '\"strict $.key\") (JsonVariables))"}, + }; + + for (const auto& item : testCases) { + NYql::TAstParseResult res = SqlToYql( + TStringBuilder() << R"( + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT JSON_EXISTS($json, "strict $.key" )" << item.first << ");\n" + ); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(item.second)); + }; + + TWordCountHive elementStat({"JsonExists"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonExists"] > 0); + } + } + + Y_UNIT_TEST(JsonExistsInvalidHandler) { + NYql::TAstParseResult res = SqlToYql(R"( + $json = CAST(@@{"key": 1238}@@ as Json); + $default = false; + SELECT JSON_EXISTS($json, "strict $.key" $default ON ERROR); + )"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:53: Error: Unexpected token absence : Missing RPAREN \n\n"); + } + + Y_UNIT_TEST(JsonExistsNullInput) { + NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_EXISTS(NULL, "strict $.key");)"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); + }; + + TWordCountHive elementStat({"JsonExists"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonExists"] > 0); + } +} + +Y_UNIT_TEST_SUITE(JsonQuery) { + Y_UNIT_TEST(JsonQueryValidHandlers) { + using TTestSuite = const TVector<std::pair<TString, TString>>; + TTestSuite wrapCases = { + {"", "'NoWrap"}, + {"WITHOUT WRAPPER", "'NoWrap"}, + {"WITHOUT ARRAY WRAPPER", "'NoWrap"}, + {"WITH WRAPPER", "'Wrap"}, + {"WITH ARRAY WRAPPER", "'Wrap"}, + {"WITH UNCONDITIONAL WRAPPER", "'Wrap"}, + {"WITH UNCONDITIONAL ARRAY WRAPPER", "'Wrap"}, + {"WITH CONDITIONAL WRAPPER", "'ConditionalWrap"}, + {"WITH CONDITIONAL ARRAY WRAPPER", "'ConditionalWrap"}, + }; + TTestSuite handlerCases = { + {"", "'Null"}, + {"ERROR", "'Error"}, + {"NULL", "'Null"}, + {"EMPTY ARRAY", "'EmptyArray"}, + {"EMPTY OBJECT", "'EmptyObject"}, + }; + + for (const auto& wrap : wrapCases) { + for (const auto& onError : handlerCases) { + for (const auto& onEmpty : handlerCases) { + TStringBuilder query; + query << R"($json = CAST(@@{"key": [123]}@@ as Json); + SELECT JSON_QUERY($json, "strict $.key" )" << wrap.first; + if (!onEmpty.first.empty()) { + if (wrap.first.StartsWith("WITH ")) { + continue; + } + query << " " << onEmpty.first << " ON EMPTY"; + } + if (!onError.first.empty()) { + query << " " << onError.first << " ON ERROR"; + } + query << ");\n"; + + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + const TString args = TStringBuilder() << wrap.second << " " << onEmpty.second << " " << onError.second; + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(args)); + }; + + Cout << wrap.first << " " << onEmpty.first << " " << onError.first << Endl; + + TWordCountHive elementStat({"JsonQuery"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonQuery"] > 0); + } + } + } + } + + Y_UNIT_TEST(JsonQueryOnEmptyWithWrapper) { + NYql::TAstParseResult res = SqlToYql(R"( + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT JSON_QUERY($json, "strict $" WITH ARRAY WRAPPER EMPTY ARRAY ON EMPTY); + )"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:38: Error: ON EMPTY is prohibited because WRAPPER clause is specified\n"); + } + + Y_UNIT_TEST(JsonQueryNullInput) { + NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_QUERY(NULL, "strict $.key");)"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); + }; + + TWordCountHive elementStat({"JsonQuery"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonQuery"] > 0); + } +} + +Y_UNIT_TEST_SUITE(JsonPassing) { + Y_UNIT_TEST(SupportedVariableTypes) { + const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"}; + + for (const auto& function : functions) { + const auto query = Sprintf(R"( + pragma CompactNamedExprs; + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT %s( + $json, + "strict $.key" + PASSING + "string" as var1, + 1.234 as var2, + CAST(1 as Int64) as var3, + true as var4, + $json as var5 + ))", + function.data() + ); + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"string")))"), "Cannot find `var1`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var2" (Double '"1.234")))"), "Cannot find `var2`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (SafeCast (Int32 '"1") (DataType 'Int64))))"), "Cannot find `var3`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var4" (Bool '"true")))"), "Cannot find `var4`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var5" namedexprnode0))"), "Cannot find `var5`"); + }; + + TWordCountHive elementStat({"JsonVariables"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonVariables"] > 0); + } + } + + Y_UNIT_TEST(ValidVariableNames) { + const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"}; + + for (const auto& function : functions) { + const auto query = Sprintf(R"( + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT %s( + $json, + "strict $.key" + PASSING + "one" as var1, + "two" as "VaR2", + "three" as `var3`, + "four" as VaR4 + ))", + function.data() + ); + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"one")))"), "Cannot find `var1`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR2" (String '"two")))"), "Cannot find `VaR2`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (String '"three")))"), "Cannot find `var3`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR4" (String '"four")))"), "Cannot find `VaR4`"); + }; + + TWordCountHive elementStat({"JsonVariables"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonVariables"] > 0); + } + } +} + +Y_UNIT_TEST_SUITE(MigrationToJsonApi) { + Y_UNIT_TEST(WarningOnDeprecatedJsonUdf) { + NYql::TAstParseResult res = SqlToYql(R"( + $json = CAST(@@{"key": 1234}@@ as Json); + SELECT Json::Parse($json); + )"); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:26: Warning: Json UDF is deprecated. Please use JSON API instead, code: 4506\n"); + } +} + +Y_UNIT_TEST_SUITE(AnsiIdentsNegative) { + Y_UNIT_TEST(EnableAnsiLexerFromRequestSpecialComments) { + auto req = "\n" + "\t --!ansi_lexer \n" + "-- Some comment\n" + "-- another comment\n" + "pragma SimpleColumns;\n" + "\n" + "select 1, '''' as empty;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + } + + Y_UNIT_TEST(AnsiLexerShouldNotBeEnabledHere) { + auto req = "$str = '\n" + "--!ansi_lexer\n" + "--!syntax_v1\n" + "';\n" + "\n" + "select 1, $str, \"\" as empty;"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + } + + Y_UNIT_TEST(DoubleQuotesInDictsTuplesOrLists) { + auto req = "$d = { 'a': 1, \"b\": 2, 'c': 3,};"; + + auto res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:16: Error: Column reference \"b\" is not allowed in current scope\n"); + + req = "$t = (1, 2, \"a\");"; + + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:13: Error: Column reference \"a\" is not allowed in current scope\n"); + + req = "$l = ['a', 'b', \"c\"];"; + + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Column reference \"c\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(MultilineComments) { + auto req = "/*/**/ select 1;"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:16: Error: Unexpected character : syntax error...\n\n"); + + req = "/*\n" + "--/*\n" + "*/ select 1;"; + res = SqlToYql(req); + UNIT_ASSERT(res.Root); + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:12: Error: Unexpected character : syntax error...\n\n"); + + req = "/*\n" + "/*\n" + "--*/\n" + "*/ select 1;"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:0: Error: Unexpected token '*' : cannot match to any predicted input...\n\n"); + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(res.Root); + } +} + +Y_UNIT_TEST_SUITE(AnsiOptionalAs) { + Y_UNIT_TEST(OptionalAsInProjection) { + UNIT_ASSERT(SqlToYql("PRAGMA AnsiOptionalAs; SELECT a b, c FROM plato.Input;").IsOk()); + ExpectFailWithError("PRAGMA DisableAnsiOptionalAs;\n" + "SELECT a b, c FROM plato.Input;", + "<main>:2:10: Error: Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility\n"); + } + + Y_UNIT_TEST(OptionalAsWithKeywords) { + UNIT_ASSERT(SqlToYql("PRAGMA AnsiOptionalAs; SELECT a type, b data, c source FROM plato.Input;").IsOk()); + } +} + +Y_UNIT_TEST_SUITE(SessionWindowNegative) { + Y_UNIT_TEST(SessionWindowWithoutSource) { + ExpectFailWithError("SELECT 1 + SessionWindow(ts, 32);", + "<main>:1:12: Error: SessionWindow requires data source\n"); + } + + Y_UNIT_TEST(SessionWindowInProjection) { + ExpectFailWithError("SELECT 1 + SessionWindow(ts, 32) from plato.Input;", + "<main>:1:12: Error: SessionWindow can only be used as a top-level GROUP BY / PARTITION BY expression\n"); + } + + Y_UNIT_TEST(SessionWindowWithNonConstSecondArg) { + ExpectFailWithError( + "SELECT key, session_start FROM plato.Input\n" + "GROUP BY SessionWindow(ts, 32 + subkey) as session_start, key;", + + "<main>:2:10: Error: Source does not allow column references\n" + "<main>:2:33: Error: Column reference 'subkey'\n"); + } + + Y_UNIT_TEST(SessionWindowWithWrongNumberOfArgs) { + ExpectFailWithError("SELECT * FROM plato.Input GROUP BY SessionWindow()", + "<main>:1:36: Error: SessionWindow requires either two or four arguments\n"); + ExpectFailWithError("SELECT * FROM plato.Input GROUP BY SessionWindow(key, subkey, 100)", + "<main>:1:36: Error: SessionWindow requires either two or four arguments\n"); + } + + Y_UNIT_TEST(DuplicateSessionWindow) { + ExpectFailWithError( + "SELECT\n" + " *\n" + "FROM plato.Input\n" + "GROUP BY\n" + " SessionWindow(ts, 10),\n" + " user,\n" + " SessionWindow(ts, 20)\n" + ";", + + "<main>:7:5: Error: Duplicate session window specification:\n" + "<main>:5:5: Error: Previous session window is declared here\n"); + + ExpectFailWithError( + "SELECT\n" + " MIN(key) over w\n" + "FROM plato.Input\n" + "WINDOW w AS (\n" + " PARTITION BY SessionWindow(ts, 10), user,\n" + " SessionWindow(ts, 20)\n" + ");", + + "<main>:6:5: Error: Duplicate session window specification:\n" + "<main>:5:18: Error: Previous session window is declared here\n"); + } + + Y_UNIT_TEST(SessionStartStateWithoutSource) { + ExpectFailWithError("SELECT 1 + SessionStart();", + "<main>:1:12: Error: SessionStart requires data source\n"); + ExpectFailWithError("SELECT 1 + SessionState();", + "<main>:1:12: Error: SessionState requires data source\n"); + } + + Y_UNIT_TEST(SessionStartStateWithoutGroupByOrWindow) { + ExpectFailWithError("SELECT 1 + SessionStart() from plato.Input;", + "<main>:1:12: Error: SessionStart can not be used without aggregation by SessionWindow\n"); + ExpectFailWithError("SELECT 1 + SessionState() from plato.Input;", + "<main>:1:12: Error: SessionState can not be used without aggregation by SessionWindow\n"); + } + + Y_UNIT_TEST(SessionStartStateWithGroupByWithoutSession) { + ExpectFailWithError("SELECT 1 + SessionStart() from plato.Input group by user;", + "<main>:1:12: Error: SessionStart can not be used here: SessionWindow specification is missing in GROUP BY\n"); + ExpectFailWithError("SELECT 1 + SessionState() from plato.Input group by user;", + "<main>:1:12: Error: SessionState can not be used here: SessionWindow specification is missing in GROUP BY\n"); + } + + Y_UNIT_TEST(SessionStartStateWithoutOverWithWindowWithoutSession) { + ExpectFailWithError("SELECT 1 + SessionStart(), MIN(key) over w from plato.Input window w as ()", + "<main>:1:12: Error: SessionStart can not be used without aggregation by SessionWindow. Maybe you forgot to add OVER `window_name`?\n"); + ExpectFailWithError("SELECT 1 + SessionState(), MIN(key) over w from plato.Input window w as ()", + "<main>:1:12: Error: SessionState can not be used without aggregation by SessionWindow. Maybe you forgot to add OVER `window_name`?\n"); + } + + Y_UNIT_TEST(SessionStartStateWithWindowWithoutSession) { + ExpectFailWithError("SELECT 1 + SessionStart() over w, MIN(key) over w from plato.Input window w as ()", + "<main>:1:12: Error: SessionStart can not be used with window w: SessionWindow specification is missing in PARTITION BY\n"); + ExpectFailWithError("SELECT 1 + SessionState() over w, MIN(key) over w from plato.Input window w as ()", + "<main>:1:12: Error: SessionState can not be used with window w: SessionWindow specification is missing in PARTITION BY\n"); + } + + Y_UNIT_TEST(SessionStartStateWithSessionedWindow) { + ExpectFailWithError("SELECT 1 + SessionStart(), MIN(key) over w from plato.Input group by key window w as (partition by SessionWindow(ts, 1)) ", + "<main>:1:12: Error: SessionStart can not be used here: SessionWindow specification is missing in GROUP BY. Maybe you forgot to add OVER `window_name`?\n"); + ExpectFailWithError("SELECT 1 + SessionState(), MIN(key) over w from plato.Input group by key window w as (partition by SessionWindow(ts, 1)) ", + "<main>:1:12: Error: SessionState can not be used here: SessionWindow specification is missing in GROUP BY. Maybe you forgot to add OVER `window_name`?\n"); + } + + Y_UNIT_TEST(AggregationBySessionStateIsNotSupportedYet) { + ExpectFailWithError("SELECT SOME(1 + SessionState()), key from plato.Input group by key, SessionWindow(ts, 1);", + "<main>:1:17: Error: SessionState with GROUP BY is not supported yet\n"); + } + + Y_UNIT_TEST(SessionWindowInRtmr) { + NYql::TAstParseResult res = SqlToYql( + "SELECT * FROM plato.Input GROUP BY SessionWindow(ts, 10);", + 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:54: Error: Streaming group by query must have a hopping window specification.\n"); + + res = SqlToYql(R"( + SELECT key, SUM(value) AS value FROM plato.Input + GROUP BY key, HOP(subkey, "PT10S", "PT30S", "PT20S"), SessionWindow(ts, 10); + )", 10, TString(NYql::RtmrProviderName)); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:13: Error: SessionWindow is unsupported for streaming sources\n"); + } +} + +Y_UNIT_TEST_SUITE(LibraSqlSugar) { + auto makeResult = [](TStringBuf settings) { + return SqlToYql( + TStringBuilder() + << settings + << "\n$udf1 = MyLibra::MakeLibraPreprocessor($settings);" + << "\n$udf2 = CustomLibra::MakeLibraPreprocessor($settings);" + << "\nPROCESS plato.Input USING $udf1(TableRow())" + << "\nUNION ALL" + << "\nPROCESS plato.Input USING $udf2(TableRow());" + ); + }; + + Y_UNIT_TEST(EmptySettings) { + auto res = makeResult(R"( + $settings = AsStruct(); + )"); + UNIT_ASSERT(res.IsOk()); + } + + Y_UNIT_TEST(OnlyEntities) { + auto res = makeResult(R"( + $settings = AsStruct( + AsList("A", "B", "C") AS Entities + ); + )"); + UNIT_ASSERT(res.IsOk()); + } + + Y_UNIT_TEST(EntitiesWithStrategy) { + auto res = makeResult(R"( + $settings = AsStruct( + AsList("A", "B", "C") AS Entities, + "blacklist" AS EntitiesStrategy + ); + )"); + UNIT_ASSERT(res.IsOk()); + } + + Y_UNIT_TEST(AllSettings) { + auto res = makeResult(R"( + $settings = AsStruct( + AsList("A", "B", "C") AS Entities, + "whitelist" AS EntitiesStrategy, + "path" AS BlockstatDict, + false AS ParseWithFat, + "map" AS Mode + ); + )"); + UNIT_ASSERT(res.IsOk()); + } + + Y_UNIT_TEST(BadStrategy) { + auto res = makeResult(R"( + $settings = AsStruct("bad" AS EntitiesStrategy); + )"); + UNIT_ASSERT_STRING_CONTAINS( + Err2Str(res), + "Error: MakeLibraPreprocessor got invalid entities strategy: expected 'whitelist' or 'blacklist'" + ); + } + + Y_UNIT_TEST(BadEntities) { + auto res = makeResult(R"( + $settings = AsStruct(AsList("A", 1) AS Entities); + )"); + UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "Error: MakeLibraPreprocessor entity must be string literal"); + } +} + +Y_UNIT_TEST_SUITE(TrailingQuestionsNegative) { + Y_UNIT_TEST(Basic) { + ExpectFailWithError("SELECT 1?;", "<main>:1:9: Error: Unexpected token '?' at the end of expression\n"); + ExpectFailWithError("SELECT 1? + 1;", "<main>:1:10: Error: Unexpected token '+' : cannot match to any predicted input...\n\n"); + ExpectFailWithError("SELECT 1 + 1??? < 2", "<main>:1:13: Error: Unexpected token '?' at the end of expression\n"); + ExpectFailWithError("SELECT 1? > 2? > 3?", + "<main>:1:11: Error: Unexpected token '?' at the end of expression\n" + "<main>:1:16: Error: Unexpected token '?' at the end of expression\n" + "<main>:1:21: Error: Unexpected token '?' at the end of expression\n"); + } + + Y_UNIT_TEST(SmartParen) { + ExpectFailWithError("$x = 1; SELECT (Int32?, $x?)", "<main>:1:27: Error: Unexpected token '?' at the end of expression\n"); + ExpectFailWithError("SELECT (Int32, foo?)", "<main>:1:19: Error: Unexpected token '?' at the end of expression\n"); + } + + Y_UNIT_TEST(LambdaOptArgs) { + ExpectFailWithError("$l = ($x, $y?, $z??, $t?) -> ($x);", "<main>:1:18: Error: Expecting at most one '?' token here (for optional lambda parameters), but got 2\n"); + } +} + +Y_UNIT_TEST_SUITE(FlexibleTypes) { + Y_UNIT_TEST(AssumeOrderByType) { + UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT 1 AS int32 ASSUME ORDER BY int32").IsOk()); + } + + Y_UNIT_TEST(GroupingSets) { + UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT COUNT(*) AS cnt, text, uuid FROM plato.Input GROUP BY GROUPING SETS((uuid), (uuid, text));").IsOk()); + } + + Y_UNIT_TEST(WeakField) { + UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT WeakField(text, string) as text FROM plato.Input").IsOk()); + } + + Y_UNIT_TEST(Aggregation1) { + TString q = + "PRAGMA FlexibleTypes;\n" + "$foo = ($x, $const, $type) -> ($x || $const || FormatType($type));\n" + "SELECT $foo(SOME(x), 'aaa', String) FROM plato.Input GROUP BY y;"; + UNIT_ASSERT(SqlToYql(q).IsOk()); + } + + Y_UNIT_TEST(Aggregation2) { + TString q = + "PRAGMA FlexibleTypes;\n" + "SELECT 1 + String + MAX(key) FROM plato.Input;"; + UNIT_ASSERT(SqlToYql(q).IsOk()); + } +} + +Y_UNIT_TEST_SUITE(ExternalDeclares) { + Y_UNIT_TEST(BasicUsage) { + NSQLTranslation::TTranslationSettings settings; + settings.DeclaredNamedExprs["foo"] = "String"; + auto res = SqlToYqlWithSettings("select $foo;", settings); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "declare") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'String)))__")); + } + }; + + TWordCountHive elementStat = {{TString("declare"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]); + } + + Y_UNIT_TEST(DeclareOverrides) { + NSQLTranslation::TTranslationSettings settings; + settings.DeclaredNamedExprs["foo"] = "String"; + auto res = SqlToYqlWithSettings("declare $foo as Int32; select $foo;", settings); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "declare") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'Int32)))__")); + } + }; + + TWordCountHive elementStat = {{TString("declare"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]); + } + + Y_UNIT_TEST(UnusedDeclareDoesNotProduceWarning) { + NSQLTranslation::TTranslationSettings settings; + settings.DeclaredNamedExprs["foo"] = "String"; + auto res = SqlToYqlWithSettings("select 1;", settings); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "declare") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'String)))__")); + } + }; + + TWordCountHive elementStat = {{TString("declare"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]); + } + + Y_UNIT_TEST(DeclaresWithInvalidTypesFails) { + NSQLTranslation::TTranslationSettings settings; + settings.DeclaredNamedExprs["foo"] = "List<BadType>"; + auto res = SqlToYqlWithSettings("select 1;", settings); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:0:5: Error: Unknown type: 'BadType'\n" + "<main>: Error: Failed to parse type for externally declared name 'foo'\n"); + } +} + +Y_UNIT_TEST_SUITE(ExternalDataSource) { + Y_UNIT_TEST(CreateExternalDataSourceWithAuthNone) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithAuthServiceAccount) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="SERVICE_ACCOUNT", + SERVICE_ACCOUNT_ID="sa", + SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"SERVICE_ACCOUNT") '('"location" '"my-bucket") '('"service_account_id" '"sa") '('"service_account_secret_name" '"sa_secret_name") '('"source_type" '"ObjectStorage"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithBasic) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="BASIC", + LOGIN="admin", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"BASIC") '('"location" '"protocol://host:port/") '('"login" '"admin") '('"password_secret_name" '"secret_name") '('"source_type" '"PostgreSQL"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithMdbBasic) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="MDB_BASIC", + SERVICE_ACCOUNT_ID="sa", + SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name", + LOGIN="admin", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"MDB_BASIC") '('"location" '"protocol://host:port/") '('"login" '"admin") '('"password_secret_name" '"secret_name") '('"service_account_id" '"sa") '('"service_account_secret_name" '"sa_secret_name") '('"source_type" '"PostgreSQL"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithAws) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="AWS", + AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name", + AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name", + AWS_REGION="ru-central-1" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"AWS") '('"aws_access_key_id_secret_name" '"secred_id_name") '('"aws_region" '"ru-central-1") '('"aws_secret_access_key_secret_name" '"secret_key_name") '('"location" '"protocol://host:port/") '('"source_type" '"PostgreSQL"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithToken) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="YT", + LOCATION="protocol://host:port/", + AUTH_METHOD="TOKEN", + TOKEN_SECRET_NAME="token_name" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"TOKEN") '('"location" '"protocol://host:port/") '('"source_type" '"YT") '('"token_secret_name" '"token_name"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + pragma TablePathPrefix='/aba'; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyDataSource"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceIfNotExists) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE IF NOT EXISTS MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectIfNotExists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AlterExternalDataSource) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER EXTERNAL DATA SOURCE MyDataSource + SET (SOURCE_TYPE = "ObjectStorage", Login = "Admin"), + SET Location "bucket", + RESET (Auth_Method, Service_Account_Id, Service_Account_Secret_Name); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"location" '"bucket") '('"login" '"Admin") '('"source_type" '"ObjectStorage"))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"auth_method" '"service_account_id" '"service_account_secret_name")))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceOrReplace) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + CREATE OR REPLACE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + )"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectOrReplace")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateOrReplaceForUnsupportedTableTypesShouldFail) { + ExpectFailWithError(R"sql( + USE plato; + CREATE OR REPLACE TABLE t (a int32 not null, primary key(a, a)); + )sql" , "<main>:3:23: Error: OR REPLACE feature is supported only for EXTERNAL DATA SOURCE and EXTERNAL TABLE\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE OR REPLACE TABLE t ( + Key Uint64, + Value1 String, + PRIMARY KEY (Key) + ) + WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10 + ); + )sql" , "<main>:3:23: Error: OR REPLACE feature is supported only for EXTERNAL DATA SOURCE and EXTERNAL TABLE\n"); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithBadArguments) { + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource; + )sql" , "<main>:3:56: Error: Unexpected token ';' : syntax error...\n\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + )sql" , "<main>:5:33: Error: SOURCE_TYPE requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket" + ); + )sql" , "<main>:5:30: Error: AUTH_METHOD requires key\n"); + + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE1" + ); + )sql" , "<main>:6:33: Error: Unknown AUTH_METHOD = NONE1\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="SERVICE_ACCOUNT" + ); + )sql" , "<main>:6:33: Error: SERVICE_ACCOUNT_ID requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="SERVICE_ACCOUNT", + SERVICE_ACCOUNT_ID="s1" + ); + )sql" , "<main>:7:40: Error: SERVICE_ACCOUNT_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="SERVICE_ACCOUNT", + SERVICE_ACCOUNT_SECRET_NAME="s1" + ); + )sql" , "<main>:7:49: Error: SERVICE_ACCOUNT_ID requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="BASIC", + LOGIN="admin" + ); + )sql" , "<main>:7:27: Error: PASSWORD_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="BASIC", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql" , "<main>:7:42: Error: LOGIN requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="MDB_BASIC", + SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name", + LOGIN="admin", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql" , "<main>:9:42: Error: SERVICE_ACCOUNT_ID requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="MDB_BASIC", + SERVICE_ACCOUNT_ID="sa", + LOGIN="admin", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql" , "<main>:9:42: Error: SERVICE_ACCOUNT_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="MDB_BASIC", + SERVICE_ACCOUNT_ID="sa", + SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql" , "<main>:9:42: Error: LOGIN requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="MDB_BASIC", + SERVICE_ACCOUNT_ID="sa", + SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name", + LOGIN="admin" + ); + )sql" , "<main>:9:27: Error: PASSWORD_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="AWS", + AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name", + AWS_REGION="ru-central-1" + ); + )sql" , "<main>:8:32: Error: AWS_ACCESS_KEY_ID_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="AWS", + AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name", + AWS_REGION="ru-central-1" + ); + )sql" , "<main>:8:32: Error: AWS_SECRET_ACCESS_KEY_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="AWS", + AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name", + AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name" + ); + )sql" , "<main>:8:51: Error: AWS_REGION requires key\n"); + } + + Y_UNIT_TEST(DropExternalDataSourceWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP EXTERNAL DATA SOURCE MyDataSource; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropExternalDataSource) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + pragma TablePathPrefix='/aba'; + DROP EXTERNAL DATA SOURCE MyDataSource; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyDataSource"); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropExternalDataSourceIfExists) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP EXTERNAL DATA SOURCE IF EXISTS MyDataSource; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "MyDataSource"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObjectIfExists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(ExternalTable) { + Y_UNIT_TEST(CreateExternalTable) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int + ) WITH ( + DATA_SOURCE="/Root/mydatasource", + LOCATION="/folder1/*" + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tablescheme")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalTableWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + pragma TablePathPrefix='/aba'; + CREATE EXTERNAL TABLE mytable ( + a int + ) WITH ( + DATA_SOURCE="mydatasource", + LOCATION="/folder1/*" + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "/aba/mydatasource"); + UNIT_ASSERT_STRING_CONTAINS(line, "/aba/mytable"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tablescheme")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalTableObjectStorage) { + auto res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int, + year Int + ) WITH ( + DATA_SOURCE="/Root/mydatasource", + LOCATION="/folder1/*", + FORMAT="json_as_string", + `projection.enabled`="true", + `projection.year.type`="integer", + `projection.year.min`="2010", + `projection.year.max`="2022", + `projection.year.interval`="1", + `projection.month.type`="integer", + `projection.month.min`="1", + `projection.month.max`="12", + `projection.month.interval`="1", + `projection.month.digits`="2", + `storage.location.template`="${year}/${month}", + PARTITONED_BY = "[year, month]" + ); + )sql"); + UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString()); + } + + Y_UNIT_TEST(CreateExternalTableIfNotExists) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL TABLE IF NOT EXISTS mytable ( + a int + ) WITH ( + DATA_SOURCE="/Root/mydatasource", + LOCATION="/folder1/*" + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, "create_if_not_exists"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalTableOrReplace) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + CREATE OR REPLACE EXTERNAL TABLE mytable ( + a int + ) WITH ( + DATA_SOURCE="/Root/mydatasource", + LOCATION="/folder1/*" + ); + )"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, "create_or_replace"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AlterExternalTableAddColumn) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER EXTERNAL TABLE mytable + ADD COLUMN my_column int32, + RESET (LOCATION); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('actions '('('addColumns '('('"my_column" (AsOptionalType (DataType 'Int32))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location)))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('tableType 'externalTable))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AlterExternalTableDropColumn) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER EXTERNAL TABLE mytable + DROP COLUMN my_column, + SET (Location = "abc", Other_Prop = "42"), + SET x 'y'; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('actions '('('dropColumns '('"my_column")#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location (String '"abc")) '('Other_Prop (String '"42")) '('x (String '"y")))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('tableType 'externalTable))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalTableWithBadArguments) { + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable; + )sql" , "<main>:3:45: Error: Unexpected token ';' : syntax error...\n\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int + ); + )sql" , "<main>:4:23: Error: DATA_SOURCE requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int + ) WITH ( + DATA_SOURCE="/Root/mydatasource" + ); + )sql" , "<main>:6:33: Error: LOCATION requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int + ) WITH ( + LOCATION="/folder1/*" + ); + )sql" , "<main>:6:30: Error: DATA_SOURCE requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int, + PRIMARY KEY(a) + ) WITH ( + DATA_SOURCE="/Root/mydatasource", + LOCATION="/folder1/*" + ); + )sql" , "<main>:8:30: Error: PRIMARY KEY is not supported for external table\n"); + } + + Y_UNIT_TEST(DropExternalTable) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP EXTERNAL TABLE MyExternalTable; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("tablescheme")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropExternalTableWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + pragma TablePathPrefix='/aba'; + DROP EXTERNAL TABLE MyExternalTable; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyExternalTable"); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'tablescheme")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropExternalTableIfExists) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP EXTERNAL TABLE IF EXISTS MyExternalTable; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("tablescheme")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop_if_exists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(TopicsDDL) { + void TestQuery(const TString& query, bool expectOk = true) { + TStringBuilder finalQuery; + + finalQuery << "use plato;" << Endl << query; + auto res = SqlToYql(finalQuery, 10, "kikimr"); + if (expectOk) { + UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString()); + } else { + UNIT_ASSERT(!res.IsOk()); + } + } + + Y_UNIT_TEST(CreateTopicSimple) { + TestQuery(R"( + CREATE TOPIC topic1; + )"); + TestQuery(R"( + CREATE TOPIC `cluster1.topic1`; + )"); + TestQuery(R"( + CREATE TOPIC topic1 WITH (metering_mode = "str_value", partition_count_limit = 123, retention_period = Interval('PT1H')); + )"); + } + + Y_UNIT_TEST(CreateTopicConsumer) { + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1); + )"); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons2 WITH (important = false)); + )"); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons2 WITH (important = false)) WITH (supported_codecs = "1,2,3"); + )"); + } + + Y_UNIT_TEST(AlterTopicSimple) { + TestQuery(R"( + ALTER TOPIC topic1 SET (retention_period = Interval('PT1H')); + )"); + TestQuery(R"( + ALTER TOPIC topic1 SET (retention_storage_mb = 3, partition_count_limit = 50); + )"); + TestQuery(R"( + ALTER TOPIC topic1 RESET (supported_codecs, retention_period); + )"); + TestQuery(R"( + ALTER TOPIC topic1 RESET (partition_write_speed_bytes_per_second), + SET (partition_write_burst_bytes = 11111, min_active_partitions = 1); + )"); + } + Y_UNIT_TEST(AlterTopicConsumer) { + TestQuery(R"( + ALTER TOPIC topic1 ADD CONSUMER consumer1, + ADD CONSUMER consumer2 WITH (important = false, supported_codecs = "RAW"), + ALTER CONSUMER consumer3 SET (important = false, read_from = 1), + ALTER CONSUMER consumer3 RESET (supported_codecs), + DROP CONSUMER consumer4, + SET (partition_count_limit = 11, retention_period = Interval('PT1H')), + RESET(metering_mode) + )"); + } + Y_UNIT_TEST(DropTopic) { + TestQuery(R"( + DROP TOPIC topic1; + )"); + } + + Y_UNIT_TEST(TopicBadRequests) { + TestQuery(R"( + CREATE TOPIC topic1(); + )", false); + TestQuery(R"( + CREATE TOPIC topic1 SET setting1 = value1; + )", false); + TestQuery(R"( + ALTER TOPIC topic1 SET setting1 value1; + )", false); + TestQuery(R"( + ALTER TOPIC topic1 RESET setting1; + )", false); + + TestQuery(R"( + ALTER TOPIC topic1 DROP CONSUMER consumer4 WITH (k1 = v1); + )", false); + + TestQuery(R"( + CREATE TOPIC topic1 WITH (retention_period = 123); + )", false); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons1 WITH (important = false)); + )", false); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1 WITH (bad_option = false)); + )", false); + TestQuery(R"( + ALTER TOPIC topic1 ADD CONSUMER cons1, ALTER CONSUMER cons1 RESET (important); + )", false); + TestQuery(R"( + ALTER TOPIC topic1 ADD CONSUMER consumer1, + ALTER CONSUMER consumer3 SET (supported_codecs = "RAW", read_from = 1), + ALTER CONSUMER consumer3 RESET (supported_codecs); + )", false); + TestQuery(R"( + ALTER TOPIC topic1 ADD CONSUMER consumer1, + ALTER CONSUMER consumer3 SET (supported_codecs = "RAW", read_from = 1), + ALTER CONSUMER consumer3 SET (read_from = 2); + )", false); + } + + Y_UNIT_TEST(TopicWithPrefix) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + PRAGMA TablePathPrefix = '/database/path/to/tables'; + ALTER TOPIC `my_table/my_feed` ADD CONSUMER `my_consumer`; + )"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("/database/path/to/tables/my_table/my_feed"), 0}, {"topic", 0}}; + VerifyProgram(res, elementStat); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["topic"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["/database/path/to/tables/my_table/my_feed"]); + } +} + +Y_UNIT_TEST_SUITE(BlockEnginePragma) { + Y_UNIT_TEST(Basic) { + const TVector<TString> values = {"auto", "force", "disable"}; + for (const auto& value : values) { + const auto query = TStringBuilder() << "pragma Blockengine='" << value << "'; select 1;"; + NYql::TAstParseResult res = SqlToYql(query); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_STRING_CONTAINS(line, TStringBuilder() << R"(Configure! world (DataSource '"config") '"BlockEngine" '")" << value << "\""); + }; + + TWordCountHive elementStat({"BlockEngine"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["BlockEngine"] == ((value == "disable") ? 0 : 1)); + } + } + + Y_UNIT_TEST(UnknownSetting) { + ExpectFailWithError("use plato; pragma BlockEngine='foo';", + "<main>:1:31: Error: Expected `disable|auto|force' argument for: BlockEngine\n"); + } +} + +Y_UNIT_TEST_SUITE(TViewSyntaxTest) { + Y_UNIT_TEST(CreateViewSimple) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + CREATE VIEW TheView WITH (security_invoker = TRUE) AS SELECT 1; + )" + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + } + + Y_UNIT_TEST(CreateViewFromTable) { + constexpr const char* path = "/PathPrefix/TheView"; + constexpr const char* query = R"( + SELECT * FROM SomeTable + )"; + + NYql::TAstParseResult res = SqlToYql(std::format(R"( + USE plato; + CREATE VIEW `{}` WITH (security_invoker = TRUE) AS {}; + )", + path, + query + ) + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_STRING_CONTAINS(line, path); + UNIT_ASSERT_STRING_CONTAINS(line, "createObject"); + } + }; + TWordCountHive elementStat = { {"Write!"} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1); + } + + Y_UNIT_TEST(CheckReconstructedQuery) { + constexpr const char* path = "/PathPrefix/TheView"; + constexpr const char* query = R"( + SELECT * FROM FirstTable JOIN SecondTable ON FirstTable.key == SecondTable.key + )"; + + NYql::TAstParseResult res = SqlToYql(std::format(R"( + USE plato; + CREATE VIEW `{}` WITH (security_invoker = TRUE) AS {}; + )", + path, + query + ) + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TString reconstructedQuery = ToString(Tokenize(query)); + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + if (word == "query_text") { + UNIT_ASSERT_STRING_CONTAINS(line, reconstructedQuery); + } + }; + TWordCountHive elementStat = { {"Write!"} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1); + } + + Y_UNIT_TEST(DropView) { + constexpr const char* path = "/PathPrefix/TheView"; + NYql::TAstParseResult res = SqlToYql(std::format(R"( + USE plato; + DROP VIEW `{}`; + )", + path + ) + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_STRING_CONTAINS(line, path); + UNIT_ASSERT_STRING_CONTAINS(line, "dropObject"); + } + }; + TWordCountHive elementStat = { {"Write!"} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1); + } + + Y_UNIT_TEST(CreateViewWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + PRAGMA TablePathPrefix='/PathPrefix'; + CREATE VIEW TheView WITH (security_invoker = TRUE) AS SELECT 1; + )" + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_STRING_CONTAINS(line, "/PathPrefix/TheView"); + UNIT_ASSERT_STRING_CONTAINS(line, "createObject"); + } + }; + + TWordCountHive elementStat = { {"Write!"} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1); + } + + Y_UNIT_TEST(DropViewWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + PRAGMA TablePathPrefix='/PathPrefix'; + DROP VIEW TheView; + )" + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "/PathPrefix/TheView"); + UNIT_ASSERT_STRING_CONTAINS(line, "dropObject"); + } + }; + + TWordCountHive elementStat = { {"Write!"} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1); + } + + Y_UNIT_TEST(YtAlternativeSchemaSyntax) { + NYql::TAstParseResult res = SqlToYql(R"( + SELECT * FROM plato.Input WITH schema(y Int32, x String not null); + )"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "userschema") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__('('('"userschema" (StructType '('"y" (AsOptionalType (DataType 'Int32))) '('"x" (DataType 'String))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("userschema"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["userschema"]); + } + + Y_UNIT_TEST(UseViewAndFullColumnId) { + NYql::TAstParseResult res = SqlToYql("USE plato; SELECT Input.x FROM Input VIEW uitzicht;"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("SqlAccess"), 0}, {"SqlProjectItem", 0}, {"Read!", 0}}; + VerifyProgram(res, elementStat); + UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlAccess"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]); + } +} + +Y_UNIT_TEST_SUITE(CompactNamedExprs) { + Y_UNIT_TEST(SourceCallablesInWrongContext) { + TString query = R"( + pragma CompactNamedExprs; + $foo = %s(); + select $foo from plato.Input; + )"; + + THashMap<TString, TString> errs = { + {"TableRow", "<main>:3:20: Error: TableRow requires data source\n"}, + {"JoinTableRow", "<main>:3:20: Error: JoinTableRow requires data source\n"}, + {"TableRecordIndex", "<main>:3:20: Error: Unable to use function: TableRecord without source\n"}, + {"TablePath", "<main>:3:20: Error: Unable to use function: TablePath without source\n"}, + {"SystemMetadata", "<main>:3:20: Error: Unable to use function: SystemMetadata without source\n"}, + }; + + for (TString callable : { "TableRow", "JoinTableRow", "TableRecordIndex", "TablePath", "SystemMetadata"}) { + auto req = Sprintf(query.c_str(), callable.c_str()); + ExpectFailWithError(req, errs[callable]); + } + } + + Y_UNIT_TEST(ValidateUnusedExprs) { + TString query = R"( + pragma warning("disable", "4527"); + pragma CompactNamedExprs; + pragma ValidateUnusedExprs; + + $foo = count(1); + select 1; + )"; + ExpectFailWithError(query, "<main>:6:20: Error: Aggregation is not allowed in this context\n"); + query = R"( + pragma warning("disable", "4527"); + pragma CompactNamedExprs; + pragma ValidateUnusedExprs; + + define subquery $x() as + select count(1, 2); + end define; + select 1; + )"; + ExpectFailWithError(query, "<main>:7:24: Error: Aggregation function Count requires exactly 1 argument(s), given: 2\n"); + } + + Y_UNIT_TEST(DisableValidateUnusedExprs) { + TString query = R"( + pragma warning("disable", "4527"); + pragma CompactNamedExprs; + pragma DisableValidateUnusedExprs; + + $foo = count(1); + select 1; + )"; + SqlToYql(query).IsOk(); + query = R"( + pragma warning("disable", "4527"); + pragma CompactNamedExprs; + pragma DisableValidateUnusedExprs; + + define subquery $x() as + select count(1, 2); + end define; + select 1; + )"; + SqlToYql(query).IsOk(); + } +} + +Y_UNIT_TEST_SUITE(ResourcePool) { + Y_UNIT_TEST(CreateResourcePool) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE RESOURCE POOL MyResourcePool WITH ( + CONCURRENT_QUERY_LIMIT=20, + QUERY_CANCEL_AFTER_SECONDS=86400, + QUEUE_TYPE="FIFO" + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"concurrent_query_limit" (Int32 '"20")) '('"query_cancel_after_seconds" (Int32 '"86400")) '('"queue_type" '"FIFO"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateResourcePoolWithBadArguments) { + ExpectFailWithError(R"sql( + USE plato; + CREATE RESOURCE POOL MyResourcePool; + )sql" , "<main>:3:51: Error: Unexpected token ';' : syntax error...\n\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE RESOURCE POOL MyResourcePool WITH ( + DUPLICATE_SETTING="first_value", + DUPLICATE_SETTING="second_value" + ); + )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n"); + } + + Y_UNIT_TEST(AlterResourcePool) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER RESOURCE POOL MyResourcePool + SET (CONCURRENT_QUERY_LIMIT = 30, Weight = 5, QUEUE_TYPE = "UNORDERED"), + RESET (Query_Cancel_After_Seconds, Query_Count_Limit); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"concurrent_query_limit" (Int32 '"30")) '('"queue_type" '"UNORDERED") '('"weight" (Int32 '"5")))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"query_cancel_after_seconds" '"query_count_limit")))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropResourcePool) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP RESOURCE POOL MyResourcePool; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(BackupCollection) { + Y_UNIT_TEST(CreateBackupCollection) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection WITH ( + STORAGE="local", + TAG="test" -- for testing purposes, not a real thing + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateBackupCollectionWithDatabase) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection DATABASE WITH ( + STORAGE="local", + TAG="test" -- for testing purposes, not a real thing + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('type 'database)")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateBackupCollectionWithTables) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection ( + TABLE someTable, + TABLE `prefix/anotherTable` + ) WITH ( + STORAGE="local", + TAG="test" -- for testing purposes, not a real thing + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('('('type 'table) '('path '"someTable")))#")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('('('type 'table) '('path '"prefix/anotherTable")))#")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateBackupCollectionWithBadArguments) { + ExpectFailWithError(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection; + )sql" , "<main>:3:55: Error: Unexpected token ';' : syntax error...\n\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE BACKUP COLLECTION TABLE TestCollection; + )sql" , "<main>:3:47: Error: Unexpected token 'TestCollection' : syntax error...\n\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE BACKUP COLLECTION DATABASE `test` TestCollection; + )sql" , "<main>:3:50: Error: Unexpected token '`test`' : syntax error...\n\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection WITH ( + DUPLICATE_SETTING="first_value", + DUPLICATE_SETTING="second_value" + ); + )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection WITH ( + INT_SETTING=1 + ); + )sql" , "<main>:4:21: Error: INT_SETTING value should be a string literal\n"); + } + + Y_UNIT_TEST(AlterBackupCollection) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER BACKUP COLLECTION TestCollection + SET (STORAGE="remote"), -- also just for test + SET (TAG1 = "123"), + RESET (TAG2, TAG3); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('settings '('('"storage" '"remote") '('"tag1" '"123"))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetSettings '('"tag2" '"tag3")))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AlterBackupCollectionEntries) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER BACKUP COLLECTION TestCollection + DROP TABLE `test`, + ADD DATABASE; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('alterEntries)#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('type 'table) '('path '"test") '('action 'drop)))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('type 'database) '('action 'add)))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropBackupCollection) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP BACKUP COLLECTION TestCollection; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(ResourcePoolClassifier) { + Y_UNIT_TEST(CreateResourcePoolClassifier) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RANK=20, + RESOURCE_POOL='wgUserQueries', + MEMBER_NAME='yandex_query@abc' + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"member_name" '"yandex_query@abc") '('"rank" (Int32 '"20")) '('"resource_pool" '"wgUserQueries"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateResourcePoolClassifierWithBadArguments) { + ExpectFailWithError(R"sql( + USE plato; + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier; + )sql" , "<main>:3:72: Error: Unexpected token ';' : syntax error...\n\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + DUPLICATE_SETTING="first_value", + DUPLICATE_SETTING="second_value" + ); + )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n"); + } + + Y_UNIT_TEST(AlterResourcePoolClassifier) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier + SET (RANK = 30, Weight = 5, MEMBER_NAME = "test@user"), + RESET (Resource_Pool); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"member_name" '"test@user") '('"rank" (Int32 '"30")) '('"weight" (Int32 '"5")))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"resource_pool")))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropResourcePoolClassifier) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP RESOURCE POOL CLASSIFIER MyResourcePoolClassifier; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(BacktickMatching) { + auto req = "select\n" + " 1 as `Schema has \\`RealCost\\``\n" + " -- foo`bar"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + + req = "select 1 as `a``b`, 2 as ````, 3 as `\\x60a\\x60`, 4 as ```b```, 5 as `\\`c\\``"; + res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + } +} + +Y_UNIT_TEST_SUITE(OlapPartitionCount) { + Y_UNIT_TEST(CorrectUsage) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE TABLE `mytable` (id Uint32, PRIMARY KEY (id)) + PARTITION BY HASH(id) + WITH (STORE = COLUMN, PARTITION_COUNT = 8); + )sql"); + + UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString()); + } + + Y_UNIT_TEST(UseWithoutColumnStore) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE TABLE `mytable` (id Uint32, PRIMARY KEY (id)) + WITH (PARTITION_COUNT = 8); + )sql"); + + UNIT_ASSERT(!res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 1); + UNIT_ASSERT_STRING_CONTAINS(res.Issues.ToString(), "PARTITION_COUNT can be used only with STORE=COLUMN"); + } +} + +Y_UNIT_TEST_SUITE(Backup) { + Y_UNIT_TEST(Simple) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + BACKUP TestCollection; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'incremental")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'backup")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(Incremental) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + BACKUP TestCollection INCREMENTAL; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'incremental")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'backup")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(Restore) { + Y_UNIT_TEST(Simple) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + RESTORE TestCollection; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'restore")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AtPoint) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + RESTORE TestCollection AT '2024-06-16_20-14-02'; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('at '"2024-06-16_20-14-02")#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'restore")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(ColumnFamily) { + Y_UNIT_TEST(CompressionLevel) { + NYql::TAstParseResult res = SqlToYql(R"( use plato; + CREATE TABLE tableName ( + Key Uint32 FAMILY default, + Value String FAMILY family1, + PRIMARY KEY (Key), + FAMILY default ( + DATA = "test", + COMPRESSION = "lz4", + COMPRESSION_LEVEL = 5 + ), + FAMILY family1 ( + DATA = "test", + COMPRESSION = "lz4", + COMPRESSION_LEVEL = 3 + ) + ); + )"); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("compression_level")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("5")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("3")); + } + }; + + TWordCountHive elementStat = { { TString("Write"), 0 }, { TString("compression_level"), 0 } }; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["compression_level"]); + } +} diff --git a/yql/essentials/sql/v1/sql_ut.h b/yql/essentials/sql/v1/sql_ut.h new file mode 100644 index 00000000000..7e9c3df8e8e --- /dev/null +++ b/yql/essentials/sql/v1/sql_ut.h @@ -0,0 +1,235 @@ + +#include <yql/essentials/providers/common/provider/yql_provider_names.h> +#include <yql/essentials/sql/sql.h> +#include <util/generic/map.h> + +#include <library/cpp/regex/pcre/pcre.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/string/split.h> +#include <deque> +#include <unordered_set> +using namespace NSQLTranslation; + +enum class EDebugOutput { + None, + ToCerr, +}; + +const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote | + NYql::TAstPrintFlags::AdaptArbitraryContent; + +inline TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) { + TStringStream s; + res.Issues.PrintTo(s); + + if (debug == EDebugOutput::ToCerr) { + Cerr << s.Str() << Endl; + } + return s.Str(); +} + +inline NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {}, + EDebugOutput debug = EDebugOutput::None, bool ansiLexer = false, NSQLTranslation::TTranslationSettings settings = {}) +{ + google::protobuf::Arena arena; + const auto service = provider ? provider : TString(NYql::YtProviderName); + const TString cluster = "plato"; + settings.ClusterMapping[cluster] = service; + settings.ClusterMapping["hahn"] = NYql::YtProviderName; + settings.ClusterMapping["mon"] = NYql::SolomonProviderName; + settings.MaxErrors = maxErrors; + settings.Mode = mode; + settings.Arena = &arena; + settings.AnsiLexer = ansiLexer; + settings.Antlr4Parser = false; + settings.SyntaxVersion = 1; + auto res = SqlToYql(query, settings); + if (debug == EDebugOutput::ToCerr) { + Err2Str(res, debug); + } + return res; +} + +inline NYql::TAstParseResult SqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) { + return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug); +} + +inline NYql::TAstParseResult +SqlToYqlWithSettings(const TString& query, const NSQLTranslation::TTranslationSettings& settings) { + return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, 10, {}, EDebugOutput::None, false, settings); +} + +inline void ExpectFailWithError(const TString& query, const TString& error) { + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), error); +} + +inline void ExpectFailWithFuzzyError(const TString& query, const TString& errorRegex) { + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT(NPcre::TPcre<char>(errorRegex.c_str()).Matches(Err2Str(res))); +} + +inline NYql::TAstParseResult SqlToYqlWithAnsiLexer(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) { + bool ansiLexer = true; + return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug, ansiLexer); +} + +inline void ExpectFailWithErrorForAnsiLexer(const TString& query, const TString& error) { + NYql::TAstParseResult res = SqlToYqlWithAnsiLexer(query); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), error); +} + +inline TString GetPrettyPrint(const NYql::TAstParseResult& res) { + TStringStream yqlProgram; + res.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote); + return yqlProgram.Str(); +} + +inline TString Quote(const char* str) { + return TStringBuilder() << "'\"" << str << "\""; +} + +class TWordCountHive: public TMap<TString, unsigned> { +public: + TWordCountHive(std::initializer_list<TString> strings) { + for (auto& str: strings) { + emplace(str, 0); + } + } + + TWordCountHive(std::initializer_list<std::pair<const TString, unsigned>> list) + : TMap(list) + { + } +}; + +typedef std::function<void (const TString& word, const TString& line)> TVerifyLineFunc; + +inline TString VerifyProgram(const NYql::TAstParseResult& res, TWordCountHive& wordCounter, TVerifyLineFunc verifyLine = TVerifyLineFunc()) { + const auto programm = GetPrettyPrint(res); + TVector<TString> yqlProgram; + Split(programm, "\n", yqlProgram); + for (const auto& line: yqlProgram) { + for (auto& counterIter: wordCounter) { + const auto& word = counterIter.first; + auto pos = line.find(word); + while (pos != TString::npos) { + ++counterIter.second; + if (verifyLine) { + verifyLine(word, line); + } + pos = line.find(word, pos + word.length()); + } + } + } + return programm; +} + +inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints, TMaybe<bool> ansi) { + TString pragma; + if (ansi.Defined()) { + pragma = *ansi ? "PRAGMA AnsiInForEmptyOrNullableItemsCollections;" : + "PRAGMA DisableAnsiInForEmptyOrNullableItemsCollections;"; + } + + NYql::TAstParseResult res = SqlToYql(pragma + query); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + if (!ansi.Defined()) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('warnNoAnsi)")); + } else if (*ansi) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('ansi)")); + } + for (auto& hint : expectedHints) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(hint)); + } + }; + TWordCountHive elementStat = {{TString("SqlIn"), 0}}; + VerifyProgram(res, elementStat, verifyLine); +} + +inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints) { + VerifySqlInHints(query, expectedHints, false); + VerifySqlInHints(query, expectedHints, true); +} + +inline NSQLTranslation::TTranslationSettings GetSettingsWithS3Binding(const TString& name) { + NSQLTranslation::TTranslationSettings settings; + NSQLTranslation::TTableBindingSettings bindSettings; + bindSettings.ClusterType = "s3"; + bindSettings.Settings["cluster"] = "cluster"; + bindSettings.Settings["path"] = "path"; + bindSettings.Settings["format"] = "format"; + bindSettings.Settings["compression"] = "ccompression"; + bindSettings.Settings["bar"] = "1"; + // schema is not validated in this test but should be valid YSON text + bindSettings.Settings["schema"] = R"__("[ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ]])__"; + bindSettings.Settings["partitioned_by"] = "[\"key\", \"subkey\"]"; + settings.Bindings[name] = bindSettings; + return settings; +} + +inline void AstBfs(NYql::TAstNode const* root, std::function<bool (NYql::TAstNode const*)> visitor) { + std::deque<NYql::TAstNode const*> wishList{ root }; + std::unordered_set<NYql::TAstNode const*> visited; + while(!wishList.empty()){ + auto v = wishList.front(); + wishList.pop_front(); + if (!visitor(v)) + return; + visited.insert(v); + if (v->IsList()) { + for (ui32 i = 0; i != v->GetChildrenCount(); ++i) { + auto child = v->GetChild(i); + if (visited.find(child) == visited.cend()) { + wishList.push_back(child); + } + } + } + } +} + +inline const NYql::TAstNode* FindNodeByChildAtomContent(const NYql::TAstNode* root, uint32_t childIndex, TStringBuf name){ + const NYql::TAstNode* result = nullptr; + AstBfs(root, [&result, childIndex, name](auto v) { + if (v->IsList() && v->GetChildrenCount() > childIndex && + v->GetChild(childIndex)->IsAtom() && v->GetChild(childIndex)->GetContent() == name) { + result = v; + return false; + } + return true; }); + return result; +} diff --git a/yql/essentials/sql/v1/sql_ut_antlr4.cpp b/yql/essentials/sql/v1/sql_ut_antlr4.cpp new file mode 100644 index 00000000000..7f11822ccaf --- /dev/null +++ b/yql/essentials/sql/v1/sql_ut_antlr4.cpp @@ -0,0 +1,7434 @@ +#include "sql_ut_antlr4.h" +#include "format/sql_format.h" +#include "lexer/lexer.h" + +#include <yql/essentials/providers/common/provider/yql_provider_names.h> +#include <yql/essentials/sql/sql.h> +#include <util/generic/map.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/string/split.h> + +#include <format> + +using namespace NSQLTranslation; + +namespace { + +TParsedTokenList Tokenize(const TString& query) { + auto lexer = NSQLTranslationV1::MakeLexer(true, true); + TParsedTokenList tokens; + NYql::TIssues issues; + UNIT_ASSERT_C(Tokenize(*lexer, query, "Query", tokens, issues, SQL_MAX_PARSER_ERRORS), + issues.ToString()); + + return tokens; +} + +TString ToString(const TParsedTokenList& tokens) { + TStringBuilder reconstructedQuery; + for (const auto& token : tokens) { + if (token.Name == "WS" || token.Name == "EOF") { + continue; + } + if (!reconstructedQuery.empty()) { + reconstructedQuery << ' '; + } + reconstructedQuery << token.Content; + } + return reconstructedQuery; +} + +} + +Y_UNIT_TEST_SUITE(AnsiMode) { + Y_UNIT_TEST(PragmaAnsi) { + UNIT_ASSERT(SqlToYql("PRAGMA ANSI 2016;").IsOk()); + } +} + +Y_UNIT_TEST_SUITE(SqlParsingOnly) { + ///This function is used in BACKWARD COMPATIBILITY tests below that LIMIT the sets of token that CAN NOT be used + ///as identifiers in different contexts in a SQL request + ///\return list of tokens that failed this check + TVector<TString> ValidateTokens(const THashSet<TString>& forbidden, const std::function<TString (const TString& )>& makeRequest) { + THashMap<TString, bool> allTokens; + for (const auto& t: NSQLFormat::GetKeywords()) { + allTokens[t] = !forbidden.contains((t)); + } + for (const auto& f: forbidden) { + UNIT_ASSERT(allTokens.contains(f)); //check that forbidden list contains tokens only(argument check) + } + TVector<TString> failed; + for (const auto& [token, allowed]: allTokens) { + if (SqlToYql(makeRequest(token)).IsOk() != allowed) + failed.push_back(token); + } + return failed; + } + + Y_UNIT_TEST(TokensAsColumnName) { //id_expr + auto failed = ValidateTokens({ + "ALL", "ANY", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST", + "CALLABLE", "CASE", "CAST", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", + "DICT", "DISTINCT", "ENUM", "ERASE", "EXCEPT", "EXISTS", "FLOW", "FROM", "FULL", "GLOBAL", + "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL", + "NOT", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP", + "SELECT", "SET", "STREAM", "STRUCT", "SYMMETRIC", "TAGGED", "TUPLE", "UNBOUNDED", + "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT " << token << " FROM Plato.Input"; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsWithoutColumnName) { //id_without + auto failed = ValidateTokens({ + "ALL", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST", + "CALLABLE", "CASE", "CAST", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", + "DICT", "DISTINCT", "EMPTY_ACTION", "ENUM", "EXCEPT", "EXISTS", "FALSE", "FLOW", "FROM", "FULL", "GLOBAL", + "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL", + "NOT", "NULL", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP", + "SELECT", "SET", "STRUCT", "SYMMETRIC", "TAGGED", "TRUE", "TUPLE", "UNBOUNDED", + "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * WITHOUT " << token << " FROM Plato.Input"; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsColumnNameInAddColumn) { //id_schema + auto failed = ValidateTokens({ + "ANY", "AUTOMAP", "CALLABLE", "COLUMN", "DICT", "ENUM", "ERASE", "FALSE", "FLOW", + "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE", + "SET", "STREAM", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT" + }, + [](const TString& token){ + TStringBuilder req; + req << "ALTER TABLE Plato.Input ADD COLUMN " << token << " Bool"; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsColumnAlias) { + auto failed = ValidateTokens({ + "AUTOMAP", "FALSE", + "GLOBAL", "REPEATABLE", "TRUE" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT Col as " << token << " FROM Plato.Input"; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsTableName) { //id_table_or_type + auto failed = ValidateTokens({ + "ANY", "AUTOMAP", "COLUMN", "ERASE", "FALSE", + "GLOBAL", "REPEATABLE", "STREAM", "TRUE" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * FROM Plato." << token; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsTableAlias) { //id_table + auto failed = ValidateTokens({ + "AUTOMAP", "CALLABLE", "DICT", "ENUM","FALSE", "FLOW", + "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE", + "SET", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * FROM Plato.Input AS " << token; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsHints) { //id_hint + auto failed = ValidateTokens({ + "AUTOMAP", "CALLABLE", "COLUMNS", "DICT", "ENUM", "FALSE", "FLOW", + "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE", + "SCHEMA", "SET", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * FROM Plato.Input WITH " << token; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsWindow) { //id_window + auto failed = ValidateTokens({ + "AUTOMAP", "CALLABLE", "DICT", "ENUM", "FALSE", "FLOW", "GLOBAL", "GROUPS", "LIST", "OPTIONAL", + "RANGE", "REPEATABLE", "RESOURCE", "ROWS", "SET", "STRUCT", "TAGGED" ,"TRUE", "TUPLE", "VARIANT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * FROM Plato.Input WINDOW " << token << " AS ()"; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TokensAsIdExprIn) { //id_expr_in + auto failed = ValidateTokens({ + "ALL", "ANY", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST", + "CALLABLE", "CASE", "CAST", "COMPACT", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", + "DICT", "DISTINCT", "ENUM", "ERASE", "EXCEPT", "EXISTS", "FLOW", "FROM", "FULL", "GLOBAL", + "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL", + "NOT", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP", + "SELECT", "SET", "STREAM", "STRUCT", "SYMMETRIC", "TAGGED", "TUPLE", "UNBOUNDED", + "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT" + }, + [](const TString& token){ + TStringBuilder req; + req << "SELECT * FROM Plato.Input WHERE q IN " << token; + return req; + } + ); + UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{}); + } + + Y_UNIT_TEST(TableHints) { + UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input WITH INFER_SCHEMA").IsOk()); + UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input WITH (INFER_SCHEMA)").IsOk()); + } + + Y_UNIT_TEST(InNoHints) { + TString query = "SELECT * FROM plato.Input WHERE key IN (1,2,3)"; + + VerifySqlInHints(query, { "'('('warnNoAnsi))" }, {}); + VerifySqlInHints(query, { "'()" }, false); + VerifySqlInHints(query, { "'('('ansi))" }, true); + } + + Y_UNIT_TEST(InHintCompact) { + // should parse COMPACT as hint + TString query = "SELECT * FROM plato.Input WHERE key IN COMPACT(1, 2, 3)"; + + VerifySqlInHints(query, { "'('isCompact)" }); + } + + Y_UNIT_TEST(InHintSubquery) { + // should parse tableSource as hint + TString query = "$subq = (SELECT key FROM plato.Input); SELECT * FROM plato.Input WHERE key IN $subq"; + + VerifySqlInHints(query, { "'('tableSource)" }); + } + + Y_UNIT_TEST(InHintCompactSubquery) { + TString query = "$subq = (SELECT key FROM plato.Input); SELECT * FROM plato.Input WHERE key IN COMPACT $subq"; + + VerifySqlInHints(query, { "'('isCompact)", "'('tableSource)" }); + } + + Y_UNIT_TEST(CompactKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("SELECT COMPACT FROM plato.Input WHERE COMPACT IN COMPACT(1, 2, 3)").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT * FROM COMPACT").IsOk()); + } + + Y_UNIT_TEST(FamilyKeywordNotReservedForNames) { + // FIXME: check if we can get old behaviour + //UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE FAMILY (FAMILY Uint32, PRIMARY KEY (FAMILY));").IsOk()); + //UNIT_ASSERT(SqlToYql("USE plato; SELECT FAMILY FROM FAMILY").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT FAMILY FROM Input").IsOk()); + } + + Y_UNIT_TEST(ResetKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE RESET (RESET Uint32, PRIMARY KEY (RESET));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT RESET FROM RESET").IsOk()); + } + + Y_UNIT_TEST(SyncKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE SYNC (SYNC Uint32, PRIMARY KEY (SYNC));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT SYNC FROM SYNC").IsOk()); + } + + Y_UNIT_TEST(AsyncKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE ASYNC (ASYNC Uint32, PRIMARY KEY (ASYNC));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT ASYNC FROM ASYNC").IsOk()); + } + + Y_UNIT_TEST(DisableKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE DISABLE (DISABLE Uint32, PRIMARY KEY (DISABLE));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT DISABLE FROM DISABLE").IsOk()); + } + + Y_UNIT_TEST(ChangefeedKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE CHANGEFEED (CHANGEFEED Uint32, PRIMARY KEY (CHANGEFEED));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT CHANGEFEED FROM CHANGEFEED").IsOk()); + } + + Y_UNIT_TEST(ReplicationKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE REPLICATION (REPLICATION Uint32, PRIMARY KEY (REPLICATION));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT REPLICATION FROM REPLICATION").IsOk()); + } + + Y_UNIT_TEST(SecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE SECONDS (SECONDS Uint32, PRIMARY KEY (SECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT SECONDS FROM SECONDS").IsOk()); + } + + Y_UNIT_TEST(MillisecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MILLISECONDS (MILLISECONDS Uint32, PRIMARY KEY (MILLISECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT MILLISECONDS FROM MILLISECONDS").IsOk()); + } + + Y_UNIT_TEST(MicrosecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MICROSECONDS (MICROSECONDS Uint32, PRIMARY KEY (MICROSECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT MICROSECONDS FROM MICROSECONDS").IsOk()); + } + + Y_UNIT_TEST(NanosecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE NANOSECONDS (NANOSECONDS Uint32, PRIMARY KEY (NANOSECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT NANOSECONDS FROM NANOSECONDS").IsOk()); + } + + Y_UNIT_TEST(Jubilee) { + NYql::TAstParseResult res = SqlToYql("USE plato; INSERT INTO Arcadia (r2000000) VALUES (\"2M GET!!!\");"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(QualifiedAsteriskBefore) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA DisableSimpleColumns;" + "select interested_table.*, LENGTH(value) AS megahelpful_len from plato.Input as interested_table;" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + static bool seenStar = false; + if (word == "FlattenMembers") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("interested_table.")); + } else if (word == "SqlProjectItem") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megahelpful_len"))); + UNIT_ASSERT_VALUES_EQUAL(seenStar, true); + } else if (word == "SqlProjectStarItem") { + seenStar = true; + } + }; + TWordCountHive elementStat = {{TString("FlattenMembers"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["FlattenMembers"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]); + } + + Y_UNIT_TEST(QualifiedAsteriskAfter) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA DisableSimpleColumns;" + "select LENGTH(value) AS megahelpful_len, interested_table.* from plato.Input as interested_table;" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + static bool seenStar = false; + if (word == "FlattenMembers") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("interested_table.")); + } else if (word == "SqlProjectItem") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megahelpful_len"))); + UNIT_ASSERT_VALUES_EQUAL(seenStar, false); + } else if (word == "SqlProjectStarItem") { + seenStar = true; + } + }; + TWordCountHive elementStat = {{TString("FlattenMembers"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["FlattenMembers"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]); + } + + Y_UNIT_TEST(QualifiedMembers) { + NYql::TAstParseResult res = SqlToYql("select interested_table.key, interested_table.value from plato.Input as interested_table;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + const bool fieldKey = TString::npos != line.find(Quote("key")); + const bool fieldValue = TString::npos != line.find(Quote("value")); + const bool refOnTable = TString::npos != line.find("interested_table."); + if (word == "SqlProjectItem") { + UNIT_ASSERT(fieldKey || fieldValue); + UNIT_ASSERT(!refOnTable); + } else if (word == "Write!") { + UNIT_ASSERT(fieldKey && fieldValue && !refOnTable); + } + }; + TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(JoinParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA DisableSimpleColumns;" + " SELECT table_bb.*, table_aa.key as megakey" + " FROM plato.Input AS table_aa" + " JOIN plato.Input AS table_bb" + " ON table_aa.value == table_bb.value;" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "SelectMembers") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa.")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table_bb.")); + } else if (word == "SqlProjectItem") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megakey"))); + } else if (word == "SqlColumn") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("table_aa"))); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("key"))); + } + }; + TWordCountHive elementStat = {{TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}, {TString("SelectMembers"), 0}, {TString("SqlColumn"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SelectMembers"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlColumn"]); + } + + Y_UNIT_TEST(Join3Table) { + NYql::TAstParseResult res = SqlToYql( + " PRAGMA DisableSimpleColumns;" + " SELECT table_bb.*, table_aa.key as gigakey, table_cc.* " + " FROM plato.Input AS table_aa" + " JOIN plato.Input AS table_bb ON table_aa.key == table_bb.key" + " JOIN plato.Input AS table_cc ON table_aa.subkey == table_cc.subkey;" + ); + Err2Str(res); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "SelectMembers") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa.")); + UNIT_ASSERT(line.find("table_bb.") != TString::npos || line.find("table_cc.") != TString::npos); + } else if (word == "SqlProjectItem") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("gigakey"))); + } else if (word == "SqlColumn") { + const auto posTableAA = line.find(Quote("table_aa")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, posTableAA); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("key"))); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa", posTableAA + 3)); + } + }; + TWordCountHive elementStat = {{TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}, {TString("SelectMembers"), 0}, {TString("SqlColumn"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectStarItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SelectMembers"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlColumn"]); + } + + Y_UNIT_TEST(DisabledJoinCartesianProduct) { + NYql::TAstParseResult res = SqlToYql("pragma DisableAnsiImplicitCrossJoin; use plato; select * from A,B,C"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:67: Error: Cartesian product of tables is disabled. Please use explicit CROSS JOIN or enable it via PRAGMA AnsiImplicitCrossJoin\n"); + } + + Y_UNIT_TEST(JoinCartesianProduct) { + NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from A,B,C"); + UNIT_ASSERT(res.Root); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "EquiJoin") { + auto pos = line.find("Cross"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, pos); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Cross", pos + 1)); + } + }; + TWordCountHive elementStat = {{TString("EquiJoin"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["EquiJoin"]); + } + + Y_UNIT_TEST(JoinWithoutConcreteColumns) { + NYql::TAstParseResult res = SqlToYql( + " use plato;" + " SELECT a.v, b.value" + " FROM `Input1` VIEW `ksv` AS a" + " JOIN `Input2` AS b" + " ON a.k == b.key;" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "SqlProjectItem") { + UNIT_ASSERT(line.find(Quote("a.v")) != TString::npos || line.find(Quote("b.value")) != TString::npos); + } else if (word == "SqlColumn") { + const auto posTableA = line.find(Quote("a")); + const auto posTableB = line.find(Quote("b")); + if (posTableA != TString::npos) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("v"))); + } else { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, posTableB); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("value"))); + } + } + }; + TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlColumn"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlColumn"]); + } + + Y_UNIT_TEST(JoinWithSameValues) { + NYql::TAstParseResult res = SqlToYql("SELECT a.value, b.value FROM plato.Input AS a JOIN plato.Input as b ON a.key == b.key;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "SqlProjectItem") { + const bool isValueFromA = TString::npos != line.find(Quote("a.value")); + const bool isValueFromB = TString::npos != line.find(Quote("b.value")); + UNIT_ASSERT(isValueFromA || isValueFromB); + } if (word == "Write!") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("a.a.")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("b.b.")); + } + }; + TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {"Write!", 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(SameColumnsForDifferentTables) { + NYql::TAstParseResult res = SqlToYql("SELECT a.key, b.key FROM plato.Input as a JOIN plato.Input as b on a.key==b.key;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SameColumnsForDifferentTablesFullJoin) { + NYql::TAstParseResult res = SqlToYql("SELECT a.key, b.key, a.value, b.value FROM plato.Input AS a FULL JOIN plato.Input AS b USING(key);"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(JoinStreamLookupStrategyHint) { + { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ plato.Input AS b USING(key);"); + UNIT_ASSERT(res.Root); + } + //case insensitive + { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ streamlookup() */ plato.Input AS b USING(key);"); + UNIT_ASSERT(res.Root); + } + } + + Y_UNIT_TEST(JoinConflictingStrategyHint) { + { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ /*+ Merge() */ plato.Input AS b USING(key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:91: Error: Conflicting join strategy hints\n"); + } + } + + Y_UNIT_TEST(JoinDuplicatingStrategyHint) { + { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ /*+ StreamLookup() */ plato.Input AS b USING(key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:98: Error: Duplicate join strategy hint\n"); + } + } + + Y_UNIT_TEST(WarnCrossJoinStrategyHint) { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a CROSS JOIN /*+ merge() */ plato.Input AS b;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:32: Warning: Non-default join strategy will not be used for CROSS JOIN, code: 4534\n"); + } + + Y_UNIT_TEST(WarnCartesianProductStrategyHint) { + NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; SELECT * FROM A, /*+ merge() */ B;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:74: Warning: Non-default join strategy will not be used for CROSS JOIN, code: 4534\n"); + } + + Y_UNIT_TEST(WarnUnknownJoinStrategyHint) { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ xmerge() */ plato.Input AS b USING (key);"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:41: Warning: Unsupported join hint: xmerge, code: 4534\n"); + } + + Y_UNIT_TEST(ReverseLabels) { + NYql::TAstParseResult res = SqlToYql("select in.key as subkey, subkey as key from plato.Input as in;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(AutogenerationAliasWithoutCollisionConflict1) { + NYql::TAstParseResult res = SqlToYql("select LENGTH(Value), key as column1 from plato.Input;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(AutogenerationAliasWithoutCollision2Conflict2) { + NYql::TAstParseResult res = SqlToYql("select key as column0, LENGTH(Value) from plato.Input;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(InputAliasForQualifiedAsterisk) { + NYql::TAstParseResult res = SqlToYql("use plato; select zyuzya.*, key from plato.Input as zyuzya;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectSupportsResultColumnsWithTrailingComma) { + NYql::TAstParseResult res = SqlToYql("select a, b, c, from plato.Input;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectOrderByLabeledColumn) { + NYql::TAstParseResult res = SqlToYql("pragma DisableOrderedColumns; select key as goal from plato.Input order by goal"); + UNIT_ASSERT(res.Root); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "DataSource") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("plato")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Input")); + + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("goal")); + } else if (word == "Sort") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("goal")); + + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("key")); + } + }; + TWordCountHive elementStat = {{TString("DataSource"), 0}, {TString("Sort"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["DataSource"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]); + } + + Y_UNIT_TEST(SelectOrderBySimpleExpr) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by a + a"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectOrderByDuplicateLabels) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by a, a"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectOrderByExpression) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input as i order by cast(key as uint32) + cast(subkey as uint32)"); + UNIT_ASSERT(res.Root); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Sort") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"+MayWarn\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("key")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("subkey")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'true)")); + + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i.key")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i.subkey")); + } + }; + TWordCountHive elementStat = {{TString("Sort"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]); + } + + Y_UNIT_TEST(SelectOrderByExpressionDesc) { + NYql::TAstParseResult res = SqlToYql("pragma disablesimplecolumns; select i.*, key, subkey from plato.Input as i order by cast(i.key as uint32) - cast(i.subkey as uint32) desc"); + UNIT_ASSERT(res.Root); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Sort") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"-MayWarn\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'false)")); + } else if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'columns")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("prefix")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"i.\"")); + } + }; + TWordCountHive elementStat = {{TString("Sort"), 0}, {TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(SelectOrderByExpressionAsc) { + NYql::TAstParseResult res = SqlToYql("select i.key, i.subkey from plato.Input as i order by cast(key as uint32) % cast(i.subkey as uint32) asc"); + UNIT_ASSERT(res.Root); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Sort") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"%MayWarn\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'true)")); + } else if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'columns")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\"")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i.")); + } + }; + TWordCountHive elementStat = {{TString("Sort"), 0}, {TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(ReferenceToKeyInSubselect) { + NYql::TAstParseResult res = SqlToYql("select b.key from (select a.key from plato.Input as a) as b;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(OrderByCastValue) { + NYql::TAstParseResult res = SqlToYql("select i.key, i.subkey from plato.Input as i order by cast(key as uint32) desc;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(GroupByCastValue) { + NYql::TAstParseResult res = SqlToYql("select count(1) from plato.Input as i group by cast(key as uint8);"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(KeywordInSelectColumns) { + NYql::TAstParseResult res = SqlToYql("select in, s.check from (select 1 as in, \"test\" as check) as s;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectAllGroupBy) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input group by subkey;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(CreateObjectWithFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"Key1\" '\"Value1\")")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(CreateObjectIfNotExists) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT IF NOT EXISTS secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectIfNotExists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(CreateObjectWithFeaturesStrings) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=\"Value1\", K2='V2', K3=V3, K4='', K5=`aaa`, K6='a\\'aa');"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"K3\" '\"V3\") '('\"K4\" '\"\") '('\"K5\" '\"aaa\") '('\"K6\" '\"a'aa\") '('\"Key1\" '\"Value1\")")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("SECRET"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + } + + Y_UNIT_TEST(UpsertObjectWithFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; UPSERT OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"Key1\" '\"Value1\")")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("upsertObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(CreateObjectWithFeaturesAndFlags) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2, RECURSE);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"Key1\" '\"Value1\") '('\"RECURSE\")")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(Select1Type) { + NYql::TAstParseResult res = SqlToYql("SELECT 1 type;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectTableType) { + NYql::TAstParseResult res = SqlToYql("USE plato; SELECT * from T type;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(CreateObjectNoFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(AlterObjectWithFeatures) { + NYql::TAstParseResult res = SqlToYql( + "USE plato;\n" + "declare $path as String;\n" + "ALTER OBJECT secretId (TYPE SECRET) SET (Key1=$path, K2=V2);" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"Key1\" (EvaluateAtom \"$path\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"K2\" '\"V2\"")); + + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alterObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(AlterObjectNoFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER OBJECT secretId (TYPE SECRET);"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(DropObjectNoFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(DropObjectWithFeatures) { + NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET) WITH (A, B, C);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(DropObjectWithOneOption) { + NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET) WITH OVERRIDE;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"OVERRIDE\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(DropObjectIfExists) { + NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT IF EXISTS secretId (TYPE SECRET);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObjectIfExists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]); + } + + Y_UNIT_TEST(PrimaryKeyParseCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32, Subkey Int64, Value String, PRIMARY KEY (Key, Subkey));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"Key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"Subkey\"")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("primarykey"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["primarykey"]); + } + + Y_UNIT_TEST(CreateTableNonNullableYqlTypeAstCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (DataType 'Int32) '('columnConstrains '('('not_null))) '())))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableNullableYqlTypeAstCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableNonNullablePgTypeAstCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a pg_int4 not null);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (PgType '_int4) '('columnConstrains '('('not_null))) '())))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableNullablePgTypeAstCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a pg_int4);"); + UNIT_ASSERT(res.Root); + + res.Root->PrettyPrintTo(Cout, PRETTY_FLAGS); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (PgType '_int4)) '('columnConstrains '()) '()))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableNullPkColumnsAreAllowed) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32, primary key(a));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableNotNullPkColumnsAreIdempotentAstCorrect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null, primary key(a));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (DataType 'Int32) '('columnConstrains '('('not_null))) '()))) '('primarykey '('"a"))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableWithIfNotExists) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE IF NOT EXISTS t (a int32, primary key(a));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create_if_not_exists) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTempTable) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TEMP TABLE t (a int32, primary key(a));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")) '('temporary))))__"), line); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTemporaryTable) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TEMPORARY TABLE t (a int32, primary key(a));"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, + line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")) '('temporary))))__"), line); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + } + + Y_UNIT_TEST(CreateTableWithoutTypes) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, primary key(a));"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(CreateTableAsSelectWithTypes) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32, primary key(a)) AS SELECT * FROM ts;"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(CreateTableAsSelect) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, b, primary key(a)) AS SELECT * FROM ts;"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((let world (Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a") '('"b"))) '('primarykey '('"a"))))))__")); + } + if (word == "Read!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Read! world (DataSource '"yt" '"plato") (MrTableConcat (Key '('table (String '"ts")))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}, {TString("Read!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]); + } + + Y_UNIT_TEST(CreateTableAsSelectOnlyPrimary) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (primary key(a)) AS SELECT * FROM ts;"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((let world (Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '()) '('primarykey '('"a"))))))__")); + } + if (word == "Read!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Read! world (DataSource '"yt" '"plato") (MrTableConcat (Key '('table (String '"ts")))))__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}, {TString("Read!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]); + } + + Y_UNIT_TEST(CreateTableAsValuesFail) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, primary key(a)) AS VALUES (1), (2);"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(CreateTableDuplicatedPkColumnsFail) { + NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null, primary key(a, a));"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(DeleteFromTableByKey) { + NYql::TAstParseResult res = SqlToYql("delete from plato.Input where key = 200;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DeleteFromTable) { + NYql::TAstParseResult res = SqlToYql("delete from plato.Input;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DeleteFromTableOnValues) { + NYql::TAstParseResult res = SqlToYql("delete from plato.Input on (key) values (1);", + 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete_on)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DeleteFromTableOnSelect) { + NYql::TAstParseResult res = SqlToYql( + "delete from plato.Input on select key from plato.Input where value > 0;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete_on)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(UpdateByValues) { + NYql::TAstParseResult res = SqlToYql("update plato.Input set key = 777, value = 'cool' where key = 200;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)")); + } else if (word == "AsStruct") { + const bool isKey = line.find("key") != TString::npos; + const bool isValue = line.find("value") != TString::npos; + UNIT_ASSERT(isKey || isValue); + if (isKey) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("777"))); + } else if (isValue) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("cool"))); + } + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]); + } + + Y_UNIT_TEST(UpdateByMultiValues) { + NYql::TAstParseResult res = SqlToYql("update plato.Input set (key, value, subkey) = ('2','ddd',':') where key = 200;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)")); + } else if (word == "AsStruct") { + const bool isKey = line.find("key") != TString::npos; + const bool isSubkey = line.find("subkey") != TString::npos; + const bool isValue = line.find("value") != TString::npos; + UNIT_ASSERT(isKey || isSubkey || isValue); + if (isKey && !isSubkey) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("2"))); + } else if (isSubkey) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote(":"))); + } else if (isValue) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("ddd"))); + } + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]); + } + + Y_UNIT_TEST(UpdateBySelect) { + NYql::TAstParseResult res = SqlToYql("update plato.Input set (key, value, subkey) = (select key, value, subkey from plato.Input where key = 911) where key = 200;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + int lineIndex = 0; + int writeLineIndex = -1; + bool found = false; + + TVerifyLineFunc verifyLine = [&lineIndex, &writeLineIndex, &found](const TString& word, const TString& line) { + if (word == "Write") { + writeLineIndex = lineIndex; + found = line.find("('mode 'update)") != TString::npos; + } else if (word == "mode") { + found |= lineIndex == writeLineIndex + 1 && line.find("('mode 'update)") != TString::npos; + UNIT_ASSERT(found); + } + + ++lineIndex; + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("mode"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(UpdateSelfModifyAll) { + NYql::TAstParseResult res = SqlToYql("update plato.Input set subkey = subkey + 's';", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)")); + } else if (word == "AsStruct") { + const bool isSubkey = line.find("subkey") != TString::npos; + UNIT_ASSERT(isSubkey); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("subkey"))); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("s"))); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]); + } + + Y_UNIT_TEST(UpdateOnValues) { + NYql::TAstParseResult res = SqlToYql("update plato.Input on (key, value) values (5, 'cool')", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update_on)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(UpdateOnSelect) { + NYql::TAstParseResult res = SqlToYql( + "update plato.Input on select key, value + 1 as value from plato.Input", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update_on)")); + } + }; + + TWordCountHive elementStat = {{TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(UnionAllTest) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input UNION ALL select subkey FROM plato.Input;"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("UnionAll"), 0}}; + VerifyProgram(res, elementStat, {}); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["UnionAll"]); + } + + Y_UNIT_TEST(UnionTest) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input UNION select subkey FROM plato.Input;"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("Union"), 0}}; + VerifyProgram(res, elementStat, {}); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Union"]); + } + + Y_UNIT_TEST(UnionAggregationTest) { + NYql::TAstParseResult res = SqlToYql(R"( + SELECT 1 + UNION ALL + SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 + UNION + SELECT 1 UNION SELECT 1 UNION SELECT 1 UNION SELECT 1 + UNION ALL + SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1; + )"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("Union"), 0}, {TString("UnionAll"), 0}}; + VerifyProgram(res, elementStat, {}); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["UnionAll"]); + UNIT_ASSERT_VALUES_EQUAL(3, elementStat["Union"]); + } + + Y_UNIT_TEST(DeclareDecimalParameter) { + NYql::TAstParseResult res = SqlToYql("declare $value as Decimal(22,9); select $value as cnt;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SimpleGroupBy) { + NYql::TAstParseResult res = SqlToYql("select count(1),z from plato.Input group by key as z order by z;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(EmptyColumnName0) { + /// Now it's parsed well and error occur on validate step like "4:31:Empty struct member name is not allowed" in "4:31:Function: AddMember" + NYql::TAstParseResult res = SqlToYql("insert into plato.Output (``, list1) values (0, AsList(0, 1, 2));"); + /// Verify that parsed well without crash + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(KikimrRollback) { + NYql::TAstParseResult res = SqlToYql("use plato; select * from Input; rollback;", 10, "kikimr"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("rollback"), 0}}; + VerifyProgram(res, elementStat); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["rollback"]); + } + + Y_UNIT_TEST(PragmaFile) { + NYql::TAstParseResult res = SqlToYql(R"(pragma file("HW", "sbr:181041334");)"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString(R"((let world (Configure! world (DataSource '"config") '"AddFileByUrl" '"HW" '"sbr:181041334")))"), 0}}; + VerifyProgram(res, elementStat); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat.cbegin()->second); + } + + Y_UNIT_TEST(DoNotCrashOnNamedInFilter) { + NYql::TAstParseResult res = SqlToYql("USE plato; $all = ($table_name) -> { return true; }; SELECT * FROM FILTER(Input, $all)"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(PragmasFileAndUdfOrder) { + NYql::TAstParseResult res = SqlToYql(R"( + PRAGMA file("libvideoplayers_udf.so", "https://proxy.sandbox.yandex-team.ru/235185290"); + PRAGMA udf("libvideoplayers_udf.so"); + )"); + UNIT_ASSERT(res.Root); + + const auto programm = GetPrettyPrint(res); + const auto file = programm.find("AddFileByUrl"); + const auto udfs = programm.find("ImportUdfs"); + UNIT_ASSERT(file < udfs); + } + + Y_UNIT_TEST(ProcessUserType) { + NYql::TAstParseResult res = SqlToYql("process plato.Input using Kikimr::PushData(TableRows());", 1, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Kikimr.PushData") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TupleType")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TypeOf")); + } + }; + + TWordCountHive elementStat = {{TString("Kikimr.PushData"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Kikimr.PushData"]); + } + + Y_UNIT_TEST(ProcessUserTypeAuth) { + NYql::TAstParseResult res = SqlToYql("process plato.Input using YDB::PushData(TableRows(), AsTuple('oauth', SecureParam('api:oauth')));", 1, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "YDB.PushData") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TupleType")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TypeOf")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("api:oauth")); + } + }; + + TWordCountHive elementStat = {{TString("YDB.PushData"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["YDB.PushData"]); + } + + Y_UNIT_TEST(SelectStreamRtmr) { + NYql::TAstParseResult res = SqlToYql( + "USE plato; INSERT INTO Output SELECT STREAM key FROM Input;", + 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + + res = SqlToYql( + "USE plato; INSERT INTO Output SELECT key FROM Input;", + 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectStreamRtmrJoinWithYt) { + NYql::TAstParseResult res = SqlToYql( + "USE plato; INSERT INTO Output SELECT STREAM key FROM Input LEFT JOIN hahn.ttt as t ON Input.key = t.Name;", + 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SelectStreamNonRtmr) { + NYql::TAstParseResult res = SqlToYql( + "USE plato; INSERT INTO Output SELECT STREAM key FROM Input;", + 10); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:31: Error: SELECT STREAM is unsupported for non-streaming sources\n"); + } + + Y_UNIT_TEST(GroupByHopRtmr) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; INSERT INTO Output SELECT key, SUM(value) AS value FROM Input + GROUP BY key, HOP(subkey, "PT10S", "PT30S", "PT20S"); + )", 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(GroupByHopRtmrSubquery) { + // 'use plato' intentially avoided + NYql::TAstParseResult res = SqlToYql(R"( + SELECT COUNT(*) AS value FROM (SELECT * FROM plato.Input) + GROUP BY HOP(Data, "PT10S", "PT30S", "PT20S") + )", 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(GroupByHopRtmrSubqueryBinding) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + $q = SELECT * FROM Input; + INSERT INTO Output SELECT STREAM * FROM ( + SELECT COUNT(*) AS value FROM $q + GROUP BY HOP(Data, "PT10S", "PT30S", "PT20S") + ); + )", 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(GroupByNoHopRtmr) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; INSERT INTO Output SELECT STREAM key, SUM(value) AS value FROM Input + GROUP BY key; + )", 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:22: Error: Streaming group by query must have a hopping window specification.\n"); + } + + Y_UNIT_TEST(KikimrInserts) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + INSERT INTO Output SELECT key, value FROM Input; + INSERT OR ABORT INTO Output SELECT key, value FROM Input; + INSERT OR IGNORE INTO Output SELECT key, value FROM Input; + INSERT OR REVERT INTO Output SELECT key, value FROM Input; + )", 10, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(WarnMissingIsBeforeNotNull) { + NYql::TAstParseResult res = SqlToYql("select 1 NOT NULL"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Missing IS keyword before NOT NULL, code: 4507\n"); + } + + Y_UNIT_TEST(Subqueries) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + $sq1 = (SELECT * FROM plato.Input); + + $sq2 = SELECT * FROM plato.Input; + + $squ1 = ( + SELECT * FROM plato.Input + UNION ALL + SELECT * FROM plato.Input + ); + + $squ2 = + SELECT * FROM plato.Input + UNION ALL + SELECT * FROM plato.Input; + + $squ3 = ( + (SELECT * FROM plato.Input) + UNION ALL + (SELECT * FROM plato.Input) + ); + + SELECT * FROM $sq1; + SELECT * FROM $sq2; + SELECT * FROM $squ1; + SELECT * FROM $squ2; + SELECT * FROM $squ3; + )"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(SubqueriesJoin) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + + $left = SELECT * FROM plato.Input1 WHERE value != "BadValue"; + $right = SELECT * FROM plato.Input2; + + SELECT * FROM $left AS l + JOIN $right AS r + ON l.key == r.key; + )"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(AnyInBackticksAsTableName) { + NYql::TAstParseResult res = SqlToYql("use plato; select * from `any`;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(AnyJoinForTableAndSubQuery) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + + $r = SELECT * FROM plato.Input2; + + SELECT * FROM ANY plato.Input1 AS l + LEFT JOIN ANY $r AS r + USING (key); + )"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "EquiJoin") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('left 'any)")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('right 'any)")); + } + }; + + TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["right"]); + } + + Y_UNIT_TEST(AnyJoinForTableAndTableSource) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + + $r = AsList( + AsStruct("aaa" as key, "bbb" as subkey, "ccc" as value) + ); + + SELECT * FROM ANY plato.Input1 AS l + LEFT JOIN ANY AS_TABLE($r) AS r + USING (key); + )"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "EquiJoin") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('left 'any)")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('right 'any)")); + } + }; + + TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["right"]); + } + + Y_UNIT_TEST(AnyJoinNested) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + + FROM ANY Input1 as a + JOIN Input2 as b ON a.key = b.key + LEFT JOIN ANY Input3 as c ON a.key = c.key + RIGHT JOIN ANY Input4 as d ON d.key = b.key + CROSS JOIN Input5 + SELECT *; + )"); + + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}}; + VerifyProgram(res, elementStat); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["right"]); + } + + Y_UNIT_TEST(InlineAction) { + NYql::TAstParseResult res = SqlToYql( + "do begin\n" + " select 1\n" + "; end do\n"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), ""); + } + + Y_UNIT_TEST(FlattenByCorrelationName) { + UNIT_ASSERT(SqlToYql("select * from plato.Input as t flatten by t.x").IsOk()); + UNIT_ASSERT(SqlToYql("select * from plato.Input as t flatten by t -- same as flatten by t.t").IsOk()); + } + + Y_UNIT_TEST(DiscoveryMode) { + UNIT_ASSERT(SqlToYqlWithMode("insert into plato.Output select * from plato.Input", NSQLTranslation::ESqlMode::DISCOVERY).IsOk()); + UNIT_ASSERT(SqlToYqlWithMode("select * from plato.concat(Input1, Input2)", NSQLTranslation::ESqlMode::DISCOVERY).IsOk()); + UNIT_ASSERT(SqlToYqlWithMode("select * from plato.each(AsList(\"Input1\", \"Input2\"))", NSQLTranslation::ESqlMode::DISCOVERY).IsOk()); + } + + Y_UNIT_TEST(CubeWithAutoGeneratedLikeColumnName) { + UNIT_ASSERT(SqlToYql("select key,subkey,group from plato.Input group by cube(key,subkey,group)").IsOk()); + } + + Y_UNIT_TEST(CubeWithAutoGeneratedLikeAlias) { + UNIT_ASSERT(SqlToYql("select key,subkey,group from plato.Input group by cube(key,subkey,value as group)").IsOk()); + } + + Y_UNIT_TEST(FilterCanBeUsedAsColumnIdOrBind) { + UNIT_ASSERT(SqlToYql("select filter from plato.Input").IsOk()); + UNIT_ASSERT(SqlToYql("select 1 as filter").IsOk()); + UNIT_ASSERT(SqlToYql("$filter = 1; select $filter").IsOk()); + } + + Y_UNIT_TEST(DuplicateSemicolonsAreAllowedBetweenTopLevelStatements) { + UNIT_ASSERT(SqlToYql(";;select 1; ; select 2;/*comment*/;select 3;;--comment\n;select 4;;").IsOk()); + } + + Y_UNIT_TEST(DuplicateAndMissingTrailingSemicolonsAreAllowedBetweenActionStatements) { + TString req = + "define action $action($b,$c) as\n" + " ;;$d = $b + $c;\n" + " select $b;\n" + " select $c;;\n" + " select $d,\n" + "end define;\n" + "\n" + "do $action(1,2);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(DuplicateAndMissingTrailingSemicolonsAreAllowedBetweenInlineActionStatements) { + TString req = + "do begin\n" + " ;select 1,\n" + "end do;\n" + "evaluate for $i in AsList(1,2,3) do begin\n" + " select $i;;\n" + " select $i + $i;;\n" + "end do;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(DuplicateSemicolonsAreAllowedBetweenLambdaStatements) { + TString req = + "$x=1;\n" + "$foo = ($a, $b)->{\n" + " ;;$v = $a + $b;\n" + " $bar = ($c) -> {; return $c << $x};;\n" + " return $bar($v);;\n" + "};\n" + "select $foo(1,2);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(StringLiteralWithEscapedBackslash) { + NYql::TAstParseResult res1 = SqlToYql(R"foo(SELECT 'a\\';)foo"); + NYql::TAstParseResult res2 = SqlToYql(R"foo(SELECT "a\\";)foo"); + UNIT_ASSERT(res1.Root); + UNIT_ASSERT(res2.Root); + + TWordCountHive elementStat = {{TString("a\\"), 0}}; + + VerifyProgram(res1, elementStat); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["a\\"]); + + VerifyProgram(res2, elementStat); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["a\\"]); + } + + Y_UNIT_TEST(StringMultiLineLiteralWithEscapes) { + UNIT_ASSERT(SqlToYql("SELECT @@@foo@@@@bar@@@").IsOk()); + UNIT_ASSERT(SqlToYql("SELECT @@@@@@@@@").IsOk()); + } + + Y_UNIT_TEST(StringMultiLineLiteralConsequitiveAt) { + UNIT_ASSERT(!SqlToYql("SELECT @").IsOk()); + UNIT_ASSERT(!SqlToYql("SELECT @@").IsOk()); + UNIT_ASSERT(!SqlToYql("SELECT @@@").IsOk()); + UNIT_ASSERT( SqlToYql("SELECT @@@@").IsOk()); + UNIT_ASSERT( SqlToYql("SELECT @@@@@").IsOk()); + + UNIT_ASSERT(!SqlToYql("SELECT @@@@@@").IsOk()); + UNIT_ASSERT(!SqlToYql("SELECT @@@@@@@").IsOk()); + + UNIT_ASSERT( SqlToYql("SELECT @@@@@@@@").IsOk()); + UNIT_ASSERT( SqlToYql("SELECT @@@@@@@@@").IsOk()); + UNIT_ASSERT(!SqlToYql("SELECT @@@@@@@@@@").IsOk()); + } + + Y_UNIT_TEST(ConstnessForListDictSetCreate) { + auto req = "$foo = ($x, $y) -> (\"aaaa\");\n" + "\n" + "select\n" + " $foo(sum(key), ListCreate(String)),\n" + " $foo(sum(key), DictCreate(String, String)),\n" + " $foo(sum(key), SetCreate(String)),\n" + "from (select 1 as key);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(CanUseEmptyTupleInWindowPartitionBy) { + auto req = "select sum(key) over w\n" + "from plato.Input\n" + "window w as (partition compact by (), (subkey), (), value || value as dvalue);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(DenyAnsiOrderByLimitLegacyMode) { + auto req = "pragma DisableAnsiOrderByLimitInUnionAll;\n" + "use plato;\n" + "\n" + "select * from Input order by key limit 10\n" + "union all\n" + "select * from Input order by key limit 1;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: DisableAnsiOrderByLimitInUnionAll pragma is deprecated and no longer supported\n"); + } + + Y_UNIT_TEST(ReduceUsingUdfWithShortcutsWorks) { + auto req = "use plato;\n" + "\n" + "$arg = 'foo';\n" + "$func = XXX::YYY($arg);\n" + "\n" + "REDUCE Input ON key using $func(subkey);\n" + "REDUCE Input ON key using $func(UUU::VVV(TableRow()));\n"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + req = "use plato;\n" + "\n" + "$arg = 'foo';\n" + "$func = XXX::YYY($arg);\n" + "\n" + "REDUCE Input ON key using all $func(subkey);\n" + "REDUCE Input ON key using all $func(UUU::VVV(TableRow()));"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(YsonDisableStrict) { + UNIT_ASSERT(SqlToYql("pragma yson.DisableStrict = \"false\";").IsOk()); + UNIT_ASSERT(SqlToYql("pragma yson.DisableStrict;").IsOk()); + } + + Y_UNIT_TEST(YsonStrict) { + UNIT_ASSERT(SqlToYql("pragma yson.Strict = \"false\";").IsOk()); + UNIT_ASSERT(SqlToYql("pragma yson.Strict;").IsOk()); + } + + Y_UNIT_TEST(JoinByTuple) { + auto req = "use plato;\n" + "\n" + "select * from T1 as a\n" + "join T2 as b\n" + "on AsTuple(a.key, a.subkey) = AsTuple(b.key, b.subkey);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(JoinByStruct) { + auto req = "use plato;\n" + "\n" + "select * from T1 as a\n" + "join T2 as b\n" + "on AsStruct(a.key as k, a.subkey as sk) = AsStruct(b.key as k, b.subkey as sk);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(JoinByUdf) { + auto req = "use plato;\n" + "\n" + "select a.align\n" + "from T1 as a\n" + "join T2 as b\n" + "on Yson::SerializeJsonEncodeUtf8(a.align)=b.align;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(EscapedIdentifierAsLambdaArg) { + auto req = "$f = ($`foo bar`, $x) -> { return $`foo bar` + $x; };\n" + "\n" + "select $f(1, 2);"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(lambda '(\"$foo bar\" \"$x\")"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOnlyCallable) { + auto req = "SELECT Udf(DateTime::FromString)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType)))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarTypeNoRun) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\")"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarRunNoType) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, Void() as RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"\" (Void))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarFullTest) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, Void() As RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (Void))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOtherRunConfigs) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, '55' As RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (String '\"55\"))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOtherRunConfigs2) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, AsTuple(32, 'no', AsStruct(1e-9 As SomeFloat)) As RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" '((Int32 '\"32\") (String '\"no\") (AsStruct '('\"SomeFloat\" (Double '\"1e-9\")))))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOptional) { + auto req = "SELECT Udf(DateTime::FromString, String?, Int32??, Tuple<Int32, Float>, \"foo\" as TypeConfig, Void() As RunConfig)(\"2022-01-01\");"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (OptionalType (DataType 'String)) (OptionalType (OptionalType (DataType 'Int32))) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (Void))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(CompactionPolicyParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key)) + WITH ( COMPACTION_POLICY = "SomeCompactionPreset" );)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("compactionPolicy")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SomeCompactionPreset")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AutoPartitioningBySizeParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key)) + WITH ( AUTO_PARTITIONING_BY_SIZE = ENABLED );)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("autoPartitioningBySize")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("ENABLED")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(UniformPartitionsParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key)) + WITH ( UNIFORM_PARTITIONS = 16 );)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("uniformPartitions")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("16")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DateTimeTtlParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key)) + WITH (TTL = Interval("P1D") On CreatedAt);)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(IntTtlParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key)) + WITH (TTL = Interval("P1D") On CreatedAt AS SECONDS);)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnUnit")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("seconds")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(TieringParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key)) + WITH ( TIERING = 'my_tiering' );)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiering")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("my_tiering")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(StoreExternalBlobsParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key)) + WITH ( STORE_EXTERNAL_BLOBS = ENABLED );)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("storeExternalBlobs")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("ENABLED")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DefaultValueColumn2) { + auto res = SqlToYql(R"( use plato; + $lambda = () -> { + RETURN CAST(RandomUuid(2) as String) + }; + + CREATE TABLE tableName ( + Key Uint32 DEFAULT RandomNumber(1), + Value String DEFAULT $lambda, + PRIMARY KEY (Key) + ); + )"); + + UNIT_ASSERT_C(res.Root, Err2Str(res)); + + const auto program = GetPrettyPrint(res); + + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("RandomNumber")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("RandomUuid")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("columnConstrains")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("columnConstrains")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("Write")); + +#if 0 + Cerr << program << Endl; +#endif + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DefaultValueColumn3) { + auto res = SqlToYql(R"( use plato; + + CREATE TABLE tableName ( + database_id Utf8, + cloud_id Utf8, + global_id Utf8 DEFAULT database_id || "=====", + PRIMARY KEY (database_id) + ); + )"); + + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:6:40: Error: Column reference \"database_id\" is not allowed in current scope\n"); + UNIT_ASSERT(!res.Root); + } + + Y_UNIT_TEST(DefaultValueColumn) { + auto res = SqlToYql(R"( use plato; + CREATE TABLE tableName ( + Key Uint32 FAMILY cold DEFAULT 5, + Value String FAMILY default DEFAULT "empty", + PRIMARY KEY (Key), + FAMILY default ( + DATA = "test", + COMPRESSION = "lz4" + ), + FAMILY cold ( + DATA = "test", + COMPRESSION = "off" + ) + ); + )"); + + UNIT_ASSERT_C(res.Root, Err2Str(res)); + +#if 0 + const auto program = GetPrettyPrint(res); + Cerr << program << Endl; +#endif + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("default")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnConstrains")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnFamilies")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(ChangefeedParseCorrect) { + auto res = SqlToYql(R"( USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH ( + MODE = 'KEYS_ONLY', + FORMAT = 'json', + INITIAL_SCAN = TRUE, + VIRTUAL_TIMESTAMPS = FALSE, + RESOLVED_TIMESTAMPS = Interval("PT1S"), + RETENTION_PERIOD = Interval("P1D"), + TOPIC_MIN_ACTIVE_PARTITIONS = 10, + AWS_REGION = 'aws:region' + ) + ); + )"); + UNIT_ASSERT_C(res.Root, Err2Str(res)); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("changefeed")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("mode")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("KEYS_ONLY")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("format")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("json")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("initial_scan")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("true")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("virtual_timestamps")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("false")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("resolved_timestamps")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("retention_period")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("topic_min_active_partitions")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("aws_region")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("aws:region")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CloneForAsTableWorksWithCube) { + UNIT_ASSERT(SqlToYql("SELECT * FROM AS_TABLE([<|k1:1, k2:1|>]) GROUP BY CUBE(k1, k2);").IsOk()); + } + + Y_UNIT_TEST(WindowPartitionByColumnProperlyEscaped) { + NYql::TAstParseResult res = SqlToYql("SELECT SUM(key) OVER w FROM plato.Input WINDOW w AS (PARTITION BY `column with space`);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "CalcOverWindow") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"column with space\"")); + } + }; + + TWordCountHive elementStat = { {TString("CalcOverWindow"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["CalcOverWindow"]); + } + + Y_UNIT_TEST(WindowPartitionByExpressionWithoutAliasesAreAllowed) { + NYql::TAstParseResult res = SqlToYql("SELECT SUM(key) OVER w FROM plato.Input as i WINDOW w AS (PARTITION BY ii.subkey);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "AddMember") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("AddMember row 'group_w_0 (SqlAccess 'struct (Member row '\"ii\")")); + } + if (word == "CalcOverWindow") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("CalcOverWindow core '('\"group_w_0\")")); + } + }; + + TWordCountHive elementStat = { {TString("CalcOverWindow"), 0}, {TString("AddMember"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["CalcOverWindow"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AddMember"]); + } + + Y_UNIT_TEST(PqReadByAfterUse) { + ExpectFailWithError("use plato; pragma PqReadBy='plato2';", + "<main>:1:28: Error: Cluster in PqReadPqBy pragma differs from cluster specified in USE statement: plato2 != plato\n"); + + UNIT_ASSERT(SqlToYql("pragma PqReadBy='plato2';").IsOk()); + UNIT_ASSERT(SqlToYql("pragma PqReadBy='plato2'; use plato;").IsOk()); + UNIT_ASSERT(SqlToYql("$x='plato'; use rtmr:$x; pragma PqReadBy='plato2';").IsOk()); + UNIT_ASSERT(SqlToYql("use plato; pragma PqReadBy='dq';").IsOk()); + } + + Y_UNIT_TEST(MrObject) { + NYql::TAstParseResult res = SqlToYql( + "declare $path as String;\n" + "select * from plato.object($path, `format`, \"comp\" || \"ression\" as compression, 1 as bar) with schema (Int32 as y, String as x)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "MrObject") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((MrObject (EvaluateAtom "$path") '"format" '('('"compression" (Concat (String '"comp") (String '"ression"))) '('"bar" (Int32 '"1")))))__")); + } else if (word == "userschema") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__('('('"userschema" (StructType '('"y" (DataType 'Int32)) '('"x" (DataType 'String))) '('"y" '"x"))))__")); + } + }; + + TWordCountHive elementStat = {{TString("MrObject"), 0}, {TString("userschema"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["MrObject"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["userschema"]); + } + + Y_UNIT_TEST(TableBindings) { + NSQLTranslation::TTranslationSettings settings = GetSettingsWithS3Binding("foo"); + NYql::TAstParseResult res = SqlToYqlWithSettings( + "select * from bindings.foo", + settings + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "MrObject") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((MrTableConcat (Key '('table (String '"path")))) (Void) '('('"bar" '"1") '('"compression" '"ccompression") '('"format" '"format") '('"partitionedby" '"key" '"subkey") '('"userschema" (SqlTypeFromYson)__")); + } + }; + + TWordCountHive elementStat = {{TString("MrTableConcat"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["MrTableConcat"]); + + settings.DefaultCluster = "plato"; + settings.BindingsMode = NSQLTranslation::EBindingsMode::DISABLED; + res = SqlToYqlWithSettings( + "select * from bindings.foo", + settings + ); + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:15: Error: Please remove 'bindings.' from your query, the support for this syntax has ended, code: 4601\n"); + UNIT_ASSERT(!res.Root); + + settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP; + res = SqlToYqlWithSettings( + "select * from bindings.foo", + settings + ); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine2 = [](const TString& word, const TString& line) { + if (word == "MrTableConcat") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((MrTableConcat (Key '('table (String '"foo")))) (Void) '())))__")); + } + }; + + TWordCountHive elementStat2 = {{TString("MrTableConcat"), 0}}; + VerifyProgram(res, elementStat2, verifyLine2); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat2["MrTableConcat"]); + + settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP_WITH_WARNING; + res = SqlToYqlWithSettings( + "select * from bindings.foo", + settings + ); + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:15: Warning: Please remove 'bindings.' from your query, the support for this syntax will be dropped soon, code: 4538\n"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat3 = {{TString("MrTableConcat"), 0}}; + VerifyProgram(res, elementStat3, verifyLine2); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat3["MrTableConcat"]); + } + + Y_UNIT_TEST(TableBindingsWithInsert) { + NSQLTranslation::TTranslationSettings settings = GetSettingsWithS3Binding("foo"); + NYql::TAstParseResult res = SqlToYqlWithSettings( + "insert into bindings.foo with truncate (x, y) values (1, 2);", + settings + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('table (String '"path"))) values '('('"bar" '"1") '('"compression" '"ccompression") '('"format" '"format") '('"partitionedby" '"key" '"subkey") '('"userschema" (SqlTypeFromYson)__")); + } + }; + + TWordCountHive elementStat = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]); + + settings.DefaultCluster = "plato"; + settings.BindingsMode = NSQLTranslation::EBindingsMode::DISABLED; + res = SqlToYqlWithSettings( + "insert into bindings.foo with truncate (x, y) values (1, 2);", + settings + ); + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:13: Error: Please remove 'bindings.' from your query, the support for this syntax has ended, code: 4601\n"); + UNIT_ASSERT(!res.Root); + + settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP; + res = SqlToYqlWithSettings( + "insert into bindings.foo with truncate (x, y) values (1, 2);", + settings + ); + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), ""); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine2 = [](const TString& word, const TString& line) { + if (word == "Write!") { + //UNIT_ASSERT_VALUES_EQUAL(line, ""); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__((Write! world sink (Key '('table (String '"foo"))) values '('('mode 'renew)))__")); + } + }; + + TWordCountHive elementStat2 = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat2, verifyLine2); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat2["Write!"]); + + settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP_WITH_WARNING; + res = SqlToYqlWithSettings( + "insert into bindings.foo with truncate (x, y) values (1, 2);", + settings + ); + UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:13: Warning: Please remove 'bindings.' from your query, the support for this syntax will be dropped soon, code: 4538\n"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat3 = {{TString("Write!"), 0}}; + VerifyProgram(res, elementStat3, verifyLine2); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat3["Write!"]); + } + + Y_UNIT_TEST(TrailingCommaInWithout) { + UNIT_ASSERT(SqlToYql("SELECT * WITHOUT stream, FROM plato.Input").IsOk()); + UNIT_ASSERT(SqlToYql("SELECT a.* WITHOUT a.intersect, FROM plato.Input AS a").IsOk()); + UNIT_ASSERT(SqlToYql("SELECT a.* WITHOUT col1, col2, a.col3, FROM plato.Input AS a").IsOk()); + } + + Y_UNIT_TEST(NoStackOverflowOnBigCaseStatement) { + TStringBuilder req; + req << "select case 1 + 123"; + for (size_t i = 0; i < 20000; ++i) { + req << " when " << i << " then " << i + 1; + } + req << " else 100500 end;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(CollectPreaggregatedInListLiteral) { + UNIT_ASSERT(SqlToYql("SELECT [COUNT(DISTINCT a+b)] FROM plato.Input").IsOk()); + } + + Y_UNIT_TEST(SmartParenInGroupByClause) { + UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input GROUP BY (k, v)").IsOk()); + } + + Y_UNIT_TEST(AlterTableRenameToIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table RENAME TO moved").IsOk()); + } + + Y_UNIT_TEST(AlterTableAddDropColumnIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ADD COLUMN addc uint64, DROP COLUMN dropc, ADD addagain uint64").IsOk()); + } + + Y_UNIT_TEST(AlterTableSetTTLIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column)").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column AS SECONDS)").IsOk()); + } + + Y_UNIT_TEST(AlterTableSetTieringIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TIERING = 'my_tiering')").IsOk()); + } + + Y_UNIT_TEST(AlterTableAddChangefeedIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ADD CHANGEFEED feed WITH (MODE = 'UPDATES', FORMAT = 'json')").IsOk()); + } + + Y_UNIT_TEST(AlterTableAlterChangefeedIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ALTER CHANGEFEED feed DISABLE").IsOk()); + ExpectFailWithError("USE plato; ALTER TABLE table ALTER CHANGEFEED feed SET (FORMAT = 'proto');", + "<main>:1:57: Error: FORMAT alter is not supported\n"); + } + + Y_UNIT_TEST(AlterTableDropChangefeedIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table DROP CHANGEFEED feed").IsOk()); + } + + Y_UNIT_TEST(AlterTableSetPartitioningIsCorrect) { + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (AUTO_PARTITIONING_BY_SIZE = DISABLED)").IsOk()); + } + + Y_UNIT_TEST(AlterTableAddIndexWithIsNotSupported) { + ExpectFailWithError("USE plato; ALTER TABLE table ADD INDEX idx GLOBAL ON (col) WITH (a=b)", + "<main>:1:40: Error: with: alternative is not implemented yet: \n"); + } + + Y_UNIT_TEST(AlterTableAddIndexLocalIsNotSupported) { + ExpectFailWithError("USE plato; ALTER TABLE table ADD INDEX idx LOCAL ON (col)", + "<main>:1:40: Error: local: alternative is not implemented yet: \n"); + } + + Y_UNIT_TEST(CreateTableAddIndexVector) { + const auto result = SqlToYql(R"(USE plato; + CREATE TABLE table ( + pk INT32 NOT NULL, + col String, + INDEX idx GLOBAL USING vector_kmeans_tree + ON (col) COVER (col) + WITH (distance=cosine, vector_type=float, vector_dimension=1024,), + PRIMARY KEY (pk)) + )"); + UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString()); + } + + Y_UNIT_TEST(AlterTableAddIndexVector) { + const auto result = SqlToYql(R"(USE plato; + ALTER TABLE table ADD INDEX idx + GLOBAL USING vector_kmeans_tree + ON (col) COVER (col) + WITH (distance=cosine, vector_type="float", vector_dimension=1024) + )"); + UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString()); + } + + Y_UNIT_TEST(AlterTableAddIndexUnknownSubtype) { + ExpectFailWithError("USE plato; ALTER TABLE table ADD INDEX idx GLOBAL USING unknown ON (col)", + "<main>:1:57: Error: UNKNOWN index subtype is not supported\n"); + } + + Y_UNIT_TEST(AlterTableAddIndexMissedParameter) { + ExpectFailWithError(R"(USE plato; + ALTER TABLE table ADD INDEX idx + GLOBAL USING vector_kmeans_tree + ON (col) + WITH (distance=cosine, vector_type=float) + )", + "<main>:5:52: Error: vector_dimension should be set\n"); + } + + Y_UNIT_TEST(AlterTableAlterIndexSetPartitioningIsCorrect) { + const auto result = SqlToYql("USE plato; ALTER TABLE table ALTER INDEX index SET AUTO_PARTITIONING_MIN_PARTITIONS_COUNT 10"); + UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString()); + } + + Y_UNIT_TEST(AlterTableAlterIndexSetMultiplePartitioningSettings) { + const auto result = SqlToYql("USE plato; ALTER TABLE table ALTER INDEX index SET " + "(AUTO_PARTITIONING_BY_LOAD = ENABLED, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10)" + ); + UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString()); + } + + Y_UNIT_TEST(AlterTableAlterIndexResetPartitioningIsNotSupported) { + ExpectFailWithError("USE plato; ALTER TABLE table ALTER INDEX index RESET (AUTO_PARTITIONING_MIN_PARTITIONS_COUNT)", + "<main>:1:55: Error: AUTO_PARTITIONING_MIN_PARTITIONS_COUNT reset is not supported\n" + ); + } + + Y_UNIT_TEST(AlterTableAlterColumnDropNotNullAstCorrect) { + auto reqSetNull = SqlToYql(R"( + USE plato; + CREATE TABLE tableName ( + id Uint32, + val Uint32 NOT NULL, + PRIMARY KEY (id) + ); + + COMMIT; + ALTER TABLE tableName ALTER COLUMN val DROP NOT NULL; + )"); + + UNIT_ASSERT(reqSetNull.IsOk()); + UNIT_ASSERT(reqSetNull.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + Y_UNUSED(word); + + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find( + R"(let world (Write! world sink (Key '('tablescheme (String '"tableName"))) (Void) '('('mode 'alter) '('actions '('('alterColumns '('('"val" '('changeColumnConstraints '('('drop_not_null)))))))))))" + )); + }; + + TWordCountHive elementStat({TString("\'mode \'alter")}); + VerifyProgram(reqSetNull, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["\'mode \'alter"]); + } + + Y_UNIT_TEST(OptionalAliases) { + UNIT_ASSERT(SqlToYql("USE plato; SELECT foo FROM (SELECT key foo FROM Input);").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT a.x FROM Input1 a JOIN Input2 b ON a.key = b.key;").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT a.x FROM (VALUES (1,2), (3,4)) a(x,key) JOIN Input b ON a.key = b.key;").IsOk()); + } + + Y_UNIT_TEST(TableNameConstness) { + UNIT_ASSERT(SqlToYql("USE plato; $path = 'foo'; SELECT TableName($path), count(*) FROM Input;").IsOk()); + UNIT_ASSERT(SqlToYql("$path = 'foo'; SELECT TableName($path, 'yt'), count(*) FROM plato.Input;").IsOk()); + ExpectFailWithError("USE plato; SELECT TableName(), count(*) FROM plato.Input;", + "<main>:1:19: Error: Expression has to be an aggregation function or key column, because aggregation is used elsewhere in this subquery\n"); + } + + Y_UNIT_TEST(UseShouldWorkAsColumnName) { + UNIT_ASSERT(SqlToYql("select use from (select 1 as use);").IsOk()); + } + + Y_UNIT_TEST(TrueFalseWorkAfterDollar) { + UNIT_ASSERT(SqlToYql("$ true = false; SELECT $ true or false;").IsOk()); + UNIT_ASSERT(SqlToYql("$False = 0; SELECT $False;").IsOk()); + } + + Y_UNIT_TEST(WithSchemaEquals) { + UNIT_ASSERT(SqlToYql("select * from plato.T with schema Struct<a:Int32, b:String>;").IsOk()); + UNIT_ASSERT(SqlToYql("select * from plato.T with columns = Struct<a:Int32, b:String>;").IsOk()); + } + + Y_UNIT_TEST(WithNonStructSchemaS3) { + NSQLTranslation::TTranslationSettings settings; + settings.ClusterMapping["s3bucket"] = NYql::S3ProviderName; + UNIT_ASSERT(SqlToYql("select * from s3bucket.`foo` with schema (col1 Int32, String as col2, Int64 as col3);", settings).IsOk()); + } + + Y_UNIT_TEST(AllowNestedTuplesInGroupBy) { + NYql::TAstParseResult res = SqlToYql("select count(*) from plato.Input group by 1 + (x, y, z);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Aggregate core '('\"group0\")")); + }; + + TWordCountHive elementStat({"Aggregate"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["Aggregate"] == 1); + } + + Y_UNIT_TEST(AllowGroupByWithParens) { + NYql::TAstParseResult res = SqlToYql("select count(*) from plato.Input group by (x, y as alias1, z);"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Aggregate core '('\"x\" '\"alias1\" '\"z\")")); + }; + + TWordCountHive elementStat({"Aggregate"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["Aggregate"] == 1); + } + + Y_UNIT_TEST(CreateAsyncReplicationParseCorrect) { + auto req = R"( + USE plato; + CREATE ASYNC REPLICATION MyReplication + FOR table1 AS table2, table3 AS table4 + WITH ( + CONNECTION_STRING = "grpc://localhost:2135/?database=/MyDatabase", + ENDPOINT = "localhost:2135", + DATABASE = "/MyDatabase" + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("create")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table1")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table2")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table3")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table4")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("connection_string")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("grpc://localhost:2135/?database=/MyDatabase")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("endpoint")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("localhost:2135")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("database")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("/MyDatabase")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateAsyncReplicationUnsupportedSettings) { + auto reqTpl = R"( + USE plato; + CREATE ASYNC REPLICATION MyReplication + FOR table1 AS table2, table3 AS table4 + WITH ( + %s = "%s" + ) + )"; + + auto settings = THashMap<TString, TString>{ + {"STATE", "DONE"}, + {"FAILOVER_MODE", "FORCE"}, + }; + + for (const auto& [k, v] : settings) { + auto req = Sprintf(reqTpl, k.c_str(), v.c_str()); + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), Sprintf("<main>:6:%zu: Error: %s is not supported in CREATE\n", 20 + k.size(), k.c_str())); + } + } + + Y_UNIT_TEST(AlterAsyncReplicationParseCorrect) { + auto req = R"( + USE plato; + ALTER ASYNC REPLICATION MyReplication + SET ( + STATE = "DONE", + FAILOVER_MODE = "FORCE" + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("state")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DONE")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("failover_mode")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("FORCE")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AlterAsyncReplicationUnsupportedSettings) { + auto reqTpl = R"( + USE plato; + ALTER ASYNC REPLICATION MyReplication + SET ( + %s = "%s" + ) + )"; + + auto settings = THashMap<TString, TString>{ + {"connection_string", "grpc://localhost:2135/?database=/MyDatabase"}, + {"endpoint", "localhost:2135"}, + {"database", "/MyDatabase"}, + {"token", "foo"}, + {"token_secret_name", "foo_secret_name"}, + {"user", "user"}, + {"password", "bar"}, + {"password_secret_name", "bar_secret_name"}, + }; + + for (const auto& setting : settings) { + auto& key = setting.first; + auto& value = setting.second; + auto req = Sprintf(reqTpl, key.c_str(), value.c_str()); + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&key, &value](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(key)); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(value)); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + } + + Y_UNIT_TEST(AsyncReplicationInvalidSettings) { + auto req = R"( + USE plato; + ALTER ASYNC REPLICATION MyReplication SET (FOO = "BAR"); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:62: Error: Unknown replication setting: FOO\n"); + } + + Y_UNIT_TEST(DropAsyncReplicationParseCorrect) { + auto req = R"( + USE plato; + DROP ASYNC REPLICATION MyReplication; + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropAsyncReplicationCascade) { + auto req = R"( + USE plato; + DROP ASYNC REPLICATION MyReplication CASCADE; + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropCascade")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(PragmaCompactGroupBy) { + auto req = "PRAGMA CompactGroupBy; SELECT key, COUNT(*) FROM plato.Input GROUP BY key;"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Aggregate") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('compact)")); + } + }; + + TWordCountHive elementStat = { {TString("Aggregate"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Aggregate"]); + } + + Y_UNIT_TEST(PragmaDisableCompactGroupBy) { + auto req = "PRAGMA DisableCompactGroupBy; SELECT key, COUNT(*) FROM plato.Input GROUP /*+ compact() */ BY key;"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Aggregate") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'('compact)")); + } + }; + + TWordCountHive elementStat = { {TString("Aggregate"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Aggregate"]); + } + + Y_UNIT_TEST(AutoSampleWorksWithNamedSubquery) { + UNIT_ASSERT(SqlToYql("$src = select * from plato.Input; select * from $src sample 0.2").IsOk()); + } + + Y_UNIT_TEST(AutoSampleWorksWithSubquery) { + UNIT_ASSERT(SqlToYql("select * from (select * from plato.Input) sample 0.2").IsOk()); + } + + Y_UNIT_TEST(CreateTableTrailingComma) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32, PRIMARY KEY (Key),);").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32,);").IsOk()); + } + + Y_UNIT_TEST(BetweenSymmetric) { + UNIT_ASSERT(SqlToYql("select 3 between symmetric 5 and 4;").IsOk()); + UNIT_ASSERT(SqlToYql("select 3 between asymmetric 5 and 4;").IsOk()); + UNIT_ASSERT(SqlToYql("use plato; select key between symmetric and and and from Input;").IsOk()); + UNIT_ASSERT(SqlToYql("use plato; select key between and and and from Input;").IsOk()); + } +} + +Y_UNIT_TEST_SUITE(ExternalFunction) { + Y_UNIT_TEST(ValidUseFunctions) { + + UNIT_ASSERT(SqlToYql( + "PROCESS plato.Input" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', <|a: 123, b: a + 641|>)" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>," + " CONCURRENCY=3, OPTIMIZE_FOR='CALLS'").IsOk()); + + // use CALLS without quotes, as keyword + UNIT_ASSERT(SqlToYql( + "PROCESS plato.Input" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo')" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>," + " OPTIMIZE_FOR=CALLS").IsOk()); + + UNIT_ASSERT(SqlToYql( + "PROCESS plato.Input" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', TableRow())" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>," + " CONCURRENCY=3").IsOk()); + + UNIT_ASSERT(SqlToYql( + "PROCESS plato.Input" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo')" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>," + " CONCURRENCY=3, BATCH_SIZE=1000000, CONNECTION='yc-folder34fse-con'," + " INIT=[0, 900]").IsOk()); + + UNIT_ASSERT(SqlToYql( + "PROCESS plato.Input" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'bar', TableRow())" + " WITH UNKNOWN_PARAM_1='837747712', UNKNOWN_PARAM_2=Tuple<Uint16, Utf8>," + " INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>").IsOk()); + } + + + Y_UNIT_TEST(InValidUseFunctions) { + ExpectFailWithError("PROCESS plato.Input USING some::udf(*) WITH INPUT_TYPE=Struct<a:Int32>", + "<main>:1:33: Error: PROCESS without USING EXTERNAL FUNCTION doesn't allow WITH block\n"); + + ExpectFailWithError("PROCESS plato.Input USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'jhhjfh88134d')" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>" + " ASSUME ORDER BY key", + "<main>:1:129: Error: PROCESS with USING EXTERNAL FUNCTION doesn't allow ASSUME block\n"); + + ExpectFailWithError("PROCESS plato.Input USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', 'bar', 'baz')", + "<main>:1:15: Error: EXTERNAL FUNCTION requires from 2 to 3 arguments, but got: 4\n"); + + ExpectFailWithError("PROCESS plato.Input\n" + " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', <|field_1: a1, field_b: b1|>)\n" + " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,\n" + " CONCURRENCY=3, BATCH_SIZE=1000000, CONNECTION='yc-folder34fse-con',\n" + " CONCURRENCY=5, INPUT_TYPE=Struct<b:Bool>,\n" + " INIT=[0, 900]\n", + "<main>:5:2: Error: WITH \"CONCURRENCY\" clause should be specified only once\n" + "<main>:5:17: Error: WITH \"INPUT_TYPE\" clause should be specified only once\n"); + } +} + +Y_UNIT_TEST_SUITE(SqlToYQLErrors) { + Y_UNIT_TEST(UdfSyntaxSugarMissingCall) { + auto req = "SELECT Udf(DateTime::FromString, \"foo\" as RunConfig);"; + auto res = SqlToYql(req); + TString a1 = Err2Str(res); + TString a2("<main>:1:8: Error: Abstract Udf Node can't be used as a part of expression.\n"); + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(UdfSyntaxSugarIsNotCallable) { + auto req = "SELECT Udf(123, \"foo\" as RunConfig);"; + auto res = SqlToYql(req); + TString a1 = Err2Str(res); + TString a2("<main>:1:8: Error: Udf: first argument must be a callable, like Foo::Bar\n"); + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(UdfSyntaxSugarNoArgs) { + auto req = "SELECT Udf()();"; + auto res = SqlToYql(req); + TString a1 = Err2Str(res); + TString a2("<main>:1:8: Error: Udf: expected at least one argument\n"); + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(StrayUTF8) { + /// 'c' in plato is russian here + NYql::TAstParseResult res = SqlToYql("select * from сedar.Input"); + UNIT_ASSERT(!res.Root); + + TString a1 = Err2Str(res); + TString a2(R"foo(<main>:1:16: Error: Unknown cluster: edar +)foo"); + + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(IvalidStringLiteralWithEscapedBackslash) { + NYql::TAstParseResult res1 = SqlToYql(R"foo($bar = 'a\\'b';)foo"); + NYql::TAstParseResult res2 = SqlToYql(R"foo($bar = "a\\"b";)foo"); + UNIT_ASSERT(!res1.Root); + UNIT_ASSERT(!res2.Root); + + UNIT_ASSERT_NO_DIFF(Err2Str(res1), "<main>:1:12: Error: mismatched input 'b' expecting {<EOF>, ';'}\n"); + UNIT_ASSERT_NO_DIFF(Err2Str(res2), "<main>:1:12: Error: mismatched input 'b' expecting {<EOF>, ';'}\n"); + } + + Y_UNIT_TEST(InvalidHexInStringLiteral) { + NYql::TAstParseResult res = SqlToYql("select \"foo\\x1\\xfe\""); + UNIT_ASSERT(!res.Root); + TString a1 = Err2Str(res); + TString a2 = "<main>:1:15: Error: Failed to parse string literal: Invalid hexadecimal value\n"; + + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(InvalidOctalInMultilineStringLiteral) { + NYql::TAstParseResult res = SqlToYql("select \"foo\n" + "bar\n" + "\\01\""); + UNIT_ASSERT(!res.Root); + TString a1 = Err2Str(res); + TString a2 = "<main>:3:4: Error: Failed to parse string literal: Invalid octal value\n"; + + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(InvalidDoubleAtString) { + NYql::TAstParseResult res = SqlToYql("select @@@@@@"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: extraneous input '@' expecting {<EOF>, ';'}\n"); + } + + Y_UNIT_TEST(InvalidDoubleAtStringWhichWasAcceptedEarlier) { + NYql::TAstParseResult res = SqlToYql("SELECT @@foo@@ @ @@bar@@"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: mismatched input '@' expecting {<EOF>, ';'}\n"); + } + + Y_UNIT_TEST(InvalidStringFromTable) { + NYql::TAstParseResult res = SqlToYql("select \"FOO\"\"BAR from plato.foo"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: mismatched input '\"' expecting {<EOF>, ';'}\n"); + } + + Y_UNIT_TEST(InvalidDoubleAtStringFromTable) { + NYql::TAstParseResult res = SqlToYql("select @@@@@@ from plato.foo"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: mismatched input '@' expecting {<EOF>, ';'}\n"); + } + + Y_UNIT_TEST(SelectInvalidSyntax) { + NYql::TAstParseResult res = SqlToYql("select 1 form Wat"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:14: Error: extraneous input 'Wat' expecting {<EOF>, ';'}\n"); + } + + Y_UNIT_TEST(SelectNoCluster) { + NYql::TAstParseResult res = SqlToYql("select foo from bar"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: No cluster name given and no default cluster is selected\n"); + } + + Y_UNIT_TEST(SelectDuplicateColumns) { + NYql::TAstParseResult res = SqlToYql("select a, a from plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:11: Error: Unable to use duplicate column names. Collision in name: a\n"); + } + + Y_UNIT_TEST(SelectDuplicateLabels) { + NYql::TAstParseResult res = SqlToYql("select a as foo, b as foo from plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unable to use duplicate column names. Collision in name: foo\n"); + } + + Y_UNIT_TEST(SelectCaseWithoutThen) { + NYql::TAstParseResult res = SqlToYql("select case when true 1;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:1:22: Error: missing THEN at \'1\'\n" + "<main>:1:23: Error: extraneous input \';\' expecting {ELSE, END, WHEN}\n" + ); + } + + Y_UNIT_TEST(SelectComplexCaseWithoutThen) { + NYql::TAstParseResult res = SqlToYql( + "SELECT *\n" + "FROM plato.Input AS a\n" + "WHERE CASE WHEN a.key = \"foo\" a.subkey ELSE a.value END\n" + ); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:30: Error: missing THEN at 'a'\n"); + } + + Y_UNIT_TEST(SelectCaseWithoutEnd) { + NYql::TAstParseResult res = SqlToYql("select case a when b then c end from plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: ELSE is required\n"); + } + + Y_UNIT_TEST(SelectWithBadAggregationNoInput) { + NYql::TAstParseResult res = SqlToYql("select a, Min(b), c"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:8: Error: Column reference 'a'\n" + "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:15: Error: Column reference 'b'\n" + "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:19: Error: Column reference 'c'\n" + ); + } + + Y_UNIT_TEST(SelectWithBadAggregation) { + ExpectFailWithError("select count(*), 1 + key from plato.Input", + "<main>:1:22: Error: Column `key` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectWithBadAggregatedTerms) { + ExpectFailWithError("select key, 2 * subkey from plato.Input group by key", + "<main>:1:17: Error: Column `subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectDistinctWithBadAggregation) { + ExpectFailWithError("select distinct count(*), 1 + key from plato.Input", + "<main>:1:31: Error: Column `key` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + ExpectFailWithError("select distinct key, 2 * subkey from plato.Input group by key", + "<main>:1:26: Error: Column `subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectWithBadAggregationInHaving) { + ExpectFailWithError("select key from plato.Input group by key\n" + "having \"f\" || value == \"foo\"", + "<main>:2:15: Error: Column `value` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(JoinWithNonAggregatedColumnInProjection) { + ExpectFailWithError("select a.key, 1 + b.subkey\n" + "from plato.Input1 as a join plato.Input2 as b using(key)\n" + "group by a.key;", + "<main>:1:19: Error: Column `b.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + + ExpectFailWithError("select a.key, 1 + b.subkey.x\n" + "from plato.Input1 as a join plato.Input2 as b using(key)\n" + "group by a.key;", + "<main>:1:19: Error: Column must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectWithBadAggregatedTermsWithSources) { + ExpectFailWithError("select key, 1 + a.subkey\n" + "from plato.Input1 as a\n" + "group by a.key;", + "<main>:1:17: Error: Column `a.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + ExpectFailWithError("select key, 1 + a.subkey.x\n" + "from plato.Input1 as a\n" + "group by a.key;", + "<main>:1:17: Error: Column must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(WarnForAggregationBySelectAlias) { + NYql::TAstParseResult res = SqlToYql("select c + 1 as c from plato.Input\n" + "group by c"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:2:11: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n" + "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n"); + + res = SqlToYql("select c + 1 as c from plato.Input\n" + "group by Math::Floor(c + 2) as c;"); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:2:22: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n" + "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n"); + } + + Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenAggrFunctionsAreUsedInAlias) { + NYql::TAstParseResult res = SqlToYql("select\n" + " cast(avg(val) as int) as value,\n" + " value as key\n" + "from\n" + " plato.Input\n" + "group by value"); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + + res = SqlToYql("select\n" + " cast(avg(val) over w as int) as value,\n" + " value as key\n" + "from\n" + " plato.Input\n" + "group by value\n" + "window w as ()"); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + } + + Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenQualifiedNameIsUsed) { + NYql::TAstParseResult res = SqlToYql("select\n" + " Unwrap(a.key) as key\n" + "from plato.Input as a\n" + "join plato.Input2 as b using(k)\n" + "group by a.key;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + + res = SqlToYql("select Unwrap(a.key) as key\n" + "from plato.Input as a\n" + "group by a.key;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + } + + Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenTrivialRenamingIsUsed) { + NYql::TAstParseResult res = SqlToYql("select a.key as key\n" + "from plato.Input as a\n" + "group by key;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + + res = SqlToYql("select key as key\n" + "from plato.Input\n" + "group by key;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.Issues.Size() == 0); + } + + Y_UNIT_TEST(ErrorByAggregatingByExpressionWithSameExpressionInSelect) { + ExpectFailWithError("select k * 2 from plato.Input group by k * 2", + "<main>:1:8: Error: Column `k` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(ErrorForAggregationBySelectAlias) { + ExpectFailWithError("select key, Math::Floor(1.1 + a.subkey) as foo\n" + "from plato.Input as a\n" + "group by a.key, foo;", + "<main>:3:17: Warning: GROUP BY will aggregate by column `foo` instead of aggregating by SELECT expression with same alias, code: 4532\n" + "<main>:1:19: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n" + "<main>:1:31: Error: Column `a.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + + ExpectFailWithError("select c + 1 as c from plato.Input\n" + "group by Math::Floor(c + 2);", + "<main>:2:22: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n" + "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n" + "<main>:1:8: Error: Column `c` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectWithDuplicateGroupingColumns) { + NYql::TAstParseResult res = SqlToYql("select c from plato.Input group by c, c"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Duplicate grouping column: c\n"); + } + + Y_UNIT_TEST(SelectWithBadAggregationInGrouping) { + NYql::TAstParseResult res = SqlToYql("select a, Min(b), c group by c"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:30: Error: Column reference 'c'\n"); + } + + Y_UNIT_TEST(SelectWithOpOnBadAggregation) { + ExpectFailWithError("select 1 + a + Min(b) from plato.Input", + "<main>:1:12: Error: Column `a` must either be a key column in GROUP BY or it should be used in aggregation function\n"); + } + + Y_UNIT_TEST(SelectOrderByConstantNum) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by 1"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY constant expression\n"); + } + + Y_UNIT_TEST(SelectOrderByConstantExpr) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by 1 * 42"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:38: Error: Unable to ORDER BY constant expression\n"); + } + + Y_UNIT_TEST(SelectOrderByConstantString) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by \"nest\""); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY constant expression\n"); + } + + Y_UNIT_TEST(SelectOrderByAggregated) { + NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by min(a)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY aggregated values\n"); + } + + Y_UNIT_TEST(ErrorInOrderByExpresison) { + NYql::TAstParseResult res = SqlToYql("select key, value from plato.Input order by (key as zey)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:45: Error: You should use in ORDER BY column name, qualified field, callable function or expression\n"); + } + + Y_UNIT_TEST(ErrorsInOrderByWhenColumnIsMissingInProjection) { + ExpectFailWithError("select subkey from (select 1 as subkey) order by key", "<main>:1:50: Error: Column key is not in source column set\n"); + ExpectFailWithError("select subkey from plato.Input as a order by x.key", "<main>:1:46: Error: Unknown correlation name: x\n"); + ExpectFailWithError("select distinct a, b from plato.Input order by c", "<main>:1:48: Error: Column c is not in source column set. Did you mean a?\n"); + ExpectFailWithError("select count(*) as a from plato.Input order by c", "<main>:1:48: Error: Column c is not in source column set. Did you mean a?\n"); + ExpectFailWithError("select count(*) as a, b, from plato.Input group by b order by c", "<main>:1:63: Error: Column c is not in source column set. Did you mean a?\n"); + UNIT_ASSERT(SqlToYql("select a, b from plato.Input order by c").IsOk()); + } + + Y_UNIT_TEST(SelectAggregatedWhere) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input where count(key)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:33: Error: Can not use aggregated values in filtering\n"); + } + + Y_UNIT_TEST(DoubleFrom) { + NYql::TAstParseResult res = SqlToYql("from plato.Input select * from plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: Only one FROM clause is allowed\n"); + } + + Y_UNIT_TEST(SelectJoinMissingCorrName) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input1 as a join plato.Input2 as b on a.key == key"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:65: Error: JOIN: column requires correlation name\n"); + } + + Y_UNIT_TEST(SelectJoinMissingCorrName1) { + NYql::TAstParseResult res = SqlToYql( + "use plato;\n" + "$foo = select * from Input1;\n" + "select * from Input2 join $foo USING(key);\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:27: Error: JOIN: missing correlation name for source\n"); + } + + Y_UNIT_TEST(SelectJoinMissingCorrName2) { + NYql::TAstParseResult res = SqlToYql( + "use plato;\n" + "$foo = select * from Input1;\n" + "select * from Input2 cross join $foo;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:33: Error: JOIN: missing correlation name for source\n"); + } + + Y_UNIT_TEST(SelectJoinEmptyCorrNames) { + NYql::TAstParseResult res = SqlToYql( + "$left = (SELECT * FROM plato.Input1 LIMIT 2);\n" + "$right = (SELECT * FROM plato.Input2 LIMIT 2);\n" + "SELECT * FROM $left FULL JOIN $right USING (key);\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:45: Error: At least one correlation name is required in join\n"); + } + + Y_UNIT_TEST(SelectJoinSameCorrNames) { + NYql::TAstParseResult res = SqlToYql("SELECT Input.key FROM plato.Input JOIN plato.Input1 ON Input.key == Input.subkey\n"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:66: Error: JOIN: different correlation names are required for joined tables\n"); + } + + Y_UNIT_TEST(SelectJoinConstPredicateArg) { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input1 as A JOIN plato.Input2 as B ON A.key == B.key AND A.subkey == \"wtf\"\n"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:87: Error: JOIN: each equality predicate argument must depend on exactly one JOIN input\n"); + } + + Y_UNIT_TEST(SelectJoinNonEqualityPredicate) { + NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input1 as A JOIN plato.Input2 as B ON A.key == B.key AND A.subkey > B.subkey\n"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:87: Error: JOIN ON expression must be a conjunction of equality predicates\n"); + } + + Y_UNIT_TEST(SelectEquiJoinCorrNameOutOfScope) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA equijoin;\n" + "SELECT * FROM plato.A JOIN plato.B ON A.key == C.key JOIN plato.C ON A.subkey == C.subkey;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:45: Error: JOIN: can not use source: C in equality predicate, it is out of current join scope\n"); + } + + Y_UNIT_TEST(SelectEquiJoinNoRightSource) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA equijoin;\n" + "SELECT * FROM plato.A JOIN plato.B ON A.key == B.key JOIN plato.C ON A.subkey == B.subkey;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:79: Error: JOIN ON equality predicate must have one of its arguments from the rightmost source\n"); + } + + Y_UNIT_TEST(SelectEquiJoinOuterWithoutType) { + NYql::TAstParseResult res = SqlToYql( + "SELECT * FROM plato.A Outer JOIN plato.B ON A.key == B.key;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Invalid join type: OUTER JOIN. OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL\n"); + } + + Y_UNIT_TEST(SelectEquiJoinOuterWithWrongType) { + NYql::TAstParseResult res = SqlToYql( + "SELECT * FROM plato.A LEFT semi OUTER JOIN plato.B ON A.key == B.key;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:33: Error: Invalid join type: LEFT SEMI OUTER JOIN. OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL\n"); + } + + Y_UNIT_TEST(InsertNoCluster) { + NYql::TAstParseResult res = SqlToYql("insert into Output (foo) values (1)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: No cluster name given and no default cluster is selected\n"); + } + + Y_UNIT_TEST(InsertValuesNoLabels) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output values (1)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: INSERT INTO ... VALUES requires specification of table columns\n"); + } + + Y_UNIT_TEST(UpsertValuesNoLabelsKikimr) { + NYql::TAstParseResult res = SqlToYql("upsert into plato.Output values (1)", 10, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: UPSERT INTO ... VALUES requires specification of table columns\n"); + } + + Y_UNIT_TEST(ReplaceValuesNoLabelsKikimr) { + NYql::TAstParseResult res = SqlToYql("replace into plato.Output values (1)", 10, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:20: Error: REPLACE INTO ... VALUES requires specification of table columns\n"); + } + + Y_UNIT_TEST(InsertValuesInvalidLabels) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output (foo) values (1, 2)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: VALUES have 2 columns, INSERT INTO expects: 1\n"); + } + + Y_UNIT_TEST(BuiltinFileOpNoArgs) { + NYql::TAstParseResult res = SqlToYql("select FilePath()"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: FilePath() requires exactly 1 arguments, given: 0\n"); + } + + Y_UNIT_TEST(ProcessWithHaving) { + NYql::TAstParseResult res = SqlToYql("process plato.Input using some::udf(value) having value == 1"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: PROCESS does not allow HAVING yet! You may request it on yql@ maillist.\n"); + } + + Y_UNIT_TEST(ReduceNoBy) { + NYql::TAstParseResult res = SqlToYql("reduce plato.Input using some::udf(value)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: mismatched input 'using' expecting {',', ON, PRESORT}\n"); + } + + Y_UNIT_TEST(ReduceDistinct) { + NYql::TAstParseResult res = SqlToYql("reduce plato.Input on key using some::udf(distinct value)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:43: Error: DISTINCT can not be used in PROCESS/REDUCE\n"); + } + + Y_UNIT_TEST(CreateTableWithView) { + NYql::TAstParseResult res = SqlToYql("CREATE TABLE plato.foo:bar (key INT);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:22: Error: mismatched input ':' expecting '('\n"); + } + + Y_UNIT_TEST(AsteriskWithSomethingAfter) { + NYql::TAstParseResult res = SqlToYql("select *, LENGTH(value) from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).\n"); + } + + Y_UNIT_TEST(AsteriskWithSomethingBefore) { + NYql::TAstParseResult res = SqlToYql("select LENGTH(value), * from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).\n"); + } + + Y_UNIT_TEST(DuplicatedQualifiedAsterisk) { + NYql::TAstParseResult res = SqlToYql("select in.*, key, in.* from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unable to use twice same quialified asterisk. Invalid source: in\n"); + } + + Y_UNIT_TEST(BrokenLabel) { + NYql::TAstParseResult res = SqlToYql("select in.*, key as `funny.label` from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:14: Error: Unable to use '.' in column name. Invalid column name: funny.label\n"); + } + + Y_UNIT_TEST(KeyConflictDetect0) { + NYql::TAstParseResult res = SqlToYql("select key, in.key as key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:13: Error: Unable to use duplicate column names. Collision in name: key\n"); + } + + Y_UNIT_TEST(KeyConflictDetect1) { + NYql::TAstParseResult res = SqlToYql("select length(key) as key, key from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Unable to use duplicate column names. Collision in name: key\n"); + } + + Y_UNIT_TEST(KeyConflictDetect2) { + NYql::TAstParseResult res = SqlToYql("select key, in.key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n"); + } + + Y_UNIT_TEST(AutogenerationAliasWithCollisionConflict1) { + UNIT_ASSERT(SqlToYql("select LENGTH(Value), key as column0 from plato.Input;").IsOk()); + } + + Y_UNIT_TEST(AutogenerationAliasWithCollisionConflict2) { + UNIT_ASSERT(SqlToYql("select key as column1, LENGTH(Value) from plato.Input;").IsOk()); + } + + Y_UNIT_TEST(MissedSourceTableForQualifiedAsteriskOnSimpleSelect) { + NYql::TAstParseResult res = SqlToYql("use plato; select Intop.*, Input.key from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unknown correlation name: Intop\n"); + } + + Y_UNIT_TEST(MissedSourceTableForQualifiedAsteriskOnJoin) { + NYql::TAstParseResult res = SqlToYql("use plato; select tmissed.*, t2.*, t1.key from plato.Input as t1 join plato.Input as t2 on t1.key==t2.key;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unknown correlation name for asterisk: tmissed\n"); + } + + Y_UNIT_TEST(UnableToReferenceOnNotExistSubcolumn) { + NYql::TAstParseResult res = SqlToYql("select b.subkey from (select key from plato.Input as a) as b;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Column subkey is not in source column set\n"); + } + + Y_UNIT_TEST(ConflictOnSameNameWithQualify0) { + NYql::TAstParseResult res = SqlToYql("select in.key, in.key as key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n"); + } + + Y_UNIT_TEST(ConflictOnSameNameWithQualify1) { + NYql::TAstParseResult res = SqlToYql("select in.key, length(key) as key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n"); + } + + Y_UNIT_TEST(ConflictOnSameNameWithQualify2) { + NYql::TAstParseResult res = SqlToYql("select key, in.key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n"); + } + + Y_UNIT_TEST(ConflictOnSameNameWithQualify3) { + NYql::TAstParseResult res = SqlToYql("select in.key, subkey as key from plato.Input as in;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n"); + } + + Y_UNIT_TEST(SelectFlattenBySameColumns) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, key as kk)"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Duplicate column name found: key in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenBySameAliases) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, subkey as kk);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Duplicate alias found: kk in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenByExprSameAliases) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, ListSkip(subkey,1) as kk);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Collision between alias and column name: kk in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenByConflictNameAndAlias0) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, subkey as key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Collision between alias and column name: key in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenByConflictNameAndAlias1) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, subkey as key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Collision between alias and column name: key in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenByExprConflictNameAndAlias1) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, ListSkip(subkey,1) as key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Duplicate column name found: key in FlattenBy section\n"); + } + + Y_UNIT_TEST(SelectFlattenByUnnamedExpr) { + NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, ListSkip(key, 1))"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Unnamed expression after FLATTEN BY is not allowed\n"); + } + + Y_UNIT_TEST(UseInOnStrings) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input where \"foo\" in \"foovalue\";"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:42: Error: Unable to use IN predicate with string argument, it won't search substring - " + "expecting tuple, list, dict or single column table source\n"); + } + + Y_UNIT_TEST(UseSubqueryInScalarContextInsideIn) { + NYql::TAstParseResult res = SqlToYql("$q = (select key from plato.Input); select * from plato.Input where subkey in ($q);"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:79: Warning: Using subrequest in scalar context after IN, " + "perhaps you should remove parenthesis here, code: 4501\n"); + } + + Y_UNIT_TEST(InHintsWithKeywordClash) { + NYql::TAstParseResult res = SqlToYql("SELECT COMPACT FROM plato.Input WHERE COMPACT IN COMPACT `COMPACT`(1,2,3)"); + UNIT_ASSERT(!res.Root); + // should try to parse last compact as call expression + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:58: Error: Unknown builtin: COMPACT\n"); + } + + Y_UNIT_TEST(ErrorColumnPosition) { + NYql::TAstParseResult res = SqlToYql( + "USE plato;\n" + "SELECT \n" + "value FROM (\n" + "select key from Input\n" + ");\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:1: Error: Column value is not in source column set\n"); + } + + Y_UNIT_TEST(PrimaryViewAbortMapReduce) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input VIEW PRIMARY KEY"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: primary view is not supported for yt tables\n"); + } + + Y_UNIT_TEST(InsertAbortMapReduce) { + NYql::TAstParseResult res = SqlToYql("INSERT OR ABORT INTO plato.Output SELECT key FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: INSERT OR ABORT INTO is not supported for yt tables\n"); + } + + Y_UNIT_TEST(ReplaceIntoMapReduce) { + NYql::TAstParseResult res = SqlToYql("REPLACE INTO plato.Output SELECT key FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: Meaning of REPLACE INTO has been changed, now you should use INSERT INTO <table> WITH TRUNCATE ... for yt\n"); + } + + Y_UNIT_TEST(UpsertIntoMapReduce) { + NYql::TAstParseResult res = SqlToYql("UPSERT INTO plato.Output SELECT key FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: UPSERT INTO is not supported for yt tables\n"); + } + + Y_UNIT_TEST(UpdateMapReduce) { + NYql::TAstParseResult res = SqlToYql("UPDATE plato.Output SET value = value + 1 WHERE key < 1"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: UPDATE is unsupported for yt\n"); + } + + Y_UNIT_TEST(DeleteMapReduce) { + NYql::TAstParseResult res = SqlToYql("DELETE FROM plato.Output WHERE key < 1"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: DELETE is unsupported for yt\n"); + } + + Y_UNIT_TEST(ReplaceIntoWithTruncate) { + NYql::TAstParseResult res = SqlToYql("REPLACE INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:32: Error: Unable REPLACE INTO with truncate mode\n"); + } + + Y_UNIT_TEST(UpsertIntoWithTruncate) { + NYql::TAstParseResult res = SqlToYql("UPSERT INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:31: Error: Unable UPSERT INTO with truncate mode\n"); + } + + Y_UNIT_TEST(InsertIntoWithTruncateKikimr) { + NYql::TAstParseResult res = SqlToYql("INSERT INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input", 10, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: INSERT INTO WITH TRUNCATE is not supported for kikimr tables\n"); + } + + Y_UNIT_TEST(InsertIntoWithWrongArgumentCount) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output with truncate (key, value, subkey) values (5, '1', '2', '3');"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: VALUES have 4 columns, INSERT INTO ... WITH TRUNCATE expects: 3\n"); + } + + Y_UNIT_TEST(UpsertWithWrongArgumentCount) { + NYql::TAstParseResult res = SqlToYql("upsert into plato.Output (key, value, subkey) values (2, '3');", 10, TString(NYql::KikimrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:39: Error: VALUES have 2 columns, UPSERT INTO expects: 3\n"); + } + + Y_UNIT_TEST(GroupingSetByExprWithoutAlias) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY GROUPING SETS (cast(key as uint32), subkey);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: Unnamed expressions are not supported in GROUPING SETS. Please use '<expr> AS <name>'.\n"); + } + + Y_UNIT_TEST(GroupingSetByExprWithoutAlias2) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY subkey || subkey, GROUPING SETS (\n" + "cast(key as uint32), subkey);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:1: Error: Unnamed expressions are not supported in GROUPING SETS. Please use '<expr> AS <name>'.\n"); + } + + Y_UNIT_TEST(CubeByExprWithoutAlias) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE (key, subkey / key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:56: Error: Unnamed expressions are not supported in CUBE. Please use '<expr> AS <name>'.\n"); + } + + Y_UNIT_TEST(RollupByExprWithoutAlias) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY ROLLUP (subkey / key);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: Unnamed expressions are not supported in ROLLUP. Please use '<expr> AS <name>'.\n"); + } + + Y_UNIT_TEST(GroupByHugeCubeDeniedNoPragma) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE (key, subkey, value, key + subkey as sum, key - subkey as sub, key + val as keyval);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:119: Error: GROUP BY CUBE is allowed only for 5 columns, but you use 6\n"); + } + + Y_UNIT_TEST(GroupByInvalidPragma) { + NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByCubeLimit = '-4';"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: Expected unsigned integer literal as a single argument for: GroupByCubeLimit\n"); + } + + Y_UNIT_TEST(GroupByHugeCubeDeniedPragme) { + NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByCubeLimit = '4'; SELECT key FROM plato.Input GROUP BY CUBE (key, subkey, value, key + subkey as sum, key - subkey as sub);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:132: Error: GROUP BY CUBE is allowed only for 4 columns, but you use 5\n"); + } + + Y_UNIT_TEST(GroupByFewBigCubes) { + NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE(key, subkey, key + subkey as sum), CUBE(value, value + key + subkey as total);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Unable to GROUP BY more than 64 groups, you try use 80 groups\n"); + } + + Y_UNIT_TEST(GroupByFewBigCubesWithPragmaLimit) { + NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByLimit = '16'; SELECT key FROM plato.Input GROUP BY GROUPING SETS(key, subkey, key + subkey as sum), ROLLUP(value, value + key + subkey as total);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:29: Error: Unable to GROUP BY more than 16 groups, you try use 18 groups\n"); + } + + Y_UNIT_TEST(NoGroupingColumn0) { + NYql::TAstParseResult res = SqlToYql( + "select count(1), key_first, val_first, grouping(key_first, val_first, nomind) as group\n" + "from plato.Input group by grouping sets (cast(key as uint32) /100 as key_first, Substring(value, 1, 1) as val_first);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:71: Error: Column 'nomind' is not a grouping column\n"); + } + + Y_UNIT_TEST(NoGroupingColumn1) { + NYql::TAstParseResult res = SqlToYql("select count(1), grouping(key, value) as group_duo from plato.Input group by cube (key, subkey);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:32: Error: Column 'value' is not a grouping column\n"); + } + + Y_UNIT_TEST(EmptyAccess0) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), AsList(``));"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:73: Error: Column reference \"\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(EmptyAccess1) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), ``);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:66: Error: Column reference \"\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(UseUnknownColumnInInsert) { + NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), AsList(`test`));"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:73: Error: Column reference \"test\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(GroupByEmptyColumn) { + NYql::TAstParseResult res = SqlToYql("select count(1) from plato.Input group by ``;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:43: Error: Column name can not be empty\n"); + } + + Y_UNIT_TEST(ConvertNumberOutOfBase) { + NYql::TAstParseResult res = SqlToYql("select 0o80l;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 0o80l, char: '8' is out of base: 8\n"); + } + + Y_UNIT_TEST(ConvertNumberOutOfRangeForInt64ButFitsInUint64) { + NYql::TAstParseResult res = SqlToYql("select 0xc000000000000000l;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse 13835058055282163712 as integer literal of Int64 type: value out of range for Int64\n"); + } + + Y_UNIT_TEST(ConvertNumberOutOfRangeUint64) { + NYql::TAstParseResult res = SqlToYql("select 0xc0000000000000000l;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 0xc0000000000000000l, number limit overflow\n"); + + res = SqlToYql("select 1234234543563435151456;\n"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 1234234543563435151456, number limit overflow\n"); + } + + Y_UNIT_TEST(ConvertNumberNegativeOutOfRange) { + NYql::TAstParseResult res = SqlToYql("select -9223372036854775808;\n" + "select -9223372036854775809;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:8: Error: Failed to parse negative integer: -9223372036854775809, number limit overflow\n"); + } + + Y_UNIT_TEST(InvaildUsageReal0) { + NYql::TAstParseResult res = SqlToYql("select .0;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "<main>:1:7: Error: extraneous input '.' expecting {"); + } + + Y_UNIT_TEST(InvaildUsageReal1) { + NYql::TAstParseResult res = SqlToYql("select .0f;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "<main>:1:7: Error: extraneous input '.' expecting {"); + } + + Y_UNIT_TEST(InvaildUsageWinFunctionWithoutWindow) { + NYql::TAstParseResult res = SqlToYql("select lead(key, 2) from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to use window function Lead without window specification\n"); + } + + Y_UNIT_TEST(DropTableWithIfExists) { + NYql::TAstParseResult res = SqlToYql("DROP TABLE IF EXISTS plato.foo;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop_if_exists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(TooManyErrors) { + const char* q = R"( + USE plato; + select A, B, C, D, E, F, G, H, I, J, K, L, M, N from (select b from `abc`); +)"; + + NYql::TAstParseResult res = SqlToYql(q, 10); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + R"(<main>:3:16: Error: Column A is not in source column set. Did you mean b? +<main>:3:19: Error: Column B is not in source column set. Did you mean b? +<main>:3:22: Error: Column C is not in source column set. Did you mean b? +<main>:3:25: Error: Column D is not in source column set. Did you mean b? +<main>:3:28: Error: Column E is not in source column set. Did you mean b? +<main>:3:31: Error: Column F is not in source column set. Did you mean b? +<main>:3:34: Error: Column G is not in source column set. Did you mean b? +<main>:3:37: Error: Column H is not in source column set. Did you mean b? +<main>:3:40: Error: Column I is not in source column set. Did you mean b? +<main>: Error: Too many issues, code: 1 +)"); + }; + + Y_UNIT_TEST(ShouldCloneBindingForNamedParameter) { + NYql::TAstParseResult res = SqlToYql(R"($f = () -> { + $value_type = TypeOf(1); + $pair_type = StructType( + TypeOf("2") AS key, + $value_type AS value + ); + + RETURN TupleType( + ListType($value_type), + $pair_type); +}; + +select FormatType($f()); +)"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(BlockedInvalidFrameBounds) { + auto check = [](const TString& frame, const TString& err) { + const TString prefix = "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (PARTITION BY key ORDER BY subkey\n"; + NYql::TAstParseResult res = SqlToYql(prefix + frame + ")"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), err); + }; + + check("ROWS UNBOUNDED FOLLOWING", "<main>:2:5: Error: Frame cannot start from UNBOUNDED FOLLOWING\n"); + check("ROWS BETWEEN 5 PRECEDING AND UNBOUNDED PRECEDING", "<main>:2:29: Error: Frame cannot end with UNBOUNDED PRECEDING\n"); + check("ROWS BETWEEN CURRENT ROW AND 5 PRECEDING", "<main>:2:13: Error: Frame cannot start from CURRENT ROW and end with PRECEDING\n"); + check("ROWS BETWEEN 5 FOLLOWING AND CURRENT ROW", "<main>:2:14: Error: Frame cannot start from FOLLOWING and end with CURRENT ROW\n"); + check("ROWS BETWEEN 5 FOLLOWING AND 5 PRECEDING", "<main>:2:14: Error: Frame cannot start from FOLLOWING and end with PRECEDING\n"); + } + + Y_UNIT_TEST(BlockedRangeValueWithoutSingleOrderBy) { + UNIT_ASSERT(SqlToYql("SELECT COUNT(*) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM plato.Input").IsOk()); + UNIT_ASSERT(SqlToYql("SELECT COUNT(*) OVER (RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) FROM plato.Input").IsOk()); + + auto res = SqlToYql("SELECT COUNT(*) OVER (RANGE 5 PRECEDING) FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:29: Error: RANGE with <offset> PRECEDING/FOLLOWING requires exactly one expression in ORDER BY partition clause\n"); + + res = SqlToYql("SELECT COUNT(*) OVER (ORDER BY key, value RANGE 5 PRECEDING) FROM plato.Input"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: RANGE with <offset> PRECEDING/FOLLOWING requires exactly one expression in ORDER BY partition clause\n"); + } + + Y_UNIT_TEST(NoColumnsInFrameBounds) { + NYql::TAstParseResult res = SqlToYql( + "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (ROWS BETWEEN\n" + " 1 + key PRECEDING AND 2 + key FOLLOWING);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:6: Error: Column reference \"key\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(WarnOnEmptyFrameBounds) { + NYql::TAstParseResult res = SqlToYql( + "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (PARTITION BY key ORDER BY subkey\n" + "ROWS BETWEEN 10 FOLLOWING AND 5 FOLLOWING)"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:14: Warning: Used frame specification implies empty window frame, code: 4520\n"); + } + + Y_UNIT_TEST(WarnOnRankWithUnorderedWindow) { + NYql::TAstParseResult res = SqlToYql("SELECT RANK() OVER w FROM plato.Input WINDOW w AS ()"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Rank() is used with unordered window - all rows will be considered equal to each other, code: 4521\n"); + } + + Y_UNIT_TEST(WarnOnRankExprWithUnorderedWindow) { + NYql::TAstParseResult res = SqlToYql("SELECT RANK(key) OVER w FROM plato.Input WINDOW w AS ()"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Rank(<expression>) is used with unordered window - the result is likely to be undefined, code: 4521\n"); + } + + Y_UNIT_TEST(AnyAsTableName) { + NYql::TAstParseResult res = SqlToYql("use plato; select * from any;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: no viable alternative at input 'any;'\n"); + } + + Y_UNIT_TEST(IncorrectOrderOfLambdaOptionalArgs) { + NYql::TAstParseResult res = SqlToYql("$f = ($x?, $y)->($x + $y); select $f(1);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Non-optional argument can not follow optional one\n"); + } + + Y_UNIT_TEST(IncorrectOrderOfActionOptionalArgs) { + NYql::TAstParseResult res = SqlToYql("define action $f($x?, $y) as select $x,$y; end define; do $f(1);"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Non-optional argument can not follow optional one\n"); + } + + Y_UNIT_TEST(NotAllowedQuestionOnNamedNode) { + NYql::TAstParseResult res = SqlToYql("$f = 1; select $f?;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unexpected token '?' at the end of expression\n"); + } + + Y_UNIT_TEST(AnyAndCrossJoin) { + NYql::TAstParseResult res = SqlToYql("use plato; select * from any Input1 cross join Input2"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:26: Error: ANY should not be used with Cross JOIN\n"); + + res = SqlToYql("use plato; select * from Input1 cross join any Input2"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:44: Error: ANY should not be used with Cross JOIN\n"); + } + + Y_UNIT_TEST(AnyWithCartesianProduct) { + NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from any Input1, Input2"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:56: Error: ANY should not be used with Cross JOIN\n"); + + res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from Input1, any Input2"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:64: Error: ANY should not be used with Cross JOIN\n"); + } + + Y_UNIT_TEST(ErrorPlainEndAsInlineActionTerminator) { + NYql::TAstParseResult res = SqlToYql( + "do begin\n" + " select 1\n" + "; end\n"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:0: Error: missing DO at '<EOF>'\n"); + } + + Y_UNIT_TEST(ErrorMultiWayJoinWithUsing) { + NYql::TAstParseResult res = SqlToYql( + "USE plato;\n" + "PRAGMA DisableSimpleColumns;\n" + "SELECT *\n" + "FROM Input1 AS a\n" + "JOIN Input2 AS b USING(key)\n" + "JOIN Input3 AS c ON a.key = c.key;\n" + ); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:5:24: Error: Multi-way JOINs should be connected with ON clause instead of USING clause\n" + ); + } + + Y_UNIT_TEST(RequireLabelInFlattenByWithDot) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input flatten by x.y"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:1:40: Error: Unnamed expression after FLATTEN BY is not allowed\n" + ); + } + + Y_UNIT_TEST(WarnUnnamedColumns) { + NYql::TAstParseResult res = SqlToYql( + "PRAGMA WarnUnnamedColumns;\n" + "\n" + "SELECT key, subkey, key || subkey FROM plato.Input ORDER BY subkey;\n"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:28: Warning: Autogenerated column name column2 will be used for expression, code: 4516\n"); + } + + Y_UNIT_TEST(WarnSourceColumnMismatch) { + NYql::TAstParseResult res = SqlToYql( + "insert into plato.Output (key, subkey, new_value, one_more_value) select key as Key, subkey, value, \"x\" from plato.Input;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:51: Warning: Column names in SELECT don't match column specification in parenthesis. \"key\" doesn't match \"Key\". \"new_value\" doesn't match \"value\", code: 4517\n"); + } + + Y_UNIT_TEST(YtCaseInsensitive) { + NYql::TAstParseResult res = SqlToYql("select * from PlatO.foo;"); + UNIT_ASSERT(res.Root); + + res = SqlToYql("use PlatO; select * from foo;"); + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(KikimrCaseSensitive) { + NYql::TAstParseResult res = SqlToYql("select * from PlatO.foo;", 10, "kikimr"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: Unknown cluster: PlatO\n"); + + res = SqlToYql("use PlatO; select * from foo;", 10, "kikimr"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:5: Error: Unknown cluster: PlatO\n"); + } + + Y_UNIT_TEST(DiscoveryModeForbidden) { + NYql::TAstParseResult res = SqlToYqlWithMode("insert into plato.Output select * from plato.range(\"\", Input1, Input4)", NSQLTranslation::ESqlMode::DISCOVERY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: range is not allowed in Discovery mode, code: 4600\n"); + + res = SqlToYqlWithMode("insert into plato.Output select * from plato.like(\"\", \"Input%\")", NSQLTranslation::ESqlMode::DISCOVERY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: like is not allowed in Discovery mode, code: 4600\n"); + + res = SqlToYqlWithMode("insert into plato.Output select * from plato.regexp(\"\", \"Input.\")", NSQLTranslation::ESqlMode::DISCOVERY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: regexp is not allowed in Discovery mode, code: 4600\n"); + + res = SqlToYqlWithMode("insert into plato.Output select * from plato.filter(\"\", ($name) -> { return find($name, \"Input\") is not null; })", NSQLTranslation::ESqlMode::DISCOVERY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: filter is not allowed in Discovery mode, code: 4600\n"); + + res = SqlToYqlWithMode("select Path from plato.folder(\"\") where Type == \"table\"", NSQLTranslation::ESqlMode::DISCOVERY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: folder is not allowed in Discovery mode, code: 4600\n"); + } + + Y_UNIT_TEST(YsonFuncWithoutArgs) { + UNIT_ASSERT(SqlToYql("SELECT Yson::SerializeText(Yson::From());").IsOk()); + } + + Y_UNIT_TEST(CanNotUseOrderByInNonLastSelectInUnionAllChain) { + auto req = "pragma AnsiOrderByLimitInUnionAll;\n" + "use plato;\n" + "\n" + "select * from Input order by key\n" + "union all\n" + "select * from Input order by key limit 1;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:21: Error: ORDER BY within UNION ALL is only allowed after last subquery\n"); + } + + Y_UNIT_TEST(CanNotUseLimitInNonLastSelectInUnionAllChain) { + auto req = "pragma AnsiOrderByLimitInUnionAll;\n" + "use plato;\n" + "\n" + "select * from Input limit 1\n" + "union all\n" + "select * from Input order by key limit 1;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:21: Error: LIMIT within UNION ALL is only allowed after last subquery\n"); + } + + Y_UNIT_TEST(CanNotUseDiscardInNonFirstSelectInUnionAllChain) { + auto req = "pragma AnsiOrderByLimitInUnionAll;\n" + "use plato;\n" + "\n" + "select * from Input\n" + "union all\n" + "discard select * from Input;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:6:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n"); + } + + Y_UNIT_TEST(CanNotUseIntoResultInNonLastSelectInUnionAllChain) { + auto req = "use plato;\n" + "pragma AnsiOrderByLimitInUnionAll;\n" + "\n" + "select * from Input\n" + "union all\n" + "discard select * from Input;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:6:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n"); + } + + Y_UNIT_TEST(YsonStrictInvalidPragma) { + auto res = SqlToYql("pragma yson.Strict = \"wrong\";"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:22: Error: Expected 'true', 'false' or no parameter for: Strict\n"); + } + + Y_UNIT_TEST(WarnTableNameInSomeContexts) { + UNIT_ASSERT(SqlToYql("use plato; select TableName() from Input;").IsOk()); + UNIT_ASSERT(SqlToYql("use plato; select TableName(\"aaaa\");").IsOk()); + UNIT_ASSERT(SqlToYql("select TableName(\"aaaa\", \"yt\");").IsOk()); + + auto res = SqlToYql("select TableName() from plato.Input;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: TableName requires either service name as second argument or current cluster name\n"); + + res = SqlToYql("use plato;\n" + "select TableName() from Input1 as a join Input2 as b using(key);"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:8: Warning: TableName() may produce empty result when used in ambiguous context (with JOIN), code: 4525\n"); + + res = SqlToYql("use plato;\n" + "select SOME(TableName()), key from Input group by key;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:13: Warning: TableName() will produce empty result when used with aggregation.\n" + "Please consult documentation for possible workaround, code: 4525\n"); + } + + Y_UNIT_TEST(WarnOnDistincWithHavingWithoutAggregations) { + auto res = SqlToYql("select distinct key from plato.Input having key != '0';"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Warning: The usage of HAVING without aggregations with SELECT DISTINCT is non-standard and will stop working soon. Please use WHERE instead., code: 4526\n"); + } + + Y_UNIT_TEST(FlattenByExprWithNestedNull) { + auto res = SqlToYql("USE plato;\n" + "\n" + "SELECT * FROM (SELECT 1 AS region_id)\n" + "FLATTEN BY (\n" + " CAST($unknown(region_id) AS List<String>) AS region\n" + ")"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:10: Error: Unknown name: $unknown\n"); + } + + Y_UNIT_TEST(EmptySymbolNameIsForbidden) { + auto req = " $`` = 1; select $``;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:5: Error: Empty symbol name is not allowed\n"); + } + + Y_UNIT_TEST(WarnOnBinaryOpWithNullArg) { + auto req = "select * from plato.Input where cast(key as Int32) != NULL"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Warning: Binary operation != will return NULL here, code: 4529\n"); + + req = "select 1 or null"; + res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), ""); + } + + Y_UNIT_TEST(ErrorIfTableSampleArgUsesColumns) { + auto req = "SELECT key FROM plato.Input TABLESAMPLE BERNOULLI(MIN_OF(100.0, CAST(subkey as Int32)));"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:70: Error: Column reference \"subkey\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(DerivedColumnListForSelectIsNotSupportedYet) { + auto req = "SELECT a,b,c FROM plato.Input as t(x,y,z);"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:35: Error: Derived column list is only supported for VALUES\n"); + } + + Y_UNIT_TEST(ErrorIfValuesHasDifferentCountOfColumns) { + auto req = "VALUES (1,2,3), (4,5);"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: All VALUES items should have same size: expecting 3, got 2\n"); + } + + Y_UNIT_TEST(ErrorIfDerivedColumnSizeExceedValuesColumnCount) { + auto req = "SELECT * FROM(VALUES (1,2), (3,4)) as t(x,y,z);"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: Derived column list size exceeds column count in VALUES\n"); + } + + Y_UNIT_TEST(WarnoOnAutogeneratedNamesForValues) { + auto req = "PRAGMA WarnUnnamedColumns;\n" + "SELECT * FROM (VALUES (1,2,3,4), (5,6,7,8)) as t(x,y);"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:16: Warning: Autogenerated column names column2...column3 will be used here, code: 4516\n"); + } + + Y_UNIT_TEST(ErrUnionAllWithOrderByWithoutExplicitLegacyMode) { + auto req = "use plato;\n" + "\n" + "select * from Input order by key\n" + "union all\n" + "select * from Input order by key;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: ORDER BY within UNION ALL is only allowed after last subquery\n"); + } + + Y_UNIT_TEST(ErrUnionAllWithLimitWithoutExplicitLegacyMode) { + auto req = "use plato;\n" + "\n" + "select * from Input limit 10\n" + "union all\n" + "select * from Input limit 1;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: LIMIT within UNION ALL is only allowed after last subquery\n"); + } + + Y_UNIT_TEST(ErrUnionAllWithIntoResultWithoutExplicitLegacyMode) { + auto req = "use plato;\n" + "\n" + "select * from Input into result aaa\n" + "union all\n" + "select * from Input;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: INTO RESULT within UNION ALL is only allowed after last subquery\n"); + } + + Y_UNIT_TEST(ErrUnionAllWithDiscardWithoutExplicitLegacyMode) { + auto req = "use plato;\n" + "\n" + "select * from Input\n" + "union all\n" + "discard select * from Input;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n"); + } + + Y_UNIT_TEST(ErrUnionAllKeepsIgnoredOrderByWarning) { + auto req = "use plato;\n" + "\n" + "SELECT * FROM (\n" + " SELECT * FROM Input\n" + " UNION ALL\n" + " SELECT t.* FROM Input AS t ORDER BY t.key\n" + ");"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:3: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n" + "<main>:6:39: Error: Unknown correlation name: t\n"); + } + + Y_UNIT_TEST(ErrOrderByIgnoredButCheckedForMissingColumns) { + auto req = "$src = SELECT key FROM (SELECT 1 as key, 2 as subkey) ORDER BY x; SELECT * FROM $src;"; + ExpectFailWithError(req, "<main>:1:8: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n" + "<main>:1:64: Error: Column x is not in source column set\n"); + + req = "$src = SELECT key FROM plato.Input ORDER BY x; SELECT * FROM $src;"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n"); + } + + Y_UNIT_TEST(InvalidTtlInterval) { + auto req = R"( + USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key)) + WITH (TTL = 1 On CreatedAt); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:25: Error: Literal of Interval type is expected for TTL\n" + "<main>:4:25: Error: Invalid TTL settings\n"); + } + + Y_UNIT_TEST(InvalidTtlUnit) { + auto req = R"( + USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key)) + WITH (TTL = Interval("P1D") On CreatedAt AS PICOSECONDS); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "mismatched input 'PICOSECONDS' expecting {MICROSECONDS, MILLISECONDS, NANOSECONDS, SECONDS}"); + } + + Y_UNIT_TEST(InvalidChangefeedSink) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (SINK_TYPE = "S3", MODE = "KEYS_ONLY", FORMAT = "json") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:55: Error: Unknown changefeed sink type: S3\n"); + } + + Y_UNIT_TEST(InvalidChangefeedSettings) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (SINK_TYPE = "local", FOO = "bar") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:64: Error: Unknown changefeed setting: FOO\n"); + } + + Y_UNIT_TEST(InvalidChangefeedInitialScan) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", INITIAL_SCAN = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:95: Error: Literal of Bool type is expected for INITIAL_SCAN\n"); + } + + Y_UNIT_TEST(InvalidChangefeedVirtualTimestamps) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", VIRTUAL_TIMESTAMPS = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:101: Error: Literal of Bool type is expected for VIRTUAL_TIMESTAMPS\n"); + } + + Y_UNIT_TEST(InvalidChangefeedResolvedTimestamps) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", RESOLVED_TIMESTAMPS = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:102: Error: Literal of Interval type is expected for RESOLVED_TIMESTAMPS\n"); + } + + Y_UNIT_TEST(InvalidChangefeedRetentionPeriod) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", RETENTION_PERIOD = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:99: Error: Literal of Interval type is expected for RETENTION_PERIOD\n"); + } + + Y_UNIT_TEST(InvalidChangefeedTopicPartitions) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", TOPIC_MIN_ACTIVE_PARTITIONS = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:110: Error: Literal of integer type is expected for TOPIC_MIN_ACTIVE_PARTITIONS\n"); + } + + Y_UNIT_TEST(InvalidChangefeedAwsRegion) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", AWS_REGION = true) + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:93: Error: Literal of String type is expected for AWS_REGION\n"); + } + + Y_UNIT_TEST(ErrJoinWithGroupingSetsWithoutCorrelationName) { + auto req = "USE plato;\n" + "\n" + "SELECT k1, k2, subkey\n" + "FROM T1 AS a JOIN T2 AS b USING (key)\n" + "GROUP BY GROUPING SETS(\n" + " (a.key as k1, b.subkey as k2),\n" + " (k1),\n" + " (subkey)\n" + ");"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:8:4: Error: Columns in grouping sets should have correlation name, error in key: subkey\n"); + } + + Y_UNIT_TEST(ErrJoinWithGroupByWithoutCorrelationName) { + auto req = "USE plato;\n" + "\n" + "SELECT k1, k2,\n" + " value\n" + "FROM T1 AS a JOIN T2 AS b USING (key)\n" + "GROUP BY a.key as k1, b.subkey as k2,\n" + " value;"; + ExpectFailWithError(req, + "<main>:7:5: Error: Columns in GROUP BY should have correlation name, error in key: value\n"); + } + + Y_UNIT_TEST(ErrWithMissingFrom) { + auto req = "select 1 as key where 1 > 1;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:25: Error: Filtering is not allowed without FROM\n"); + + req = "select 1 + count(*);"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Aggregation is not allowed without FROM\n"); + + req = "select 1 as key, subkey + value;"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:18: Error: Column reference 'subkey'\n" + "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:27: Error: Column reference 'value'\n"); + + req = "select count(1) group by key;"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:26: Error: Column reference 'key'\n"); + } + + Y_UNIT_TEST(ErrWithMissingFromForWindow) { + auto req = "$c = () -> (1 + count(1) over w);\n" + "select $c();"; + ExpectFailWithError(req, + "<main>:1:9: Error: Window and aggregation functions are not allowed in this context\n" + "<main>:1:17: Error: Failed to use aggregation function Count without window specification or in wrong place\n"); + + req = "$c = () -> (1 + lead(1) over w);\n" + "select $c();"; + ExpectFailWithError(req, + "<main>:1:17: Error: Window functions are not allowed in this context\n" + "<main>:1:17: Error: Failed to use window function Lead without window specification or in wrong place\n"); + + req = "select 1 + count(1) over w window w as ();"; + ExpectFailWithError(req, + "<main>:1:1: Error: Window and aggregation functions are not allowed without FROM\n" + "<main>:1:12: Error: Failed to use aggregation function Count without window specification or in wrong place\n"); + + req = "select 1 + lead(1) over w window w as ();"; + ExpectFailWithError(req, + "<main>:1:12: Error: Window functions are not allowed without FROM\n" + "<main>:1:12: Error: Failed to use window function Lead without window specification or in wrong place\n"); + } + + Y_UNIT_TEST(ErrWithMissingFromForInplaceWindow) { + auto req = "$c = () -> (1 + count(1) over ());\n" + "select $c();"; + ExpectFailWithError(req, + "<main>:1:26: Error: Window and aggregation functions are not allowed in this context\n"); + + req = "$c = () -> (1 + lead(1) over (rows between unbounded preceding and current row));\n" + "select $c();"; + ExpectFailWithError(req, + "<main>:1:25: Error: Window and aggregation functions are not allowed in this context\n"); + + req = "select 1 + count(1) over ();"; + ExpectFailWithError(req, + "<main>:1:1: Error: Window and aggregation functions are not allowed without FROM\n" + "<main>:1:12: Error: Failed to use aggregation function Count without window specification or in wrong place\n"); + + req = "select 1 + lead(1) over (rows between current row and unbounded following);"; + ExpectFailWithError(req, + "<main>:1:12: Error: Window functions are not allowed without FROM\n" + "<main>:1:12: Error: Failed to use window function Lead without window specification or in wrong place\n"); + } + + Y_UNIT_TEST(ErrDistinctInWrongPlace) { + auto req = "select Some::Udf(distinct key) from plato.Input;"; + ExpectFailWithError(req, + "<main>:1:18: Error: DISTINCT can only be used in aggregation functions\n"); + req = "select sum(key)(distinct foo) from plato.Input;"; + ExpectFailWithError(req, + "<main>:1:17: Error: DISTINCT can only be used in aggregation functions\n"); + + req = "select len(distinct foo) from plato.Input;"; + ExpectFailWithError(req, + "<main>:1:8: Error: DISTINCT can only be used in aggregation functions\n"); + + req = "$foo = ($x) -> ($x); select $foo(distinct key) from plato.Input;"; + ExpectFailWithError(req, + "<main>:1:34: Error: DISTINCT can only be used in aggregation functions\n"); + } + + Y_UNIT_TEST(ErrForNotSingleChildInInlineAST) { + ExpectFailWithError("select YQL::\"\"", + "<main>:1:8: Error: Failed to parse YQL: expecting AST root node with single child, but got 0\n"); + ExpectFailWithError("select YQL::@@ \t@@", + "<main>:1:8: Error: Failed to parse YQL: expecting AST root node with single child, but got 0\n"); + auto req = "$lambda = YQL::@@(lambda '(x)(+ x x)) (lambda '(y)(+ y y))@@;\n" + "select ListMap([1, 2, 3], $lambda);"; + ExpectFailWithError(req, + "<main>:1:11: Error: Failed to parse YQL: expecting AST root node with single child, but got 2\n"); + } + + Y_UNIT_TEST(ErrEmptyColumnName) { + ExpectFailWithError("select * without \"\" from plato.Input", + "<main>:1:18: Error: String literal can not be used here\n"); + + ExpectFailWithError("select * without `` from plato.Input;", + "<main>:1:18: Error: Empty column name is not allowed\n"); + + ExpectFailWithErrorForAnsiLexer("select * without \"\" from plato.Input", + "<main>:1:18: Error: Empty column name is not allowed\n"); + + ExpectFailWithErrorForAnsiLexer("select * without `` from plato.Input;", + "<main>:1:18: Error: Empty column name is not allowed\n"); + } + + Y_UNIT_TEST(ErrOnNonZeroArgumentsForTableRows) { + ExpectFailWithError("$udf=\"\";process plato.Input using $udf(TableRows(k))", + "<main>:1:40: Error: TableRows requires exactly 0 arguments\n"); + } + + Y_UNIT_TEST(ErrGroupByWithAggregationFunctionAndDistinctExpr) { + ExpectFailWithError("select * from plato.Input group by count(distinct key|key)", + "<main>:1:36: Error: Unable to GROUP BY aggregated values\n"); + } + + // FIXME: check if we can get old behaviour +#if 0 + Y_UNIT_TEST(ErrWithSchemaWithColumnsWithoutType) { + ExpectFailWithError("select * from plato.Input with COLUMNs", + "<main>:1:32: Error: Expected type after COLUMNS\n" + "<main>:1:32: Error: Failed to parse table hints\n"); + + ExpectFailWithError("select * from plato.Input with scheMa", + "<main>:1:32: Error: Expected type after SCHEMA\n" + "<main>:1:32: Error: Failed to parse table hints\n"); + } +#endif + + Y_UNIT_TEST(ErrCollectPreaggregatedInListLiteralWithoutFrom) { + ExpectFailWithError("SELECT([VARIANCE(DISTINCT[])])", + "<main>:1:1: Error: Column references are not allowed without FROM\n" + "<main>:1:9: Error: Column reference '_yql_preagg_Variance0'\n"); + } + + Y_UNIT_TEST(ErrGroupBySmartParenAsTuple) { + ExpectFailWithError("SELECT * FROM plato.Input GROUP BY (k, v,)", + "<main>:1:41: Error: Unexpected trailing comma in grouping elements list\n"); + } + + Y_UNIT_TEST(HandleNestedSmartParensInGroupBy) { + ExpectFailWithError("SELECT * FROM plato.Input GROUP BY (+() as k)", + "<main>:1:37: Error: Unable to GROUP BY constant expression\n"); + } + + Y_UNIT_TEST(ErrRenameWithAddColumn) { + ExpectFailWithError("USE plato; ALTER TABLE table RENAME TO moved, ADD COLUMN addc uint64", + "<main>:1:40: Error: RENAME TO can not be used together with another table action\n"); + } + + Y_UNIT_TEST(ErrAddColumnAndRename) { + // FIXME: fix positions in ALTER TABLE + ExpectFailWithError("USE plato; ALTER TABLE table ADD COLUMN addc uint64, RENAME TO moved", + "<main>:1:46: Error: RENAME TO can not be used together with another table action\n"); + } + + Y_UNIT_TEST(InvalidUuidValue) { + ExpectFailWithError("SELECT Uuid('123e4567ae89ba12d3aa456a426614174ab0')", + "<main>:1:8: Error: Invalid value \"123e4567ae89ba12d3aa456a426614174ab0\" for type Uuid\n"); + ExpectFailWithError("SELECT Uuid('123e4567ae89b-12d3-a456-426614174000')", + "<main>:1:8: Error: Invalid value \"123e4567ae89b-12d3-a456-426614174000\" for type Uuid\n"); + } + + Y_UNIT_TEST(WindowFunctionWithoutOver) { + ExpectFailWithError("SELECT LAST_VALUE(foo) FROM plato.Input", + "<main>:1:8: Error: Can't use window function LastValue without window specification (OVER keyword is missing)\n"); + ExpectFailWithError("SELECT LAST_VALUE(foo) FROM plato.Input GROUP BY key", + "<main>:1:8: Error: Can't use window function LastValue without window specification (OVER keyword is missing)\n"); + } + + Y_UNIT_TEST(CreateAlterUserWithoutCluster) { + ExpectFailWithError("\n CREATE USER user ENCRYPTED PASSWORD 'foobar';", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n"); + ExpectFailWithError("ALTER USER CURRENT_USER RENAME TO $foo;", "<main>:1:1: Error: USE statement is missing - no default cluster is selected\n"); + } + + Y_UNIT_TEST(ModifyPermissionsWithoutCluster) { + ExpectFailWithError("\n GRANT CONNECT ON `/Root` TO user;", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n"); + ExpectFailWithError("\n REVOKE MANAGE ON `/Root` FROM user;", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n"); + } + + Y_UNIT_TEST(ReservedRoleNames) { + ExpectFailWithError("USE plato; CREATE USER current_User;", "<main>:1:24: Error: System role CURRENT_USER can not be used here\n"); + ExpectFailWithError("USE plato; ALTER USER current_User RENAME TO Current_role", "<main>:1:46: Error: System role CURRENT_ROLE can not be used here\n"); + UNIT_ASSERT(SqlToYql("USE plato; DROP GROUP IF EXISTS a, b, c, current_User;").IsOk()); + } + + Y_UNIT_TEST(DisableClassicDivisionWithError) { + ExpectFailWithError("pragma ClassicDivision = 'false'; select $foo / 30;", "<main>:1:42: Error: Unknown name: $foo\n"); + } + + Y_UNIT_TEST(AggregationOfAgrregatedDistinctExpr) { + ExpectFailWithError("select sum(sum(distinct x + 1)) from plato.Input", "<main>:1:12: Error: Aggregation of aggregated values is forbidden\n"); + } + + Y_UNIT_TEST(WarnForUnusedSqlHint) { + NYql::TAstParseResult res = SqlToYql("select * from plato.Input1 as a join /*+ merge() */ plato.Input2 as b using(key);\n" + "select --+ foo(bar)\n" + " 1;"); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:23: Warning: Hint foo will not be used, code: 4534\n"); + } + + Y_UNIT_TEST(WarnForDeprecatedSchema) { + NSQLTranslation::TTranslationSettings settings; + settings.ClusterMapping["s3bucket"] = NYql::S3ProviderName; + NYql::TAstParseResult res = SqlToYql("select * from s3bucket.`foo` with schema (col1 Int32, String as col2, Int64 as col3);", settings); + UNIT_ASSERT(res.Root); + UNIT_ASSERT_STRING_CONTAINS(res.Issues.ToString(), "Warning: Deprecated syntax for positional schema: please use 'column type' instead of 'type AS column', code: 4535\n"); + } + + Y_UNIT_TEST(ErrorOnColumnNameInMaxByLimit) { + ExpectFailWithError( + "SELECT AGGREGATE_BY(AsTuple(value, key), AggregationFactory(\"MAX_BY\", subkey)) FROM plato.Input;", + "<main>:1:42: Error: Source does not allow column references\n" + "<main>:1:71: Error: Column reference 'subkey'\n"); + } + + Y_UNIT_TEST(ErrorInLibraryWithTopLevelNamedSubquery) { + TString withUnusedSubq = "$unused = select max(key) from plato.Input;\n" + "\n" + "define subquery $foo() as\n" + " $count = select count(*) from plato.Input;\n" + " select * from plato.Input limit $count / 2;\n" + "end define;\n" + "export $foo;\n"; + UNIT_ASSERT(SqlToYqlWithMode(withUnusedSubq, NSQLTranslation::ESqlMode::LIBRARY).IsOk()); + + TString withTopLevelSubq = "$count = select count(*) from plato.Input;\n" + "\n" + "define subquery $foo() as\n" + " select * from plato.Input limit $count / 2;\n" + "end define;\n" + "export $foo;\n"; + auto res = SqlToYqlWithMode(withTopLevelSubq, NSQLTranslation::ESqlMode::LIBRARY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Named subquery can not be used as a top level statement in libraries\n"); + } + + Y_UNIT_TEST(SessionStartAndSessionStateShouldSurviveSessionWindowArgsError){ + TString query = R"( + $init = ($_row) -> (min(1, 2)); -- error: aggregation func min() can not be used here + $calculate = ($_row, $_state) -> (1); + $update = ($_row, $_state) -> (2); + SELECT + SessionStart() over w as session_start, + SessionState() over w as session_state, + FROM plato.Input as t + WINDOW w AS ( + PARTITION BY user, SessionWindow(ts + 1, $init, $update, $calculate) + ) + )"; + ExpectFailWithError(query, "<main>:2:33: Error: Aggregation function Min requires exactly 1 argument(s), given: 2\n"); + } +} + +void CheckUnused(const TString& req, const TString& symbol, unsigned row, unsigned col) { + auto res = SqlToYql(req); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), TStringBuilder() << "<main>:" << row << ":" << col << ": Warning: Symbol " << symbol << " is not used, code: 4527\n"); +} + +Y_UNIT_TEST_SUITE(WarnUnused) { + Y_UNIT_TEST(ActionOrSubquery) { + TString req = " $a()\n" + "as select 1;\n" + "end define;\n" + "\n" + "select 1;"; + CheckUnused("define action\n" + req, "$a", 2, 3); + CheckUnused("define subquery\n" + req, "$a", 2, 3); + } + + Y_UNIT_TEST(Import) { + TString req = "import lib1 symbols\n" + " $sqr;\n" + "select 1;"; + CheckUnused(req, "$sqr", 2, 3); + + req = "import lib1 symbols\n" + " $sqr as\n" + " $sq;\n" + "select 1;"; + CheckUnused(req, "$sq", 3, 5); + } + + Y_UNIT_TEST(NamedNodeStatement) { + TString req = " $a, $a = AsTuple(1, 2);\n" + "select $a;"; + CheckUnused(req, "$a", 1, 2); + req = "$a, $b = AsTuple(1, 2);\n" + "select $a;"; + CheckUnused(req, "$b", 1, 6); + CheckUnused(" $a = 1; $a = 2; select $a;", "$a", 1, 2); + } + + Y_UNIT_TEST(Declare) { + CheckUnused("declare $a as String;select 1;", "$a", 1, 9); + } + + Y_UNIT_TEST(ActionParams) { + TString req = "define action $a($x, $y) as\n" + " select $x;\n" + "end define;\n" + "\n" + "do $a(1,2);"; + CheckUnused(req, "$y", 1, 22); + } + + Y_UNIT_TEST(SubqueryParams) { + TString req = "use plato;\n" + "define subquery $q($name, $x) as\n" + " select * from $name;\n" + "end define;\n" + "\n" + "select * from $q(\"Input\", 1);"; + CheckUnused(req, "$x", 2, 27); + } + + Y_UNIT_TEST(For) { + TString req = "define action $a() as\n" + " select 1;\n" + "end define;\n" + "\n" + "for $i in ListFromRange(1, 10)\n" + "do $a();"; + CheckUnused(req, "$i", 5, 5); + } + + Y_UNIT_TEST(LambdaParams) { + TString req = "$lambda = ($x, $y) -> ($x);\n" + "select $lambda(1, 2);"; + CheckUnused(req, "$y", 1, 16); + } + + Y_UNIT_TEST(InsideLambdaBody) { + TString req = "$lambda = () -> {\n" + " $x = 1; return 1;\n" + "};\n" + "select $lambda();"; + CheckUnused(req, "$x", 2, 3); + req = "$lambda = () -> {\n" + " $x = 1; $x = 2; return $x;\n" + "};\n" + "select $lambda();"; + CheckUnused(req, "$x", 2, 3); + } + + Y_UNIT_TEST(InsideAction) { + TString req = "define action $a() as\n" + " $x = 1; select 1;\n" + "end define;\n" + "\n" + "do $a();"; + CheckUnused(req, "$x", 2, 3); + req = "define action $a() as\n" + " $x = 1; $x = 2; select $x;\n" + "end define;\n" + "\n" + "do $a();"; + CheckUnused(req, "$x", 2, 3); + } + + Y_UNIT_TEST(NoWarnOnNestedActions) { + auto req = "pragma warning(\"error\", \"4527\");\n" + "define action $action($b) as\n" + " define action $aaa() as\n" + " select $b;\n" + " end define;\n" + " do $aaa();\n" + "end define;\n" + "\n" + "do $action(1);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(NoWarnForUsageAfterSubquery) { + auto req = "use plato;\n" + "pragma warning(\"error\", \"4527\");\n" + "\n" + "$a = 1;\n" + "\n" + "define subquery $q($table) as\n" + " select * from $table;\n" + "end define;\n" + "\n" + "select * from $q(\"Input\");\n" + "select $a;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } +} + +Y_UNIT_TEST_SUITE(AnonymousNames) { + Y_UNIT_TEST(ReferenceAnonymousVariableIsForbidden) { + auto req = "$_ = 1; select $_;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:16: Error: Unable to reference anonymous name $_\n"); + + req = "$`_` = 1; select $`_`;"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unable to reference anonymous name $_\n"); + } + + Y_UNIT_TEST(Declare) { + auto req = "declare $_ as String;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:9: Error: Can not use anonymous name '$_' in DECLARE statement\n"); + } + + Y_UNIT_TEST(ActionSubquery) { + auto req = "define action $_() as select 1; end define;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: Can not use anonymous name '$_' as ACTION name\n"); + + req = "define subquery $_() as select 1; end define;"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Can not use anonymous name '$_' as SUBQUERY name\n"); + } + + Y_UNIT_TEST(Import) { + auto req = "import lib symbols $sqr as $_;"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Can not import anonymous name $_\n"); + } + + Y_UNIT_TEST(Export) { + auto req = "export $_;"; + auto res = SqlToYqlWithMode(req, NSQLTranslation::ESqlMode::LIBRARY); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Can not export anonymous name $_\n"); + } + + Y_UNIT_TEST(AnonymousInActionArgs) { + auto req = "pragma warning(\"error\", \"4527\");\n" + "define action $a($_, $y, $_) as\n" + " select $y;\n" + "end define;\n" + "\n" + "do $a(1,2,3);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(AnonymousInSubqueryArgs) { + auto req = "use plato;\n" + "pragma warning(\"error\", \"4527\");\n" + "define subquery $q($_, $y, $_) as\n" + " select * from $y;\n" + "end define;\n" + "\n" + "select * from $q(1,\"Input\",3);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(AnonymousInLambdaArgs) { + auto req = "pragma warning(\"error\", \"4527\");\n" + "$lambda = ($_, $x, $_) -> ($x);\n" + "select $lambda(1,2,3);"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(AnonymousInFor) { + auto req = "pragma warning(\"error\", \"4527\");\n" + "evaluate for $_ in ListFromRange(1, 10) do begin select 1; end do;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } + + Y_UNIT_TEST(Assignment) { + auto req = "pragma warning(\"error\", \"4527\");\n" + "$_ = 1;\n" + "$_, $x, $_ = AsTuple(1,2,3);\n" + "select $x;"; + UNIT_ASSERT(SqlToYql(req).IsOk()); + } +} + +Y_UNIT_TEST_SUITE(JsonValue) { + Y_UNIT_TEST(JsonValueArgumentCount) { + NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json));"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: mismatched input ')' expecting ','\n"); + } + + Y_UNIT_TEST(JsonValueJsonPathMustBeLiteralString) { + NYql::TAstParseResult res = SqlToYql("$jsonPath = \"strict $.key\"; select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), $jsonPath);"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:79: Error: mismatched input '$' expecting STRING_VALUE\n"); + } + + Y_UNIT_TEST(JsonValueTranslation) { + NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\");"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json")); + }; + + TWordCountHive elementStat({"JsonValue"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["JsonValue"]); + } + + Y_UNIT_TEST(JsonValueReturningSection) { + for (const auto& typeName : {"Bool", "Int64", "Double", "String"}) { + NYql::TAstParseResult res = SqlToYql( + TStringBuilder() << "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" RETURNING " << typeName << ");" + ); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\"")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(TStringBuilder() << "DataType '" << typeName)); + }; + + TWordCountHive elementStat({typeName}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat[typeName] > 0); + } + } + + Y_UNIT_TEST(JsonValueInvalidReturningType) { + NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{'key': 1238}@@ as Json), 'strict $.key' RETURNING invalid);"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:77: Error: Unknown simple type 'invalid'\n"); + } + + Y_UNIT_TEST(JsonValueAndReturningInExpressions) { + NYql::TAstParseResult res = SqlToYql( + "USE plato\n;" + "$json_value = \"some string\";\n" + "SELECT $json_value;\n" + "SELECT 1 as json_value;\n" + "SELECT $json_value as json_value;\n" + "$returning = \"another string\";\n" + "SELECT $returning;\n" + "SELECT 1 as returning;\n" + "SELECT $returning as returning;\n" + ); + + UNIT_ASSERT(res.Root); + } + + Y_UNIT_TEST(JsonValueValidCaseHandlers) { + const TVector<std::pair<TString, TString>> testCases = { + {"", "'DefaultValue (Null)"}, + {"NULL", "'DefaultValue (Null)"}, + {"ERROR", "'Error (Null)"}, + {"DEFAULT 123", "'DefaultValue (Int32 '\"123\")"}, + }; + + for (const auto& onEmpty : testCases) { + for (const auto& onError : testCases) { + TStringBuilder query; + query << "$json = CAST(@@{\"key\": 1238}@@ as Json);\n" + << "SELECT JSON_VALUE($json, \"strict $.key\""; + if (!onEmpty.first.empty()) { + query << " " << onEmpty.first << " ON EMPTY"; + } + if (!onError.first.empty()) { + query << " " << onError.first << " ON ERROR"; + } + query << ");\n"; + + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(onEmpty.second + " " + onError.second)); + }; + + TWordCountHive elementStat({"JsonValue"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonValue"] > 0); + } + } + } + + Y_UNIT_TEST(JsonValueTooManyCaseHandlers) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON ERROR NULL ON EMPTY);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: Only 1 ON EMPTY and/or 1 ON ERROR clause is expected\n" + ); + } + + Y_UNIT_TEST(JsonValueTooManyOnEmpty) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON EMPTY);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: Only 1 ON EMPTY clause is expected\n" + ); + } + + Y_UNIT_TEST(JsonValueTooManyOnError) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON ERROR);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: Only 1 ON ERROR clause is expected\n" + ); + } + + Y_UNIT_TEST(JsonValueOnEmptyAfterOnError) { + NYql::TAstParseResult res = SqlToYql( + "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON EMPTY);\n" + ); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF( + Err2Str(res), + "<main>:1:52: Error: ON EMPTY clause must be before ON ERROR clause\n" + ); + } + + Y_UNIT_TEST(JsonValueNullInput) { + NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_VALUE(NULL, "strict $.key");)"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); + }; + + TWordCountHive elementStat({"JsonValue"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonValue"] > 0); + } +} + +Y_UNIT_TEST_SUITE(JsonExists) { + Y_UNIT_TEST(JsonExistsValidHandlers) { + const TVector<std::pair<TString, TString>> testCases = { + {"", "(Just (Bool '\"false\"))"}, + {"TRUE ON ERROR", "(Just (Bool '\"true\"))"}, + {"FALSE ON ERROR", "(Just (Bool '\"false\"))"}, + {"UNKNOWN ON ERROR", "(Nothing (OptionalType (DataType 'Bool)))"}, + // NOTE: in this case we expect arguments of JsonExists callable to end immediately + // after variables. This parenthesis at the end of the expression is left on purpose + {"ERROR ON ERROR", "(Utf8 '\"strict $.key\") (JsonVariables))"}, + }; + + for (const auto& item : testCases) { + NYql::TAstParseResult res = SqlToYql( + TStringBuilder() << R"( + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT JSON_EXISTS($json, "strict $.key" )" << item.first << ");\n" + ); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(item.second)); + }; + + TWordCountHive elementStat({"JsonExists"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonExists"] > 0); + } + } + + Y_UNIT_TEST(JsonExistsInvalidHandler) { + NYql::TAstParseResult res = SqlToYql(R"( + $json = CAST(@@{"key": 1238}@@ as Json); + $default = false; + SELECT JSON_EXISTS($json, "strict $.key" $default ON ERROR); + )"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:53: Error: mismatched input '$' expecting {')', ERROR, FALSE, TRUE, UNKNOWN}\n"); + } + + Y_UNIT_TEST(JsonExistsNullInput) { + NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_EXISTS(NULL, "strict $.key");)"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); + }; + + TWordCountHive elementStat({"JsonExists"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonExists"] > 0); + } +} + +Y_UNIT_TEST_SUITE(JsonQuery) { + Y_UNIT_TEST(JsonQueryValidHandlers) { + using TTestSuite = const TVector<std::pair<TString, TString>>; + TTestSuite wrapCases = { + {"", "'NoWrap"}, + {"WITHOUT WRAPPER", "'NoWrap"}, + {"WITHOUT ARRAY WRAPPER", "'NoWrap"}, + {"WITH WRAPPER", "'Wrap"}, + {"WITH ARRAY WRAPPER", "'Wrap"}, + {"WITH UNCONDITIONAL WRAPPER", "'Wrap"}, + {"WITH UNCONDITIONAL ARRAY WRAPPER", "'Wrap"}, + {"WITH CONDITIONAL WRAPPER", "'ConditionalWrap"}, + {"WITH CONDITIONAL ARRAY WRAPPER", "'ConditionalWrap"}, + }; + TTestSuite handlerCases = { + {"", "'Null"}, + {"ERROR", "'Error"}, + {"NULL", "'Null"}, + {"EMPTY ARRAY", "'EmptyArray"}, + {"EMPTY OBJECT", "'EmptyObject"}, + }; + + for (const auto& wrap : wrapCases) { + for (const auto& onError : handlerCases) { + for (const auto& onEmpty : handlerCases) { + TStringBuilder query; + query << R"($json = CAST(@@{"key": [123]}@@ as Json); + SELECT JSON_QUERY($json, "strict $.key" )" << wrap.first; + if (!onEmpty.first.empty()) { + if (wrap.first.StartsWith("WITH ")) { + continue; + } + query << " " << onEmpty.first << " ON EMPTY"; + } + if (!onError.first.empty()) { + query << " " << onError.first << " ON ERROR"; + } + query << ");\n"; + + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + const TString args = TStringBuilder() << wrap.second << " " << onEmpty.second << " " << onError.second; + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(args)); + }; + + Cout << wrap.first << " " << onEmpty.first << " " << onError.first << Endl; + + TWordCountHive elementStat({"JsonQuery"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonQuery"] > 0); + } + } + } + } + + Y_UNIT_TEST(JsonQueryOnEmptyWithWrapper) { + NYql::TAstParseResult res = SqlToYql(R"( + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT JSON_QUERY($json, "strict $" WITH ARRAY WRAPPER EMPTY ARRAY ON EMPTY); + )"); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:38: Error: ON EMPTY is prohibited because WRAPPER clause is specified\n"); + } + + Y_UNIT_TEST(JsonQueryNullInput) { + NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_QUERY(NULL, "strict $.key");)"); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))")); + }; + + TWordCountHive elementStat({"JsonQuery"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonQuery"] > 0); + } +} + +Y_UNIT_TEST_SUITE(JsonPassing) { + Y_UNIT_TEST(SupportedVariableTypes) { + const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"}; + + for (const auto& function : functions) { + const auto query = Sprintf(R"( + pragma CompactNamedExprs; + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT %s( + $json, + "strict $.key" + PASSING + "string" as var1, + 1.234 as var2, + CAST(1 as Int64) as var3, + true as var4, + $json as var5 + ))", + function.data() + ); + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"string")))"), "Cannot find `var1`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var2" (Double '"1.234")))"), "Cannot find `var2`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (SafeCast (Int32 '"1") (DataType 'Int64))))"), "Cannot find `var3`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var4" (Bool '"true")))"), "Cannot find `var4`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var5" namedexprnode0))"), "Cannot find `var5`"); + }; + + TWordCountHive elementStat({"JsonVariables"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonVariables"] > 0); + } + } + + Y_UNIT_TEST(ValidVariableNames) { + const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"}; + + for (const auto& function : functions) { + const auto query = Sprintf(R"( + $json = CAST(@@{"key": 1238}@@ as Json); + SELECT %s( + $json, + "strict $.key" + PASSING + "one" as var1, + "two" as "VaR2", + "three" as `var3`, + "four" as VaR4 + ))", + function.data() + ); + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"one")))"), "Cannot find `var1`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR2" (String '"two")))"), "Cannot find `VaR2`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (String '"three")))"), "Cannot find `var3`"); + UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR4" (String '"four")))"), "Cannot find `VaR4`"); + }; + + TWordCountHive elementStat({"JsonVariables"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["JsonVariables"] > 0); + } + } +} + +Y_UNIT_TEST_SUITE(MigrationToJsonApi) { + Y_UNIT_TEST(WarningOnDeprecatedJsonUdf) { + NYql::TAstParseResult res = SqlToYql(R"( + $json = CAST(@@{"key": 1234}@@ as Json); + SELECT Json::Parse($json); + )"); + + UNIT_ASSERT(res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:26: Warning: Json UDF is deprecated. Please use JSON API instead, code: 4506\n"); + } +} + +Y_UNIT_TEST_SUITE(AnsiIdentsNegative) { + Y_UNIT_TEST(EnableAnsiLexerFromRequestSpecialComments) { + auto req = "\n" + "\t --!ansi_lexer \n" + "-- Some comment\n" + "-- another comment\n" + "pragma SimpleColumns;\n" + "\n" + "select 1, '''' as empty;"; + + auto res = SqlToYql(req); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + } + + Y_UNIT_TEST(AnsiLexerShouldNotBeEnabledHere) { + auto req = "$str = '\n" + "--!ansi_lexer\n" + "--!syntax_v1\n" + "';\n" + "\n" + "select 1, $str, \"\" as empty;"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + } + + Y_UNIT_TEST(DoubleQuotesInDictsTuplesOrLists) { + auto req = "$d = { 'a': 1, \"b\": 2, 'c': 3,};"; + + auto res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:16: Error: Column reference \"b\" is not allowed in current scope\n"); + + req = "$t = (1, 2, \"a\");"; + + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:13: Error: Column reference \"a\" is not allowed in current scope\n"); + + req = "$l = ['a', 'b', \"c\"];"; + + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Column reference \"c\" is not allowed in current scope\n"); + } + + Y_UNIT_TEST(MultilineComments) { + auto req = "/*/**/ select 1;"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + + req = "/*\n" + "--/*\n" + "*/ select 1;"; + res = SqlToYql(req); + UNIT_ASSERT(res.Root); + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + + req = "/*\n" + "/*\n" + "--*/\n" + "*/ select 1;"; + res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:0: Error: mismatched input '*' expecting {';', '(', '$', ALTER, ANALYZE, BACKUP, COMMIT, CREATE, DECLARE, DEFINE, DELETE, DISCARD, DO, DROP, EVALUATE, EXPLAIN, EXPORT, FOR, FROM, GRANT, IF, IMPORT, INSERT, PARALLEL, PRAGMA, PROCESS, REDUCE, REPLACE, RESTORE, REVOKE, ROLLBACK, SELECT, UPDATE, UPSERT, USE, VALUES}\n"); + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(res.Root); + } +} + +Y_UNIT_TEST_SUITE(AnsiOptionalAs) { + Y_UNIT_TEST(OptionalAsInProjection) { + UNIT_ASSERT(SqlToYql("PRAGMA AnsiOptionalAs; SELECT a b, c FROM plato.Input;").IsOk()); + ExpectFailWithError("PRAGMA DisableAnsiOptionalAs;\n" + "SELECT a b, c FROM plato.Input;", + "<main>:2:10: Error: Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility\n"); + } + + Y_UNIT_TEST(OptionalAsWithKeywords) { + UNIT_ASSERT(SqlToYql("PRAGMA AnsiOptionalAs; SELECT a type, b data, c source FROM plato.Input;").IsOk()); + } +} + +Y_UNIT_TEST_SUITE(SessionWindowNegative) { + Y_UNIT_TEST(SessionWindowWithoutSource) { + ExpectFailWithError("SELECT 1 + SessionWindow(ts, 32);", + "<main>:1:12: Error: SessionWindow requires data source\n"); + } + + Y_UNIT_TEST(SessionWindowInProjection) { + ExpectFailWithError("SELECT 1 + SessionWindow(ts, 32) from plato.Input;", + "<main>:1:12: Error: SessionWindow can only be used as a top-level GROUP BY / PARTITION BY expression\n"); + } + + Y_UNIT_TEST(SessionWindowWithNonConstSecondArg) { + ExpectFailWithError( + "SELECT key, session_start FROM plato.Input\n" + "GROUP BY SessionWindow(ts, 32 + subkey) as session_start, key;", + + "<main>:2:10: Error: Source does not allow column references\n" + "<main>:2:33: Error: Column reference 'subkey'\n"); + } + + Y_UNIT_TEST(SessionWindowWithWrongNumberOfArgs) { + ExpectFailWithError("SELECT * FROM plato.Input GROUP BY SessionWindow()", + "<main>:1:36: Error: SessionWindow requires either two or four arguments\n"); + ExpectFailWithError("SELECT * FROM plato.Input GROUP BY SessionWindow(key, subkey, 100)", + "<main>:1:36: Error: SessionWindow requires either two or four arguments\n"); + } + + Y_UNIT_TEST(DuplicateSessionWindow) { + ExpectFailWithError( + "SELECT\n" + " *\n" + "FROM plato.Input\n" + "GROUP BY\n" + " SessionWindow(ts, 10),\n" + " user,\n" + " SessionWindow(ts, 20)\n" + ";", + + "<main>:7:5: Error: Duplicate session window specification:\n" + "<main>:5:5: Error: Previous session window is declared here\n"); + + ExpectFailWithError( + "SELECT\n" + " MIN(key) over w\n" + "FROM plato.Input\n" + "WINDOW w AS (\n" + " PARTITION BY SessionWindow(ts, 10), user,\n" + " SessionWindow(ts, 20)\n" + ");", + + "<main>:6:5: Error: Duplicate session window specification:\n" + "<main>:5:18: Error: Previous session window is declared here\n"); + } + + Y_UNIT_TEST(SessionStartStateWithoutSource) { + ExpectFailWithError("SELECT 1 + SessionStart();", + "<main>:1:12: Error: SessionStart requires data source\n"); + ExpectFailWithError("SELECT 1 + SessionState();", + "<main>:1:12: Error: SessionState requires data source\n"); + } + + Y_UNIT_TEST(SessionStartStateWithoutGroupByOrWindow) { + ExpectFailWithError("SELECT 1 + SessionStart() from plato.Input;", + "<main>:1:12: Error: SessionStart can not be used without aggregation by SessionWindow\n"); + ExpectFailWithError("SELECT 1 + SessionState() from plato.Input;", + "<main>:1:12: Error: SessionState can not be used without aggregation by SessionWindow\n"); + } + + Y_UNIT_TEST(SessionStartStateWithGroupByWithoutSession) { + ExpectFailWithError("SELECT 1 + SessionStart() from plato.Input group by user;", + "<main>:1:12: Error: SessionStart can not be used here: SessionWindow specification is missing in GROUP BY\n"); + ExpectFailWithError("SELECT 1 + SessionState() from plato.Input group by user;", + "<main>:1:12: Error: SessionState can not be used here: SessionWindow specification is missing in GROUP BY\n"); + } + + Y_UNIT_TEST(SessionStartStateWithoutOverWithWindowWithoutSession) { + ExpectFailWithError("SELECT 1 + SessionStart(), MIN(key) over w from plato.Input window w as ()", + "<main>:1:12: Error: SessionStart can not be used without aggregation by SessionWindow. Maybe you forgot to add OVER `window_name`?\n"); + ExpectFailWithError("SELECT 1 + SessionState(), MIN(key) over w from plato.Input window w as ()", + "<main>:1:12: Error: SessionState can not be used without aggregation by SessionWindow. Maybe you forgot to add OVER `window_name`?\n"); + } + + Y_UNIT_TEST(SessionStartStateWithWindowWithoutSession) { + ExpectFailWithError("SELECT 1 + SessionStart() over w, MIN(key) over w from plato.Input window w as ()", + "<main>:1:12: Error: SessionStart can not be used with window w: SessionWindow specification is missing in PARTITION BY\n"); + ExpectFailWithError("SELECT 1 + SessionState() over w, MIN(key) over w from plato.Input window w as ()", + "<main>:1:12: Error: SessionState can not be used with window w: SessionWindow specification is missing in PARTITION BY\n"); + } + + Y_UNIT_TEST(SessionStartStateWithSessionedWindow) { + ExpectFailWithError("SELECT 1 + SessionStart(), MIN(key) over w from plato.Input group by key window w as (partition by SessionWindow(ts, 1)) ", + "<main>:1:12: Error: SessionStart can not be used here: SessionWindow specification is missing in GROUP BY. Maybe you forgot to add OVER `window_name`?\n"); + ExpectFailWithError("SELECT 1 + SessionState(), MIN(key) over w from plato.Input group by key window w as (partition by SessionWindow(ts, 1)) ", + "<main>:1:12: Error: SessionState can not be used here: SessionWindow specification is missing in GROUP BY. Maybe you forgot to add OVER `window_name`?\n"); + } + + Y_UNIT_TEST(AggregationBySessionStateIsNotSupportedYet) { + ExpectFailWithError("SELECT SOME(1 + SessionState()), key from plato.Input group by key, SessionWindow(ts, 1);", + "<main>:1:17: Error: SessionState with GROUP BY is not supported yet\n"); + } + + Y_UNIT_TEST(SessionWindowInRtmr) { + NYql::TAstParseResult res = SqlToYql( + "SELECT * FROM plato.Input GROUP BY SessionWindow(ts, 10);", + 10, TString(NYql::RtmrProviderName)); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:54: Error: Streaming group by query must have a hopping window specification.\n"); + + res = SqlToYql(R"( + SELECT key, SUM(value) AS value FROM plato.Input + GROUP BY key, HOP(subkey, "PT10S", "PT30S", "PT20S"), SessionWindow(ts, 10); + )", 10, TString(NYql::RtmrProviderName)); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:13: Error: SessionWindow is unsupported for streaming sources\n"); + } +} + +Y_UNIT_TEST_SUITE(LibraSqlSugar) { + auto makeResult = [](TStringBuf settings) { + return SqlToYql( + TStringBuilder() + << settings + << "\n$udf1 = MyLibra::MakeLibraPreprocessor($settings);" + << "\n$udf2 = CustomLibra::MakeLibraPreprocessor($settings);" + << "\nPROCESS plato.Input USING $udf1(TableRow())" + << "\nUNION ALL" + << "\nPROCESS plato.Input USING $udf2(TableRow());" + ); + }; + + Y_UNIT_TEST(EmptySettings) { + auto res = makeResult(R"( + $settings = AsStruct(); + )"); + UNIT_ASSERT(res.IsOk()); + } + + Y_UNIT_TEST(OnlyEntities) { + auto res = makeResult(R"( + $settings = AsStruct( + AsList("A", "B", "C") AS Entities + ); + )"); + UNIT_ASSERT(res.IsOk()); + } + + Y_UNIT_TEST(EntitiesWithStrategy) { + auto res = makeResult(R"( + $settings = AsStruct( + AsList("A", "B", "C") AS Entities, + "blacklist" AS EntitiesStrategy + ); + )"); + UNIT_ASSERT(res.IsOk()); + } + + Y_UNIT_TEST(AllSettings) { + auto res = makeResult(R"( + $settings = AsStruct( + AsList("A", "B", "C") AS Entities, + "whitelist" AS EntitiesStrategy, + "path" AS BlockstatDict, + false AS ParseWithFat, + "map" AS Mode + ); + )"); + UNIT_ASSERT(res.IsOk()); + } + + Y_UNIT_TEST(BadStrategy) { + auto res = makeResult(R"( + $settings = AsStruct("bad" AS EntitiesStrategy); + )"); + UNIT_ASSERT_STRING_CONTAINS( + Err2Str(res), + "Error: MakeLibraPreprocessor got invalid entities strategy: expected 'whitelist' or 'blacklist'" + ); + } + + Y_UNIT_TEST(BadEntities) { + auto res = makeResult(R"( + $settings = AsStruct(AsList("A", 1) AS Entities); + )"); + UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "Error: MakeLibraPreprocessor entity must be string literal"); + } +} + +Y_UNIT_TEST_SUITE(TrailingQuestionsNegative) { + Y_UNIT_TEST(Basic) { + ExpectFailWithError("SELECT 1?;", "<main>:1:9: Error: Unexpected token '?' at the end of expression\n"); + ExpectFailWithError("SELECT 1? + 1;", "<main>:1:10: Error: mismatched input '+' expecting {<EOF>, ';'}\n"); + ExpectFailWithError("SELECT 1 + 1??? < 2", "<main>:1:13: Error: Unexpected token '?' at the end of expression\n"); + ExpectFailWithError("SELECT 1? > 2? > 3?", + "<main>:1:11: Error: Unexpected token '?' at the end of expression\n" + "<main>:1:16: Error: Unexpected token '?' at the end of expression\n" + "<main>:1:21: Error: Unexpected token '?' at the end of expression\n"); + } + + Y_UNIT_TEST(SmartParen) { + ExpectFailWithError("$x = 1; SELECT (Int32?, $x?)", "<main>:1:27: Error: Unexpected token '?' at the end of expression\n"); + ExpectFailWithError("SELECT (Int32, foo?)", "<main>:1:19: Error: Unexpected token '?' at the end of expression\n"); + } + + Y_UNIT_TEST(LambdaOptArgs) { + ExpectFailWithError("$l = ($x, $y?, $z??, $t?) -> ($x);", "<main>:1:18: Error: Expecting at most one '?' token here (for optional lambda parameters), but got 2\n"); + } +} + +Y_UNIT_TEST_SUITE(FlexibleTypes) { + Y_UNIT_TEST(AssumeOrderByType) { + UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT 1 AS int32 ASSUME ORDER BY int32").IsOk()); + } + + Y_UNIT_TEST(GroupingSets) { + UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT COUNT(*) AS cnt, text, uuid FROM plato.Input GROUP BY GROUPING SETS((uuid), (uuid, text));").IsOk()); + } + + Y_UNIT_TEST(WeakField) { + UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT WeakField(text, string) as text FROM plato.Input").IsOk()); + } + + Y_UNIT_TEST(Aggregation1) { + TString q = + "PRAGMA FlexibleTypes;\n" + "$foo = ($x, $const, $type) -> ($x || $const || FormatType($type));\n" + "SELECT $foo(SOME(x), 'aaa', String) FROM plato.Input GROUP BY y;"; + UNIT_ASSERT(SqlToYql(q).IsOk()); + } + + Y_UNIT_TEST(Aggregation2) { + TString q = + "PRAGMA FlexibleTypes;\n" + "SELECT 1 + String + MAX(key) FROM plato.Input;"; + UNIT_ASSERT(SqlToYql(q).IsOk()); + } +} + +Y_UNIT_TEST_SUITE(ExternalDeclares) { + Y_UNIT_TEST(BasicUsage) { + NSQLTranslation::TTranslationSettings settings; + settings.DeclaredNamedExprs["foo"] = "String"; + auto res = SqlToYqlWithSettings("select $foo;", settings); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "declare") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'String)))__")); + } + }; + + TWordCountHive elementStat = {{TString("declare"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]); + } + + Y_UNIT_TEST(DeclareOverrides) { + NSQLTranslation::TTranslationSettings settings; + settings.DeclaredNamedExprs["foo"] = "String"; + auto res = SqlToYqlWithSettings("declare $foo as Int32; select $foo;", settings); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "declare") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'Int32)))__")); + } + }; + + TWordCountHive elementStat = {{TString("declare"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]); + } + + Y_UNIT_TEST(UnusedDeclareDoesNotProduceWarning) { + NSQLTranslation::TTranslationSettings settings; + settings.DeclaredNamedExprs["foo"] = "String"; + auto res = SqlToYqlWithSettings("select 1;", settings); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "declare") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'String)))__")); + } + }; + + TWordCountHive elementStat = {{TString("declare"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]); + } + + Y_UNIT_TEST(DeclaresWithInvalidTypesFails) { + NSQLTranslation::TTranslationSettings settings; + settings.DeclaredNamedExprs["foo"] = "List<BadType>"; + auto res = SqlToYqlWithSettings("select 1;", settings); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), + "<main>:0:5: Error: Unknown type: 'BadType'\n" + "<main>: Error: Failed to parse type for externally declared name 'foo'\n"); + } +} + +Y_UNIT_TEST_SUITE(ExternalDataSource) { + Y_UNIT_TEST(CreateExternalDataSourceWithAuthNone) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithAuthServiceAccount) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="SERVICE_ACCOUNT", + SERVICE_ACCOUNT_ID="sa", + SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"SERVICE_ACCOUNT") '('"location" '"my-bucket") '('"service_account_id" '"sa") '('"service_account_secret_name" '"sa_secret_name") '('"source_type" '"ObjectStorage"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithBasic) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="BASIC", + LOGIN="admin", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"BASIC") '('"location" '"protocol://host:port/") '('"login" '"admin") '('"password_secret_name" '"secret_name") '('"source_type" '"PostgreSQL"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithMdbBasic) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="MDB_BASIC", + SERVICE_ACCOUNT_ID="sa", + SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name", + LOGIN="admin", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"MDB_BASIC") '('"location" '"protocol://host:port/") '('"login" '"admin") '('"password_secret_name" '"secret_name") '('"service_account_id" '"sa") '('"service_account_secret_name" '"sa_secret_name") '('"source_type" '"PostgreSQL"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithAws) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="AWS", + AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name", + AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name", + AWS_REGION="ru-central-1" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"AWS") '('"aws_access_key_id_secret_name" '"secred_id_name") '('"aws_region" '"ru-central-1") '('"aws_secret_access_key_secret_name" '"secret_key_name") '('"location" '"protocol://host:port/") '('"source_type" '"PostgreSQL"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithToken) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="YT", + LOCATION="protocol://host:port/", + AUTH_METHOD="TOKEN", + TOKEN_SECRET_NAME="token_name" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"TOKEN") '('"location" '"protocol://host:port/") '('"source_type" '"YT") '('"token_secret_name" '"token_name"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + pragma TablePathPrefix='/aba'; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyDataSource"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceIfNotExists) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE IF NOT EXISTS MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectIfNotExists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AlterExternalDataSource) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER EXTERNAL DATA SOURCE MyDataSource + SET (SOURCE_TYPE = "ObjectStorage", Login = "Admin"), + SET Location "bucket", + RESET (Auth_Method, Service_Account_Id, Service_Account_Secret_Name); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"location" '"bucket") '('"login" '"Admin") '('"source_type" '"ObjectStorage"))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"auth_method" '"service_account_id" '"service_account_secret_name")))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalDataSourceOrReplace) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + CREATE OR REPLACE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + )"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectOrReplace")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateOrReplaceForUnsupportedTableTypesShouldFail) { + ExpectFailWithError(R"sql( + USE plato; + CREATE OR REPLACE TABLE t (a int32 not null, primary key(a, a)); + )sql" , "<main>:3:23: Error: OR REPLACE feature is supported only for EXTERNAL DATA SOURCE and EXTERNAL TABLE\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE OR REPLACE TABLE t ( + Key Uint64, + Value1 String, + PRIMARY KEY (Key) + ) + WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10 + ); + )sql" , "<main>:3:23: Error: OR REPLACE feature is supported only for EXTERNAL DATA SOURCE and EXTERNAL TABLE\n"); + } + + Y_UNIT_TEST(CreateExternalDataSourceWithBadArguments) { + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource; + )sql" , "<main>:3:56: Error: mismatched input ';' expecting WITH\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + LOCATION="my-bucket", + AUTH_METHOD="NONE" + ); + )sql" , "<main>:5:33: Error: SOURCE_TYPE requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket" + ); + )sql" , "<main>:5:30: Error: AUTH_METHOD requires key\n"); + + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE1" + ); + )sql" , "<main>:6:33: Error: Unknown AUTH_METHOD = NONE1\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="SERVICE_ACCOUNT" + ); + )sql" , "<main>:6:33: Error: SERVICE_ACCOUNT_ID requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="SERVICE_ACCOUNT", + SERVICE_ACCOUNT_ID="s1" + ); + )sql" , "<main>:7:40: Error: SERVICE_ACCOUNT_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="SERVICE_ACCOUNT", + SERVICE_ACCOUNT_SECRET_NAME="s1" + ); + )sql" , "<main>:7:49: Error: SERVICE_ACCOUNT_ID requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="BASIC", + LOGIN="admin" + ); + )sql" , "<main>:7:27: Error: PASSWORD_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="BASIC", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql" , "<main>:7:42: Error: LOGIN requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="MDB_BASIC", + SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name", + LOGIN="admin", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql" , "<main>:9:42: Error: SERVICE_ACCOUNT_ID requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="MDB_BASIC", + SERVICE_ACCOUNT_ID="sa", + LOGIN="admin", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql" , "<main>:9:42: Error: SERVICE_ACCOUNT_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="MDB_BASIC", + SERVICE_ACCOUNT_ID="sa", + SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name", + PASSWORD_SECRET_NAME="secret_name" + ); + )sql" , "<main>:9:42: Error: LOGIN requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="MDB_BASIC", + SERVICE_ACCOUNT_ID="sa", + SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name", + LOGIN="admin" + ); + )sql" , "<main>:9:27: Error: PASSWORD_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="AWS", + AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name", + AWS_REGION="ru-central-1" + ); + )sql" , "<main>:8:32: Error: AWS_ACCESS_KEY_ID_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="AWS", + AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name", + AWS_REGION="ru-central-1" + ); + )sql" , "<main>:8:32: Error: AWS_SECRET_ACCESS_KEY_SECRET_NAME requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL DATA SOURCE MyDataSource WITH ( + SOURCE_TYPE="PostgreSQL", + LOCATION="protocol://host:port/", + AUTH_METHOD="AWS", + AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name", + AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name" + ); + )sql" , "<main>:8:51: Error: AWS_REGION requires key\n"); + } + + Y_UNIT_TEST(DropExternalDataSourceWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP EXTERNAL DATA SOURCE MyDataSource; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropExternalDataSource) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + pragma TablePathPrefix='/aba'; + DROP EXTERNAL DATA SOURCE MyDataSource; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyDataSource"); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropExternalDataSourceIfExists) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP EXTERNAL DATA SOURCE IF EXISTS MyDataSource; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "MyDataSource"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObjectIfExists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(ExternalTable) { + Y_UNIT_TEST(CreateExternalTable) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int + ) WITH ( + DATA_SOURCE="/Root/mydatasource", + LOCATION="/folder1/*" + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tablescheme")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalTableWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + pragma TablePathPrefix='/aba'; + CREATE EXTERNAL TABLE mytable ( + a int + ) WITH ( + DATA_SOURCE="mydatasource", + LOCATION="/folder1/*" + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "/aba/mydatasource"); + UNIT_ASSERT_STRING_CONTAINS(line, "/aba/mytable"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tablescheme")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalTableObjectStorage) { + auto res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int, + year Int + ) WITH ( + DATA_SOURCE="/Root/mydatasource", + LOCATION="/folder1/*", + FORMAT="json_as_string", + `projection.enabled`="true", + `projection.year.type`="integer", + `projection.year.min`="2010", + `projection.year.max`="2022", + `projection.year.interval`="1", + `projection.month.type`="integer", + `projection.month.min`="1", + `projection.month.max`="12", + `projection.month.interval`="1", + `projection.month.digits`="2", + `storage.location.template`="${year}/${month}", + PARTITONED_BY = "[year, month]" + ); + )sql"); + UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString()); + } + + Y_UNIT_TEST(CreateExternalTableIfNotExists) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE EXTERNAL TABLE IF NOT EXISTS mytable ( + a int + ) WITH ( + DATA_SOURCE="/Root/mydatasource", + LOCATION="/folder1/*" + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, "create_if_not_exists"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalTableOrReplace) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + CREATE OR REPLACE EXTERNAL TABLE mytable ( + a int + ) WITH ( + DATA_SOURCE="/Root/mydatasource", + LOCATION="/folder1/*" + ); + )"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, "create_or_replace"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AlterExternalTableAddColumn) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER EXTERNAL TABLE mytable + ADD COLUMN my_column int32, + RESET (LOCATION); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('actions '('('addColumns '('('"my_column" (AsOptionalType (DataType 'Int32))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location)))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('tableType 'externalTable))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AlterExternalTableDropColumn) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER EXTERNAL TABLE mytable + DROP COLUMN my_column, + SET (Location = "abc", Other_Prop = "42"), + SET x 'y'; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('actions '('('dropColumns '('"my_column")#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location (String '"abc")) '('Other_Prop (String '"42")) '('x (String '"y")))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('tableType 'externalTable))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateExternalTableWithBadArguments) { + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable; + )sql" , "<main>:3:45: Error: mismatched input ';' expecting '('\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int + ); + )sql" , "<main>:4:23: Error: DATA_SOURCE requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int + ) WITH ( + DATA_SOURCE="/Root/mydatasource" + ); + )sql" , "<main>:6:33: Error: LOCATION requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int + ) WITH ( + LOCATION="/folder1/*" + ); + )sql" , "<main>:6:30: Error: DATA_SOURCE requires key\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE EXTERNAL TABLE mytable ( + a int, + PRIMARY KEY(a) + ) WITH ( + DATA_SOURCE="/Root/mydatasource", + LOCATION="/folder1/*" + ); + )sql" , "<main>:8:30: Error: PRIMARY KEY is not supported for external table\n"); + } + + Y_UNIT_TEST(DropExternalTable) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP EXTERNAL TABLE MyExternalTable; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("tablescheme")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropExternalTableWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + pragma TablePathPrefix='/aba'; + DROP EXTERNAL TABLE MyExternalTable; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyExternalTable"); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'tablescheme")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropExternalTableIfExists) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP EXTERNAL TABLE IF EXISTS MyExternalTable; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("tablescheme")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop_if_exists")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(TopicsDDL) { + void TestQuery(const TString& query, bool expectOk = true) { + TStringBuilder finalQuery; + + finalQuery << "use plato;" << Endl << query; + auto res = SqlToYql(finalQuery, 10, "kikimr"); + if (expectOk) { + UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString()); + } else { + UNIT_ASSERT(!res.IsOk()); + } + } + + Y_UNIT_TEST(CreateTopicSimple) { + TestQuery(R"( + CREATE TOPIC topic1; + )"); + TestQuery(R"( + CREATE TOPIC `cluster1.topic1`; + )"); + TestQuery(R"( + CREATE TOPIC topic1 WITH (metering_mode = "str_value", partition_count_limit = 123, retention_period = Interval('PT1H')); + )"); + } + + Y_UNIT_TEST(CreateTopicConsumer) { + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1); + )"); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons2 WITH (important = false)); + )"); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons2 WITH (important = false)) WITH (supported_codecs = "1,2,3"); + )"); + } + + Y_UNIT_TEST(AlterTopicSimple) { + TestQuery(R"( + ALTER TOPIC topic1 SET (retention_period = Interval('PT1H')); + )"); + TestQuery(R"( + ALTER TOPIC topic1 SET (retention_storage_mb = 3, partition_count_limit = 50); + )"); + TestQuery(R"( + ALTER TOPIC topic1 RESET (supported_codecs, retention_period); + )"); + TestQuery(R"( + ALTER TOPIC topic1 RESET (partition_write_speed_bytes_per_second), + SET (partition_write_burst_bytes = 11111, min_active_partitions = 1); + )"); + } + Y_UNIT_TEST(AlterTopicConsumer) { + TestQuery(R"( + ALTER TOPIC topic1 ADD CONSUMER consumer1, + ADD CONSUMER consumer2 WITH (important = false, supported_codecs = "RAW"), + ALTER CONSUMER consumer3 SET (important = false, read_from = 1), + ALTER CONSUMER consumer3 RESET (supported_codecs), + DROP CONSUMER consumer4, + SET (partition_count_limit = 11, retention_period = Interval('PT1H')), + RESET(metering_mode) + )"); + } + Y_UNIT_TEST(DropTopic) { + TestQuery(R"( + DROP TOPIC topic1; + )"); + } + + Y_UNIT_TEST(TopicBadRequests) { + TestQuery(R"( + CREATE TOPIC topic1(); + )", false); + TestQuery(R"( + CREATE TOPIC topic1 SET setting1 = value1; + )", false); + TestQuery(R"( + ALTER TOPIC topic1 SET setting1 value1; + )", false); + TestQuery(R"( + ALTER TOPIC topic1 RESET setting1; + )", false); + + TestQuery(R"( + ALTER TOPIC topic1 DROP CONSUMER consumer4 WITH (k1 = v1); + )", false); + + TestQuery(R"( + CREATE TOPIC topic1 WITH (retention_period = 123); + )", false); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons1 WITH (important = false)); + )", false); + TestQuery(R"( + CREATE TOPIC topic1 (CONSUMER cons1 WITH (bad_option = false)); + )", false); + TestQuery(R"( + ALTER TOPIC topic1 ADD CONSUMER cons1, ALTER CONSUMER cons1 RESET (important); + )", false); + TestQuery(R"( + ALTER TOPIC topic1 ADD CONSUMER consumer1, + ALTER CONSUMER consumer3 SET (supported_codecs = "RAW", read_from = 1), + ALTER CONSUMER consumer3 RESET (supported_codecs); + )", false); + TestQuery(R"( + ALTER TOPIC topic1 ADD CONSUMER consumer1, + ALTER CONSUMER consumer3 SET (supported_codecs = "RAW", read_from = 1), + ALTER CONSUMER consumer3 SET (read_from = 2); + )", false); + } + + Y_UNIT_TEST(TopicWithPrefix) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + PRAGMA TablePathPrefix = '/database/path/to/tables'; + ALTER TOPIC `my_table/my_feed` ADD CONSUMER `my_consumer`; + )"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("/database/path/to/tables/my_table/my_feed"), 0}, {"topic", 0}}; + VerifyProgram(res, elementStat); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["topic"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["/database/path/to/tables/my_table/my_feed"]); + } +} + +Y_UNIT_TEST_SUITE(BlockEnginePragma) { + Y_UNIT_TEST(Basic) { + const TVector<TString> values = {"auto", "force", "disable"}; + for (const auto& value : values) { + const auto query = TStringBuilder() << "pragma Blockengine='" << value << "'; select 1;"; + NYql::TAstParseResult res = SqlToYql(query); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + UNIT_ASSERT_STRING_CONTAINS(line, TStringBuilder() << R"(Configure! world (DataSource '"config") '"BlockEngine" '")" << value << "\""); + }; + + TWordCountHive elementStat({"BlockEngine"}); + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT(elementStat["BlockEngine"] == ((value == "disable") ? 0 : 1)); + } + } + + Y_UNIT_TEST(UnknownSetting) { + ExpectFailWithError("use plato; pragma BlockEngine='foo';", + "<main>:1:31: Error: Expected `disable|auto|force' argument for: BlockEngine\n"); + } +} + +Y_UNIT_TEST_SUITE(TViewSyntaxTest) { + Y_UNIT_TEST(CreateViewSimple) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + CREATE VIEW TheView WITH (security_invoker = TRUE) AS SELECT 1; + )" + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + } + + Y_UNIT_TEST(CreateViewFromTable) { + constexpr const char* path = "/PathPrefix/TheView"; + constexpr const char* query = R"( + SELECT * FROM SomeTable + )"; + + NYql::TAstParseResult res = SqlToYql(std::format(R"( + USE plato; + CREATE VIEW `{}` WITH (security_invoker = TRUE) AS {}; + )", + path, + query + ) + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_STRING_CONTAINS(line, path); + UNIT_ASSERT_STRING_CONTAINS(line, "createObject"); + } + }; + TWordCountHive elementStat = { {"Write!"} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1); + } + + Y_UNIT_TEST(CheckReconstructedQuery) { + constexpr const char* path = "/PathPrefix/TheView"; + constexpr const char* query = R"( + SELECT * FROM FirstTable JOIN SecondTable ON FirstTable.key == SecondTable.key + )"; + + NYql::TAstParseResult res = SqlToYql(std::format(R"( + USE plato; + CREATE VIEW `{}` WITH (security_invoker = TRUE) AS {}; + )", + path, + query + ) + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TString reconstructedQuery = ToString(Tokenize(query)); + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + if (word == "query_text") { + UNIT_ASSERT_STRING_CONTAINS(line, reconstructedQuery); + } + }; + TWordCountHive elementStat = { {"Write!"} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1); + } + + Y_UNIT_TEST(DropView) { + constexpr const char* path = "/PathPrefix/TheView"; + NYql::TAstParseResult res = SqlToYql(std::format(R"( + USE plato; + DROP VIEW `{}`; + )", + path + ) + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_STRING_CONTAINS(line, path); + UNIT_ASSERT_STRING_CONTAINS(line, "dropObject"); + } + }; + TWordCountHive elementStat = { {"Write!"} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1); + } + + Y_UNIT_TEST(CreateViewWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + PRAGMA TablePathPrefix='/PathPrefix'; + CREATE VIEW TheView WITH (security_invoker = TRUE) AS SELECT 1; + )" + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write!") { + UNIT_ASSERT_STRING_CONTAINS(line, "/PathPrefix/TheView"); + UNIT_ASSERT_STRING_CONTAINS(line, "createObject"); + } + }; + + TWordCountHive elementStat = { {"Write!"} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1); + } + + Y_UNIT_TEST(DropViewWithTablePrefix) { + NYql::TAstParseResult res = SqlToYql(R"( + USE plato; + PRAGMA TablePathPrefix='/PathPrefix'; + DROP VIEW TheView; + )" + ); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, "/PathPrefix/TheView"); + UNIT_ASSERT_STRING_CONTAINS(line, "dropObject"); + } + }; + + TWordCountHive elementStat = { {"Write!"} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1); + } + + Y_UNIT_TEST(YtAlternativeSchemaSyntax) { + NYql::TAstParseResult res = SqlToYql(R"( + SELECT * FROM plato.Input WITH schema(y Int32, x String not null); + )"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "userschema") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, + line.find(R"__('('('"userschema" (StructType '('"y" (AsOptionalType (DataType 'Int32))) '('"x" (DataType 'String))))))__")); + } + }; + + TWordCountHive elementStat = {{TString("userschema"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["userschema"]); + } + + Y_UNIT_TEST(UseViewAndFullColumnId) { + NYql::TAstParseResult res = SqlToYql("USE plato; SELECT Input.x FROM Input VIEW uitzicht;"); + UNIT_ASSERT(res.Root); + + TWordCountHive elementStat = {{TString("SqlAccess"), 0}, {"SqlProjectItem", 0}, {"Read!", 0}}; + VerifyProgram(res, elementStat); + UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlAccess"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]); + } +} + +Y_UNIT_TEST_SUITE(CompactNamedExprs) { + Y_UNIT_TEST(SourceCallablesInWrongContext) { + TString query = R"( + pragma CompactNamedExprs; + $foo = %s(); + select $foo from plato.Input; + )"; + + THashMap<TString, TString> errs = { + {"TableRow", "<main>:3:20: Error: TableRow requires data source\n"}, + {"JoinTableRow", "<main>:3:20: Error: JoinTableRow requires data source\n"}, + {"TableRecordIndex", "<main>:3:20: Error: Unable to use function: TableRecord without source\n"}, + {"TablePath", "<main>:3:20: Error: Unable to use function: TablePath without source\n"}, + {"SystemMetadata", "<main>:3:20: Error: Unable to use function: SystemMetadata without source\n"}, + }; + + for (TString callable : { "TableRow", "JoinTableRow", "TableRecordIndex", "TablePath", "SystemMetadata"}) { + auto req = Sprintf(query.c_str(), callable.c_str()); + ExpectFailWithError(req, errs[callable]); + } + } + + Y_UNIT_TEST(ValidateUnusedExprs) { + TString query = R"( + pragma warning("disable", "4527"); + pragma CompactNamedExprs; + pragma ValidateUnusedExprs; + + $foo = count(1); + select 1; + )"; + ExpectFailWithError(query, "<main>:6:20: Error: Aggregation is not allowed in this context\n"); + query = R"( + pragma warning("disable", "4527"); + pragma CompactNamedExprs; + pragma ValidateUnusedExprs; + + define subquery $x() as + select count(1, 2); + end define; + select 1; + )"; + ExpectFailWithError(query, "<main>:7:24: Error: Aggregation function Count requires exactly 1 argument(s), given: 2\n"); + } + + Y_UNIT_TEST(DisableValidateUnusedExprs) { + TString query = R"( + pragma warning("disable", "4527"); + pragma CompactNamedExprs; + pragma DisableValidateUnusedExprs; + + $foo = count(1); + select 1; + )"; + SqlToYql(query).IsOk(); + query = R"( + pragma warning("disable", "4527"); + pragma CompactNamedExprs; + pragma DisableValidateUnusedExprs; + + define subquery $x() as + select count(1, 2); + end define; + select 1; + )"; + SqlToYql(query).IsOk(); + } +} + +Y_UNIT_TEST_SUITE(ResourcePool) { + Y_UNIT_TEST(CreateResourcePool) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE RESOURCE POOL MyResourcePool WITH ( + CONCURRENT_QUERY_LIMIT=20, + QUERY_CANCEL_AFTER_SECONDS=86400, + QUEUE_TYPE="FIFO" + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"concurrent_query_limit" (Int32 '"20")) '('"query_cancel_after_seconds" (Int32 '"86400")) '('"queue_type" '"FIFO"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateResourcePoolWithBadArguments) { + ExpectFailWithError(R"sql( + USE plato; + CREATE RESOURCE POOL MyResourcePool; + )sql" , "<main>:3:51: Error: mismatched input ';' expecting WITH\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE RESOURCE POOL MyResourcePool WITH ( + DUPLICATE_SETTING="first_value", + DUPLICATE_SETTING="second_value" + ); + )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n"); + } + + Y_UNIT_TEST(AlterResourcePool) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER RESOURCE POOL MyResourcePool + SET (CONCURRENT_QUERY_LIMIT = 30, Weight = 5, QUEUE_TYPE = "UNORDERED"), + RESET (Query_Cancel_After_Seconds, Query_Count_Limit); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"concurrent_query_limit" (Int32 '"30")) '('"queue_type" '"UNORDERED") '('"weight" (Int32 '"5")))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"query_cancel_after_seconds" '"query_count_limit")))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropResourcePool) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP RESOURCE POOL MyResourcePool; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(BackupCollection) { + Y_UNIT_TEST(CreateBackupCollection) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection WITH ( + STORAGE="local", + TAG="test" -- for testing purposes, not a real thing + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateBackupCollectionWithDatabase) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection DATABASE WITH ( + STORAGE="local", + TAG="test" -- for testing purposes, not a real thing + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('type 'database)")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateBackupCollectionWithTables) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection ( + TABLE someTable, + TABLE `prefix/anotherTable` + ) WITH ( + STORAGE="local", + TAG="test" -- for testing purposes, not a real thing + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('('('type 'table) '('path '"someTable")))#")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('('('type 'table) '('path '"prefix/anotherTable")))#")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateBackupCollectionWithBadArguments) { + ExpectFailWithError(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection; + )sql" , "<main>:3:55: Error: mismatched input ';' expecting {'(', DATABASE, WITH}\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE BACKUP COLLECTION TABLE TestCollection; + )sql" , "<main>:3:47: Error: mismatched input 'TestCollection' expecting {'(', DATABASE, WITH}\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE BACKUP COLLECTION DATABASE `test` TestCollection; + )sql" , "<main>:3:50: Error: mismatched input '`test`' expecting {'(', DATABASE, WITH}\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection WITH ( + DUPLICATE_SETTING="first_value", + DUPLICATE_SETTING="second_value" + ); + )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE BACKUP COLLECTION TestCollection WITH ( + INT_SETTING=1 + ); + )sql" , "<main>:4:21: Error: INT_SETTING value should be a string literal\n"); + } + + Y_UNIT_TEST(AlterBackupCollection) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER BACKUP COLLECTION TestCollection + SET (STORAGE="remote"), -- also just for test + SET (TAG1 = "123"), + RESET (TAG2, TAG3); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('settings '('('"storage" '"remote") '('"tag1" '"123"))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetSettings '('"tag2" '"tag3")))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AlterBackupCollectionEntries) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER BACKUP COLLECTION TestCollection + DROP TABLE `test`, + ADD DATABASE; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('alterEntries)#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('type 'table) '('path '"test") '('action 'drop)))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('type 'database) '('action 'add)))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropBackupCollection) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP BACKUP COLLECTION TestCollection; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(ResourcePoolClassifier) { + Y_UNIT_TEST(CreateResourcePoolClassifier) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + RANK=20, + RESOURCE_POOL='wgUserQueries', + MEMBER_NAME='yandex_query@abc' + ); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"member_name" '"yandex_query@abc") '('"rank" (Int32 '"20")) '('"resource_pool" '"wgUserQueries"))#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(CreateResourcePoolClassifierWithBadArguments) { + ExpectFailWithError(R"sql( + USE plato; + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier; + )sql" , "<main>:3:72: Error: mismatched input ';' expecting WITH\n"); + + ExpectFailWithError(R"sql( + USE plato; + CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH ( + DUPLICATE_SETTING="first_value", + DUPLICATE_SETTING="second_value" + ); + )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n"); + } + + Y_UNIT_TEST(AlterResourcePoolClassifier) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier + SET (RANK = 30, Weight = 5, MEMBER_NAME = "test@user"), + RESET (Resource_Pool); + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"member_name" '"test@user") '('"rank" (Int32 '"30")) '('"weight" (Int32 '"5")))))#"); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"resource_pool")))#"); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(DropResourcePoolClassifier) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + DROP RESOURCE POOL CLASSIFIER MyResourcePoolClassifier; + )sql"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(BacktickMatching) { + auto req = "select\n" + " 1 as `Schema has \\`RealCost\\``\n" + " -- foo`bar"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + + req = "select 1 as `a``b`, 2 as ````, 3 as `\\x60a\\x60`, 4 as ```b```, 5 as `\\`c\\``"; + res = SqlToYql(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + res = SqlToYqlWithAnsiLexer(req); + UNIT_ASSERT(res.Root); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + } +} + +Y_UNIT_TEST_SUITE(Backup) { + Y_UNIT_TEST(Simple) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + BACKUP TestCollection; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'incremental")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'backup")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(Incremental) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + BACKUP TestCollection INCREMENTAL; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'incremental")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'backup")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(Restore) { + Y_UNIT_TEST(Simple) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + RESTORE TestCollection; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'restore")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(AtPoint) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + RESTORE TestCollection AT '2024-06-16_20-14-02'; + )sql"); + UNIT_ASSERT_C(res.Root, res.Issues.ToString()); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#")); + UNIT_ASSERT_STRING_CONTAINS(line, R"#('at '"2024-06-16_20-14-02")#"); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'restore")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } +} + +Y_UNIT_TEST_SUITE(ColumnFamily) { + Y_UNIT_TEST(CompressionLevel) { + NYql::TAstParseResult res = SqlToYql(R"( use plato; + CREATE TABLE tableName ( + Key Uint32 FAMILY default, + Value String FAMILY family1, + PRIMARY KEY (Key), + FAMILY default ( + DATA = "test", + COMPRESSION = "lz4", + COMPRESSION_LEVEL = 5 + ), + FAMILY family1 ( + DATA = "test", + COMPRESSION = "lz4", + COMPRESSION_LEVEL = 3 + ) + ); + )"); + UNIT_ASSERT(res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 0); + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("compression_level")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("5")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("3")); + } + }; + + TWordCountHive elementStat = { { TString("Write"), 0 }, { TString("compression_level"), 0 } }; + VerifyProgram(res, elementStat, verifyLine); + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + UNIT_ASSERT_VALUES_EQUAL(2, elementStat["compression_level"]); + } +} diff --git a/yql/essentials/sql/v1/sql_ut_antlr4.h b/yql/essentials/sql/v1/sql_ut_antlr4.h new file mode 100644 index 00000000000..9a0029b67ad --- /dev/null +++ b/yql/essentials/sql/v1/sql_ut_antlr4.h @@ -0,0 +1,226 @@ + +#include <yql/essentials/providers/common/provider/yql_provider_names.h> +#include <yql/essentials/sql/sql.h> +#include <util/generic/map.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/string/split.h> +#include <deque> +#include <unordered_set> +using namespace NSQLTranslation; + +enum class EDebugOutput { + None, + ToCerr, +}; + +const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote | + NYql::TAstPrintFlags::AdaptArbitraryContent; + +inline TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) { + TStringStream s; + res.Issues.PrintTo(s); + + if (debug == EDebugOutput::ToCerr) { + Cerr << s.Str() << Endl; + } + return s.Str(); +} + +inline NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {}, + EDebugOutput debug = EDebugOutput::None, bool ansiLexer = false, NSQLTranslation::TTranslationSettings settings = {}) +{ + google::protobuf::Arena arena; + const auto service = provider ? provider : TString(NYql::YtProviderName); + const TString cluster = "plato"; + settings.ClusterMapping[cluster] = service; + settings.ClusterMapping["hahn"] = NYql::YtProviderName; + settings.ClusterMapping["mon"] = NYql::SolomonProviderName; + settings.MaxErrors = maxErrors; + settings.Mode = mode; + settings.Arena = &arena; + settings.AnsiLexer = ansiLexer; + settings.Antlr4Parser = true; + settings.SyntaxVersion = 1; + auto res = SqlToYql(query, settings); + if (debug == EDebugOutput::ToCerr) { + Err2Str(res, debug); + } + return res; +} + +inline NYql::TAstParseResult SqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) { + return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug); +} + +inline NYql::TAstParseResult SqlToYqlWithSettings(const TString& query, const NSQLTranslation::TTranslationSettings& settings) { + return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, 10, {}, EDebugOutput::None, false, settings); +} + +inline void ExpectFailWithError(const TString& query, const TString& error) { + NYql::TAstParseResult res = SqlToYql(query); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), error); +} + +inline NYql::TAstParseResult SqlToYqlWithAnsiLexer(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) { + bool ansiLexer = true; + return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug, ansiLexer); +} + +inline void ExpectFailWithErrorForAnsiLexer(const TString& query, const TString& error) { + NYql::TAstParseResult res = SqlToYqlWithAnsiLexer(query); + + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), error); +} + +inline TString GetPrettyPrint(const NYql::TAstParseResult& res) { + TStringStream yqlProgram; + res.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote); + return yqlProgram.Str(); +} + +inline TString Quote(const char* str) { + return TStringBuilder() << "'\"" << str << "\""; +} + +class TWordCountHive: public TMap<TString, unsigned> { +public: + TWordCountHive(std::initializer_list<TString> strings) { + for (auto& str: strings) { + emplace(str, 0); + } + } + + TWordCountHive(std::initializer_list<std::pair<const TString, unsigned>> list) + : TMap(list) + { + } +}; + +typedef std::function<void (const TString& word, const TString& line)> TVerifyLineFunc; + +inline TString VerifyProgram(const NYql::TAstParseResult& res, TWordCountHive& wordCounter, TVerifyLineFunc verifyLine = TVerifyLineFunc()) { + const auto programm = GetPrettyPrint(res); + TVector<TString> yqlProgram; + Split(programm, "\n", yqlProgram); + for (const auto& line: yqlProgram) { + for (auto& counterIter: wordCounter) { + const auto& word = counterIter.first; + auto pos = line.find(word); + while (pos != TString::npos) { + ++counterIter.second; + if (verifyLine) { + verifyLine(word, line); + } + pos = line.find(word, pos + word.length()); + } + } + } + return programm; +} + +inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints, TMaybe<bool> ansi) { + TString pragma; + if (ansi.Defined()) { + pragma = *ansi ? "PRAGMA AnsiInForEmptyOrNullableItemsCollections;" : + "PRAGMA DisableAnsiInForEmptyOrNullableItemsCollections;"; + } + + NYql::TAstParseResult res = SqlToYql(pragma + query); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) { + Y_UNUSED(word); + if (!ansi.Defined()) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('warnNoAnsi)")); + } else if (*ansi) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('ansi)")); + } + for (auto& hint : expectedHints) { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(hint)); + } + }; + TWordCountHive elementStat = {{TString("SqlIn"), 0}}; + VerifyProgram(res, elementStat, verifyLine); +} + +inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints) { + VerifySqlInHints(query, expectedHints, false); + VerifySqlInHints(query, expectedHints, true); +} + +inline NSQLTranslation::TTranslationSettings GetSettingsWithS3Binding(const TString& name) { + NSQLTranslation::TTranslationSettings settings; + NSQLTranslation::TTableBindingSettings bindSettings; + bindSettings.ClusterType = "s3"; + bindSettings.Settings["cluster"] = "cluster"; + bindSettings.Settings["path"] = "path"; + bindSettings.Settings["format"] = "format"; + bindSettings.Settings["compression"] = "ccompression"; + bindSettings.Settings["bar"] = "1"; + // schema is not validated in this test but should be valid YSON text + bindSettings.Settings["schema"] = R"__("[ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ]])__"; + bindSettings.Settings["partitioned_by"] = "[\"key\", \"subkey\"]"; + settings.Bindings[name] = bindSettings; + return settings; +} + +inline void AstBfs(NYql::TAstNode const* root, std::function<bool (NYql::TAstNode const*)> visitor) { + std::deque<NYql::TAstNode const*> wishList{ root }; + std::unordered_set<NYql::TAstNode const*> visited; + while(!wishList.empty()){ + auto v = wishList.front(); + wishList.pop_front(); + if (!visitor(v)) + return; + visited.insert(v); + if (v->IsList()) { + for (ui32 i = 0; i != v->GetChildrenCount(); ++i) { + auto child = v->GetChild(i); + if (visited.find(child) == visited.cend()) { + wishList.push_back(child); + } + } + } + } +} + +inline const NYql::TAstNode* FindNodeByChildAtomContent(const NYql::TAstNode* root, uint32_t childIndex, TStringBuf name){ + const NYql::TAstNode* result = nullptr; + AstBfs(root, [&result, childIndex, name](auto v) { + if (v->IsList() && v->GetChildrenCount() > childIndex && + v->GetChild(childIndex)->IsAtom() && v->GetChild(childIndex)->GetContent() == name) { + result = v; + return false; + } + return true; }); + return result; +} diff --git a/yql/essentials/sql/v1/sql_values.cpp b/yql/essentials/sql/v1/sql_values.cpp new file mode 100644 index 00000000000..c035489387f --- /dev/null +++ b/yql/essentials/sql/v1/sql_values.cpp @@ -0,0 +1,151 @@ +#include "sql_values.h" +#include "sql_group_by.h" +#include "sql_query.h" +#include "sql_select.h" +#include "sql_expression.h" +#include "source.h" + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +TSourcePtr TSqlValues::Build(const TRule_values_stmt& node, TPosition& valuesPos, const TVector<TString>& derivedColumns, TPosition derivedColumnsPos) { + Token(node.GetToken1()); + valuesPos = Ctx.Pos(); + + TVector<TVector<TNodePtr>> rows; + const auto& rowList = node.GetRule_values_source_row_list2(); + if (!BuildRows(rowList, rows)) { + return nullptr; + } + + YQL_ENSURE(!rows.empty()); + const size_t columnsCount = rows.back().size(); + if (derivedColumns.size() > columnsCount) { + Ctx.Error(derivedColumnsPos) << "Derived column list size exceeds column count in VALUES"; + return nullptr; + } + + auto columns = derivedColumns; + if (Ctx.WarnUnnamedColumns && columns.size() < columnsCount) { + Ctx.Warning(valuesPos, TIssuesIds::YQL_UNNAMED_COLUMN) + << "Autogenerated column names column" << columns.size() << "...column" << columnsCount - 1 << " will be used here"; + } + + while (columns.size() < columnsCount) { + columns.push_back(TStringBuilder() << "column" << columns.size()); + } + + TVector<TNodePtr> labels; + for (size_t i = 0; i < columnsCount; ++i) { + labels.push_back(BuildQuotedAtom(derivedColumnsPos, columns[i])); + } + + TVector<TNodePtr> items; + for (auto& row : rows) { + YQL_ENSURE(!row.empty()); + YQL_ENSURE(row.size() == columnsCount); + items.push_back(BuildOrderedStructure(row.front()->GetPos(), row, labels)); + } + auto list = new TCallNodeImpl(valuesPos, "AsListMayWarn", items); + list = new TCallNodeImpl(valuesPos, "PersistableRepr", { list }); + list = new TCallNodeImpl(valuesPos, "AssumeColumnOrder", { list, BuildTuple(valuesPos, labels) }); + auto result = BuildNodeSource(valuesPos, list, false); + result->AllColumns(); + return result; +} + +bool TSqlValues::BuildRows(const TRule_values_source_row_list& node, TVector<TVector<TNodePtr>>& rows) { + rows = TVector<TVector<TNodePtr>> {{}}; + + + if (!BuildRow(node.GetRule_values_source_row1(), rows.back())) { + return false; + } + + const size_t rowSize = rows.back().size(); + + for (const auto& valuesSourceRow: node.GetBlock2()) { + rows.push_back({}); + if (!BuildRow(valuesSourceRow.GetRule_values_source_row2(), rows.back())) { + return false; + } + if (rows.back().size() != rowSize) { + Token(valuesSourceRow.GetRule_values_source_row2().GetToken1()); + Error() << "All VALUES items should have same size: expecting " << rowSize << ", got " << rows.back().size(); + return false; + } + } + return true; +} + +bool TSqlValues::BuildRow(const TRule_values_source_row& inRow, TVector<TNodePtr>& outRow) { + TSqlExpression sqlExpr(Ctx, Mode); + return ExprList(sqlExpr, outRow, inRow.GetRule_expr_list2()); +} + +TSourcePtr TSqlValues::ValuesSource(const TRule_values_source& node, const TVector<TString>& columnsHint, + const TString& operationName) +{ + Ctx.IncrementMonCounter("sql_features", "ValuesSource"); + TPosition pos(Ctx.Pos()); + switch (node.Alt_case()) { + case TRule_values_source::kAltValuesSource1: { + TVector<TVector<TNodePtr>> rows {{}}; + const auto& rowList = node.GetAlt_values_source1().GetRule_values_stmt1().GetRule_values_source_row_list2(); + if (!BuildRows(rowList, rows)) { + return nullptr; + } + return BuildWriteValues(pos, operationName, columnsHint, rows); + } + case TRule_values_source::kAltValuesSource2: { + TSqlSelect select(Ctx, Mode); + TPosition selectPos; + auto source = select.Build(node.GetAlt_values_source2().GetRule_select_stmt1(), selectPos); + if (!source) { + return nullptr; + } + return BuildWriteValues(pos, "UPDATE", columnsHint, std::move(source)); + } + default: + Ctx.IncrementMonCounter("sql_errors", "UnknownValuesSource"); + AltNotImplemented("values_source", node); + return nullptr; + } +} + +TSourcePtr TSqlIntoValues::Build(const TRule_into_values_source& node, const TString& operationName) { + switch (node.Alt_case()) { + case TRule_into_values_source::kAltIntoValuesSource1: { + auto alt = node.GetAlt_into_values_source1(); + TVector<TString> columnsHint; + if (alt.HasBlock1()) { + PureColumnListStr(alt.GetBlock1().GetRule_pure_column_list1(), *this, columnsHint); + } + return ValuesSource(alt.GetRule_values_source2(), columnsHint, operationName); + } + default: + Ctx.IncrementMonCounter("sql_errors", "DefaultValuesOrOther"); + AltNotImplemented("into_values_source", node); + return nullptr; + } +} + +TSourcePtr TSqlAsValues::Build(const TRule_values_source& node, const TString& operationName) { + switch (node.Alt_case()) { + case TRule_values_source::kAltValuesSource1: { + Ctx.IncrementMonCounter("sql_errors", "UnknownValuesSource"); + Error() << "AS VALUES statement is not supported for " << operationName << "."; + return nullptr; + } + case TRule_values_source::kAltValuesSource2: { + return ValuesSource(node, {}, operationName); + } + default: + Ctx.IncrementMonCounter("sql_errors", "UnknownValuesSource"); + AltNotImplemented("values_source", node); + return nullptr; + } +} + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/sql_values.h b/yql/essentials/sql/v1/sql_values.h new file mode 100644 index 00000000000..7e19d1d8a0d --- /dev/null +++ b/yql/essentials/sql/v1/sql_values.h @@ -0,0 +1,48 @@ +#pragma once + +#include "sql_translation.h" +#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h> + +namespace NSQLTranslationV1 { + +using namespace NSQLv1Generated; + +class TSqlValues: public TSqlTranslation { +public: + TSqlValues(TContext& ctx, NSQLTranslation::ESqlMode mode) + : TSqlTranslation(ctx, mode) + { + } + + TSourcePtr Build(const TRule_values_stmt& node, TPosition& valuesPos, const TVector<TString>& derivedColumns = {}, TPosition derivedColumnsPos = TPosition()); +protected: + bool BuildRows(const TRule_values_source_row_list& node, TVector<TVector<TNodePtr>>& rows); + + TSourcePtr ValuesSource(const TRule_values_source& node, const TVector<TString>& columnsHint, + const TString& operationName); + +private: + bool BuildRow(const TRule_values_source_row& inRow, TVector<TNodePtr>& outRow); +}; + +class TSqlIntoValues: public TSqlValues { +public: + TSqlIntoValues(TContext& ctx, NSQLTranslation::ESqlMode mode) + : TSqlValues(ctx, mode) + { + } + + TSourcePtr Build(const TRule_into_values_source& node, const TString& operationName); +}; + +class TSqlAsValues: public TSqlValues { +public: + TSqlAsValues(TContext& ctx, NSQLTranslation::ESqlMode mode) + : TSqlValues(ctx, mode) + { + } + + TSourcePtr Build(const TRule_values_source& node, const TString& operationName); +}; + +} // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/ut/ya.make b/yql/essentials/sql/v1/ut/ya.make new file mode 100644 index 00000000000..f7dddb9af18 --- /dev/null +++ b/yql/essentials/sql/v1/ut/ya.make @@ -0,0 +1,21 @@ +UNITTEST_FOR(yql/essentials/sql/v1) + +SRCS( + sql_ut.cpp + sql_match_recognize_ut.cpp +) + +PEERDIR( + library/cpp/regex/pcre + yql/essentials/public/udf/service/exception_policy + yql/essentials/core/sql_types + yql/essentials/sql + yql/essentials/sql/pg_dummy + yql/essentials/sql/v1/format +) + +TIMEOUT(300) + +SIZE(MEDIUM) + +END() diff --git a/yql/essentials/sql/v1/ut_antlr4/ya.make b/yql/essentials/sql/v1/ut_antlr4/ya.make new file mode 100644 index 00000000000..211ebf7fe2e --- /dev/null +++ b/yql/essentials/sql/v1/ut_antlr4/ya.make @@ -0,0 +1,21 @@ +UNITTEST_FOR(yql/essentials/sql/v1) + +SRCS( + sql_ut_antlr4.cpp + sql_match_recognize_ut.cpp +) + +PEERDIR( + library/cpp/regex/pcre + yql/essentials/public/udf/service/exception_policy + yql/essentials/core/sql_types + yql/essentials/sql + yql/essentials/sql/pg_dummy + yql/essentials/sql/v1/format +) + +TIMEOUT(300) + +SIZE(MEDIUM) + +END() diff --git a/yql/essentials/sql/v1/ya.make b/yql/essentials/sql/v1/ya.make new file mode 100644 index 00000000000..3da22939466 --- /dev/null +++ b/yql/essentials/sql/v1/ya.make @@ -0,0 +1,69 @@ +LIBRARY() + +PEERDIR( + library/cpp/charset + library/cpp/enumbitset + library/cpp/json + library/cpp/yson/node + yql/essentials/minikql + yql/essentials/public/udf + yql/essentials/sql/settings + yql/essentials/core/issue + yql/essentials/core/issue/protos + yql/essentials/core/sql_types + yql/essentials/parser/lexer_common + yql/essentials/parser/proto_ast/collect_issues + yql/essentials/parser/proto_ast/gen/v1 + yql/essentials/parser/proto_ast/gen/v1_ansi + yql/essentials/parser/proto_ast/gen/v1_proto_split + yql/essentials/parser/proto_ast/gen/v1_antlr4 + yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4 + yql/essentials/parser/pg_catalog + yql/essentials/sql/v1/lexer + yql/essentials/sql/v1/proto_parser +) + +SRCS( + aggregation.cpp + builtin.cpp + context.cpp + join.cpp + insert.cpp + list_builtin.cpp + match_recognize.cpp + node.cpp + select.cpp + source.cpp + sql.cpp + sql_call_expr.cpp + sql_expression.cpp + sql_group_by.cpp + sql_match_recognize.cpp + sql_into_tables.cpp + sql_query.cpp + sql_select.cpp + sql_translation.cpp + sql_values.cpp + query.cpp + object_processing.cpp +) + +YQL_LAST_ABI_VERSION() + +GENERATE_ENUM_SERIALIZATION(match_recognize.h) +GENERATE_ENUM_SERIALIZATION(node.h) +GENERATE_ENUM_SERIALIZATION(sql_call_param.h) + +END() + +RECURSE( + format + lexer + perf + proto_parser +) + +RECURSE_FOR_TESTS( + ut + ut_antlr4 +) |