summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1
diff options
context:
space:
mode:
authorvvvv <[email protected]>2024-11-07 12:29:36 +0300
committervvvv <[email protected]>2024-11-07 13:49:47 +0300
commitd4c258e9431675bab6745c8638df6e3dfd4dca6b (patch)
treeb5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/sql/v1
parent13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff)
Moved other yql/essentials libs YQL-19206
init commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/sql/v1')
-rw-r--r--yql/essentials/sql/v1/aggregation.cpp1469
-rw-r--r--yql/essentials/sql/v1/builtin.cpp3772
-rw-r--r--yql/essentials/sql/v1/context.cpp656
-rw-r--r--yql/essentials/sql/v1/context.h421
-rw-r--r--yql/essentials/sql/v1/format/sql_format.cpp3105
-rw-r--r--yql/essentials/sql/v1/format/sql_format.h35
-rw-r--r--yql/essentials/sql/v1/format/sql_format_ut.cpp51
-rw-r--r--yql/essentials/sql/v1/format/sql_format_ut.h1650
-rw-r--r--yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp52
-rw-r--r--yql/essentials/sql/v1/format/ut/ya.make7
-rw-r--r--yql/essentials/sql/v1/format/ut_antlr4/ya.make7
-rw-r--r--yql/essentials/sql/v1/format/ya.make26
-rw-r--r--yql/essentials/sql/v1/insert.cpp443
-rw-r--r--yql/essentials/sql/v1/join.cpp670
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.cpp77
-rw-r--r--yql/essentials/sql/v1/lexer/lexer.h9
-rw-r--r--yql/essentials/sql/v1/lexer/tsan.supp1
-rw-r--r--yql/essentials/sql/v1/lexer/ya.make19
-rw-r--r--yql/essentials/sql/v1/list_builtin.cpp142
-rw-r--r--yql/essentials/sql/v1/list_builtin.h160
-rw-r--r--yql/essentials/sql/v1/match_recognize.cpp254
-rw-r--r--yql/essentials/sql/v1/match_recognize.h130
-rw-r--r--yql/essentials/sql/v1/node.cpp3477
-rw-r--r--yql/essentials/sql/v1/node.h1567
-rw-r--r--yql/essentials/sql/v1/object_processing.cpp68
-rw-r--r--yql/essentials/sql/v1/object_processing.h105
-rw-r--r--yql/essentials/sql/v1/perf/parse.cpp70
-rw-r--r--yql/essentials/sql/v1/perf/ya.make14
-rw-r--r--yql/essentials/sql/v1/proto_parser/proto_parser.cpp152
-rw-r--r--yql/essentials/sql/v1/proto_parser/proto_parser.h22
-rw-r--r--yql/essentials/sql/v1/proto_parser/ya.make21
-rw-r--r--yql/essentials/sql/v1/query.cpp3567
-rw-r--r--yql/essentials/sql/v1/select.cpp3195
-rw-r--r--yql/essentials/sql/v1/source.cpp992
-rw-r--r--yql/essentials/sql/v1/source.h320
-rw-r--r--yql/essentials/sql/v1/sql.cpp247
-rw-r--r--yql/essentials/sql/v1/sql.h22
-rw-r--r--yql/essentials/sql/v1/sql_call_expr.cpp444
-rw-r--r--yql/essentials/sql/v1/sql_call_expr.h98
-rw-r--r--yql/essentials/sql/v1/sql_call_param.h20
-rw-r--r--yql/essentials/sql/v1/sql_expression.cpp2307
-rw-r--r--yql/essentials/sql/v1/sql_expression.h147
-rw-r--r--yql/essentials/sql/v1/sql_group_by.cpp475
-rw-r--r--yql/essentials/sql/v1/sql_group_by.h73
-rw-r--r--yql/essentials/sql/v1/sql_into_tables.cpp267
-rw-r--r--yql/essentials/sql/v1/sql_into_tables.h30
-rw-r--r--yql/essentials/sql/v1/sql_match_recognize.cpp377
-rw-r--r--yql/essentials/sql/v1/sql_match_recognize.h30
-rw-r--r--yql/essentials/sql/v1/sql_match_recognize_ut.cpp742
-rw-r--r--yql/essentials/sql/v1/sql_query.cpp3446
-rw-r--r--yql/essentials/sql/v1/sql_query.h86
-rw-r--r--yql/essentials/sql/v1/sql_select.cpp1470
-rw-r--r--yql/essentials/sql/v1/sql_select.h74
-rw-r--r--yql/essentials/sql/v1/sql_translation.cpp5149
-rw-r--r--yql/essentials/sql/v1/sql_translation.h342
-rw-r--r--yql/essentials/sql/v1/sql_ut.cpp7462
-rw-r--r--yql/essentials/sql/v1/sql_ut.h235
-rw-r--r--yql/essentials/sql/v1/sql_ut_antlr4.cpp7434
-rw-r--r--yql/essentials/sql/v1/sql_ut_antlr4.h226
-rw-r--r--yql/essentials/sql/v1/sql_values.cpp151
-rw-r--r--yql/essentials/sql/v1/sql_values.h48
-rw-r--r--yql/essentials/sql/v1/ut/ya.make21
-rw-r--r--yql/essentials/sql/v1/ut_antlr4/ya.make21
-rw-r--r--yql/essentials/sql/v1/ya.make69
64 files changed, 58239 insertions, 0 deletions
diff --git a/yql/essentials/sql/v1/aggregation.cpp b/yql/essentials/sql/v1/aggregation.cpp
new file mode 100644
index 00000000000..875ae7d97d6
--- /dev/null
+++ b/yql/essentials/sql/v1/aggregation.cpp
@@ -0,0 +1,1469 @@
+#include "node.h"
+#include "source.h"
+#include "context.h"
+
+#include <yql/essentials/ast/yql_type_string.h>
+
+#include <library/cpp/charset/ci_string.h>
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+
+#include <array>
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+namespace {
+ bool BlockWindowAggregationWithoutFrameSpec(TPosition pos, TStringBuf name, ISource* src, TContext& ctx) {
+ if (src) {
+ auto winNamePtr = src->GetWindowName();
+ if (winNamePtr) {
+ auto winSpecPtr = src->FindWindowSpecification(ctx, *winNamePtr);
+ if (!winSpecPtr) {
+ ctx.Error(pos) << "Failed to use aggregation function " << name << " without window specification or in wrong place";
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ bool ShouldEmitAggApply(const TContext& ctx) {
+ const bool blockEngineEnabled = ctx.BlockEngineEnable || ctx.BlockEngineForce;
+ return ctx.EmitAggApply.GetOrElse(blockEngineEnabled);
+ }
+}
+
+static const THashSet<TString> AggApplyFuncs = {
+ "count_traits_factory",
+ "sum_traits_factory",
+ "avg_traits_factory",
+ "min_traits_factory",
+ "max_traits_factory",
+ "some_traits_factory",
+};
+
+class TAggregationFactory : public IAggregation {
+public:
+ TAggregationFactory(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode,
+ bool multi = false, bool validateArgs = true)
+ : IAggregation(pos, name, func, aggMode), Factory(!func.empty() ?
+ BuildBind(Pos, aggMode == EAggregateMode::OverWindow || aggMode == EAggregateMode::OverWindowDistinct ? "window_module" : "aggregate_module", func) : nullptr),
+ Multi(multi), ValidateArgs(validateArgs), DynamicFactory(!Factory)
+ {
+ if (aggMode != EAggregateMode::OverWindow && !func.empty() && AggApplyFuncs.contains(func)) {
+ AggApplyName = func.substr(0, func.size() - 15);
+ }
+
+ if (!Factory) {
+ FakeSource = BuildFakeSource(pos);
+ }
+ }
+
+protected:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) override {
+ if (!ShouldEmitAggApply(ctx)) {
+ AggApplyName = "";
+ }
+
+ if (ValidateArgs || isFactory) {
+ ui32 expectedArgs = ValidateArgs && !Factory ? 2 : (isFactory ? 0 : 1);
+ if (!Factory && ValidateArgs) {
+ YQL_ENSURE(!isFactory);
+ }
+
+ if (expectedArgs != exprs.size()) {
+ ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name
+ << " requires exactly " << expectedArgs << " argument(s), given: " << exprs.size();
+ return false;
+ }
+ }
+
+ if (!ValidateArgs) {
+ Exprs = exprs;
+ }
+
+ if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) {
+ return false;
+ }
+
+ if (ValidateArgs) {
+ if (!Factory) {
+ Factory = exprs[1];
+ }
+ }
+
+ if (!isFactory) {
+ if (ValidateArgs) {
+ Expr = exprs.front();
+ }
+
+ Name = src->MakeLocalName(Name);
+ }
+
+ if (Expr && Expr->IsAsterisk() && AggApplyName == "count") {
+ AggApplyName = "count_all";
+ }
+
+ if (!Init(ctx, src)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ node.Add("Member", "row", Q(Name));
+ if (IsOverWindow()) {
+ src->AddTmpWindowColumn(Name);
+ }
+ }
+
+ return true;
+ }
+
+ TNodePtr AggregationTraitsFactory() const override {
+ return Factory;
+ }
+
+ TNodePtr GetExtractor(bool many, TContext& ctx) const override {
+ Y_UNUSED(ctx);
+ return BuildLambda(Pos, Y("row"), Y("PersistableRepr", many ? Y("Unwrap", Expr) : Expr));
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const override {
+ auto extractor = GetExtractor(many, ctx);
+ if (!extractor) {
+ return nullptr;
+ }
+
+ if (!Multi) {
+ if (!DynamicFactory && allowAggApply && !AggApplyName.empty()) {
+ return Y("AggApply", Q(AggApplyName), Y("ListItemType", type), extractor);
+ }
+
+ return Y("Apply", Factory, (DynamicFactory ? Y("ListItemType", type) : type),
+ extractor);
+ }
+
+ return Y("MultiAggregate",
+ Y("ListItemType", type),
+ extractor,
+ Factory);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArgs) {
+ for (auto x : Exprs) {
+ if (!x->Init(ctx, src)) {
+ return false;
+ }
+ if (x->IsAggregated() && !x->IsAggregationKey() && !IsOverWindow()) {
+ ctx.Error(Pos) << "Aggregation of aggregated values is forbidden";
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ if (!Expr) {
+ return true;
+ }
+
+ if (!Expr->Init(ctx, src)) {
+ return false;
+ }
+ if (Expr->IsAggregated() && !Expr->IsAggregationKey() && !IsOverWindow()) {
+ ctx.Error(Pos) << "Aggregation of aggregated values is forbidden";
+ return false;
+ }
+ if (AggMode == EAggregateMode::Distinct || AggMode == EAggregateMode::OverWindowDistinct) {
+ const auto column = Expr->GetColumnName();
+ if (!column) {
+ // TODO: improve TBasicAggrFunc::CollectPreaggregateExprs()
+ ctx.Error(Pos) << "Aggregation of aggregated values is forbidden";
+ return false;
+ }
+ DistinctKey = *column;
+ YQL_ENSURE(src);
+ if (!IsGeneratedKeyColumn && src->GetJoin()) {
+ const auto sourcePtr = Expr->GetSourceName();
+ if (!sourcePtr || !*sourcePtr) {
+ if (!src->IsGroupByColumn(DistinctKey)) {
+ ctx.Error(Expr->GetPos()) << ErrorDistinctWithoutCorrelation(DistinctKey);
+ return false;
+ }
+ } else {
+ DistinctKey = DotJoin(*sourcePtr, DistinctKey);
+ }
+ }
+ if (src->IsGroupByColumn(DistinctKey)) {
+ ctx.Error(Expr->GetPos()) << ErrorDistinctByGroupKey(DistinctKey);
+ return false;
+ }
+ Expr = AstNode("row");
+ }
+
+ if (FakeSource) {
+ if (!Factory->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ if (AggMode == EAggregateMode::OverWindow) {
+ Factory = BuildLambda(Pos, Y("type", "extractor"), Y("block", Q(Y(
+ Y("let", "x", Y("Apply", Factory, "type", "extractor")),
+ Y("return", Y("ToWindowTraits", "x"))
+ ))));
+ }
+ }
+
+ return true;
+ }
+
+ TNodePtr Factory;
+ TNodePtr Expr;
+ bool Multi;
+ bool ValidateArgs;
+ TString AggApplyName;
+ TVector<TNodePtr> Exprs;
+
+private:
+ TSourcePtr FakeSource;
+ bool DynamicFactory;
+};
+
+class TAggregationFactoryImpl final : public TAggregationFactory {
+public:
+ TAggregationFactoryImpl(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode, bool multi)
+ : TAggregationFactory(pos, name, func, aggMode, multi)
+ {}
+
+private:
+ TNodePtr DoClone() const final {
+ return new TAggregationFactoryImpl(Pos, Name, Func, AggMode, Multi);
+ }
+};
+
+TAggregationPtr BuildFactoryAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode, bool multi) {
+ return new TAggregationFactoryImpl(pos, name, func, aggMode, multi);
+}
+
+class TKeyPayloadAggregationFactory final : public TAggregationFactory {
+public:
+ TKeyPayloadAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ , FakeSource(BuildFakeSource(pos))
+ {}
+
+private:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ ui32 adjustArgsCount = isFactory ? 0 : 2;
+ if (exprs.size() < adjustArgsCount || exprs.size() > 1 + adjustArgsCount) {
+ ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires "
+ << adjustArgsCount << " or " << (1 + adjustArgsCount) << " arguments, given: " << exprs.size();
+ return false;
+ }
+ if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ Payload = exprs.front();
+ Key = exprs[1];
+ }
+
+ if (1 + adjustArgsCount == exprs.size()) {
+ Limit = exprs.back();
+ Func += "2";
+ } else {
+ Func += "1";
+ }
+
+ if (Factory) {
+ Factory = BuildBind(Pos, AggMode == EAggregateMode::OverWindow ? "window_module" : "aggregate_module", Func);
+ }
+
+ if (!isFactory) {
+ Name = src->MakeLocalName(Name);
+ }
+
+ if (!Init(ctx, src)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ node.Add("Member", "row", Q(Name));
+ if (IsOverWindow()) {
+ src->AddTmpWindowColumn(Name);
+ }
+ }
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TKeyPayloadAggregationFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetExtractor(bool many, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ return BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Payload) : Payload);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ auto apply = Y("Apply", Factory, type,
+ BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Key) : Key),
+ BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Payload) : Payload));
+ AddFactoryArguments(apply);
+ return apply;
+ }
+
+ void AddFactoryArguments(TNodePtr& apply) const final {
+ if (Limit) {
+ apply = L(apply, Limit);
+ }
+ }
+
+ std::vector<ui32> GetFactoryColumnIndices() const final {
+ return {1u, 0u};
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (Limit) {
+ if (!Limit->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ }
+
+ if (!Key) {
+ return true;
+ }
+
+ if (!Key->Init(ctx, src)) {
+ return false;
+ }
+ if (!Payload->Init(ctx, src)) {
+ return false;
+ }
+
+ if (Key->IsAggregated()) {
+ ctx.Error(Pos) << "Aggregation of aggregated values is forbidden";
+ return false;
+ }
+ return true;
+ }
+
+ TSourcePtr FakeSource;
+ TNodePtr Key, Payload, Limit;
+};
+
+TAggregationPtr BuildKeyPayloadFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TKeyPayloadAggregationFactory(pos, name, factory, aggMode);
+}
+
+class TPayloadPredicateAggregationFactory final : public TAggregationFactory {
+public:
+ TPayloadPredicateAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ {}
+
+private:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ ui32 adjustArgsCount = isFactory ? 0 : 2;
+ if (exprs.size() != adjustArgsCount) {
+ ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires " <<
+ adjustArgsCount << " arguments, given: " << exprs.size();
+ return false;
+ }
+
+ if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ Payload = exprs.front();
+ Predicate = exprs.back();
+ Name = src->MakeLocalName(Name);
+ }
+
+ if (!Init(ctx, src)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ node.Add("Member", "row", Q(Name));
+ if (IsOverWindow()) {
+ src->AddTmpWindowColumn(Name);
+ }
+ }
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TPayloadPredicateAggregationFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetExtractor(bool many, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ return BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Payload) : Payload);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ return Y("Apply", Factory, type,
+ BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Payload) : Payload),
+ BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Predicate) : Predicate));
+ }
+
+ std::vector<ui32> GetFactoryColumnIndices() const final {
+ return {0u, 1u};
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!Predicate) {
+ return true;
+ }
+
+ if (!Predicate->Init(ctx, src)) {
+ return false;
+ }
+ if (!Payload->Init(ctx, src)) {
+ return false;
+ }
+
+ if (Payload->IsAggregated()) {
+ ctx.Error(Pos) << "Aggregation of aggregated values is forbidden";
+ return false;
+ }
+
+ return true;
+ }
+
+ TNodePtr Payload, Predicate;
+};
+
+TAggregationPtr BuildPayloadPredicateFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TPayloadPredicateAggregationFactory(pos, name, factory, aggMode);
+}
+
+class TTwoArgsAggregationFactory final : public TAggregationFactory {
+public:
+ TTwoArgsAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ {}
+
+private:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ ui32 adjustArgsCount = isFactory ? 0 : 2;
+ if (exprs.size() != adjustArgsCount) {
+ ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires " <<
+ adjustArgsCount << " arguments, given: " << exprs.size();
+ return false;
+ }
+
+ if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ One = exprs.front();
+ Two = exprs.back();
+ Name = src->MakeLocalName(Name);
+ }
+
+ if (!Init(ctx, src)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ node.Add("Member", "row", Q(Name));
+ if (IsOverWindow()) {
+ src->AddTmpWindowColumn(Name);
+ }
+ }
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TTwoArgsAggregationFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetExtractor(bool many, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ return BuildLambda(Pos, Y("row"), many ? Y("Unwrap", One) : One);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ auto tuple = Q(Y(One, Two));
+ return Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", tuple) : tuple));
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!One) {
+ return true;
+ }
+
+ if (!One->Init(ctx, src)) {
+ return false;
+ }
+ if (!Two->Init(ctx, src)) {
+ return false;
+ }
+
+ if ((One->IsAggregated() || Two->IsAggregated()) && !IsOverWindow()) {
+ ctx.Error(Pos) << "Aggregation of aggregated values is forbidden";
+ return false;
+ }
+ return true;
+ }
+
+ TNodePtr One, Two;
+};
+
+TAggregationPtr BuildTwoArgsFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TTwoArgsAggregationFactory(pos, name, factory, aggMode);
+}
+
+class THistogramAggregationFactory final : public TAggregationFactory {
+public:
+ THistogramAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ , FakeSource(BuildFakeSource(pos))
+ , Weight(Y("Double", Q("1.0")))
+ , Intervals(Y("Uint32", Q("100")))
+ {}
+
+private:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ if (isFactory) {
+ if (exprs.size() > 1) {
+ ctx.Error(Pos) << "Aggregation function factory " << Name << " requires 0 or 1 argument(s), given: " << exprs.size();
+ return false;
+ }
+ } else {
+ if (exprs.empty() || exprs.size() > 3) {
+ ctx.Error(Pos) << "Aggregation function " << Name << " requires one, two or three arguments, given: " << exprs.size();
+ return false;
+ }
+ }
+
+ if (!isFactory) {
+ /// \todo: solve it with named arguments
+ const auto integer = exprs.back()->IsIntegerLiteral();
+ switch (exprs.size()) {
+ case 2U:
+ if (!integer) {
+ Weight = exprs.back();
+ }
+ break;
+ case 3U:
+ if (!integer) {
+ ctx.Error(Pos) << "Aggregation function " << Name << " for case with 3 arguments should have third argument of integer type";
+ return false;
+ }
+ Weight = exprs[1];
+ break;
+ }
+ if (exprs.size() >= 2 && integer) {
+ Intervals = Y("Cast", exprs.back(), Q("Uint32"));
+ }
+ } else {
+ if (exprs.size() >= 1) {
+ const auto integer = exprs.back()->IsIntegerLiteral();
+ if (!integer) {
+ ctx.Error(Pos) << "Aggregation function factory " << Name << " should have second interger argument";
+ return false;
+ }
+
+ Intervals = Y("Cast", exprs.back(), Q("Uint32"));
+ }
+ }
+
+ return TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front()));
+ }
+
+ TNodePtr DoClone() const final {
+ return new THistogramAggregationFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ auto apply = Y("Apply", Factory, type,
+ BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr),
+ BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Weight) : Weight));
+ AddFactoryArguments(apply);
+ return apply;
+ }
+
+ void AddFactoryArguments(TNodePtr& apply) const final {
+ apply = L(apply, Intervals);
+ }
+
+ std::vector<ui32> GetFactoryColumnIndices() const final {
+ return {0u, 1u};
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!Weight->Init(ctx, src)) {
+ return false;
+ }
+ if (!Intervals->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ return TAggregationFactory::DoInit(ctx, src);
+ }
+
+ TSourcePtr FakeSource;
+ TNodePtr Weight, Intervals;
+};
+
+TAggregationPtr BuildHistogramFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new THistogramAggregationFactory(pos, name, factory, aggMode);
+}
+
+class TLinearHistogramAggregationFactory final : public TAggregationFactory {
+public:
+ TLinearHistogramAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ , FakeSource(BuildFakeSource(pos))
+ , BinSize(Y("Double", Q("10.0")))
+ , Minimum(Y("Double", Q(ToString(-1.0 * Max<double>()))))
+ , Maximum(Y("Double", Q(ToString(Max<double>()))))
+ {}
+
+private:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ if (isFactory) {
+ if (exprs.size() > 3) {
+ ctx.Error(Pos) << "Aggregation function " << Name << " requires zero to three arguments, given: " << exprs.size();
+ return false;
+ }
+ } else {
+ if (exprs.empty() || exprs.size() > 4) {
+ ctx.Error(Pos) << "Aggregation function " << Name << " requires one to four arguments, given: " << exprs.size();
+ return false;
+ }
+ }
+
+ if (exprs.size() > 1 - isFactory) {
+ BinSize = exprs[1 - isFactory];
+ }
+
+ if (exprs.size() > 2 - isFactory) {
+ Minimum = exprs[2 - isFactory];
+ }
+
+ if (exprs.size() > 3 - isFactory) {
+ Maximum = exprs[3 - isFactory];
+ }
+
+ return TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front()));
+ }
+
+ TNodePtr DoClone() const final {
+ return new TLinearHistogramAggregationFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ return Y("Apply", Factory, type,
+ BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr),
+ BinSize, Minimum, Maximum);
+ }
+
+ void AddFactoryArguments(TNodePtr& apply) const final {
+ apply = L(apply, BinSize, Minimum, Maximum);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!BinSize->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ if (!Minimum->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ if (!Maximum->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ return TAggregationFactory::DoInit(ctx, src);
+ }
+
+ TSourcePtr FakeSource;
+ TNodePtr BinSize, Minimum, Maximum;
+};
+
+TAggregationPtr BuildLinearHistogramFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TLinearHistogramAggregationFactory(pos, name, factory, aggMode);
+}
+
+class TPercentileFactory final : public TAggregationFactory {
+public:
+ TPercentileFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ , FakeSource(BuildFakeSource(pos))
+ {}
+
+private:
+ const TString* GetGenericKey() const final {
+ return Column;
+ }
+
+ void Join(IAggregation* aggr) final {
+ const auto percentile = dynamic_cast<TPercentileFactory*>(aggr);
+ YQL_ENSURE(percentile);
+ YQL_ENSURE(Column && percentile->Column && *Column == *percentile->Column);
+ YQL_ENSURE(AggMode == percentile->AggMode);
+ Percentiles.insert(percentile->Percentiles.cbegin(), percentile->Percentiles.cend());
+ percentile->Percentiles.clear();
+ }
+
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ ui32 adjustArgsCount = isFactory ? 0 : 1;
+ if (exprs.size() < 0 + adjustArgsCount || exprs.size() > 1 + adjustArgsCount) {
+ ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires "
+ << (0 + adjustArgsCount) << " or " << (1 + adjustArgsCount) << " arguments, given: " << exprs.size();
+ return false;
+ }
+
+ if (!isFactory) {
+ Column = exprs.front()->GetColumnName();
+ }
+
+ if (!TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front())))
+ return false;
+
+ TNodePtr x;
+ if (1 + adjustArgsCount == exprs.size()) {
+ x = exprs.back();
+ if (!x->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ } else {
+ x = Y("Double", Q("0.5"));
+ }
+
+ if (isFactory) {
+ FactoryPercentile = x;
+ } else {
+ Percentiles.emplace(Name, x);
+ }
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TPercentileFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ TNodePtr percentiles(Percentiles.cbegin()->second);
+
+ if (Percentiles.size() > 1U) {
+ percentiles = Y();
+ for (const auto& percentile : Percentiles) {
+ percentiles = L(percentiles, percentile.second);
+ }
+ percentiles = Q(percentiles);
+ }
+
+ return Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr), percentiles);
+ }
+
+ void AddFactoryArguments(TNodePtr& apply) const final {
+ apply = L(apply, FactoryPercentile);
+ }
+
+ std::pair<TNodePtr, bool> AggregationTraits(const TNodePtr& type, bool overState, bool many, bool allowAggApply, TContext& ctx) const final {
+ if (Percentiles.empty())
+ return { TNodePtr(), true };
+
+ TNodePtr names(Q(Percentiles.cbegin()->first));
+
+ if (Percentiles.size() > 1U) {
+ names = Y();
+ for (const auto& percentile : Percentiles)
+ names = L(names, Q(percentile.first));
+ names = Q(names);
+ }
+
+ const bool distinct = AggMode == EAggregateMode::Distinct;
+ const auto listType = distinct ? Y("ListType", Y("StructMemberType", Y("ListItemType", type), BuildQuotedAtom(Pos, DistinctKey))) : type;
+ auto apply = GetApply(listType, many, allowAggApply, ctx);
+ if (!apply) {
+ return { TNodePtr(), false };
+ }
+
+ auto wrapped = WrapIfOverState(apply, overState, many, ctx);
+ if (!wrapped) {
+ return { TNodePtr(), false };
+ }
+
+ return { distinct ?
+ Q(Y(names, wrapped, BuildQuotedAtom(Pos, DistinctKey))) :
+ Q(Y(names, wrapped)), true };
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ for (const auto& p : Percentiles) {
+ if (!p.second->Init(ctx, src)) {
+ return false;
+ }
+ }
+
+ return TAggregationFactory::DoInit(ctx, src);
+ }
+
+ TSourcePtr FakeSource;
+ std::multimap<TString, TNodePtr> Percentiles;
+ TNodePtr FactoryPercentile;
+ const TString* Column = nullptr;
+};
+
+TAggregationPtr BuildPercentileFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TPercentileFactory(pos, name, factory, aggMode);
+}
+
+class TTopFreqFactory final : public TAggregationFactory {
+public:
+ TTopFreqFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ , FakeSource(BuildFakeSource(pos))
+ {}
+
+private:
+
+ //first - n, second - buffer
+ using TPair = std::pair<TNodePtr, TNodePtr>;
+
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ ui32 adjustArgsCount = isFactory ? 0 : 1;
+ const double DefaultBufferC = 1.5;
+ const ui32 MinBuffer = 100;
+
+ if (exprs.size() < adjustArgsCount || exprs.size() > 2 + adjustArgsCount) {
+ ctx.Error(Pos) << "Aggregation function " << (isFactory? "factory " : "") << Name <<
+ " requires " << adjustArgsCount << " to " << (2 + adjustArgsCount) << " arguments, given: " << exprs.size();
+ return false;
+ }
+
+ if (!TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front())))
+ return false;
+
+ TNodePtr n = Y("Null");
+ TNodePtr buffer = Y("Null");
+
+ if (1 + adjustArgsCount <= exprs.size()) {
+ n = exprs[adjustArgsCount];
+ if (!n->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ n = Y("SafeCast", n, Q("Uint32"));
+ }
+
+ n = Y("Coalesce", n, Y("Uint32", Q("1")));
+ if (2 + adjustArgsCount == exprs.size()) {
+ buffer = exprs[1 + adjustArgsCount];
+ if (!buffer->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ buffer = Y("SafeCast", buffer, Q("Uint32"));
+ }
+
+ buffer = Y("Coalesce", buffer, Y("SafeCast", Y("*", n, Y("Double", Q(ToString(DefaultBufferC)))), Q("Uint32")));
+ buffer = Y("Coalesce", buffer, Y("Uint32", Q(ToString(MinBuffer))));
+ buffer = Y("Max", buffer, Y("Uint32", Q(ToString(MinBuffer))));
+
+ auto x = TPair{ n, buffer };
+ if (isFactory) {
+ TopFreqFactoryParams = x;
+ } else {
+ TopFreqs.emplace(Name, x);
+ }
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TTopFreqFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ TPair topFreqs(TopFreqs.cbegin()->second);
+
+ if (TopFreqs.size() > 1U) {
+ topFreqs = { Y(), Y() };
+ for (const auto& topFreq : TopFreqs) {
+ topFreqs = { L(topFreqs.first, topFreq.second.first), L(topFreqs.second, topFreq.second.second) };
+ }
+ topFreqs = { Q(topFreqs.first), Q(topFreqs.second) };
+ }
+
+ auto apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr), topFreqs.first, topFreqs.second);
+ return apply;
+ }
+
+ void AddFactoryArguments(TNodePtr& apply) const final {
+ apply = L(apply, TopFreqFactoryParams.first, TopFreqFactoryParams.second);
+ }
+
+ std::pair<TNodePtr, bool> AggregationTraits(const TNodePtr& type, bool overState, bool many, bool allowAggApply, TContext& ctx) const final {
+ if (TopFreqs.empty())
+ return { TNodePtr(), true };
+
+ TNodePtr names(Q(TopFreqs.cbegin()->first));
+
+ if (TopFreqs.size() > 1U) {
+ names = Y();
+ for (const auto& topFreq : TopFreqs)
+ names = L(names, Q(topFreq.first));
+ names = Q(names);
+ }
+
+ const bool distinct = AggMode == EAggregateMode::Distinct;
+ const auto listType = distinct ? Y("ListType", Y("StructMemberType", Y("ListItemType", type), BuildQuotedAtom(Pos, DistinctKey))) : type;
+ auto apply = GetApply(listType, many, allowAggApply, ctx);
+ if (!apply) {
+ return { nullptr, false };
+ }
+
+ auto wrapped = WrapIfOverState(apply, overState, many, ctx);
+ if (!wrapped) {
+ return { nullptr, false };
+ }
+
+ return { distinct ?
+ Q(Y(names, wrapped, BuildQuotedAtom(Pos, DistinctKey))) :
+ Q(Y(names, wrapped)), true };
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ for (const auto& topFreq : TopFreqs) {
+ if (!topFreq.second.first->Init(ctx, src)) {
+ return false;
+ }
+
+ if (!topFreq.second.second->Init(ctx, src)) {
+ return false;
+ }
+ }
+
+ return TAggregationFactory::DoInit(ctx, src);
+ }
+
+ std::multimap<TString, TPair> TopFreqs;
+ TPair TopFreqFactoryParams;
+ TSourcePtr FakeSource;
+};
+
+TAggregationPtr BuildTopFreqFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TTopFreqFactory(pos, name, factory, aggMode);
+}
+
+template <bool HasKey>
+class TTopAggregationFactory final : public TAggregationFactory {
+public:
+ TTopAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ , FakeSource(BuildFakeSource(pos))
+ {}
+
+private:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ ui32 adjustArgsCount = isFactory ? 1 : (HasKey ? 3 : 2);
+ if (exprs.size() != adjustArgsCount) {
+ ctx.Error(Pos) << "Aggregation function " << (isFactory ? "factory " : "") << Name << " requires "
+ << adjustArgsCount << " arguments, given: " << exprs.size();
+ return false;
+ }
+
+ if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ Payload = exprs[0];
+ if (HasKey) {
+ Key = exprs[1];
+ }
+ }
+
+ Count = exprs.back();
+
+ if (!isFactory) {
+ Name = src->MakeLocalName(Name);
+ }
+
+ if (!Init(ctx, src)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ node.Add("Member", "row", Q(Name));
+ if (IsOverWindow()) {
+ src->AddTmpWindowColumn(Name);
+ }
+ }
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TTopAggregationFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ TNodePtr apply;
+ if (HasKey) {
+ apply = Y("Apply", Factory, type,
+ BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Key) : Key),
+ BuildLambda(Pos, Y("row"), many ? Y("Payload", Payload) : Payload));
+ } else {
+ apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Payload) : Payload));
+ }
+ AddFactoryArguments(apply);
+ return apply;
+ }
+
+ void AddFactoryArguments(TNodePtr& apply) const final {
+ apply = L(apply, Count);
+ }
+
+ std::vector<ui32> GetFactoryColumnIndices() const final {
+ if (HasKey) {
+ return {1u, 0u};
+ } else {
+ return {0u};
+ }
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!Count->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ if (!Payload) {
+ return true;
+ }
+
+ if (HasKey) {
+ if (!Key->Init(ctx, src)) {
+ return false;
+ }
+ }
+
+ if (!Payload->Init(ctx, src)) {
+ return false;
+ }
+
+ if ((HasKey && Key->IsAggregated()) || (!HasKey && Payload->IsAggregated())) {
+ ctx.Error(Pos) << "Aggregation of aggregated values is forbidden";
+ return false;
+ }
+ return true;
+ }
+
+ TSourcePtr FakeSource;
+ TNodePtr Key, Payload, Count;
+};
+
+template <bool HasKey>
+TAggregationPtr BuildTopFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TTopAggregationFactory<HasKey>(pos, name, factory, aggMode);
+}
+
+template TAggregationPtr BuildTopFactoryAggregation<false>(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+template TAggregationPtr BuildTopFactoryAggregation<true >(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+
+class TCountDistinctEstimateAggregationFactory final : public TAggregationFactory {
+public:
+ TCountDistinctEstimateAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ {}
+
+private:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ ui32 adjustArgsCount = isFactory ? 0 : 1;
+ if (exprs.size() < adjustArgsCount || exprs.size() > 1 + adjustArgsCount) {
+ ctx.Error(Pos) << Name << " aggregation function " << (isFactory ? "factory " : "") << " requires " <<
+ adjustArgsCount << " or " << (1 + adjustArgsCount) << " argument(s), given: " << exprs.size();
+ return false;
+ }
+
+ Precision = 14;
+ if (1 + adjustArgsCount <= exprs.size()) {
+ auto posSecondArg = exprs[adjustArgsCount]->GetPos();
+ if (!Parseui32(exprs[adjustArgsCount], Precision)) {
+ ctx.Error(posSecondArg) << Name << ": invalid argument, numeric literal is expected";
+ return false;
+ }
+ }
+ if (Precision > 18 || Precision < 4) {
+ ctx.Error(Pos) << Name << ": precision is expected to be between 4 and 18 (inclusive), got " << Precision;
+ return false;
+ }
+
+ if (!isFactory) {
+ Expr = exprs[0];
+ Name = src->MakeLocalName(Name);
+ }
+
+ if (!Init(ctx, src)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ node.Add("Member", "row", Q(Name));
+ if (IsOverWindow()) {
+ src->AddTmpWindowColumn(Name);
+ }
+ }
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TCountDistinctEstimateAggregationFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ auto apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr));
+ AddFactoryArguments(apply);
+ return apply;
+ }
+
+ void AddFactoryArguments(TNodePtr& apply) const final {
+ apply = L(apply, Y("Uint32", Q(ToString(Precision))));
+ }
+
+private:
+ ui32 Precision = 0;
+};
+
+TAggregationPtr BuildCountDistinctEstimateFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TCountDistinctEstimateAggregationFactory(pos, name, factory, aggMode);
+}
+
+class TListAggregationFactory final : public TAggregationFactory {
+public:
+ TListAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ , FakeSource(BuildFakeSource(pos))
+ {
+ }
+
+private:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ ui32 adjustArgsCount = isFactory ? 0 : 1;
+ ui32 minArgs = (0 + adjustArgsCount);
+ ui32 maxArgs = (1 + adjustArgsCount);
+ if (exprs.size() < minArgs || exprs.size() > maxArgs) {
+ ctx.Error(Pos) << "List aggregation " << (isFactory ? "factory " : "") << "function require " << minArgs
+ << " or " << maxArgs << " arguments, given: " << exprs.size();
+ return false;
+ }
+
+ if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) {
+ return false;
+ }
+
+ Limit = nullptr;
+ if (adjustArgsCount + 1U <= exprs.size()) {
+ auto posSecondArg = exprs[adjustArgsCount]->GetPos();
+ Limit = exprs[adjustArgsCount];
+ if (!Limit->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ }
+
+ if (!isFactory) {
+ Expr = exprs[0];
+ Name = src->MakeLocalName(Name);
+ }
+
+ if (!Init(ctx, src)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ node.Add("Member", "row", Q(Name));
+ if (IsOverWindow()) {
+ src->AddTmpWindowColumn(Name);
+ }
+ }
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TListAggregationFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ auto apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr));
+ AddFactoryArguments(apply);
+ return apply;
+ }
+
+ void AddFactoryArguments(TNodePtr& apply) const final {
+ if (!Limit) {
+ apply = L(apply, Y("Uint64", Q("0")));
+ } else {
+ apply = L(apply, Limit);
+ }
+ }
+
+private:
+ TSourcePtr FakeSource;
+ TNodePtr Limit;
+};
+
+TAggregationPtr BuildListFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TListAggregationFactory(pos, name, factory, aggMode);
+}
+
+class TUserDefinedAggregationFactory final : public TAggregationFactory {
+public:
+ TUserDefinedAggregationFactory(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ {}
+
+private:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ ui32 adjustArgsCount = isFactory ? 0 : 1;
+ if (exprs.size() < (3 + adjustArgsCount) || exprs.size() > (7 + adjustArgsCount)) {
+ ctx.Error(Pos) << "User defined aggregation function " << (isFactory ? "factory " : "") << " requires " <<
+ (3 + adjustArgsCount) << " to " << (7 + adjustArgsCount) << " arguments, given: " << exprs.size();
+ return false;
+ }
+
+ Lambdas[0] = BuildLambda(Pos, Y("value", "parent"), Y("NamedApply", exprs[adjustArgsCount], Q(Y("value")), Y("AsStruct"), Y("DependsOn", "parent")));
+ Lambdas[1] = BuildLambda(Pos, Y("value", "state", "parent"), Y("NamedApply", exprs[adjustArgsCount + 1], Q(Y("state", "value")), Y("AsStruct"), Y("DependsOn", "parent")));
+ Lambdas[2] = BuildLambda(Pos, Y("one", "two"), Y("IfType", exprs[adjustArgsCount + 2], Y("NullType"),
+ BuildLambda(Pos, Y(), Y("Void")),
+ BuildLambda(Pos, Y(), Y("Apply", exprs[adjustArgsCount + 2], "one", "two"))));
+
+ for (size_t i = 3U; i < Lambdas.size(); ++i) {
+ const auto j = adjustArgsCount + i;
+ Lambdas[i] = BuildLambda(Pos, Y("state"), j >= exprs.size() ? AstNode("state") : Y("Apply", exprs[j], "state"));
+ }
+
+ DefVal = (exprs.size() == (7 + adjustArgsCount)) ? exprs[adjustArgsCount + 6] : Y("Null");
+ return TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front()));
+ }
+
+ TNodePtr DoClone() const final {
+ return new TUserDefinedAggregationFactory(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ auto apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr));
+ AddFactoryArguments(apply);
+ return apply;
+ }
+
+ void AddFactoryArguments(TNodePtr& apply) const final {
+ apply = L(apply, Lambdas[0], Lambdas[1], Lambdas[2], Lambdas[3], Lambdas[4], Lambdas[5], DefVal);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ for (const auto& lambda : Lambdas) {
+ if (!lambda->Init(ctx, src)) {
+ return false;
+ }
+ }
+
+ if (!DefVal->Init(ctx, src)) {
+ return false;
+ }
+
+ return TAggregationFactory::DoInit(ctx, src);
+ }
+
+ std::array<TNodePtr, 6> Lambdas;
+ TNodePtr DefVal;
+};
+
+TAggregationPtr BuildUserDefinedFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TUserDefinedAggregationFactory(pos, name, factory, aggMode);
+}
+
+class TCountAggregation final : public TAggregationFactory {
+public:
+ TCountAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, func, aggMode)
+ {}
+
+private:
+ TNodePtr DoClone() const final {
+ return new TCountAggregation(Pos, Name, Func, AggMode);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!Expr) {
+ return true;
+ }
+
+ if (Expr->IsAsterisk()) {
+ Expr = Y("Void");
+ }
+ if (!Expr->Init(ctx, src)) {
+ return false;
+ }
+ Expr->SetCountHint(Expr->IsConstant());
+ return TAggregationFactory::DoInit(ctx, src);
+ }
+};
+
+TAggregationPtr BuildCountAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode) {
+ return new TCountAggregation(pos, name, func, aggMode);
+}
+
+class TPGFactoryAggregation final : public TAggregationFactory {
+public:
+ TPGFactoryAggregation(TPosition pos, const TString& name, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, "", aggMode, false, false)
+ , PgFunc(Name)
+ {}
+
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) override {
+ auto ret = TAggregationFactory::InitAggr(ctx, isFactory, src, node, exprs);
+ if (ret) {
+ if (isFactory) {
+ Factory = BuildLambda(Pos, Y("type", "extractor"), Y(AggMode == EAggregateMode::OverWindow ? "PgWindowTraitsTuple" : "PgAggregationTraitsTuple",
+ Q(PgFunc), Y("ListItemType", "type"), "extractor"));
+ } else {
+ Lambda = BuildLambda(Pos, Y("row"), exprs);
+ }
+ }
+
+ return ret;
+ }
+
+ TNodePtr GetExtractor(bool many, TContext& ctx) const override {
+ Y_UNUSED(many);
+ ctx.Error() << "Partial aggregation by PostgreSQL function isn't supported";
+ return nullptr;
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(many);
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ if (ShouldEmitAggApply(ctx) && allowAggApply && AggMode != EAggregateMode::OverWindow) {
+ return Y("AggApply",
+ Q("pg_" + to_lower(PgFunc)), Y("ListItemType", type), Lambda);
+ }
+
+ return Y(AggMode == EAggregateMode::OverWindow ? "PgWindowTraits" : "PgAggregationTraits",
+ Q(PgFunc), Y("ListItemType", type), Lambda);
+ }
+
+private:
+ TNodePtr DoClone() const final {
+ return new TPGFactoryAggregation(Pos, Name, AggMode);
+ }
+
+ TString PgFunc;
+ TNodePtr Lambda;
+};
+
+TAggregationPtr BuildPGFactoryAggregation(TPosition pos, const TString& name, EAggregateMode aggMode) {
+ return new TPGFactoryAggregation(pos, name, aggMode);
+}
+
+class TNthValueFactoryAggregation final : public TAggregationFactory {
+public:
+public:
+ TNthValueFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode)
+ : TAggregationFactory(pos, name, factory, aggMode)
+ , FakeSource(BuildFakeSource(pos))
+ {
+ }
+
+private:
+ bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
+ ui32 adjustArgsCount = isFactory ? 0 : 1;
+ ui32 expectedArgs = (1 + adjustArgsCount);
+ if (exprs.size() != expectedArgs) {
+ ctx.Error(Pos) << "NthValue aggregation " << (isFactory ? "factory " : "") << "function require "
+ << expectedArgs << " arguments, given: " << exprs.size();
+ return false;
+ }
+
+ if (BlockWindowAggregationWithoutFrameSpec(Pos, GetName(), src, ctx)) {
+ return false;
+ }
+
+ Index = exprs[adjustArgsCount];
+ if (!Index->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ if (!isFactory) {
+ Expr = exprs[0];
+ Name = src->MakeLocalName(Name);
+ }
+
+ if (!Init(ctx, src)) {
+ return false;
+ }
+
+ if (!isFactory) {
+ node.Add("Member", "row", Q(Name));
+ if (IsOverWindow()) {
+ src->AddTmpWindowColumn(Name);
+ }
+ }
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TNthValueFactoryAggregation(Pos, Name, Func, AggMode);
+ }
+
+ TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(allowAggApply);
+ auto apply = Y("Apply", Factory, type, BuildLambda(Pos, Y("row"), many ? Y("Unwrap", Expr) : Expr));
+ AddFactoryArguments(apply);
+ return apply;
+ }
+
+ void AddFactoryArguments(TNodePtr& apply) const final {
+ apply = L(apply, Index);
+ }
+
+private:
+ TSourcePtr FakeSource;
+ TNodePtr Index;
+};
+
+TAggregationPtr BuildNthFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
+ return new TNthValueFactoryAggregation(pos, name, factory, aggMode);
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/builtin.cpp b/yql/essentials/sql/v1/builtin.cpp
new file mode 100644
index 00000000000..e066cd846d7
--- /dev/null
+++ b/yql/essentials/sql/v1/builtin.cpp
@@ -0,0 +1,3772 @@
+#include "node.h"
+#include "context.h"
+
+#include "list_builtin.h"
+#include "match_recognize.h"
+
+#include <yql/essentials/ast/yql_type_string.h>
+#include <yql/essentials/public/udf/udf_data_type.h>
+#include <yql/essentials/core/sql_types/simple_types.h>
+#include <yql/essentials/minikql/mkql_program_builder.h>
+#include <yql/essentials/minikql/mkql_type_ops.h>
+#include <yql/essentials/public/issue/yql_issue_id.h>
+#include <yql/essentials/parser/pg_catalog/catalog.h>
+
+#include <library/cpp/charset/ci_string.h>
+#include <library/cpp/yson/node/node_io.h>
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+#include <util/string/util.h>
+#include <util/string/join.h>
+#include <util/system/env.h>
+
+#include <unordered_map>
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+extern const char SubqueryExtendFor[] = "SubqueryExtendFor";
+extern const char SubqueryUnionAllFor[] = "SubqueryUnionAllFor";
+extern const char SubqueryMergeFor[] = "SubqueryMergeFor";
+extern const char SubqueryUnionMergeFor[] = "SubqueryUnionMergeFor";
+extern const char SubqueryOrderBy[] = "SubqueryOrderBy";
+extern const char SubqueryAssumeOrderBy[] = "SubqueryAssumeOrderBy";
+
+TNodePtr MakeTypeConfig(const TPosition& pos, const TString& ns, const TVector<TNodePtr>& udfArgs) {
+ if (ns == "clickhouse") {
+ auto settings = NYT::TNode::CreateMap();
+ auto args = NYT::TNode::CreateMap();
+ for (ui32 i = 0; i < udfArgs.size(); ++i) {
+ if (!udfArgs[i]->IsNull() && udfArgs[i]->IsLiteral()) {
+ args[ToString(i)] = NYT::TNode()
+ ("type", udfArgs[i]->GetLiteralType())
+ ("value", udfArgs[i]->GetLiteralValue());
+ }
+ }
+
+ settings["args"] = args;
+ return (TDeferredAtom(pos, NYT::NodeToYsonString(settings))).Build();
+ }
+
+ return nullptr;
+}
+
+void AdjustCheckedAggFuncName(TString& aggNormalizedName, TContext& ctx) {
+ if (!ctx.Scoped->PragmaCheckedOps) {
+ return;
+ }
+
+ if (aggNormalizedName == "sum") {
+ aggNormalizedName = "checked_sum";
+ } else if (aggNormalizedName == "sumif") {
+ aggNormalizedName = "checked_sumif";
+ }
+}
+
+class TGroupingNode final: public TAstListNode {
+public:
+ TGroupingNode(TPosition pos, const TVector<TNodePtr>& args)
+ : TAstListNode(pos)
+ , Args(args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!src) {
+ ctx.Error(Pos) << "Grouping function should have source";
+ return false;
+ }
+ TVector<TString> columns;
+ columns.reserve(Args.size());
+ const bool isJoin = src->GetJoin();
+ ISource* composite = src->GetCompositeSource();
+ for (const auto& node: Args) {
+ auto namePtr = node->GetColumnName();
+ if (!namePtr || !*namePtr) {
+ ctx.Error(Pos) << "GROUPING function should use columns as arguments";
+ return false;
+ }
+ TString column = *namePtr;
+ if (isJoin) {
+ auto sourceNamePtr = node->GetSourceName();
+ if (sourceNamePtr && !sourceNamePtr->empty()) {
+ column = DotJoin(*sourceNamePtr, column);
+ }
+ }
+
+ if (!src->IsGroupByColumn(column) && !src->IsAlias(EExprSeat::GroupBy, *namePtr) && (!composite || !composite->IsGroupByColumn(column))) {
+ ctx.Error(node->GetPos()) << "Column '" << column << "' is not a grouping column";
+ return false;
+ }
+ columns.emplace_back(column);
+ }
+ TString groupingColumn;
+ if (!src->AddGrouping(ctx, columns, groupingColumn)) {
+ return false;
+ }
+ Nodes.push_back(BuildAtom(Pos, "Member"));
+ Nodes.push_back(BuildAtom(Pos, "row"));
+ Nodes.push_back(BuildQuotedAtom(Pos, groupingColumn));
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TGroupingNode(Pos, CloneContainer(Args));
+ }
+
+private:
+ const TVector<TNodePtr> Args;
+};
+
+class TBasicAggrFunc final: public TAstListNode {
+public:
+ TBasicAggrFunc(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector<TNodePtr>& args)
+ : TAstListNode(pos)
+ , Name(name)
+ , Aggr(aggr)
+ , Args(args)
+ {}
+
+ TCiString GetName() const {
+ return Name;
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!src) {
+ ctx.Error(Pos) << "Unable to use aggregation function '" << Name << "' without data source";
+ return false;
+ }
+ if (!DoInitAggregation(ctx, src)) {
+ return false;
+ }
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override {
+ if (Args.empty() || (Aggr->GetAggregationMode() != EAggregateMode::Distinct && Aggr->GetAggregationMode() != EAggregateMode::OverWindowDistinct)) {
+ return;
+ }
+
+ auto& expr = Args.front();
+
+ // need to initialize expr before checking whether it is a column
+ auto clone = expr->Clone();
+ if (!clone->Init(ctx, &src)) {
+ return;
+ }
+
+ const auto column = clone->GetColumnName();
+ if (column) {
+ return;
+ }
+
+ auto tmpColumn = src.MakeLocalName("_yql_preagg_" + Name);
+ YQL_ENSURE(!expr->GetLabel());
+ expr->SetLabel(tmpColumn);
+
+ PreaggregateExpr = expr;
+ exprs.push_back(PreaggregateExpr);
+ expr = BuildColumn(expr->GetPos(), tmpColumn);
+
+ Aggr->MarkKeyColumnAsGenerated();
+ }
+
+ TNodePtr DoClone() const final {
+ TAggregationPtr aggrClone = static_cast<IAggregation*>(Aggr->Clone().Release());
+ return new TBasicAggrFunc(Pos, Name, aggrClone, CloneContainer(Args));
+ }
+
+ TAggregationPtr GetAggregation() const override {
+ return Aggr;
+ }
+
+private:
+ bool DoInitAggregation(TContext& ctx, ISource* src) {
+ if (PreaggregateExpr) {
+ YQL_ENSURE(PreaggregateExpr->HasState(ENodeState::Initialized));
+ if (PreaggregateExpr->IsAggregated() && !PreaggregateExpr->IsAggregationKey() && !Aggr->IsOverWindow()) {
+ ctx.Error(Aggr->GetPos()) << "Aggregation of aggregated values is forbidden";
+ return false;
+ }
+ }
+
+ if (!Aggr->InitAggr(ctx, false, src, *this, Args)) {
+ return false;
+ }
+ return src->AddAggregation(ctx, Aggr);
+ }
+
+ void DoUpdateState() const final {
+ State.Set(ENodeState::Const, !Args.empty() && AllOf(Args, [](const auto& arg){ return arg->IsConstant(); }));
+ State.Set(ENodeState::Aggregated);
+ }
+
+ TNodePtr PreaggregateExpr;
+protected:
+ const TString Name;
+ TAggregationPtr Aggr;
+ TVector<TNodePtr> Args;
+};
+
+class TBasicAggrFactory final : public TAstListNode {
+public:
+ TBasicAggrFactory(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector<TNodePtr>& args)
+ : TAstListNode(pos)
+ , Name(name)
+ , Aggr(aggr)
+ , Args(args)
+ {}
+
+ TCiString GetName() const {
+ return Name;
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!DoInitAggregation(ctx)) {
+ return false;
+ }
+
+ auto factory = Aggr->AggregationTraitsFactory();
+ auto apply = Y("Apply", factory, Y("ListType", "type"));
+
+ auto columnIndices = Aggr->GetFactoryColumnIndices();
+ if (columnIndices.size() == 1) {
+ apply = L(apply, "extractor");
+ } else {
+ // make several extractors from main that returns a tuple
+ for (ui32 arg = 0; arg < columnIndices.size(); ++arg) {
+ auto partial = BuildLambda(Pos, Y("row"), Y("Nth", Y("Apply", "extractor", "row"), Q(ToString(columnIndices[arg]))));
+ apply = L(apply, partial);
+ }
+ }
+
+ Aggr->AddFactoryArguments(apply);
+ Lambda = BuildLambda(Pos, Y("type", "extractor"), apply);
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return Lambda->Translate(ctx);
+ }
+
+ TNodePtr DoClone() const final {
+ TAggregationPtr aggrClone = static_cast<IAggregation*>(Aggr->Clone().Release());
+ return new TBasicAggrFactory(Pos, Name, aggrClone, CloneContainer(Args));
+ }
+
+ TAggregationPtr GetAggregation() const override {
+ return Aggr;
+ }
+
+private:
+ bool DoInitAggregation(TContext& ctx) {
+ return Aggr->InitAggr(ctx, true, nullptr, *this, Args);
+ }
+
+protected:
+ const TString Name;
+ TAggregationPtr Aggr;
+ TVector<TNodePtr> Args;
+ TNodePtr Lambda;
+};
+
+typedef THolder<TBasicAggrFunc> TAggrFuncPtr;
+
+class TLiteralStringAtom: public INode {
+public:
+ TLiteralStringAtom(TPosition pos, TNodePtr node, const TString& info, const TString& prefix = {})
+ : INode(pos)
+ , Node(node)
+ , Info(info)
+ , Prefix(prefix)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ if (!Node) {
+ ctx.Error(Pos) << Info;
+ return false;
+ }
+
+ if (!Node->Init(ctx, src)) {
+ return false;
+ }
+
+ Atom = MakeAtomFromExpression(Pos, ctx, Node, Prefix).Build();
+ return true;
+ }
+
+ bool IsLiteral() const override {
+ return Atom ? Atom->IsLiteral() : false;
+ }
+
+ TString GetLiteralType() const override {
+ return Atom ? Atom->GetLiteralType() : "";
+ }
+
+ TString GetLiteralValue() const override {
+ return Atom ? Atom->GetLiteralValue() : "";
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return Atom->Translate(ctx);
+ }
+
+ TPtr DoClone() const final {
+ return new TLiteralStringAtom(GetPos(), SafeClone(Node), Info, Prefix);
+ }
+
+ void DoUpdateState() const override {
+ YQL_ENSURE(Atom);
+ State.Set(ENodeState::Const, Atom->IsConstant());
+ State.Set(ENodeState::Aggregated, Atom->IsAggregated());
+ State.Set(ENodeState::OverWindow, Atom->IsOverWindow());
+ }
+private:
+ TNodePtr Node;
+ TNodePtr Atom;
+ TString Info;
+ TString Prefix;
+};
+
+class TYqlAsAtom: public TLiteralStringAtom {
+public:
+ TYqlAsAtom(TPosition pos, const TVector<TNodePtr>& args)
+ : TLiteralStringAtom(pos, args.size() == 1 ? args[0] : nullptr, "Literal string is required as argument")
+ {
+ }
+};
+
+class TYqlData: public TCallNode {
+public:
+ TYqlData(TPosition pos, const TString& type, const TVector<TNodePtr>& args)
+ : TCallNode(pos, type, 1, 1, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto slot = NUdf::FindDataSlot(GetOpName());
+ if (!slot) {
+ ctx.Error(Pos) << "Unexpected type " << GetOpName();
+ return false;
+ }
+
+ if (*slot == NUdf::EDataSlot::Decimal) {
+ MinArgs = MaxArgs = 3;
+ }
+
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ auto stringNode = Args[0];
+ auto atom = stringNode->GetLiteral("String");
+ if (!atom) {
+ ctx.Error(Pos) << "Expected literal string as argument in " << GetOpName() << " function";
+ return false;
+ }
+
+ TString value;
+ if (*slot == NUdf::EDataSlot::Decimal) {
+ const auto precision = Args[1]->GetLiteral("Int32");
+ const auto scale = Args[2]->GetLiteral("Int32");
+
+ if (!NKikimr::NMiniKQL::IsValidDecimal(*atom)) {
+ ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
+ return false;
+ }
+
+ ui8 stub;
+ if (!(precision && TryFromString<ui8>(*precision, stub))) {
+ ctx.Error(Pos) << "Invalid precision " << (precision ? precision->Quote() : "") << " for type " << GetOpName();
+ return false;
+ }
+
+ if (!(scale && TryFromString<ui8>(*scale, stub))) {
+ ctx.Error(Pos) << "Invalid scale " << (scale ? scale->Quote() : "") << " for type " << GetOpName();
+ return false;
+ }
+
+ Args[0] = BuildQuotedAtom(GetPos(), *atom);
+ Args[1] = BuildQuotedAtom(GetPos(), *precision);
+ Args[2] = BuildQuotedAtom(GetPos(), *scale);
+ return TCallNode::DoInit(ctx, src);
+ } else if (NUdf::GetDataTypeInfo(*slot).Features & (NUdf::DateType | NUdf::TzDateType | NUdf::TimeIntervalType)) {
+ const auto out = NKikimr::NMiniKQL::ValueFromString(*slot, *atom);
+ if (!out) {
+ ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
+ return false;
+ }
+
+ switch (*slot) {
+ case NUdf::EDataSlot::Date:
+ case NUdf::EDataSlot::TzDate:
+ value = ToString(out.Get<ui16>());
+ break;
+ case NUdf::EDataSlot::Date32:
+ case NUdf::EDataSlot::TzDate32:
+ value = ToString(out.Get<i32>());
+ break;
+ case NUdf::EDataSlot::Datetime:
+ case NUdf::EDataSlot::TzDatetime:
+ value = ToString(out.Get<ui32>());
+ break;
+ case NUdf::EDataSlot::Timestamp:
+ case NUdf::EDataSlot::TzTimestamp:
+ value = ToString(out.Get<ui64>());
+ break;
+ case NUdf::EDataSlot::Datetime64:
+ case NUdf::EDataSlot::Timestamp64:
+ case NUdf::EDataSlot::TzDatetime64:
+ case NUdf::EDataSlot::TzTimestamp64:
+ value = ToString(out.Get<i64>());
+ break;
+ case NUdf::EDataSlot::Interval:
+ case NUdf::EDataSlot::Interval64:
+ value = ToString(out.Get<i64>());
+ if ('T' == atom->back()) {
+ ctx.Error(Pos) << "Time prefix 'T' at end of interval constant. The designator 'T' shall be absent if all of the time components are absent.";
+ return false;
+ }
+ break;
+ default:
+ Y_ABORT("Unexpected data slot");
+ }
+
+ if (NUdf::GetDataTypeInfo(*slot).Features & NUdf::TzDateType) {
+ value += ",";
+ value += NKikimr::NMiniKQL::GetTimezoneIANAName(out.GetTimezoneId());
+ }
+ } else if (NUdf::EDataSlot::Uuid == *slot) {
+ char out[0x10];
+ if (!NKikimr::NMiniKQL::ParseUuid(*atom, out)) {
+ ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
+ return false;
+ }
+
+ value.assign(out, sizeof(out));
+ } else {
+ if (!NKikimr::NMiniKQL::IsValidStringValue(*slot, *atom)) {
+ ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
+ return false;
+ }
+
+ value = *atom;
+ }
+
+ Args[0] = BuildQuotedAtom(GetPos(), value);
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return new TYqlData(GetPos(), OpName, CloneContainer(Args));
+ }
+};
+
+class TTableName : public TCallNode {
+public:
+ TTableName(TPosition pos, const TVector<TNodePtr>& args, const TString& service)
+ : TCallNode(pos, "TableName", 0, 2, args)
+ , Service(service)
+ , EmptyArgs(args.empty())
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (Args.empty()) {
+ if (!src) {
+ ctx.Error(Pos) << "Unable to use TableName() without source";
+ return false;
+ }
+
+ // TODO: TablePath() and TableRecordIndex() have more strict limitations
+ if (src->GetJoin()) {
+ ctx.Warning(Pos,
+ TIssuesIds::YQL_EMPTY_TABLENAME_RESULT) << "TableName() may produce empty result when used in ambiguous context (with JOIN)";
+ }
+
+ if (src->HasAggregations()) {
+ ctx.Warning(Pos,
+ TIssuesIds::YQL_EMPTY_TABLENAME_RESULT) << "TableName() will produce empty result when used with aggregation.\n"
+ "Please consult documentation for possible workaround";
+ }
+
+ Args.push_back(Y("TablePath", Y("DependsOn", "row")));
+ }
+
+ if (Args.size() == 2) {
+ auto literal = Args[1]->GetLiteral("String");
+ if (!literal) {
+ ctx.Error(Args[1]->GetPos()) << "Expected literal string as second argument in TableName function";
+ return false;
+ }
+
+ Args[1] = BuildQuotedAtom(Args[1]->GetPos(), *literal);
+ } else {
+ if (Service.empty()) {
+ ctx.Error(GetPos()) << GetOpName() << " requires either service name as second argument or current cluster name";
+ return false;
+ }
+
+ Args.push_back(BuildQuotedAtom(GetPos(), Service));
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return new TTableName(GetPos(), CloneContainer(Args), Service);
+ }
+
+ void DoUpdateState() const override {
+ if (EmptyArgs) {
+ State.Set(ENodeState::Const, false);
+ } else {
+ TCallNode::DoUpdateState();
+ }
+ }
+
+private:
+ TString Service;
+ const bool EmptyArgs;
+};
+
+class TYqlParseType final : public INode {
+public:
+ TYqlParseType(TPosition pos, const TVector<TNodePtr>& args)
+ : INode(pos)
+ , Args(args)
+ {}
+
+ TAstNode* Translate(TContext& ctx) const override {
+ if (Args.size() != 1) {
+ ctx.Error(Pos) << "Expected 1 argument in ParseType function";
+ return nullptr;
+ }
+
+ auto literal = Args[0]->GetLiteral("String");
+ if (!literal) {
+ ctx.Error(Args[0]->GetPos()) << "Expected literal string as argument in ParseType function";
+ return nullptr;
+ }
+
+ auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos());
+ if (!parsed) {
+ ctx.Error(Args[0]->GetPos()) << "Failed to parse type";
+ return nullptr;
+ }
+
+ return parsed;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlParseType(Pos, CloneContainer(Args));
+ }
+
+ void DoUpdateState() const final {
+ State.Set(ENodeState::Const);
+ }
+private:
+ TVector<TNodePtr> Args;
+};
+
+class TYqlAddTimezone: public TCallNode {
+public:
+ TYqlAddTimezone(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "AddTimezone", 2, 2, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ Args[1] = Y("TimezoneId", Args[1]);
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlAddTimezone(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlPgType: public TCallNode {
+public:
+ TYqlPgType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "PgType", 1, 1, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ ui32 oid;
+ if (Args[0]->IsIntegerLiteral() && TryFromString<ui32>(Args[0]->GetLiteralValue(), oid)) {
+ if (!NPg::HasType(oid)) {
+ ctx.Error(Args[0]->GetPos()) << "Unknown pg type oid: " << oid;
+ return false;
+ } else {
+ Args[0] = BuildQuotedAtom(Args[0]->GetPos(), NPg::LookupType(oid).Name);
+ }
+ } else if (Args[0]->IsLiteral() && Args[0]->GetLiteralType() == "String") {
+ if (!NPg::HasType(Args[0]->GetLiteralValue())) {
+ ctx.Error(Args[0]->GetPos()) << "Unknown pg type: " << Args[0]->GetLiteralValue();
+ return false;
+ } else {
+ Args[0] = BuildQuotedAtom(Args[0]->GetPos(), Args[0]->GetLiteralValue());
+ }
+ } else {
+ ctx.Error(Args[0]->GetPos()) << "Expecting string literal with pg type name or integer literal with pg type oid";
+ return false;
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+
+ TNodePtr DoClone() const final {
+ return new TYqlPgType(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlPgConst : public TCallNode {
+public:
+ TYqlPgConst(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "PgConst", 2, -1, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+
+ if (Args[0]->IsLiteral()) {
+ Args[0] = BuildQuotedAtom(Args[0]->GetPos(), Args[0]->GetLiteralValue());
+ } else {
+ auto value = MakeAtomFromExpression(Pos, ctx, Args[0]).Build();
+ Args[0] = value;
+ }
+
+ if (Args.size() > 2) {
+ TVector<TNodePtr> typeModArgs;
+ typeModArgs.push_back(Args[1]);
+ for (ui32 i = 2; i < Args.size(); ++i) {
+ if (!Args[i]->IsLiteral()) {
+ ctx.Error(Args[i]->GetPos()) << "Expecting literal";
+ return false;
+ }
+
+ typeModArgs.push_back(BuildQuotedAtom(Args[i]->GetPos(), Args[i]->GetLiteralValue()));
+ }
+
+ Args.erase(Args.begin() + 2, Args.end());
+ Args.push_back(new TCallNodeImpl(Pos, "PgTypeMod", typeModArgs));
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlPgConst(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlPgCast : public TCallNode {
+public:
+ TYqlPgCast(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "PgCast", 2, -1, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (Args.size() > 2) {
+ TVector<TNodePtr> typeModArgs;
+ typeModArgs.push_back(Args[1]);
+ for (ui32 i = 2; i < Args.size(); ++i) {
+ if (!Args[i]->IsLiteral()) {
+ ctx.Error(Args[i]->GetPos()) << "Expecting literal";
+ return false;
+ }
+
+ typeModArgs.push_back(BuildQuotedAtom(Args[i]->GetPos(), Args[i]->GetLiteralValue()));
+ }
+
+ Args.erase(Args.begin() + 2, Args.end());
+ Args.push_back(new TCallNodeImpl(Pos, "PgTypeMod", typeModArgs));
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlPgCast(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlPgOp : public TCallNode {
+public:
+ TYqlPgOp(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "PgOp", 2, 3, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+
+ if (!Args[0]->IsLiteral() || Args[0]->GetLiteralType() != "String") {
+ ctx.Error(Args[0]->GetPos()) << "Expecting string literal as first argument";
+ return false;
+ }
+
+ Args[0] = BuildQuotedAtom(Args[0]->GetPos(), Args[0]->GetLiteralValue());
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlPgOp(Pos, CloneContainer(Args));
+ }
+};
+
+template <bool RangeFunction>
+class TYqlPgCall : public TCallNode {
+public:
+ TYqlPgCall(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "PgCall", 1, -1, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+
+ if (!Args[0]->IsLiteral() || Args[0]->GetLiteralType() != "String") {
+ ctx.Error(Args[0]->GetPos()) << "Expecting string literal as first argument";
+ return false;
+ }
+
+ Args[0] = BuildQuotedAtom(Args[0]->GetPos(), Args[0]->GetLiteralValue());
+ Args.insert(Args.begin() + 1, RangeFunction ? Q(Y(Q(Y(Q("range"))))) : Q(Y()));
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlPgCall<RangeFunction>(Pos, CloneContainer(Args));
+ }
+};
+
+template <const char* Name>
+class TYqlSubqueryFor : public TCallNode {
+public:
+ TYqlSubqueryFor(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, Name, 2, 2, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ Args[0] = Y("EvaluateExpr", Args[0]);
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlSubqueryFor<Name>(Pos, CloneContainer(Args));
+ }
+};
+
+template <const char* Name>
+class TYqlSubqueryOrderBy : public TCallNode {
+public:
+ TYqlSubqueryOrderBy(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, Name, 2, 2, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ Args[1] = Y("EvaluateExpr", Args[1]);
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlSubqueryOrderBy<Name>(Pos, CloneContainer(Args));
+ }
+};
+
+
+template <bool Strict>
+class TYqlTypeAssert : public TCallNode {
+public:
+ TYqlTypeAssert(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, Strict ? "EnsureType" : "EnsureConvertibleTo", 2, 3, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+ if (Args.size() == 3) {
+ if (!Args[2]->Init(ctx, src)) {
+ return false;
+ }
+
+ auto message = MakeAtomFromExpression(Pos, ctx, Args[2]).Build();
+ Args[2] = message;
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlTypeAssert<Strict>(Pos, CloneContainer(Args));
+ }
+};
+
+class TFromBytes final : public TCallNode {
+public:
+ TFromBytes(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "FromBytes", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Y("FormatType", Args[1])).Build();
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TFromBytes(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlTaggedBase : public TCallNode {
+public:
+ TYqlTaggedBase(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+};
+
+class TYqlAsTagged final : public TYqlTaggedBase {
+public:
+ TYqlAsTagged(TPosition pos, const TVector<TNodePtr>& args)
+ : TYqlTaggedBase(pos, "AsTagged", args)
+ {}
+
+ TNodePtr DoClone() const final {
+ return new TYqlAsTagged(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlUntag final : public TYqlTaggedBase {
+public:
+ TYqlUntag(TPosition pos, const TVector<TNodePtr>& args)
+ : TYqlTaggedBase(pos, "Untag", args)
+ {}
+
+ TNodePtr DoClone() const final {
+ return new TYqlUntag(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlVariant final : public TCallNode {
+public:
+ TYqlVariant(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "Variant", 3, 3, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlVariant(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlEnum final : public TCallNode {
+public:
+ TYqlEnum(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "Enum", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[0] = MakeAtomFromExpression(Pos, ctx, Args[0]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlEnum(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlAsVariant final : public TCallNode {
+public:
+ TYqlAsVariant(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "AsVariant", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlAsVariant(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlAsEnum final : public TCallNode {
+public:
+ TYqlAsEnum(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "AsEnum", 1, 1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[0] = MakeAtomFromExpression(Pos, ctx, Args[0]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlAsEnum(Pos, CloneContainer(Args));
+ }
+};
+
+TNodePtr BuildFileNameArgument(TPosition pos, const TNodePtr& argument, const TString& prefix) {
+ return new TLiteralStringAtom(pos, argument, "FilePath requires string literal as parameter", prefix);
+}
+
+template <typename TDerived, bool IsFile>
+class TYqlAtomBase: public TCallNode {
+public:
+ TYqlAtomBase(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 1, 1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!Args.empty()) {
+ Args[0] = BuildFileNameArgument(Pos, Args[0], IsFile ? ctx.Settings.FileAliasPrefix : TString());
+ }
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TDerived(Pos, OpName, CloneContainer(Args));
+ }
+
+ bool IsLiteral() const override {
+ return !Args.empty() ? Args[0]->IsLiteral() : false;
+ }
+
+ TString GetLiteralType() const override {
+ return !Args.empty() ? Args[0]->GetLiteralType() : "";
+ }
+
+ TString GetLiteralValue() const override {
+ return !Args.empty() ? Args[0]->GetLiteralValue() : "";
+ }
+};
+
+class TYqlAtom final : public TYqlAtomBase<TYqlAtom, false>
+{
+ using TBase = TYqlAtomBase<TYqlAtom, false>;
+ using TBase::TBase;
+};
+
+class TFileYqlAtom final : public TYqlAtomBase<TFileYqlAtom, true>
+{
+ using TBase = TYqlAtomBase<TFileYqlAtom, true>;
+ using TBase::TBase;
+};
+
+class TTryMember final: public TCallNode {
+public:
+ TTryMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 3, 3, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.size() != 3) {
+ ctx.Error(Pos) << OpName << " requires exactly three arguments";
+ return false;
+ }
+ for (const auto& arg : Args) {
+ if (!arg->Init(ctx, src)) {
+ return false;
+ }
+ }
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TTryMember(Pos, OpName, CloneContainer(Args));
+ }
+};
+
+template<bool Pretty>
+class TFormatTypeDiff final: public TCallNode {
+public:
+ TFormatTypeDiff(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 3, 3, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.size() != 2) {
+ ctx.Error(Pos) << OpName << " requires exactly 2 arguments";
+ return false;
+ }
+ for (const auto& arg : Args) {
+ if (!arg->Init(ctx, src)) {
+ return false;
+ }
+ }
+ Args.push_back(Q(Pretty ? "true" : "false"));
+ OpName = "FormatTypeDiff";
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TFormatTypeDiff<Pretty>(GetPos(), OpName, CloneContainer(Args));
+ }
+};
+
+class TAddMember final: public TCallNode {
+public:
+ TAddMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 3, 3, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.size() != 3) {
+ ctx.Error(Pos) << OpName << " requires exactly three arguments";
+ return false;
+ }
+ for (const auto& arg : Args) {
+ if (!arg->Init(ctx, src)) {
+ return false;
+ }
+ }
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TAddMember(Pos, OpName, CloneContainer(Args));
+ }
+};
+
+class TRemoveMember final: public TCallNode {
+public:
+ TRemoveMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.size() != 2) {
+ ctx.Error(Pos) << OpName << " requires exactly two arguments";
+ return false;
+ }
+ for (const auto& arg : Args) {
+ if (!arg->Init(ctx, src)) {
+ return false;
+ }
+ }
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TRemoveMember(Pos, OpName, CloneContainer(Args));
+ }
+};
+
+class TCombineMembers final: public TCallNode {
+public:
+ TCombineMembers(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 1, -1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.empty()) {
+ ctx.Error(Pos) << "CombineMembers requires at least one argument";
+ return false;
+ }
+ for (size_t i = 0; i < Args.size(); ++i) {
+ Args[i] = Q(Y(Q(""), Args[i])); // flatten without prefix
+ }
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TCombineMembers(Pos, OpName, CloneContainer(Args));
+ }
+};
+
+class TFlattenMembers final: public TCallNode {
+public:
+ TFlattenMembers(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 1, -1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.empty()) {
+ ctx.Error(Pos) << OpName << " requires at least one argument";
+ return false;
+ }
+ for (size_t i = 0; i < Args.size(); ++i) {
+ if (!Args[i]->Init(ctx, src)) {
+ return false;
+ }
+ if (Args[i]->GetTupleSize() == 2) {
+ // flatten with prefix
+ Args[i] = Q(Y(
+ MakeAtomFromExpression(Pos, ctx, Args[i]->GetTupleElement(0)).Build(),
+ Args[i]->GetTupleElement(1)
+ ));
+ } else {
+ ctx.Error(Pos) << OpName << " requires arguments to be tuples of size 2: prefix and struct";
+ return false;
+ }
+ }
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TFlattenMembers(Pos, OpName, CloneContainer(Args));
+ }
+};
+
+TString NormalizeTypeString(const TString& str) {
+ auto ret = to_title(str);
+ if (ret.StartsWith("Tz")) {
+ ret = "Tz" + to_title(ret.substr(2));
+ }
+ if (ret.StartsWith("Json")) {
+ ret = "Json" + to_title(ret.substr(4));
+ }
+ if (ret.StartsWith("Dy")) {
+ ret = "Dy" + to_title(ret.substr(2));
+ }
+
+ return ret;
+}
+
+static const TSet<TString> AvailableDataTypes = {"Bool", "String", "Uint32", "Uint64", "Int32", "Int64", "Float", "Double", "Utf8", "Yson", "Json", "JsonDocument",
+ "Date", "Datetime", "Timestamp", "Interval", "Uint8", "Int8", "Uint16", "Int16", "TzDate", "TzDatetime", "TzTimestamp", "Uuid", "Decimal", "DyNumber",
+ "Date32", "Datetime64", "Timestamp64", "Interval64", "TzDate32", "TzDatetime64", "TzTimestamp64"};
+TNodePtr GetDataTypeStringNode(TContext& ctx, TCallNode& node, unsigned argNum, TString* outTypeStrPtr = nullptr) {
+ auto errMsgFunc = [&node, argNum]() {
+ static std::array<TString, 2> numToName = {{"first", "second"}};
+ TStringBuilder sb;
+ sb << "At " << numToName.at(argNum) << " argument of " << node.GetOpName() << " expected type string, available one of: "
+ << JoinRange(", ", AvailableDataTypes.begin(), AvailableDataTypes.end()) << ";";
+ return TString(sb);
+ };
+ auto typeStringNode = node.GetArgs().at(argNum);
+ auto typeStringPtr = typeStringNode->GetLiteral("String");
+ TNodePtr dataTypeNode;
+ if (typeStringPtr) {
+ TString typeString = NormalizeTypeString(*typeStringPtr);
+ if (!AvailableDataTypes.contains(typeString)) {
+ ctx.Error(typeStringNode->GetPos()) << "Bad type string: '" << typeString << "'. " << errMsgFunc();
+ return {};
+ }
+ if (outTypeStrPtr) {
+ *outTypeStrPtr = typeString;
+ }
+ dataTypeNode = typeStringNode->Q(typeString);
+ } else {
+ ctx.Error(typeStringNode->GetPos()) << errMsgFunc();
+ return {};
+ }
+ return dataTypeNode;
+}
+
+class TYqlParseFileOp final: public TCallNode {
+public:
+ TYqlParseFileOp(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "ParseFile", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ auto dataTypeStringNode = GetDataTypeStringNode(ctx, *this, 0);
+ if (!dataTypeStringNode) {
+ return false;
+ }
+ auto aliasNode = BuildFileNameArgument(Args[1]->GetPos(), Args[1], ctx.Settings.FileAliasPrefix);
+ OpName = "Apply";
+ Args[0] = Y("Udf", Q("File.ByLines"), Y("Void"),
+ Y("TupleType",
+ Y("TupleType", Y("DataType", dataTypeStringNode)),
+ Y("StructType"),
+ Y("TupleType")));
+
+ Args[1] = Y("FilePath", aliasNode);
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TString GetOpName() const override {
+ return "ParseFile";
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlParseFileOp(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlDataType final : public TCallNode {
+public:
+ TYqlDataType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "DataType", 1, 3, args)
+ {
+ FakeSource = BuildFakeSource(pos);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ for (ui32 i = 0; i < Args.size(); ++i) {
+ if (!Args[i]->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ Args[i] = MakeAtomFromExpression(Pos, ctx, Args[i]).Build();
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlDataType(Pos, CloneContainer(Args));
+ }
+
+private:
+ TSourcePtr FakeSource;
+};
+
+class TYqlResourceType final : public TCallNode {
+public:
+ TYqlResourceType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "ResourceType", 1, 1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[0] = MakeAtomFromExpression(Pos, ctx, Args[0]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlResourceType(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlTaggedType final : public TCallNode {
+public:
+ TYqlTaggedType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "TaggedType", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlTaggedType(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlCallableType final : public TCallNode {
+public:
+ TYqlCallableType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "CallableType", 2, -1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[0]->GetTupleNode()) {
+ ui32 numOptArgs;
+ if (!Parseui32(Args[0], numOptArgs)) {
+ ctx.Error(Args[0]->GetPos()) << "Expected either tuple or number of optional arguments";
+ return false;
+ }
+
+ Args[0] = Q(Y(BuildQuotedAtom(Args[0]->GetPos(), ToString(numOptArgs))));
+ }
+
+ if (!Args[1]->GetTupleNode()) {
+ Args[1] = Q(Y(Args[1]));
+ }
+
+ for (ui32 index = 2; index < Args.size(); ++index) {
+ if (!Args[index]->GetTupleNode()) {
+ Args[index] = Q(Y(Args[index]));
+ }
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlCallableType(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlTupleElementType final : public TCallNode {
+public:
+ TYqlTupleElementType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "TupleElementType", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlTupleElementType(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlStructMemberType final : public TCallNode {
+public:
+ TYqlStructMemberType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "StructMemberType", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlStructMemberType(Pos, CloneContainer(Args));
+ }
+};
+
+class TYqlCallableArgumentType final : public TCallNode {
+public:
+ TYqlCallableArgumentType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "CallableArgumentType", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ ui32 index;
+ if (!Parseui32(Args[1], index)) {
+ ctx.Error(Args[1]->GetPos()) << "Expected index of the callable argument";
+ return false;
+ }
+
+ Args[1] = BuildQuotedAtom(Args[1]->GetPos(), ToString(index));
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlCallableArgumentType(Pos, CloneContainer(Args));
+ }
+};
+
+class TStructTypeNode : public TAstListNode {
+public:
+ TStructTypeNode(TPosition pos, const TVector<TNodePtr>& exprs)
+ : TAstListNode(pos)
+ , Exprs(exprs)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Nodes.push_back(BuildAtom(Pos, "StructType", TNodeFlags::Default));
+ for (const auto& expr : Exprs) {
+ const auto& label = expr->GetLabel();
+ if (!label) {
+ ctx.Error(expr->GetPos()) << "Structure does not allow anonymous members";
+ return false;
+ }
+ Nodes.push_back(Q(Y(Q(label), expr)));
+ }
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TStructTypeNode(Pos, CloneContainer(Exprs));
+ }
+
+private:
+ const TVector<TNodePtr> Exprs;
+};
+
+template <bool IsStrict>
+class TYqlIf final: public TCallNode {
+public:
+ TYqlIf(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, IsStrict ? "IfStrict" : "If", 2, 3, args)
+ {}
+
+private:
+ TCallNode::TPtr DoClone() const override {
+ return new TYqlIf(GetPos(), CloneContainer(Args));
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ Args[0] = Y("Coalesce", Args[0], Y("Bool", Q("false")));
+ if (Args.size() == 2) {
+ Args.push_back(Y("Null"));
+ }
+ return TCallNode::DoInit(ctx, src);
+ }
+};
+
+class TYqlSubstring final: public TCallNode {
+public:
+ TYqlSubstring(TPosition pos, const TString& name, const TVector<TNodePtr>& args)
+ : TCallNode(pos, name, 2, 3, args)
+ {}
+
+private:
+ TCallNode::TPtr DoClone() const override {
+ return new TYqlSubstring(GetPos(), OpName, CloneContainer(Args));
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.size() == 2) {
+ Args.push_back(Y("Null"));
+ }
+ return TCallNode::DoInit(ctx, src);
+ }
+};
+
+class TYqlIn final: public TCallNode {
+public:
+ TYqlIn(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "IN", 3, 3, args)
+ {}
+
+private:
+ TNodePtr DoClone() const final {
+ return new TYqlIn(Pos, CloneContainer(Args));
+ }
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ auto key = Args[0];
+ auto inNode = Args[1];
+ auto hints = Args[2];
+
+ const auto pos = inNode->GetPos();
+
+ if (!key->Init(ctx, src)) {
+ return false;
+ }
+
+ if (!inNode->Init(ctx, inNode->GetSource() ? nullptr : src)) {
+ return false;
+ }
+
+ if (inNode->GetLiteral("String")) {
+ ctx.Error(pos) << "Unable to use IN predicate with string argument, it won't search substring - "
+ "expecting tuple, list, dict or single column table source";
+ return false;
+ }
+
+ if (inNode->GetTupleSize() == 1) {
+ auto singleElement = inNode->GetTupleElement(0);
+ // TODO: 'IN ((select ...))' is parsed exactly like 'IN (select ...)' instead of a single element tuple
+ if (singleElement->GetSource() || singleElement->IsSelect()) {
+ TStringBuf parenKind = singleElement->GetSource() ? "" : "external ";
+ ctx.Warning(pos,
+ TIssuesIds::YQL_CONST_SUBREQUEST_IN_LIST) << "Using subrequest in scalar context after IN, "
+ << "perhaps you should remove "
+ << parenKind << "parenthesis here";
+ }
+ }
+
+ TVector<TNodePtr> hintElements;
+ for (size_t i = 0; i < hints->GetTupleSize(); ++i) {
+ hintElements.push_back(hints->GetTupleElement(i));
+ }
+
+ if (inNode->GetSource() || inNode->IsSelect()) {
+ hintElements.push_back(BuildHint(pos, "tableSource"));
+ }
+
+ if (!ctx.AnsiInForEmptyOrNullableItemsCollections.Defined()) {
+ hintElements.push_back(BuildHint(pos, "warnNoAnsi"));
+ } else if (*ctx.AnsiInForEmptyOrNullableItemsCollections) {
+ hintElements.push_back(BuildHint(pos, "ansi"));
+ }
+
+ OpName = "SqlIn";
+ MinArgs = MaxArgs = 3;
+ Args = {
+ inNode->GetSource() ? inNode->GetSource() : inNode,
+ key,
+ BuildTuple(pos, hintElements)
+ };
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ static TNodePtr BuildHint(TPosition pos, const TString& name) {
+ return BuildTuple(pos, { BuildQuotedAtom(pos, name, NYql::TNodeFlags::Default) });
+ }
+
+ TString GetOpName() const override {
+ return "IN predicate";
+ }
+};
+
+class TYqlUdfBase : public TCallNode {
+public:
+ TYqlUdfBase(TPosition pos, const TString& name)
+ : TCallNode(pos, "Udf", 1, 1, UdfArgs(pos, name))
+ {}
+
+ TYqlUdfBase(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
+ : TCallNode(pos, "Udf", argsCount, argsCount, UdfArgs(pos, name, &args))
+ {}
+
+protected:
+ TYqlUdfBase(TPosition pos, const TString& opName, ui32 minArgs, ui32 maxArgs, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, minArgs, maxArgs, args)
+ {}
+
+private:
+ static TVector<TNodePtr> UdfArgs(TPosition pos, const TString& name, const TVector<TNodePtr>* args = nullptr) {
+ TVector<TNodePtr> res = { BuildQuotedAtom(pos, name) };
+ if (args) {
+ res.insert(res.end(), args->begin(), args->end());
+ }
+ return res;
+ }
+
+ void DoUpdateState() const override {
+ TCallNode::DoUpdateState();
+ State.Set(ENodeState::Aggregated, false/*!RunConfig || RunConfig->IsAggregated()*/);
+ State.Set(ENodeState::Const, true /* FIXME: To avoid CheckAggregationLevel issue for non-const TypeOf. */);
+ }
+
+private:
+ TNodePtr RunConfig;
+};
+
+class TYqlUdf final : public TYqlUdfBase {
+public:
+ TYqlUdf(TPosition pos, const TString& name)
+ : TYqlUdfBase(pos, name)
+ {}
+
+ TYqlUdf(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
+ : TYqlUdfBase(pos, name, args, argsCount)
+ {}
+
+private:
+ TYqlUdf(const TYqlUdf& other)
+ : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args))
+ {}
+
+ TNodePtr DoClone() const final {
+ return new TYqlUdf(*this);
+ }
+};
+
+class TYqlTypeConfigUdf final : public TYqlUdfBase {
+public:
+ TYqlTypeConfigUdf(TPosition pos, const TString& name)
+ : TYqlUdfBase(pos, name)
+ {}
+
+ TYqlTypeConfigUdf(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
+ : TYqlUdfBase(pos, name, args, argsCount)
+ {}
+
+private:
+ TYqlTypeConfigUdf(const TYqlTypeConfigUdf& other)
+ : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args))
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[3]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[3] = MakeAtomFromExpression(Pos, ctx, Args[3]).Build();
+ return TYqlUdfBase::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlTypeConfigUdf(*this);
+ }
+};
+
+class TWeakFieldOp final: public TCallNode {
+public:
+ TWeakFieldOp(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "WeakField", 2, 3, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!src) {
+ ctx.Error(Pos) << GetCallExplain() << " unable use without source";
+ return false;
+ }
+
+ src->AllColumns();
+
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ bool hasError = false;
+ for (auto& arg: Args) {
+ if (!arg->Init(ctx, src)) {
+ hasError = true;
+ continue;
+ }
+ }
+
+ if (hasError) {
+ return false;
+ }
+
+ PrecacheState();
+
+ const auto memberPos = Args[0]->GetPos();
+ TVector<TNodePtr> repackArgs = {BuildAtom(memberPos, "row", NYql::TNodeFlags::Default)};
+ if (auto literal = Args[1]->GetLiteral("String")) {
+ TString targetType;
+ if (!GetDataTypeStringNode(ctx, *this, 1, &targetType)) {
+ return false;
+ }
+
+ repackArgs.push_back(Args[1]->Q(targetType));
+ } else {
+ repackArgs.push_back(Args[1]);
+ }
+
+ TVector<TNodePtr> column;
+ auto namePtr = Args[0]->GetColumnName();
+ if (!namePtr || !*namePtr) {
+ ctx.Error(Pos) << GetCallExplain() << " expects column name as first argument";
+ return false;
+ }
+ auto memberName = *namePtr;
+ column.push_back(Args[0]->Q(*namePtr));
+
+ if (src->GetJoin() && !src->IsJoinKeysInitializing()) {
+ const auto sourcePtr = Args[0]->GetSourceName();
+ if (!sourcePtr || !*sourcePtr) {
+ ctx.Error(Pos) << GetOpName() << " required to have correlation name in case of JOIN for column at first parameter";
+ return false;
+ }
+ column.push_back(Args[0]->Q(*sourcePtr));
+ memberName = DotJoin(*sourcePtr, memberName);
+ }
+ if (!GetLabel()) {
+ SetLabel(memberName);
+ }
+ repackArgs.push_back(BuildTuple(memberPos, column));
+ if (Args.size() == 3) {
+ repackArgs.push_back(Args[2]);
+ }
+ ++MinArgs;
+ ++MaxArgs;
+ Args.swap(repackArgs);
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TWeakFieldOp(Pos, CloneContainer(Args));
+ }
+};
+
+template <bool Join>
+class TTableRow final : public INode {
+public:
+ TTableRow(TPosition pos, const TVector<TNodePtr>& args)
+ : TTableRow(pos, args.size())
+ {}
+
+ TTableRow(TPosition pos, ui32 argsCount)
+ : INode(pos)
+ , ArgsCount(argsCount)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!src || src->IsFake()) {
+ ctx.Error(Pos) << TStringBuilder() << (Join ? "Join" : "") << "TableRow requires data source";
+ return false;
+ }
+
+ if (ArgsCount > 0) {
+ ctx.Error(Pos) << "TableRow requires exactly 0 arguments";
+ return false;
+ }
+
+ src->AllColumns();
+ const bool isJoin = src->GetJoin();
+ if (!Join && ctx.SimpleColumns && isJoin) {
+ TNodePtr block = Y();
+ const auto& sameKeyMap = src->GetJoin()->GetSameKeysMap();
+ if (sameKeyMap) {
+ block = L(block, Y("let", "flatSameKeys", "row"));
+ for (const auto& sameKeysPair: sameKeyMap) {
+ const auto& column = sameKeysPair.first;
+ auto keys = Y("Coalesce");
+ auto sameSourceIter = sameKeysPair.second.begin();
+ for (auto end = sameKeysPair.second.end(); sameSourceIter != end; ++sameSourceIter) {
+ auto addKeyNode = Q(DotJoin(*sameSourceIter, column));
+ keys = L(keys, Y("TryMember", "row", addKeyNode, Y("Null")));
+ }
+
+ block = L(block, Y("let", "flatSameKeys", Y("AddMember", "flatSameKeys", Q(column), keys)));
+ sameSourceIter = sameKeysPair.second.begin();
+ for (auto end = sameKeysPair.second.end(); sameSourceIter != end; ++sameSourceIter) {
+ auto removeKeyNode = Q(DotJoin(*sameSourceIter, column));
+ block = L(block, Y("let", "flatSameKeys", Y("ForceRemoveMember", "flatSameKeys", removeKeyNode)));
+ }
+ }
+ block = L(block, Y("let", "row", "flatSameKeys"));
+ }
+
+ auto members = Y();
+ for (auto& joinLabel: src->GetJoin()->GetJoinLabels()) {
+ members = L(members, BuildQuotedAtom(Pos, joinLabel + "."));
+ }
+ block = L(block, Y("let", "res", Y("DivePrefixMembers", "row", Q(members))));
+
+ for (const auto& sameKeysPair: src->GetJoin()->GetSameKeysMap()) {
+ const auto& column = sameKeysPair.first;
+ auto addMemberKeyNode = Y("Member", "row", Q(column));
+ block = L(block, Y("let", "res", Y("AddMember", "res", Q(column), addMemberKeyNode)));
+ }
+
+ Node = Y("block", Q(L(block, Y("return", "res"))));
+ } else {
+ Node = ctx.EnableSystemColumns ? Y("RemoveSystemMembers", "row") : BuildAtom(Pos, "row", 0);
+ }
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, false);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TTableRow<Join>(Pos, ArgsCount);
+ }
+
+private:
+ const size_t ArgsCount;
+ TNodePtr Node;
+};
+
+TTableRows::TTableRows(TPosition pos, const TVector<TNodePtr>& args)
+ : TTableRows(pos, args.size())
+{}
+
+TTableRows::TTableRows(TPosition pos, ui32 argsCount)
+ : INode(pos)
+ , ArgsCount(argsCount)
+{}
+
+bool TTableRows::DoInit(TContext& ctx, ISource* /*src*/) {
+ if (ArgsCount > 0) {
+ ctx.Error(Pos) << "TableRows requires exactly 0 arguments";
+ return false;
+ }
+ Node = ctx.EnableSystemColumns ? Y("RemoveSystemMembers", "inputRowsList") : BuildAtom(Pos, "inputRowsList", 0);
+ return true;
+}
+
+TAstNode* TTableRows::Translate(TContext& ctx) const {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+}
+
+void TTableRows::DoUpdateState() const {
+ State.Set(ENodeState::Const, false);
+}
+
+TNodePtr TTableRows::DoClone() const {
+ return MakeIntrusive<TTableRows>(Pos, ArgsCount);
+}
+
+TSessionWindow::TSessionWindow(TPosition pos, const TVector<TNodePtr>& args)
+ : INode(pos)
+ , Args(args)
+ , FakeSource(BuildFakeSource(pos))
+ , Valid(false)
+{}
+
+void TSessionWindow::MarkValid() {
+ YQL_ENSURE(!HasState(ENodeState::Initialized));
+ Valid = true;
+}
+
+TNodePtr TSessionWindow::BuildTraits(const TString& label) const {
+ YQL_ENSURE(HasState(ENodeState::Initialized));
+
+ auto trueNode = Y("Bool", Q("true"));
+
+ if (Args.size() == 2) {
+ auto timeExpr = Args[0];
+ auto timeoutExpr = Args[1];
+
+ auto coalesceLess = [&](auto first, auto second) {
+ // first < second ?? true
+ return Y("Coalesce", Y("<", first, second), trueNode);
+ };
+
+ auto absDelta = Y("If",
+ coalesceLess("prev", "curr"),
+ Y("-", "curr", "prev"),
+ Y("-", "prev", "curr"));
+
+ auto newSessionPred = Y("And", Y("AggrNotEquals", "curr", "prev"), coalesceLess(timeoutExpr, absDelta));
+ auto timeoutLambda = BuildLambda(timeoutExpr->GetPos(), Y("prev", "curr"), newSessionPred);
+ auto sortSpec = Y("SortTraits", Y("TypeOf", label), trueNode, BuildLambda(Pos, Y("row"), Y("PersistableRepr", timeExpr)));
+
+ return Y("SessionWindowTraits",
+ Y("TypeOf", label),
+ sortSpec,
+ BuildLambda(Pos, Y("row"), timeExpr),
+ timeoutLambda);
+ }
+
+ auto orderExpr = Args[0];
+ auto initLambda = Args[1];
+ auto updateLambda = Args[2];
+ auto calculateLambda = Args[3];
+
+ auto sortSpec = Y("SortTraits", Y("TypeOf", label), trueNode, BuildLambda(Pos, Y("row"), Y("PersistableRepr", orderExpr)));
+
+ return Y("SessionWindowTraits",
+ Y("TypeOf", label),
+ sortSpec,
+ initLambda,
+ updateLambda,
+ calculateLambda);
+}
+
+bool TSessionWindow::DoInit(TContext& ctx, ISource* src) {
+ if (!src || src->IsFake()) {
+ ctx.Error(Pos) << "SessionWindow requires data source";
+ return false;
+ }
+
+ if (!(Args.size() == 2 || Args.size() == 4)) {
+ ctx.Error(Pos) << "SessionWindow requires either two or four arguments";
+ return false;
+ }
+
+ if (!Valid) {
+ ctx.Error(Pos) << "SessionWindow can only be used as a top-level GROUP BY / PARTITION BY expression";
+ return false;
+ }
+
+ if (Args.size() == 2) {
+ auto timeExpr = Args[0];
+ auto timeoutExpr = Args[1];
+ return timeExpr->Init(ctx, src) && timeoutExpr->Init(ctx, FakeSource.Get());
+ }
+
+ auto orderExpr = Args[0];
+ auto initLambda = Args[1];
+ auto updateLambda = Args[2];
+ auto calculateLambda = Args[3];
+ src->AllColumns();
+
+ return orderExpr->Init(ctx, src) && initLambda->Init(ctx, FakeSource.Get()) &&
+ updateLambda->Init(ctx, FakeSource.Get()) && calculateLambda->Init(ctx, FakeSource.Get());
+}
+
+TAstNode* TSessionWindow::Translate(TContext&) const {
+ YQL_ENSURE(false, "Translate is called for SessionWindow");
+ return nullptr;
+}
+
+void TSessionWindow::DoUpdateState() const {
+ State.Set(ENodeState::Const, false);
+}
+
+TNodePtr TSessionWindow::DoClone() const {
+ return new TSessionWindow(Pos, CloneContainer(Args));
+}
+
+TString TSessionWindow::GetOpName() const {
+ return "SessionWindow";
+}
+
+template<bool IsStart>
+class TSessionStart final : public INode {
+public:
+ TSessionStart(TPosition pos, const TVector<TNodePtr>& args)
+ : INode(pos)
+ , ArgsCount(args.size())
+ {
+ }
+private:
+ TSessionStart(TPosition pos, size_t argsCount)
+ : INode(pos)
+ , ArgsCount(argsCount)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!src || src->IsFake()) {
+ ctx.Error(Pos) << GetOpName() << " requires data source";
+ return false;
+ }
+
+ if (ArgsCount > 0) {
+ ctx.Error(Pos) << GetOpName() << " requires exactly 0 arguments";
+ return false;
+ }
+
+ auto windowName = src->GetWindowName();
+ OverWindow = windowName != nullptr;
+ TNodePtr sessionWindow;
+ if (windowName) {
+ auto spec = src->FindWindowSpecification(ctx, *windowName);
+ if (!spec) {
+ return false;
+ }
+ sessionWindow = spec->Session;
+ if (!sessionWindow) {
+ ctx.Error(Pos) << GetOpName() << " can not be used with window " << *windowName << ": SessionWindow specification is missing in PARTITION BY";
+ return false;
+ }
+ } else {
+ sessionWindow = src->GetSessionWindowSpec();
+ if (!sessionWindow) {
+ TString extra;
+ if (src->IsOverWindowSource()) {
+ extra = ". Maybe you forgot to add OVER `window_name`?";
+ }
+ if (src->HasAggregations()) {
+ ctx.Error(Pos) << GetOpName() << " can not be used here: SessionWindow specification is missing in GROUP BY" << extra;
+ } else {
+ ctx.Error(Pos) << GetOpName() << " can not be used without aggregation by SessionWindow" << extra;
+ }
+ return false;
+ }
+
+ if (!IsStart) {
+ ctx.Error(Pos) << GetOpName() << " with GROUP BY is not supported yet";
+ return false;
+ }
+ }
+
+ if (sessionWindow->HasState(ENodeState::Failed)) {
+ return false;
+ }
+
+ YQL_ENSURE(sessionWindow->HasState(ENodeState::Initialized));
+ YQL_ENSURE(sessionWindow->GetLabel());
+ Node = Y("Member", "row", BuildQuotedAtom(Pos, sessionWindow->GetLabel()));
+ if (OverWindow) {
+ Node = Y("Member", Node, BuildQuotedAtom(Pos, IsStart ? "start" : "state"));
+ }
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, false);
+ if (OverWindow) {
+ State.Set(ENodeState::OverWindow, true);
+ } else if (IsStart) {
+ State.Set(ENodeState::Aggregated, true);
+ }
+ }
+
+ TNodePtr DoClone() const override {
+ return new TSessionStart<IsStart>(Pos, ArgsCount);
+ }
+
+ TString GetOpName() const override {
+ return IsStart ? "SessionStart" : "SessionState";
+ }
+
+ const size_t ArgsCount;
+ bool OverWindow = false;
+ TNodePtr Node;
+};
+
+THoppingWindow::THoppingWindow(TPosition pos, const TVector<TNodePtr>& args)
+ : INode(pos)
+ , Args(args)
+ , FakeSource(BuildFakeSource(pos))
+ , Valid(false)
+{}
+
+void THoppingWindow::MarkValid() {
+ YQL_ENSURE(!HasState(ENodeState::Initialized));
+ Valid = true;
+}
+
+TNodePtr THoppingWindow::BuildTraits(const TString& label) const {
+ YQL_ENSURE(HasState(ENodeState::Initialized));
+
+ return Y(
+ "HoppingTraits",
+ Y("ListItemType", Y("TypeOf", label)),
+ BuildLambda(Pos, Y("row"), Y("Just", Y("SystemMetadata", Y("String", Q("write_time")), Y("DependsOn", "row")))),
+ Hop,
+ Interval,
+ Interval,
+ Q("true"),
+ Q("v2"));
+}
+
+bool THoppingWindow::DoInit(TContext& ctx, ISource* src) {
+ if (!src || src->IsFake()) {
+ ctx.Error(Pos) << "HoppingWindow requires data source";
+ return false;
+ }
+
+ if (!(Args.size() == 2)) {
+ ctx.Error(Pos) << "HoppingWindow requires two arguments";
+ return false;
+ }
+
+ if (!Valid) {
+ ctx.Error(Pos) << "HoppingWindow can only be used as a top-level GROUP BY expression";
+ return false;
+ }
+
+ auto hopExpr = Args[0];
+ auto intervalExpr = Args[1];
+ if (!(hopExpr->Init(ctx, FakeSource.Get()) && intervalExpr->Init(ctx, FakeSource.Get()))) {
+ return false;
+ }
+
+ Hop = ProcessIntervalParam(hopExpr);
+ Interval = ProcessIntervalParam(intervalExpr);
+
+ return true;
+}
+
+TAstNode* THoppingWindow::Translate(TContext&) const {
+ YQL_ENSURE(false, "Translate is called for HoppingWindow");
+ return nullptr;
+}
+
+void THoppingWindow::DoUpdateState() const {
+ State.Set(ENodeState::Const, false);
+}
+
+TNodePtr THoppingWindow::DoClone() const {
+ return new THoppingWindow(Pos, CloneContainer(Args));
+}
+
+TString THoppingWindow::GetOpName() const {
+ return "HoppingWindow";
+}
+
+TNodePtr THoppingWindow::ProcessIntervalParam(const TNodePtr& node) const {
+ auto literal = node->GetLiteral("String");
+ if (!literal) {
+ return Y("EvaluateExpr", node);
+ }
+
+ return new TYqlData(node->GetPos(), "Interval", {node});
+}
+
+TNodePtr BuildUdfUserTypeArg(TPosition pos, const TVector<TNodePtr>& args, TNodePtr customUserType) {
+ TVector<TNodePtr> argsTypeItems;
+ for (auto& arg : args) {
+ argsTypeItems.push_back(new TCallNodeImpl(pos, "TypeOf", TVector<TNodePtr>(1, arg)));
+ }
+
+ TVector<TNodePtr> userTypeItems;
+ userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", argsTypeItems));
+ userTypeItems.push_back(new TCallNodeImpl(pos, "StructType", {}));
+ if (customUserType) {
+ userTypeItems.push_back(customUserType);
+ } else {
+ userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {}));
+ }
+
+ return new TCallNodeImpl(pos, "TupleType", userTypeItems);
+}
+
+TNodePtr BuildUdfUserTypeArg(TPosition pos, TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType) {
+ TVector<TNodePtr> userTypeItems;
+ userTypeItems.reserve(3);
+ userTypeItems.push_back(positionalArgs->Y("TypeOf", positionalArgs));
+ userTypeItems.push_back(positionalArgs->Y("TypeOf", namedArgs));
+ if (customUserType) {
+ userTypeItems.push_back(customUserType);
+ } else {
+ userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {}));
+ }
+
+ return new TCallNodeImpl(pos, "TupleType", userTypeItems);
+}
+
+TVector<TNodePtr> BuildUdfArgs(const TContext& ctx, TPosition pos, const TVector<TNodePtr>& args,
+ TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, TNodePtr typeConfig) {
+ if (!ctx.Settings.EnableGenericUdfs) {
+ return {};
+ }
+ TVector<TNodePtr> udfArgs;
+ udfArgs.push_back(new TAstListNodeImpl(pos));
+ udfArgs[0]->Add(new TAstAtomNodeImpl(pos, "Void", 0));
+ if (namedArgs) {
+ udfArgs.push_back(BuildUdfUserTypeArg(pos, positionalArgs, namedArgs, customUserType));
+ } else {
+ udfArgs.push_back(BuildUdfUserTypeArg(pos, args, customUserType));
+ }
+
+ if (typeConfig) {
+ udfArgs.push_back(typeConfig);
+ }
+
+ return udfArgs;
+}
+
+TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args,
+ TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig)
+{
+ const TString fullName = module + "." + name;
+ TNodePtr callable;
+ if (to_lower(module) == "@yql") {
+ callable = BuildCallable(pos, module, name, {});
+ } else if (!ctx.Settings.EnableGenericUdfs) {
+ auto varName = ctx.AddSimpleUdf(fullName);
+ callable = new TAstAtomNodeImpl(pos, varName, TNodeFlags::ArbitraryContent);
+ }
+
+ if (callable) {
+ TVector<TNodePtr> applyArgs = { callable };
+ applyArgs.insert(applyArgs.end(), args.begin(), args.end());
+ return new TCallNodeImpl(pos, namedArgs ? "NamedApply" : "Apply", applyArgs);
+ }
+
+ TVector<TNodePtr> sqlCallArgs;
+ sqlCallArgs.push_back(BuildQuotedAtom(pos, fullName));
+ if (namedArgs) {
+ auto tupleNodePtr = positionalArgs->GetTupleNode();
+ YQL_ENSURE(tupleNodePtr);
+ TNodePtr positionalArgsNode = new TCallNodeImpl(pos, "PositionalArgs", tupleNodePtr->Elements());
+ sqlCallArgs.push_back(BuildTuple(pos, { positionalArgsNode, namedArgs }));
+ } else {
+ TNodePtr positionalArgsNode = new TCallNodeImpl(pos, "PositionalArgs", args);
+ sqlCallArgs.push_back(BuildTuple(pos, { positionalArgsNode }));
+ }
+
+ // optional arguments
+ if (customUserType) {
+ sqlCallArgs.push_back(customUserType);
+ } else if (!typeConfig.Empty()) {
+ sqlCallArgs.push_back(new TCallNodeImpl(pos, "TupleType", {}));
+ }
+
+ if (!typeConfig.Empty()) {
+ sqlCallArgs.push_back(typeConfig.Build());
+ } else if (runConfig) {
+ sqlCallArgs.push_back(BuildQuotedAtom(pos, ""));
+ }
+
+ if (runConfig) {
+ sqlCallArgs.push_back(runConfig);
+ }
+
+ return new TCallNodeImpl(pos, "SqlCall", sqlCallArgs);
+}
+
+class TCallableNode final: public INode {
+public:
+ TCallableNode(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, bool forReduce)
+ : INode(pos)
+ , Module(module)
+ , Name(name)
+ , Args(args)
+ , ForReduce(forReduce)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Module == "yql") {
+ Node = new TFuncNodeImpl(Pos, Name);
+ } else if (Module == "@yql") {
+ auto parsedName = StringContent(ctx, Pos, Name);
+ if (!parsedName) {
+ return false;
+ }
+
+ const TString yql("(" + parsedName->Content + ")");
+ TAstParseResult ast = ParseAst(yql, ctx.Pool.get());
+ /// TODO: do not drop warnings
+ if (ast.IsOk()) {
+ const auto rootCount = ast.Root->GetChildrenCount();
+ if (rootCount != 1) {
+ ctx.Error(Pos) << "Failed to parse YQL: expecting AST root node with single child, but got " << rootCount;
+ return false;
+ }
+ Node = AstNode(ast.Root->GetChild(0));
+ } else {
+ ctx.Error(Pos) << "Failed to parse YQL: " << ast.Issues.ToString();
+ return false;
+ }
+
+ if (src) {
+ src->AllColumns();
+ }
+ } else if (ctx.Settings.ModuleMapping.contains(Module)) {
+ Node = Y("bind", Module + "_module", Q(Name));
+ if (src) {
+ src->AllColumns();
+ }
+ } else {
+ TNodePtr customUserType = nullptr;
+ if (Module == "Tensorflow" && Name == "RunBatch") {
+ if (Args.size() > 2) {
+ auto passThroughAtom = Q("PassThrough");
+ auto passThroughType = Y("StructMemberType", Y("ListItemType", Y("TypeOf", Args[1])), passThroughAtom);
+ customUserType = Y("AddMemberType", Args[2], passThroughAtom, passThroughType);
+ Args.erase(Args.begin() + 2);
+ }
+ }
+
+ if ("Datetime" == Module || ("Yson" == Module && ctx.PragmaYsonFast))
+ Module.append('2');
+
+ TNodePtr typeConfig = MakeTypeConfig(Pos, to_lower(Module), Args);
+ if (ForReduce) {
+ TVector<TNodePtr> udfArgs;
+ udfArgs.push_back(BuildQuotedAtom(Pos, TString(Module) + "." + Name));
+ udfArgs.push_back(customUserType ? customUserType : new TCallNodeImpl(Pos, "TupleType", {}));
+ if (typeConfig) {
+ udfArgs.push_back(typeConfig);
+ }
+ Node = new TCallNodeImpl(Pos, "SqlReduceUdf", udfArgs);
+ } else {
+ auto udfArgs = BuildUdfArgs(ctx, Pos, Args, nullptr, nullptr, customUserType, typeConfig);
+ Node = BuildUdf(ctx, Pos, Module, Name, udfArgs);
+ }
+ }
+ return Node->Init(ctx, src);
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+ }
+
+ const TString* FuncName() const override {
+ return &Name;
+ }
+
+ const TString* ModuleName() const override {
+ return &Module;
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, Node->IsConstant());
+ State.Set(ENodeState::Aggregated, Node->IsAggregated());
+ }
+
+ TNodePtr DoClone() const override {
+ return new TCallableNode(Pos, Module, Name, CloneContainer(Args), ForReduce);
+ }
+
+ void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ Node->VisitTree(func, visited);
+ }
+private:
+ TCiString Module;
+ TString Name;
+ TVector<TNodePtr> Args;
+ TNodePtr Node;
+ const bool ForReduce;
+};
+
+TNodePtr BuildCallable(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, bool forReduce) {
+ return new TCallableNode(pos, module, name, args, forReduce);
+}
+
+TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args) {
+ if (to_lower(module) == "@yql") {
+ return BuildCallable(pos, module, name, args);
+ }
+
+ auto fullName = module + "." + name;
+ if (!args.empty()) {
+ return new TYqlUdf(pos, fullName, args, args.size() + 1);
+
+ } else {
+ auto varName = ctx.AddSimpleUdf(fullName);
+ return new TAstAtomNodeImpl(pos, varName, TNodeFlags::ArbitraryContent);
+ }
+}
+
+class TScriptUdf final: public INode {
+public:
+ TScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args)
+ : INode(pos)
+ , ModuleName(moduleName)
+ , FuncName(funcName)
+ , Args(args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ const bool isPython = ModuleName.find(TStringBuf("Python")) != TString::npos;
+ if (!isPython) {
+ if (Args.size() != 2) {
+ ctx.Error(Pos) << ModuleName << " script declaration requires exactly two parameters";
+ return false;
+ }
+ } else {
+ if (Args.size() < 1 || Args.size() > 2) {
+ ctx.Error(Pos) << ModuleName << " script declaration requires one or two parameters";
+ return false;
+ }
+ }
+
+ auto nameAtom = BuildQuotedAtom(Pos, FuncName);
+ auto scriptNode = Args.back();
+ if (!scriptNode->Init(ctx, src)) {
+ return false;
+ }
+ auto scriptStrPtr = Args.back()->GetLiteral("String");
+ if (!ctx.CompactNamedExprs && scriptStrPtr && scriptStrPtr->size() > SQL_MAX_INLINE_SCRIPT_LEN) {
+ scriptNode = ctx.UniversalAlias("scriptudf", std::move(scriptNode));
+ }
+
+ INode::TPtr type;
+ if (Args.size() == 2) {
+ type = Args[0];
+ } else {
+ // Python supports getting functions signatures right from docstrings
+ type = Y("EvaluateType", Y("ParseTypeHandle", Y("Apply",
+ Y("bind", "core_module", Q("PythonFuncSignature")),
+ Q(ModuleName),
+ scriptNode,
+ Y("String", nameAtom)
+ )));
+ }
+
+ if (!type->Init(ctx, src)) {
+ return false;
+ }
+
+ Node = Y("ScriptUdf", Q(ModuleName), nameAtom, type, scriptNode);
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_UNUSED(ctx);
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, true);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TScriptUdf(GetPos(), ModuleName, FuncName, CloneContainer(Args));
+ }
+
+ void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ Node->VisitTree(func, visited);
+ }
+private:
+ TString ModuleName;
+ TString FuncName;
+ TVector<TNodePtr> Args;
+ TNodePtr Node;
+};
+
+template <bool Sorted, bool Hashed>
+class TYqlToDict final: public TCallNode {
+public:
+ TYqlToDict(TPosition pos, const TString& mode, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "ToDict", 4, 4, args)
+ , Mode(mode)
+ {}
+
+private:
+ TCallNode::TPtr DoClone() const override {
+ return new TYqlToDict<Sorted, Hashed>(GetPos(), Mode, CloneContainer(Args));
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.size() != 1) {
+ ctx.Error(Pos) << "ToDict required exactly one argument";
+ return false;
+ }
+ Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("0"))));
+ Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("1"))));
+ Args.push_back(Q(Y(Q(Sorted ? "Sorted" : Hashed ? "Hashed" : "Auto"), Q(Mode))));
+ return TCallNode::DoInit(ctx, src);
+ }
+private:
+ TString Mode;
+};
+
+template <bool IsStart>
+class THoppingTime final: public TAstListNode {
+public:
+ THoppingTime(TPosition pos, const TVector<TNodePtr>& args = {})
+ : TAstListNode(pos)
+ {
+ Y_UNUSED(args);
+ }
+
+private:
+ TNodePtr DoClone() const override {
+ return new THoppingTime(GetPos());
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(ctx);
+
+ auto legacySpec = src->GetLegacyHoppingWindowSpec();
+ auto spec = src->GetHoppingWindowSpec();
+ if (!legacySpec && !spec) {
+ ctx.Error(Pos) << "No hopping window parameters in aggregation";
+ return false;
+ }
+
+ Nodes.clear();
+
+ const auto fieldName = legacySpec
+ ? "_yql_time"
+ : spec->GetLabel();
+
+ const auto interval = legacySpec
+ ? legacySpec->Interval
+ : dynamic_cast<THoppingWindow*>(spec.Get())->Interval;
+
+ if (!IsStart) {
+ Add("Member", "row", Q(fieldName));
+ return true;
+ }
+
+ Add("Sub",
+ Y("Member", "row", Q(fieldName)),
+ interval);
+ return true;
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Aggregated, true);
+ }
+};
+
+class TInvalidBuiltin final: public INode {
+public:
+ TInvalidBuiltin(TPosition pos, const TString& info)
+ : INode(pos)
+ , Info(info)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource*) override {
+ ctx.Error(Pos) << Info;
+ return false;
+ }
+
+ TAstNode* Translate(TContext&) const override {
+ return nullptr;
+ }
+
+ TPtr DoClone() const override {
+ return new TInvalidBuiltin(GetPos(), Info);
+ }
+private:
+ TString Info;
+};
+
+enum EAggrFuncTypeCallback {
+ NORMAL,
+ KEY_PAYLOAD,
+ PAYLOAD_PREDICATE,
+ TWO_ARGS,
+ COUNT,
+ HISTOGRAM,
+ LINEAR_HISTOGRAM,
+ PERCENTILE,
+ TOPFREQ,
+ TOP,
+ TOP_BY,
+ COUNT_DISTINCT_ESTIMATE,
+ LIST,
+ UDAF,
+ PG,
+ NTH_VALUE
+};
+
+struct TCoreFuncInfo {
+ TString Name;
+ ui32 MinArgs;
+ ui32 MaxArgs;
+};
+
+using TAggrFuncFactoryCallback = std::function<INode::TPtr(TPosition pos, const TVector<TNodePtr>& args, EAggregateMode aggMode, bool isFactory)>;
+using TAggrFuncFactoryCallbackMap = std::unordered_map<TString, TAggrFuncFactoryCallback, THash<TString>>;
+using TBuiltinFactoryCallback = std::function<TNodePtr(TPosition pos, const TVector<TNodePtr>& args)>;
+using TBuiltinFactoryCallbackMap = std::unordered_map<TString, TBuiltinFactoryCallback, THash<TString>>;
+using TCoreFuncMap = std::unordered_map<TString, TCoreFuncInfo, THash<TString>>;
+
+TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback(
+ const TString& functionName,
+ const TString& factoryName,
+ EAggrFuncTypeCallback type = NORMAL,
+ const TString& functionNameOverride = TString(),
+ const TVector<EAggregateMode>& validModes = {}) {
+
+ const TString realFunctionName = functionNameOverride.empty() ? functionName : functionNameOverride;
+ return [functionName, realFunctionName, factoryName, type, validModes] (TPosition pos, const TVector<TNodePtr>& args, EAggregateMode aggMode, bool isFactory) -> INode::TPtr {
+ if (!validModes.empty()) {
+ if (!IsIn(validModes, aggMode)) {
+ TString errorText;
+ if (TVector{EAggregateMode::OverWindow} == validModes) {
+ errorText = TStringBuilder()
+ << "Can't use window function " << functionName << " without window specification (OVER keyword is missing)";
+ } else {
+ errorText = TStringBuilder()
+ << "Can't use " << functionName << " in " << ToString(aggMode) << " aggregation mode";
+ }
+ return INode::TPtr(new TInvalidBuiltin(pos, errorText));
+ }
+ }
+ TAggregationPtr factory = nullptr;
+ switch (type) {
+ case NORMAL:
+ factory = BuildFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case KEY_PAYLOAD:
+ factory = BuildKeyPayloadFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case PAYLOAD_PREDICATE:
+ factory = BuildPayloadPredicateFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case TWO_ARGS:
+ factory = BuildTwoArgsFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case COUNT:
+ factory = BuildCountAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case HISTOGRAM:
+ factory = BuildHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case LINEAR_HISTOGRAM:
+ factory = BuildLinearHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case PERCENTILE:
+ factory = BuildPercentileFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case TOPFREQ:
+ factory = BuildTopFreqFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case TOP:
+ factory = BuildTopFactoryAggregation<false>(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case TOP_BY:
+ factory = BuildTopFactoryAggregation<true>(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case COUNT_DISTINCT_ESTIMATE:
+ factory = BuildCountDistinctEstimateFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case LIST:
+ factory = BuildListFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case UDAF:
+ factory = BuildUserDefinedFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case PG:
+ factory = BuildPGFactoryAggregation(pos, realFunctionName, aggMode);
+ break;
+ case NTH_VALUE:
+ factory = BuildNthFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ }
+ if (isFactory) {
+ auto realArgs = args;
+ realArgs.erase(realArgs.begin()); // skip function name
+ return new TBasicAggrFactory(pos, functionName, factory, realArgs);
+ } else {
+ return new TBasicAggrFunc(pos, functionName, factory, args);
+ }
+ };
+}
+
+TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback(
+ const TString& functionName,
+ const TString& factoryName,
+ const TVector<EAggregateMode>& validModes,
+ EAggrFuncTypeCallback type = NORMAL,
+ const TString& functionNameOverride = TString()) {
+ return BuildAggrFuncFactoryCallback(functionName, factoryName, type, functionNameOverride, validModes);
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildSimpleBuiltinFactoryCallback() {
+ return [] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, args);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildNamedBuiltinFactoryCallback(const TString& name) {
+ return [name] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, name, args);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildArgcBuiltinFactoryCallback(i32 minArgs, i32 maxArgs) {
+ return [minArgs, maxArgs] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, minArgs, maxArgs, args);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildNamedArgcBuiltinFactoryCallback(const TString& name, i32 minArgs, i32 maxArgs) {
+ return [name, minArgs, maxArgs] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, name, minArgs, maxArgs, args);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildNamedDepsArgcBuiltinFactoryCallback(ui32 reqArgsCount, const TString& name, i32 minArgs, i32 maxArgs) {
+ return [reqArgsCount, name, minArgs, maxArgs](TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(reqArgsCount, pos, name, minArgs, maxArgs, args);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildBoolBuiltinFactoryCallback(bool arg) {
+ return [arg] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, args, arg);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildFoldBuiltinFactoryCallback(const TString& name, const TString& defaultValue) {
+ return [name, defaultValue] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, name, "Bool", defaultValue, 1, args);
+ };
+}
+
+TNodePtr MakePair(TPosition pos, const TVector<TNodePtr>& args) {
+ TNodePtr list = new TAstListNodeImpl(pos, {
+ args[0],
+ args.size() > 1 ? args[1] : new TAstListNodeImpl(pos,{ new TAstAtomNodeImpl(pos, "Null", TNodeFlags::Default) })
+ });
+
+ return new TAstListNodeImpl(pos, {
+ new TAstAtomNodeImpl(pos, "quote", TNodeFlags::Default),
+ list
+ });
+}
+
+struct TBuiltinFuncData {
+ const TBuiltinFactoryCallbackMap BuiltinFuncs;
+ const TAggrFuncFactoryCallbackMap AggrFuncs;
+ const TCoreFuncMap CoreFuncs;
+
+ TBuiltinFuncData():
+ BuiltinFuncs(MakeBuiltinFuncs()),
+ AggrFuncs(MakeAggrFuncs()),
+ CoreFuncs(MakeCoreFuncs())
+ {
+ }
+
+ TBuiltinFactoryCallbackMap MakeBuiltinFuncs() {
+ TBuiltinFactoryCallbackMap builtinFuncs = {
+ // Branching
+ {"if", BuildSimpleBuiltinFactoryCallback<TYqlIf<false>>()},
+ {"ifstrict", BuildSimpleBuiltinFactoryCallback<TYqlIf<true>>() },
+
+ // String builtins
+ {"len", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
+ {"length", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
+ {"charlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
+ {"characterlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
+ {"substring", BuildNamedBuiltinFactoryCallback<TYqlSubstring>("Substring")},
+ {"find", BuildNamedBuiltinFactoryCallback<TYqlSubstring>("Find")},
+ {"rfind", BuildNamedBuiltinFactoryCallback<TYqlSubstring>("RFind")},
+ {"byteat", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ByteAt", 2, 2) },
+ {"startswith", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StartsWith", 2, 2)},
+ {"endswith", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EndsWith", 2, 2)},
+
+ // Numeric builtins
+ {"abs", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Abs", 1, 1) },
+ {"tobytes", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ToBytes", 1, 1) },
+ {"frombytes", BuildSimpleBuiltinFactoryCallback<TFromBytes>() },
+
+ // Compare builtins
+ {"minof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Min", 1, -1)},
+ {"maxof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Max", 1, -1)},
+ {"greatest", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Max", 1, -1)},
+ {"least", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Min", 1, -1)},
+ {"in", BuildSimpleBuiltinFactoryCallback<TYqlIn>()},
+
+ // List builtins
+ {"aslist", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsListMayWarn", 0, -1)},
+ {"asliststrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsListStrict", 0, -1) },
+ {"listlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Length", 1, 1)},
+ {"listhasitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("HasItems", 1, 1)},
+ {"listextend", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListExtend", 0, -1)},
+ {"listextendstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListExtendStrict", 0, -1)},
+ {"listunionall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListUnionAll", 0, -1) },
+ {"listzip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListZip", -1, -1)},
+ {"listzipall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListZipAll", -1, -1)},
+ {"listenumerate", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListEnumerate", 1, 3)},
+ {"listreverse", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListReverse", 1, 1)},
+ {"listskip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListSkip", 2, 2)},
+ {"listtake", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTake", 2, 2)},
+ {"listhead", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListHead", 1, 1)},
+ {"listlast", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListLast", 1, 1)},
+ {"listsort", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(true)},
+ {"listsortasc", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(true)},
+ {"listsortdesc", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(false)},
+ {"listmap", BuildBoolBuiltinFactoryCallback<TListMapBuiltin>(false)},
+ {"listflatmap", BuildBoolBuiltinFactoryCallback<TListMapBuiltin>(true)},
+ {"listfilter", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListFilter")},
+ {"listany", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListAny", 1, 1)},
+ {"listall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListAll", 1, 1)},
+ {"listhas", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListHas", 2, 2)},
+ {"listmax", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListMax", 1, 1)},
+ {"listmin", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListMin", 1, 1)},
+ {"listsum", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListSum", 1, 1)},
+ {"listfold", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFold", 3, 3)},
+ {"listfold1", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFold1", 3, 3)},
+ {"listfoldmap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFoldMap", 3, 3)},
+ {"listfold1map", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFold1Map", 3, 3)},
+ {"listavg", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListAvg", 1, 1)},
+ {"listconcat", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListConcat", 1, 2)},
+ {"listextract", BuildSimpleBuiltinFactoryCallback<TListExtractBuiltin>()},
+ {"listuniq", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListUniq", 1, 1)},
+ {"listuniqstable", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListUniqStable", 1, 1)},
+ {"listcreate", BuildSimpleBuiltinFactoryCallback<TListCreateBuiltin>()},
+ {"listfromrange", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFromRange", 2, 3) },
+ {"listreplicate", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Replicate", 2, 2) },
+ {"listtakewhile", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListTakeWhile") },
+ {"listskipwhile", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListSkipWhile") },
+ {"listtakewhileinclusive", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListTakeWhileInclusive") },
+ {"listskipwhileinclusive", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListSkipWhileInclusive") },
+ {"listcollect", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListCollect", 1, 1) },
+ {"listnotnull", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListNotNull", 1, 1)},
+ {"listflatten", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFlatten", 1, 1)},
+ {"listtop", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTop", 2, 3)},
+ {"listtopasc", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopAsc", 2, 3)},
+ {"listtopdesc", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopDesc", 2, 3)},
+ {"listtopsort", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopSort", 2, 3)},
+ {"listtopsortasc", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopSortAsc", 2, 3)},
+ {"listtopsortdesc", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopSortDesc", 2, 3)},
+
+ // Dict builtins
+ {"dictlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Length", 1, 1)},
+ {"dicthasitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("HasItems", 1, 1)},
+ {"dictcreate", BuildSimpleBuiltinFactoryCallback<TDictCreateBuiltin>()},
+ {"setcreate", BuildSimpleBuiltinFactoryCallback<TSetCreateBuiltin>()},
+ {"asdict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDictMayWarn", 0, -1)},
+ {"asdictstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDictStrict", 0, -1)},
+ {"asset", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsSetMayWarn", 0, -1)},
+ {"assetstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsSetStrict", 0, -1)},
+ {"todict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, false>>("One")},
+ {"tomultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, false>>("Many")},
+ {"tosorteddict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true, false>>("One")},
+ {"tosortedmultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true, false>>("Many")},
+ {"tohasheddict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, true>>("One")},
+ {"tohashedmultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, true>>("Many")},
+ {"dictkeys", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeys", 1, 1) },
+ {"dictpayloads", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloads", 1, 1) },
+ {"dictitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictItems", 1, 1) },
+ {"dictlookup", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Lookup", 2, 2) },
+ {"dictcontains", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Contains", 2, 2) },
+
+ // Atom builtins
+ {"asatom", BuildSimpleBuiltinFactoryCallback<TYqlAsAtom>()},
+ {"secureparam", BuildNamedBuiltinFactoryCallback<TYqlAtom>("SecureParam")},
+
+ {"void", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Void", 0, 0)},
+ {"emptylist", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyList", 0, 0)},
+ {"emptydict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyDict", 0, 0)},
+ {"callable", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Callable", 2, 2)},
+ {"way", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Way", 1, 1) },
+ {"variant", BuildSimpleBuiltinFactoryCallback<TYqlVariant>() },
+ {"enum", BuildSimpleBuiltinFactoryCallback<TYqlEnum>() },
+ {"asvariant", BuildSimpleBuiltinFactoryCallback<TYqlAsVariant>() },
+ {"asenum", BuildSimpleBuiltinFactoryCallback<TYqlAsEnum>() },
+ {"astagged", BuildSimpleBuiltinFactoryCallback<TYqlAsTagged>() },
+ {"untag", BuildSimpleBuiltinFactoryCallback<TYqlUntag>() },
+ {"parsetype", BuildSimpleBuiltinFactoryCallback<TYqlParseType>() },
+ {"ensuretype", BuildSimpleBuiltinFactoryCallback<TYqlTypeAssert<true>>() },
+ {"ensureconvertibleto", BuildSimpleBuiltinFactoryCallback<TYqlTypeAssert<false>>() },
+ {"ensure", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Ensure", 2, 3) },
+ {"evaluateexpr", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateExpr", 1, 1) },
+ {"evaluateatom", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateAtom", 1, 1) },
+ {"evaluatetype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateType", 1, 1) },
+ {"unwrap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Unwrap", 1, 2) },
+ {"just", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Just", 1, 1) },
+ {"nothing", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nothing", 1, 1) },
+ {"formattype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FormatType", 1, 1) },
+ {"formattypediff", BuildNamedBuiltinFactoryCallback<TFormatTypeDiff<false>>("FormatTypeDiff") },
+ {"formattypediffpretty", BuildNamedBuiltinFactoryCallback<TFormatTypeDiff<true>>("FormatTypeDiffPretty") },
+ {"pgtype", BuildSimpleBuiltinFactoryCallback<TYqlPgType>() },
+ {"pgconst", BuildSimpleBuiltinFactoryCallback<TYqlPgConst>() },
+ {"pgop", BuildSimpleBuiltinFactoryCallback<TYqlPgOp>() },
+ {"pgcall", BuildSimpleBuiltinFactoryCallback<TYqlPgCall<false>>() },
+ {"pgrangecall", BuildSimpleBuiltinFactoryCallback<TYqlPgCall<true>>() },
+ {"pgcast", BuildSimpleBuiltinFactoryCallback<TYqlPgCast>() },
+ {"frompg", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FromPg", 1, 1) },
+ {"topg", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ToPg", 1, 1) },
+ {"pgor", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgOr", 2, 2) },
+ {"pgand", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgAnd", 2, 2) },
+ {"pgnot", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgNot", 1, 1) },
+ {"pgarray", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgArray", 1, -1) },
+ {"typeof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeOf", 1, 1) },
+ {"instanceof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("InstanceOf", 1, 1) },
+ {"datatype", BuildSimpleBuiltinFactoryCallback<TYqlDataType>() },
+ {"optionaltype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalType", 1, 1) },
+ {"listtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListType", 1, 1) },
+ {"streamtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamType", 1, 1) },
+ {"dicttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictType", 2, 2) },
+ {"tupletype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleType", 0, -1) },
+ {"generictype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("GenericType", 0, 0) },
+ {"unittype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("UnitType", 0, 0) },
+ {"voidtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VoidType", 0, 0) },
+ {"resourcetype", BuildSimpleBuiltinFactoryCallback<TYqlResourceType>() },
+ {"taggedtype", BuildSimpleBuiltinFactoryCallback<TYqlTaggedType>() },
+ {"varianttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantType", 1, 1) },
+ {"callabletype", BuildSimpleBuiltinFactoryCallback<TYqlCallableType>() },
+ {"optionalitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalItemType", 1, 1) },
+ {"listitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListItemType", 1, 1) },
+ {"streamitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamItemType", 1, 1) },
+ {"dictkeytype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeyType", 1, 1) },
+ {"dictpayloadtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloadType", 1, 1) },
+ {"tupleelementtype", BuildSimpleBuiltinFactoryCallback<TYqlTupleElementType>() },
+ {"structmembertype", BuildSimpleBuiltinFactoryCallback<TYqlStructMemberType>() },
+ {"callableresulttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableResultType", 1, 1) },
+ {"callableargumenttype", BuildSimpleBuiltinFactoryCallback<TYqlCallableArgumentType>() },
+ {"variantunderlyingtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantUnderlyingType", 1, 1) },
+ {"fromysonsimpletype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FromYsonSimpleType", 2, 2) },
+ {"currentutcdate", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcDate", 0, -1) },
+ {"currentutcdatetime", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcDatetime", 0, -1) },
+ {"currentutctimestamp", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcTimestamp", 0, -1) },
+ { "currenttzdate", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(1, "CurrentTzDate", 1, -1) },
+ { "currenttzdatetime", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(1, "CurrentTzDatetime", 1, -1) },
+ { "currenttztimestamp", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(1, "CurrentTzTimestamp", 1, -1) },
+ {"currentoperationid", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentOperationId", 0, 0) },
+ {"currentoperationsharedid", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentOperationSharedId", 0, 0) },
+ {"currentauthenticateduser", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentAuthenticatedUser", 0, 0) },
+ {"addtimezone", BuildSimpleBuiltinFactoryCallback<TYqlAddTimezone>() },
+ {"removetimezone", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("RemoveTimezone", 1, 1) },
+ {"pickle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Pickle", 1, 1) },
+ {"stablepickle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StablePickle", 1, 1) },
+ {"unpickle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Unpickle", 2, 2) },
+
+ {"typehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeHandle", 1, 1) },
+ {"parsetypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ParseTypeHandle", 1, 1) },
+ {"typekind", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeKind", 1, 1) },
+ {"datatypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DataTypeComponents", 1, 1) },
+ {"datatypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DataTypeHandle", 1, 1) },
+ {"optionaltypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalTypeHandle", 1, 1) },
+ {"listtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTypeHandle", 1, 1) },
+ {"streamtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamTypeHandle", 1, 1) },
+ {"tupletypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleTypeComponents", 1, 1) },
+ {"tupletypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleTypeHandle", 1, 1) },
+ {"structtypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructTypeComponents", 1, 1) },
+ {"structtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructTypeHandle", 1, 1) },
+ {"dicttypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictTypeComponents", 1, 1) },
+ {"dicttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictTypeHandle", 2, 2) },
+ {"resourcetypetag", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ResourceTypeTag", 1, 1) },
+ {"resourcetypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ResourceTypeHandle", 1, 1) },
+ {"taggedtypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TaggedTypeComponents", 1, 1) },
+ {"taggedtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TaggedTypeHandle", 2, 2) },
+ {"varianttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantTypeHandle", 1, 1) },
+ {"voidtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VoidTypeHandle", 0, 0) },
+ {"nulltypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("NullTypeHandle", 0, 0) },
+ {"emptylisttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyListTypeHandle", 0, 0) },
+ {"emptydicttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyDictTypeHandle", 0, 0) },
+ {"callabletypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableTypeComponents", 1, 1) },
+ {"callableargument", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableArgument", 1, 3) },
+ {"callabletypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableTypeHandle", 2, 4) },
+ {"pgtypename", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgTypeName", 1, 1) },
+ {"pgtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("PgTypeHandle", 1, 1) },
+ {"formatcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FormatCode", 1, 1) },
+ {"worldcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("WorldCode", 0, 0) },
+ {"atomcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AtomCode", 1, 1) },
+ {"listcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListCode", 0, -1) },
+ {"funccode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FuncCode", 1, -1) },
+ {"lambdacode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("LambdaCode", 1, 2) },
+ {"evaluatecode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateCode", 1, 1) },
+ {"reprcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ReprCode", 1, 1) },
+ {"quotecode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("QuoteCode", 1, 1) },
+ {"lambdaargumentscount", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("LambdaArgumentsCount", 1, 1) },
+ {"lambdaoptionalargumentscount", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("LambdaOptionalArgumentsCount", 1, 1) },
+ {"subqueryextend", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryExtend", 1, -1) },
+ {"subqueryunionall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryUnionAll", 1, -1) },
+ {"subquerymerge", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryMerge", 1, -1) },
+ {"subqueryunionmerge", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryUnionMerge", 1, -1) },
+ {"subqueryextendfor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryExtendFor>>() },
+ {"subqueryunionallfor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryUnionAllFor>>() },
+ {"subquerymergefor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryMergeFor>>() },
+ {"subqueryunionmergefor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryUnionMergeFor>>() },
+ {"subqueryorderby", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryOrderBy<SubqueryOrderBy>>() },
+ {"subqueryassumeorderby", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryOrderBy<SubqueryAssumeOrderBy>>() },
+
+ // Tuple builtins
+ {"astuple", BuildSimpleBuiltinFactoryCallback<TTupleNode>()},
+
+ // Struct builtins
+ {"trymember", BuildNamedBuiltinFactoryCallback<TTryMember>("TryMember")},
+ {"addmember", BuildNamedBuiltinFactoryCallback<TAddMember>("AddMember")},
+ {"replacemember", BuildNamedBuiltinFactoryCallback<TAddMember>("ReplaceMember")},
+ {"removemember", BuildNamedBuiltinFactoryCallback<TRemoveMember>("RemoveMember")},
+ {"forceremovemember", BuildNamedBuiltinFactoryCallback<TRemoveMember>("ForceRemoveMember")},
+ {"combinemembers", BuildNamedBuiltinFactoryCallback<TCombineMembers>("FlattenMembers")},
+ {"flattenmembers", BuildNamedBuiltinFactoryCallback<TFlattenMembers>("FlattenMembers")},
+ {"staticmap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticMap", 2, 2) },
+ {"staticzip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticZip", 1, -1) },
+ {"structunion", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructUnion", 2, 3)},
+ {"structintersection", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructIntersection", 2, 3)},
+ {"structdifference", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructDifference", 2, 2)},
+ {"structsymmetricdifference", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructSymmetricDifference", 2, 2)},
+ {"staticfold", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticFold", 3, 3)},
+ {"staticfold1", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticFold1", 3, 3)},
+
+ // File builtins
+ {"filepath", BuildNamedBuiltinFactoryCallback<TFileYqlAtom>("FilePath")},
+ {"filecontent", BuildNamedBuiltinFactoryCallback<TFileYqlAtom>("FileContent")},
+ {"folderpath", BuildNamedBuiltinFactoryCallback<TFileYqlAtom>("FolderPath") },
+ {"files", BuildNamedBuiltinFactoryCallback<TFileYqlAtom>("Files")},
+ {"parsefile", BuildSimpleBuiltinFactoryCallback<TYqlParseFileOp>()},
+
+ // Misc builtins
+ {"coalesce", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1)},
+ {"nvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1) },
+ {"nanvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nanvl", 2, 2) },
+ {"likely", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Likely", 1, -1)},
+ {"assumestrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AssumeStrict", 1, 1)},
+ {"assumenonstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AssumeNonStrict", 1, 1)},
+ {"random", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "Random", 1, -1)},
+ {"randomnumber", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomNumber", 1, -1)},
+ {"randomuuid", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomUuid", 1, -1) },
+ {"tablepath", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TablePath") },
+ {"tablerecordindex", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TableRecord") },
+ {"tablerow", BuildSimpleBuiltinFactoryCallback<TTableRow<false>>() },
+ {"jointablerow", BuildSimpleBuiltinFactoryCallback<TTableRow<true>>() },
+ {"tablerows", BuildSimpleBuiltinFactoryCallback<TTableRows>() },
+ {"weakfield", BuildSimpleBuiltinFactoryCallback<TWeakFieldOp>()},
+ {"version", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Version", 0, 0)},
+
+ {"systemmetadata", BuildNamedArgcBuiltinFactoryCallback<TCallDirectRow>("SystemMetadata", 1, -1)},
+
+ // Hint builtins
+ {"grouping", BuildSimpleBuiltinFactoryCallback<TGroupingNode>()},
+
+ // Window funcitons
+ {"rownumber", BuildNamedArgcBuiltinFactoryCallback<TWinRowNumber>("RowNumber", 0, 0)},
+ {"rank", BuildNamedArgcBuiltinFactoryCallback<TWinRank>("Rank", 0, 1)},
+ {"denserank", BuildNamedArgcBuiltinFactoryCallback<TWinRank>("DenseRank", 0, 1)},
+ {"lead", BuildNamedArgcBuiltinFactoryCallback<TWinLeadLag>("Lead", 1, 2)},
+ {"lag", BuildNamedArgcBuiltinFactoryCallback<TWinLeadLag>("Lag", 1, 2)},
+ {"percentrank", BuildNamedArgcBuiltinFactoryCallback<TWinRank>("PercentRank", 0, 1)},
+ {"cumedist", BuildNamedArgcBuiltinFactoryCallback<TWinCumeDist>("CumeDist", 0, 0)},
+ {"ntile", BuildNamedArgcBuiltinFactoryCallback<TWinNTile>("NTile", 1, 1)},
+
+ // Session window
+ {"sessionwindow", BuildSimpleBuiltinFactoryCallback<TSessionWindow>()},
+ {"sessionstart", BuildSimpleBuiltinFactoryCallback<TSessionStart<true>>()},
+ {"sessionstate", BuildSimpleBuiltinFactoryCallback<TSessionStart<false>>()},
+
+ // New hopping
+ {"hoppingwindow", BuildSimpleBuiltinFactoryCallback<THoppingWindow>()},
+
+ // Hopping intervals time functions
+ {"hopstart", BuildSimpleBuiltinFactoryCallback<THoppingTime<true>>()},
+ {"hopend", BuildSimpleBuiltinFactoryCallback<THoppingTime<false>>()},
+
+ //MatchRecognize navigation functions
+ {"first", BuildNamedBuiltinFactoryCallback<TMatchRecognizeNavigate>("FIRST")},
+ {"last", BuildNamedBuiltinFactoryCallback<TMatchRecognizeNavigate>("LAST")},
+ };
+ return builtinFuncs;
+ }
+
+ TAggrFuncFactoryCallbackMap MakeAggrFuncs() {
+ constexpr auto OverWindow = EAggregateMode::OverWindow;
+
+ TAggrFuncFactoryCallbackMap aggrFuncs = {
+ {"min", BuildAggrFuncFactoryCallback("Min", "min_traits_factory")},
+ {"max", BuildAggrFuncFactoryCallback("Max", "max_traits_factory")},
+
+ {"minby", BuildAggrFuncFactoryCallback("MinBy", "min_by_traits_factory", KEY_PAYLOAD)},
+ {"maxby", BuildAggrFuncFactoryCallback("MaxBy", "max_by_traits_factory", KEY_PAYLOAD)},
+
+ {"sum", BuildAggrFuncFactoryCallback("Sum", "sum_traits_factory")},
+ {"sumif", BuildAggrFuncFactoryCallback("SumIf", "sum_if_traits_factory", PAYLOAD_PREDICATE) },
+
+ {"checked_sum", BuildAggrFuncFactoryCallback("CheckedSum", "checked_sum_traits_factory")},
+ {"checked_sumif", BuildAggrFuncFactoryCallback("CheckedSumIf", "checked_sum_if_traits_factory", PAYLOAD_PREDICATE) },
+
+ {"some", BuildAggrFuncFactoryCallback("Some", "some_traits_factory")},
+ {"somevalue", BuildAggrFuncFactoryCallback("SomeValue", "some_traits_factory")},
+
+ {"count", BuildAggrFuncFactoryCallback("Count", "count_traits_factory", COUNT)},
+ {"countif", BuildAggrFuncFactoryCallback("CountIf", "count_if_traits_factory")},
+
+ {"every", BuildAggrFuncFactoryCallback("Every", "and_traits_factory")},
+ {"booland", BuildAggrFuncFactoryCallback("BoolAnd", "and_traits_factory")},
+ {"boolor", BuildAggrFuncFactoryCallback("BoolOr", "or_traits_factory")},
+ {"boolxor", BuildAggrFuncFactoryCallback("BoolXor", "xor_traits_factory")},
+
+ {"bitand", BuildAggrFuncFactoryCallback("BitAnd", "bit_and_traits_factory")},
+ {"bitor", BuildAggrFuncFactoryCallback("BitOr", "bit_or_traits_factory")},
+ {"bitxor", BuildAggrFuncFactoryCallback("BitXor", "bit_xor_traits_factory")},
+
+ {"avg", BuildAggrFuncFactoryCallback("Avg", "avg_traits_factory")},
+ {"avgif", BuildAggrFuncFactoryCallback("AvgIf", "avg_if_traits_factory", PAYLOAD_PREDICATE) },
+
+ {"agglist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
+ {"aggrlist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
+ {"aggregatelist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
+ {"agglistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
+ {"aggrlistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
+ {"aggregatelistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
+
+ {"median", BuildAggrFuncFactoryCallback("Median", "percentile_traits_factory", PERCENTILE)},
+ {"percentile", BuildAggrFuncFactoryCallback("Percentile", "percentile_traits_factory", PERCENTILE)},
+
+ {"mode", BuildAggrFuncFactoryCallback("Mode", "topfreq_traits_factory", TOPFREQ) },
+ {"topfreq", BuildAggrFuncFactoryCallback("TopFreq", "topfreq_traits_factory", TOPFREQ) },
+
+ {"top", BuildAggrFuncFactoryCallback("Top", "top_traits_factory", TOP)},
+ {"bottom", BuildAggrFuncFactoryCallback("Bottom", "bottom_traits_factory", TOP)},
+ {"topby", BuildAggrFuncFactoryCallback("TopBy", "top_by_traits_factory", TOP_BY)},
+ {"bottomby", BuildAggrFuncFactoryCallback("BottomBy", "bottom_by_traits_factory", TOP_BY)},
+
+ {"histogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM, "Histogram")},
+ {"histogramcdf", BuildAggrFuncFactoryCallback("AdaptiveWardHistogramCDF", "histogram_cdf_adaptive_ward_traits_factory", HISTOGRAM, "HistogramCDF")},
+ {"adaptivewardhistogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM)},
+ {"adaptivewardhistogramcdf", BuildAggrFuncFactoryCallback("AdaptiveWardHistogramCDF", "histogram_cdf_adaptive_ward_traits_factory", HISTOGRAM)},
+ {"adaptiveweighthistogram", BuildAggrFuncFactoryCallback("AdaptiveWeightHistogram", "histogram_adaptive_weight_traits_factory", HISTOGRAM)},
+ {"adaptiveweighthistogramcdf", BuildAggrFuncFactoryCallback("AdaptiveWeightHistogramCDF", "histogram_cdf_adaptive_weight_traits_factory", HISTOGRAM)},
+ {"adaptivedistancehistogram", BuildAggrFuncFactoryCallback("AdaptiveDistanceHistogram", "histogram_adaptive_distance_traits_factory", HISTOGRAM)},
+ {"adaptivedistancehistogramcdf", BuildAggrFuncFactoryCallback("AdaptiveDistanceHistogramCDF", "histogram_cdf_adaptive_distance_traits_factory", HISTOGRAM)},
+ {"blockwardhistogram", BuildAggrFuncFactoryCallback("BlockWardHistogram", "histogram_block_ward_traits_factory", HISTOGRAM)},
+ {"blockwardhistogramcdf", BuildAggrFuncFactoryCallback("BlockWardHistogramCDF", "histogram_cdf_block_ward_traits_factory", HISTOGRAM)},
+ {"blockweighthistogram", BuildAggrFuncFactoryCallback("BlockWeightHistogram", "histogram_block_weight_traits_factory", HISTOGRAM)},
+ {"blockweighthistogramcdf", BuildAggrFuncFactoryCallback("BlockWeightHistogramCDF", "histogram_cdf_block_weight_traits_factory", HISTOGRAM)},
+ {"linearhistogram", BuildAggrFuncFactoryCallback("LinearHistogram", "histogram_linear_traits_factory", LINEAR_HISTOGRAM)},
+ {"linearhistogramcdf", BuildAggrFuncFactoryCallback("LinearHistogramCDF", "histogram_cdf_linear_traits_factory", LINEAR_HISTOGRAM)},
+ {"logarithmichistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM)},
+ {"logarithmichistogramcdf", BuildAggrFuncFactoryCallback("LogarithmicHistogramCDF", "histogram_cdf_logarithmic_traits_factory", LINEAR_HISTOGRAM)},
+ {"loghistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM, "LogHistogram")},
+ {"loghistogramcdf", BuildAggrFuncFactoryCallback("LogarithmicHistogramCDF", "histogram_cdf_logarithmic_traits_factory", LINEAR_HISTOGRAM, "LogHistogramCDF")},
+
+ {"hyperloglog", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE)},
+ {"hll", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "HLL")},
+ {"countdistinctestimate", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "CountDistinctEstimate")},
+
+ {"variance", BuildAggrFuncFactoryCallback("Variance", "variance_0_1_traits_factory")},
+ {"stddev", BuildAggrFuncFactoryCallback("StdDev", "variance_1_1_traits_factory")},
+ {"populationvariance", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
+ {"variancepopulation", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
+ {"populationstddev", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
+ {"stddevpopulation", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
+ {"varpop", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
+ {"stddevpop", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
+ {"varp", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
+ {"stddevp", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
+ {"variancesample", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
+ {"stddevsample", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
+ {"varsamp", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
+ {"stddevsamp", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
+ {"vars", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
+ {"stddevs", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
+
+ {"correlation", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS)},
+ {"corr", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS, "Corr")},
+ {"covariance", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covariance")},
+ {"covariancesample", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS)},
+ {"covarsamp", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarSamp")},
+ {"covar", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covar")},
+ {"covars", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarS")},
+ {"covariancepopulation", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS)},
+ {"covarpop", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarPop")},
+ {"covarp", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarP")},
+
+ {"udaf", BuildAggrFuncFactoryCallback("UDAF", "udaf_traits_factory", UDAF)},
+
+ // Window functions
+ {"firstvalue", BuildAggrFuncFactoryCallback("FirstValue", "first_value_traits_factory", {OverWindow})},
+ {"lastvalue", BuildAggrFuncFactoryCallback("LastValue", "last_value_traits_factory", {OverWindow})},
+ {"nthvalue", BuildAggrFuncFactoryCallback("NthValue", "nth_value_traits_factory", {OverWindow}, NTH_VALUE)},
+ {"firstvalueignorenulls", BuildAggrFuncFactoryCallback("FirstValueIgnoreNulls", "first_value_ignore_nulls_traits_factory", {OverWindow})},
+ {"lastvalueignorenulls", BuildAggrFuncFactoryCallback("LastValueIgnoreNulls", "last_value_ignore_nulls_traits_factory", {OverWindow})},
+ {"nthvalueignorenulls", BuildAggrFuncFactoryCallback("NthValueIgnoreNulls", "nth_value_ignore_nulls_traits_factory", {OverWindow}, NTH_VALUE)},
+ };
+ return aggrFuncs;
+ }
+
+ TCoreFuncMap MakeCoreFuncs() {
+ TCoreFuncMap coreFuncs = {
+ {"listindexof", { "IndexOf", 2, 2}},
+ {"testbit", { "TestBit", 2, 2}},
+ {"setbit", { "SetBit", 2, 2}},
+ {"clearbit", { "ClearBit", 2, 2}},
+ {"flipbit", { "FlipBit", 2, 2 }},
+ {"toset", { "ToSet", 1, 1 }},
+ {"setisdisjoint", { "SetIsDisjoint", 2, 2}},
+ {"setintersection", { "SetIntersection", 2, 3}},
+ {"setincludes", { "SetIncludes", 2, 2}},
+ {"setunion", { "SetUnion", 2, 3}},
+ {"setdifference", { "SetDifference", 2, 2}},
+ {"setsymmetricdifference", { "SetSymmetricDifference", 2, 3}},
+ {"listaggregate", { "ListAggregate", 2, 2}},
+ {"dictaggregate", { "DictAggregate", 2, 2}},
+ {"aggregatetransforminput", { "AggregateTransformInput", 2, 2}},
+ {"aggregatetransformoutput", { "AggregateTransformOutput", 2, 2}},
+ {"aggregateflatten", { "AggregateFlatten", 1, 1}},
+ {"choosemembers", { "ChooseMembers", 2, 2}},
+ {"removemembers", { "RemoveMembers", 2, 2}},
+ {"forceremovemembers", { "ForceRemoveMembers", 2, 2}},
+ {"structmembers", { "StructMembers", 1, 1}},
+ {"gathermembers", { "GatherMembers", 1, 1}},
+ {"renamemembers", { "RenameMembers", 2, 2}},
+ {"forcerenamemembers", { "ForceRenameMembers", 2, 2}},
+ {"spreadmembers", { "SpreadMembers", 2, 2}},
+ {"forcespreadmembers", { "ForceSpreadMembers", 2, 2}},
+ {"listfromtuple", { "ListFromTuple", 1, 1}},
+ {"listtotuple", { "ListToTuple", 2, 2}},
+ };
+ return coreFuncs;
+ }
+};
+
+TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVector<TNodePtr>& args,
+ const TString& originalNameSpace, EAggregateMode aggMode, bool* mustUseNamed, bool warnOnYqlNameSpace) {
+
+ const TBuiltinFuncData* funcData = Singleton<TBuiltinFuncData>();
+ const TBuiltinFactoryCallbackMap& builtinFuncs = funcData->BuiltinFuncs;
+ const TAggrFuncFactoryCallbackMap& aggrFuncs = funcData->AggrFuncs;
+ const TCoreFuncMap& coreFuncs = funcData->CoreFuncs;
+
+ for (auto& arg: args) {
+ if (!arg) {
+ return nullptr;
+ }
+ }
+
+ TString normalizedName(name);
+ TString nameSpace(originalNameSpace);
+ TString ns = to_lower(nameSpace);
+ if (ns.empty()) {
+ TMaybe<TIssue> error = NormalizeName(pos, normalizedName);
+ if (!error.Empty()) {
+ return new TInvalidBuiltin(pos, error->GetMessage());
+ }
+
+ auto coreFunc = coreFuncs.find(normalizedName);
+ if (coreFunc != coreFuncs.end()) {
+ ns = "core";
+ name = coreFunc->second.Name;
+ if (args.size() < coreFunc->second.MinArgs || args.size() > coreFunc->second.MaxArgs) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << name << " expected from "
+ << coreFunc->second.MinArgs << " to " << coreFunc->second.MaxArgs << " arguments, but got: " << args.size());
+ }
+
+ if (coreFunc->second.MinArgs != coreFunc->second.MaxArgs) {
+ name += ToString(args.size());
+ }
+ }
+ }
+
+ TString moduleResource;
+ if (ctx.Settings.ModuleMapping.contains(ns)) {
+ moduleResource = ctx.Settings.ModuleMapping.at(ns);
+ }
+
+ if (ns == "js") {
+ ns = "javascript";
+ nameSpace = "JavaScript";
+ }
+
+ if (ns == "datetime2") {
+ ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_DATETIME2) << "DateTime2:: is a temporary alias for DateTime:: which will be removed in the future, use DateTime:: instead";
+ }
+
+ if (ns == "datetime") {
+ ns = "datetime2";
+ nameSpace = "DateTime2";
+ }
+
+ auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(ns);
+ switch (scriptType) {
+ case NKikimr::NMiniKQL::EScriptType::Python:
+ case NKikimr::NMiniKQL::EScriptType::Python3:
+ case NKikimr::NMiniKQL::EScriptType::ArcPython3:
+ scriptType = NKikimr::NMiniKQL::EScriptType::Python3;
+ break;
+ case NKikimr::NMiniKQL::EScriptType::Python2:
+ scriptType = NKikimr::NMiniKQL::EScriptType::ArcPython2;
+ break;
+ case NKikimr::NMiniKQL::EScriptType::SystemPython2:
+ scriptType = NKikimr::NMiniKQL::EScriptType::Python2;
+ break;
+ default:
+ break;
+ }
+
+ if (ns == "yql" || ns == "@yql") {
+ if (warnOnYqlNameSpace && GetEnv("YQL_DETERMINISTIC_MODE").empty()) {
+ ctx.Warning(pos, TIssuesIds::YQL_S_EXPRESSIONS_CALL)
+ << "It is not recommended to directly access s-expressions functions via YQL::" << Endl
+ << "This mechanism is mostly intended for temporary workarounds or internal testing purposes";
+ }
+
+ if (ns == "yql") {
+ return new TCallNodeImpl(pos, name, -1, -1, args);
+ }
+ } else if (moduleResource) {
+ auto exportName = ns == "core" ? name : "$" + name;
+ TVector<TNodePtr> applyArgs = {
+ new TCallNodeImpl(pos, "bind", {
+ BuildAtom(pos, ns + "_module", 0), BuildQuotedAtom(pos, exportName)
+ })
+ };
+ applyArgs.insert(applyArgs.end(), args.begin(), args.end());
+ return new TCallNodeImpl(pos, "Apply", applyArgs);
+ } else if (ns == "hyperscan" || ns == "pcre" || ns == "pire" || ns.StartsWith("re2")) {
+ TString moduleName(nameSpace);
+ moduleName.to_title();
+ if ((args.size() == 1 || args.size() == 2) && (name.StartsWith("Multi") || (ns.StartsWith("re2") && name == "Capture"))) {
+ TVector<TNodePtr> multiArgs{
+ ns.StartsWith("re2") && name == "Capture" ? MakePair(pos, args) : args[0],
+ new TCallNodeImpl(pos, "Void", 0, 0, {}),
+ args[0]
+ };
+ auto fullName = moduleName + "." + name;
+ return new TYqlTypeConfigUdf(pos, fullName, multiArgs, multiArgs.size() + 1);
+ } else if (!(ns.StartsWith("re2") && name == "Options")) {
+ auto newArgs = args;
+ if (ns.StartsWith("re2")) {
+ // convert run config is tuple of string and optional options
+ if (args.size() == 1 || args.size() == 2) {
+ newArgs[0] = MakePair(pos, args);
+ if (args.size() == 2) {
+ newArgs.pop_back();
+ }
+ } else {
+ return new TInvalidBuiltin(pos, TStringBuilder() << ns << "." << name << " expected one or two arguments.");
+ }
+ }
+
+ return BuildUdf(ctx, pos, moduleName, name, newArgs);
+ }
+ } else if (ns == "datetime2" && (name == "Parse")) {
+ return BuildUdf(ctx, pos, nameSpace, name, args);
+ } else if (ns == "pg") {
+ const bool isAggregateFunc = NYql::NPg::HasAggregation(name, NYql::NPg::EAggKind::Normal);
+ if (isAggregateFunc) {
+ if (aggMode == EAggregateMode::Distinct) {
+ return new TInvalidBuiltin(pos, "Distinct is not supported yet for PG aggregation ");
+ }
+
+ return BuildAggrFuncFactoryCallback(name, "", EAggrFuncTypeCallback::PG)(pos, args, aggMode, false);
+ } else {
+ TVector<TNodePtr> pgCallArgs;
+ pgCallArgs.push_back(BuildLiteralRawString(pos, name));
+ pgCallArgs.insert(pgCallArgs.end(), args.begin(), args.end());
+ return new TYqlPgCall<false>(pos, pgCallArgs);
+ }
+ } else if (name == "MakeLibraPreprocessor") {
+ if (args.size() != 1) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << name << " requires exactly one argument");
+ }
+
+ auto settings = NYT::TNode::CreateMap();
+
+ auto makeUdfArgs = [&args, &pos, &settings]() {
+ return TVector<TNodePtr> {
+ args[0],
+ new TCallNodeImpl(pos, "Void", {}),
+ BuildQuotedAtom(pos, NYT::NodeToYsonString(settings))
+ };
+ };
+
+ auto structNode = args[0]->GetStructNode();
+ if (!structNode) {
+ if (auto callNode = args[0]->GetCallNode()) {
+ if (callNode->GetOpName() == "AsStruct") {
+ return BuildUdf(ctx, pos, nameSpace, name, makeUdfArgs());
+ }
+ }
+
+ return new TInvalidBuiltin(pos, TStringBuilder() << name << " requires struct as argument");
+ }
+
+ for (const auto& item : structNode->GetExprs()) {
+ const auto& label = item->GetLabel();
+ if (label == "Entities") {
+ auto callNode = item->GetCallNode();
+ if (!callNode || callNode->GetOpName() != "AsListMayWarn") {
+ return new TInvalidBuiltin(pos, TStringBuilder() << name << " entities must be list of strings");
+ }
+
+ auto entities = NYT::TNode::CreateList();
+ for (const auto& entity : callNode->GetArgs()) {
+ if (!entity->IsLiteral() || entity->GetLiteralType() != "String") {
+ return new TInvalidBuiltin(pos, TStringBuilder() << name << " entity must be string literal");
+ }
+ entities.Add(entity->GetLiteralValue());
+ }
+
+ settings(label, std::move(entities));
+ } else if (label == "EntitiesStrategy") {
+ if (!item->IsLiteral() || item->GetLiteralType() != "String") {
+ return new TInvalidBuiltin(
+ pos, TStringBuilder() << name << " entities strategy must be string literal"
+ );
+ }
+
+ if (!EqualToOneOf(item->GetLiteralValue(), "whitelist", "blacklist")) {
+ return new TInvalidBuiltin(
+ pos,
+ TStringBuilder() << name << " got invalid entities strategy: expected 'whitelist' or 'blacklist'"
+ );
+ }
+
+ settings(label, item->GetLiteralValue());
+ } else if (label == "Mode") {
+ if (!item->IsLiteral() || item->GetLiteralType() != "String") {
+ return new TInvalidBuiltin(
+ pos, TStringBuilder() << name << " mode must be string literal"
+ );
+ }
+
+ settings(label, item->GetLiteralValue());
+ } else if (EqualToOneOf(label, "BlockstatDict", "ParseWithFat")) {
+ continue;
+ } else {
+ return new TInvalidBuiltin(
+ pos,
+ TStringBuilder()
+ << name << " got unsupported setting: " << label
+ << "; supported: Entities, EntitiesStrategy, BlockstatDict, ParseWithFat" );
+ }
+ }
+
+ return BuildUdf(ctx, pos, nameSpace, name, makeUdfArgs());
+ } else if (scriptType != NKikimr::NMiniKQL::EScriptType::Unknown) {
+ auto scriptName = NKikimr::NMiniKQL::IsCustomPython(scriptType) ? nameSpace : TString(NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType));
+ return new TScriptUdf(pos, scriptName, name, args);
+ } else if (ns.empty()) {
+ if (auto simpleType = LookupSimpleType(normalizedName, ctx.FlexibleTypes, /* isPgType = */ false)) {
+ const auto type = *simpleType;
+ if (NUdf::FindDataSlot(type)) {
+ YQL_ENSURE(type != "Decimal");
+ return new TYqlData(pos, type, args);
+ }
+
+ if (type.StartsWith("pg") || type.StartsWith("_pg")) {
+ TVector<TNodePtr> pgConstArgs;
+ if (!args.empty()) {
+ pgConstArgs.push_back(args.front());
+ pgConstArgs.push_back(new TCallNodeImpl(pos, "PgType", { BuildQuotedAtom(pos,
+ TString(type.StartsWith("pg") ? "" : "_") + type.substr(type.StartsWith("pg") ? 2 : 3), TNodeFlags::Default) }));
+ pgConstArgs.insert(pgConstArgs.end(), args.begin() + 1, args.end());
+ }
+ return new TYqlPgConst(pos, pgConstArgs);
+ } else if (type == "Void" || type == "EmptyList" || type == "EmptyDict") {
+ return new TCallNodeImpl(pos, type, 0, 0, args);
+ } else {
+ return new TInvalidBuiltin(pos, TStringBuilder() << "Can not create objects of type " << type);
+ }
+ }
+
+ if (normalizedName == "decimal") {
+ if (args.size() == 2) {
+ TVector<TNodePtr> dataTypeArgs = { BuildQuotedAtom(pos, "Decimal", TNodeFlags::Default) };
+ for (auto& arg : args) {
+ if (auto literal = arg->GetLiteral("Int32")) {
+ dataTypeArgs.push_back(BuildQuotedAtom(pos, *literal, TNodeFlags::Default));
+ } else {
+ dataTypeArgs.push_back(MakeAtomFromExpression(ctx.Pos(), ctx, arg).Build());
+ }
+ }
+ return new TCallNodeImpl(pos, "DataType", dataTypeArgs);
+ }
+ return new TYqlData(pos, "Decimal", args);
+ }
+
+ if (normalizedName == "tablename") {
+ return new TTableName(pos, args, ctx.Scoped->CurrService);
+ }
+
+ if (normalizedName == "aggregationfactory") {
+ if (args.size() < 1 || !args[0]->GetLiteral("String")) {
+ return new TInvalidBuiltin(pos, "AGGREGATION_FACTORY requries a function name");
+ }
+
+ auto aggNormalizedName = *args[0]->GetLiteral("String");
+ auto error = NormalizeName(pos, aggNormalizedName);
+ if (!error.Empty()) {
+ return new TInvalidBuiltin(pos, error->GetMessage());
+ }
+
+ if (aggNormalizedName == "aggregateby") {
+ return new TInvalidBuiltin(pos, "AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY");
+ }
+
+ if (aggNormalizedName == "multiaggregateby") {
+ return new TInvalidBuiltin(pos, "MULTI_AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY");
+ }
+
+ if (aggMode == EAggregateMode::Distinct || aggMode == EAggregateMode::OverWindowDistinct) {
+ return new TInvalidBuiltin(pos, "DISTINCT can only be used in aggregation functions");
+ }
+
+ if (to_lower(*args[0]->GetLiteral("String")).StartsWith("pg::")) {
+ auto name = args[0]->GetLiteral("String")->substr(4);
+ const bool isAggregateFunc = NYql::NPg::HasAggregation(name, NYql::NPg::EAggKind::Normal);
+ if (!isAggregateFunc) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown aggregation function: " << *args[0]->GetLiteral("String"));
+ }
+
+ return BuildAggrFuncFactoryCallback(name, "", EAggrFuncTypeCallback::PG)(pos, args, aggMode, true);
+ }
+
+ AdjustCheckedAggFuncName(aggNormalizedName, ctx);
+
+ auto aggrCallback = aggrFuncs.find(aggNormalizedName);
+ if (aggrCallback == aggrFuncs.end()) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown aggregation function: " << *args[0]->GetLiteral("String"));
+ }
+
+ return (*aggrCallback).second(pos, args, aggMode, true).Release();
+ }
+
+ if (normalizedName == "aggregateby" || normalizedName == "multiaggregateby") {
+ const bool multi = (normalizedName == "multiaggregateby");
+ if (args.size() != 2) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << (multi ? "MULTI_AGGREGATE_BY" : "AGGREGATE_BY") << " requries two arguments");
+ }
+
+ auto name = multi ? "MultiAggregateBy" : "AggregateBy";
+ auto aggr = BuildFactoryAggregation(pos, name, "", aggMode, multi);
+ return new TBasicAggrFunc(pos, name, aggr, args);
+ }
+
+ AdjustCheckedAggFuncName(normalizedName, ctx);
+
+ auto aggrCallback = aggrFuncs.find(normalizedName);
+ if (aggrCallback != aggrFuncs.end()) {
+ return (*aggrCallback).second(pos, args, aggMode, false).Release();
+ }
+ if (aggMode == EAggregateMode::Distinct || aggMode == EAggregateMode::OverWindowDistinct) {
+ return new TInvalidBuiltin(pos, "DISTINCT can only be used in aggregation functions");
+ }
+
+ auto builtinCallback = builtinFuncs.find(normalizedName);
+ if (builtinCallback != builtinFuncs.end()) {
+ return (*builtinCallback).second(pos, args);
+ } else if (normalizedName == "udf") {
+ if (mustUseNamed && *mustUseNamed) {
+ *mustUseNamed = false;
+ }
+ return new TUdfNode(pos, args);
+ } else if (normalizedName == "asstruct" || normalizedName == "structtype") {
+ if (args.empty()) {
+ return new TCallNodeImpl(pos, normalizedName == "asstruct" ? "AsStruct" : "StructType", 0, 0, args);
+ }
+
+ if (mustUseNamed && *mustUseNamed) {
+ *mustUseNamed = false;
+ YQL_ENSURE(args.size() == 2);
+ Y_DEBUG_ABORT_UNLESS(args[0]->GetTupleNode());
+ auto posArgs = args[0]->GetTupleNode();
+ if (posArgs->IsEmpty()) {
+ if (normalizedName == "asstruct") {
+ return args[1];
+ } else {
+ Y_DEBUG_ABORT_UNLESS(args[1]->GetStructNode());
+ auto namedArgs = args[1]->GetStructNode();
+ return new TStructTypeNode(pos, namedArgs->GetExprs());
+ }
+ }
+ }
+ return new TInvalidBuiltin(pos, TStringBuilder() <<
+ (normalizedName == "asstruct" ? "AsStruct" : "StructType") <<
+ " requires all argument to be named");
+ } else if (normalizedName == "expandstruct") {
+ if (mustUseNamed) {
+ if (!*mustUseNamed) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires at least one named argument");
+ }
+ *mustUseNamed = false;
+ }
+ YQL_ENSURE(args.size() == 2);
+ Y_DEBUG_ABORT_UNLESS(args[0]->GetTupleNode());
+ Y_DEBUG_ABORT_UNLESS(args[1]->GetStructNode());
+ auto posArgs = args[0]->GetTupleNode();
+ if (posArgs->GetTupleSize() != 1) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires all arguments except first to be named");
+ }
+
+ TVector<TNodePtr> flattenMembersArgs = {
+ BuildTuple(pos, {BuildQuotedAtom(pos, ""), posArgs->GetTupleElement(0)}),
+ BuildTuple(pos, {BuildQuotedAtom(pos, ""), args[1]}),
+ };
+ return new TCallNodeImpl(pos, "FlattenMembers", 2, 2, flattenMembersArgs);
+ } else if (normalizedName == "sqlexternalfunction") {
+ return new TCallNodeImpl(pos, "SqlExternalFunction", args);
+ } else {
+ return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown builtin: " << name);
+ }
+ }
+
+ TNodePtr positionalArgs;
+ TNodePtr namedArgs;
+ if (mustUseNamed && *mustUseNamed) {
+ YQL_ENSURE(args.size() == 2);
+ positionalArgs = args[0];
+ namedArgs = args[1];
+ *mustUseNamed = false;
+ }
+
+ TVector<TNodePtr> usedArgs = args;
+
+ TNodePtr customUserType = nullptr;
+ if (ns == "json") {
+ ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_JSON_UDF) << "Json UDF is deprecated. Please use JSON API instead";
+
+ ns = "yson";
+ nameSpace = "Yson";
+ if (name == "Serialize") {
+ name = "SerializeJson";
+ }
+ else if (name == "Parse") {
+ name = "ParseJson";
+ }
+ }
+
+ if (ctx.PragmaYsonFast && ns == "yson") {
+ ns.append('2');
+ nameSpace.append('2');
+ }
+
+ if (ns.StartsWith("yson")) {
+ if (name == "ConvertTo" && usedArgs.size() > 1) {
+ customUserType = usedArgs[1];
+ usedArgs.erase(usedArgs.begin() + 1);
+ }
+
+ if (name == "Serialize") {
+ if (usedArgs) {
+ usedArgs.resize(1U);
+ }
+ } else if (ctx.PragmaYsonFast && name == "SerializeJsonEncodeUtf8") {
+ name = "SerializeJson";
+ if (usedArgs.size() < 2U) {
+ usedArgs.emplace_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict, ctx.PragmaYsonFast));
+ }
+ positionalArgs = BuildTuple(pos, usedArgs);
+ auto encodeUtf8 = BuildLiteralBool(pos, true);
+ encodeUtf8->SetLabel("EncodeUtf8");
+ namedArgs = BuildStructure(pos, {encodeUtf8});
+ usedArgs = {positionalArgs, namedArgs};
+ } else if (name.StartsWith("From")) {
+ if (usedArgs) {
+ usedArgs.resize(1U);
+ }
+ name = "From";
+ } else if (name == "GetLength" || name.StartsWith("ConvertTo") || name.StartsWith("Parse") || name.StartsWith("SerializeJson")) {
+ if (usedArgs.size() < 2U) {
+ usedArgs.emplace_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict, ctx.PragmaYsonFast));
+ }
+ } else if (name == "Contains" || name.StartsWith("Lookup") || name.StartsWith("YPath")) {
+ if (usedArgs.size() < 3U) {
+ usedArgs.push_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict, ctx.PragmaYsonFast));
+ }
+ }
+ }
+
+ if (ns == "datetime2" && name == "Update") {
+ if (namedArgs) {
+ TStructNode* castedNamedArgs = namedArgs->GetStructNode();
+ Y_DEBUG_ABORT_UNLESS(castedNamedArgs);
+ auto exprs = castedNamedArgs->GetExprs();
+ for (auto& arg : exprs) {
+ if (arg->GetLabel() == "Timezone") {
+ arg = new TCallNodeImpl(pos, "TimezoneId", 1, 1, { arg });
+ arg->SetLabel("TimezoneId");
+ }
+ }
+
+ namedArgs = BuildStructure(pos, exprs);
+ usedArgs.pop_back();
+ usedArgs.push_back(namedArgs);
+ };
+ }
+
+ TNodePtr typeConfig = MakeTypeConfig(pos, ns, usedArgs);
+ return BuildSqlCall(ctx, pos, nameSpace, name, usedArgs, positionalArgs, namedArgs, customUserType, TDeferredAtom(typeConfig, ctx), nullptr);
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/context.cpp b/yql/essentials/sql/v1/context.cpp
new file mode 100644
index 00000000000..4637a3be9e9
--- /dev/null
+++ b/yql/essentials/sql/v1/context.cpp
@@ -0,0 +1,656 @@
+#include "context.h"
+
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+#include <yql/essentials/utils/yql_panic.h>
+#include <yql/essentials/utils/yql_paths.h>
+
+#include <util/folder/pathsplit.h>
+#include <util/string/join.h>
+#include <util/stream/null.h>
+
+#ifdef GetMessage
+#undef GetMessage
+#endif
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+namespace {
+
+TNodePtr AddTablePathPrefix(TContext& ctx, TStringBuf prefixPath, const TDeferredAtom& path) {
+ if (prefixPath.empty()) {
+ return path.Build();
+ }
+
+ if (path.GetLiteral()) {
+ return BuildQuotedAtom(path.Build()->GetPos(), BuildTablePath(prefixPath, *path.GetLiteral()));
+ }
+
+ auto pathNode = path.Build();
+ pathNode = new TCallNodeImpl(pathNode->GetPos(), "String", { pathNode });
+ auto prefixNode = BuildLiteralRawString(pathNode->GetPos(), TString(prefixPath));
+
+ TNodePtr buildPathNode = new TCallNodeImpl(pathNode->GetPos(), "BuildTablePath", { prefixNode, pathNode });
+
+ TDeferredAtom result;
+ MakeTableFromExpression(ctx.Pos(), ctx, buildPathNode, result);
+ return result.Build();
+}
+
+typedef bool TContext::*TPragmaField;
+
+THashMap<TStringBuf, TPragmaField> CTX_PRAGMA_FIELDS = {
+ {"AnsiOptionalAs", &TContext::AnsiOptionalAs},
+ {"WarnOnAnsiAliasShadowing", &TContext::WarnOnAnsiAliasShadowing},
+ {"PullUpFlatMapOverJoin", &TContext::PragmaPullUpFlatMapOverJoin},
+ {"FilterPushdownOverJoinOptionalSide", &TContext::FilterPushdownOverJoinOptionalSide},
+ {"RotateJoinTree", &TContext::RotateJoinTree},
+ {"DqEngineEnable", &TContext::DqEngineEnable},
+ {"DqEngineForce", &TContext::DqEngineForce},
+ {"RegexUseRe2", &TContext::PragmaRegexUseRe2},
+ {"OrderedColumns", &TContext::OrderedColumns},
+ {"BogousStarInGroupByOverJoin", &TContext::BogousStarInGroupByOverJoin},
+ {"CoalesceJoinKeysOnQualifiedAll", &TContext::CoalesceJoinKeysOnQualifiedAll},
+ {"UnorderedSubqueries", &TContext::UnorderedSubqueries},
+ {"FlexibleTypes", &TContext::FlexibleTypes},
+ {"AnsiCurrentRow", &TContext::AnsiCurrentRow},
+ {"EmitStartsWith", &TContext::EmitStartsWith},
+ {"AnsiLike", &TContext::AnsiLike},
+ {"UseBlocks", &TContext::UseBlocks},
+ {"BlockEngineEnable", &TContext::BlockEngineEnable},
+ {"BlockEngineForce", &TContext::BlockEngineForce},
+ {"UnorderedResult", &TContext::UnorderedResult},
+ {"CompactNamedExprs", &TContext::CompactNamedExprs},
+ {"ValidateUnusedExprs", &TContext::ValidateUnusedExprs},
+ {"AnsiImplicitCrossJoin", &TContext::AnsiImplicitCrossJoin},
+ {"DistinctOverWindow", &TContext::DistinctOverWindow},
+};
+
+typedef TMaybe<bool> TContext::*TPragmaMaybeField;
+
+THashMap<TStringBuf, TPragmaMaybeField> CTX_PRAGMA_MAYBE_FIELDS = {
+ {"AnsiRankForNullableKeys", &TContext::AnsiRankForNullableKeys},
+ {"AnsiInForEmptyOrNullableItemsCollections", &TContext::AnsiInForEmptyOrNullableItemsCollections},
+ {"EmitAggApply", &TContext::EmitAggApply},
+ {"CompactGroupBy", &TContext::CompactGroupBy},
+};
+
+} // namespace
+
+TContext::TContext(const NSQLTranslation::TTranslationSettings& settings,
+ const NSQLTranslation::TSQLHints& hints,
+ TIssues& issues)
+ : ClusterMapping(settings.ClusterMapping)
+ , PathPrefix(settings.PathPrefix)
+ , ClusterPathPrefixes(settings.ClusterPathPrefixes)
+ , SQLHints(hints)
+ , Settings(settings)
+ , Pool(new TMemoryPool(4096))
+ , Issues(issues)
+ , IncrementMonCounterFunction(settings.IncrementCounter)
+ , HasPendingErrors(false)
+ , DqEngineEnable(Settings.DqDefaultAuto->Allow())
+ , AnsiQuotedIdentifiers(settings.AnsiLexer)
+ , BlockEngineEnable(Settings.BlockDefaultAuto->Allow())
+{
+ for (auto lib : settings.Libraries) {
+ Libraries.emplace(lib, TLibraryStuff());
+ }
+
+ Scoped = MakeIntrusive<TScopedState>();
+ AllScopes.push_back(Scoped);
+ Scoped->UnicodeLiterals = settings.UnicodeLiterals;
+ if (settings.DefaultCluster) {
+ Scoped->CurrCluster = TDeferredAtom({}, settings.DefaultCluster);
+ auto provider = GetClusterProvider(settings.DefaultCluster);
+ YQL_ENSURE(provider);
+ Scoped->CurrService = *provider;
+ }
+
+ Position.File = settings.File;
+
+ for (auto& flag: settings.Flags) {
+ bool value = true;
+ TStringBuf key = flag;
+ auto ptr = CTX_PRAGMA_FIELDS.FindPtr(key);
+ auto ptrMaybe = CTX_PRAGMA_MAYBE_FIELDS.FindPtr(key);
+ if (!ptr && !ptrMaybe && key.SkipPrefix("Disable")) {
+ value = false;
+ ptr = CTX_PRAGMA_FIELDS.FindPtr(key);
+ ptrMaybe = CTX_PRAGMA_MAYBE_FIELDS.FindPtr(key);
+ }
+ if (ptr) {
+ this->*(*ptr) = value;
+ } else if (ptrMaybe) {
+ this->*(*ptrMaybe) = value;
+ }
+ }
+ DiscoveryMode = (NSQLTranslation::ESqlMode::DISCOVERY == Settings.Mode);
+}
+
+TContext::~TContext()
+{
+ for (auto& x: AllScopes) {
+ x->Clear();
+ }
+}
+
+const NYql::TPosition& TContext::Pos() const {
+ return Position;
+}
+
+TString TContext::MakeName(const TString& name) {
+ auto iter = GenIndexes.find(name);
+ if (iter == GenIndexes.end()) {
+ iter = GenIndexes.emplace(name, 0).first;
+ }
+ TStringBuilder str;
+ str << name << iter->second;
+ ++iter->second;
+ return str;
+}
+
+void TContext::PushCurrentBlocks(TBlocks* blocks) {
+ YQL_ENSURE(blocks);
+ CurrentBlocks.push_back(blocks);
+}
+
+void TContext::PopCurrentBlocks() {
+ YQL_ENSURE(!CurrentBlocks.empty());
+ CurrentBlocks.pop_back();
+}
+
+TBlocks& TContext::GetCurrentBlocks() const {
+ YQL_ENSURE(!CurrentBlocks.empty());
+ return *CurrentBlocks.back();
+}
+
+IOutputStream& TContext::Error(NYql::TIssueCode code) {
+ return Error(Pos(), code);
+}
+
+IOutputStream& TContext::Error(NYql::TPosition pos, NYql::TIssueCode code) {
+ HasPendingErrors = true;
+ return MakeIssue(TSeverityIds::S_ERROR, code, pos);
+}
+
+IOutputStream& TContext::Warning(NYql::TPosition pos, NYql::TIssueCode code) {
+ return MakeIssue(TSeverityIds::S_WARNING, code, pos);
+}
+
+IOutputStream& TContext::Info(NYql::TPosition pos) {
+ return MakeIssue(TSeverityIds::S_INFO, TIssuesIds::INFO, pos);
+}
+
+void TContext::SetWarningPolicyFor(NYql::TIssueCode code, NYql::EWarningAction action) {
+ TString codePattern = ToString(code);
+ TString actionString = ToString(action);
+
+ TWarningRule rule;
+ TString parseError;
+ auto parseResult = TWarningRule::ParseFrom(codePattern, actionString, rule, parseError);
+ YQL_ENSURE(parseResult == TWarningRule::EParseResult::PARSE_OK);
+ WarningPolicy.AddRule(rule);
+}
+
+TVector<NSQLTranslation::TSQLHint> TContext::PullHintForToken(NYql::TPosition tokenPos) {
+ TVector<NSQLTranslation::TSQLHint> result;
+ auto it = SQLHints.find(tokenPos);
+ if (it == SQLHints.end()) {
+ return result;
+ }
+ result = std::move(it->second);
+ SQLHints.erase(it);
+ return result;
+}
+
+void TContext::WarnUnusedHints() {
+ if (!SQLHints.empty()) {
+ // warn about first unused hint
+ auto firstUnused = SQLHints.begin();
+ YQL_ENSURE(!firstUnused->second.empty());
+ const NSQLTranslation::TSQLHint& hint = firstUnused->second.front();
+ Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Hint " << hint.Name << " will not be used";
+ }
+}
+
+IOutputStream& TContext::MakeIssue(ESeverity severity, TIssueCode code, NYql::TPosition pos) {
+ if (severity == TSeverityIds::S_WARNING) {
+ auto action = WarningPolicy.GetAction(code);
+ if (action == EWarningAction::ERROR) {
+ severity = TSeverityIds::S_ERROR;
+ HasPendingErrors = true;
+ } else if (action == EWarningAction::DISABLE) {
+ return Cnull;
+ }
+ }
+
+ // we have the last cell for issue, let's fill it with our internal error
+ if (severity >= TSeverityIds::S_WARNING) {
+ const bool aboveHalf = Issues.Size() > Settings.MaxErrors / 2;
+ if (aboveHalf) {
+ return Cnull;
+ }
+ } else {
+ if (Settings.MaxErrors == Issues.Size() + 1) {
+ Issues.AddIssue(TIssue(NYql::TPosition(), TString(TStringBuf("Too many issues"))));
+ Issues.back().SetCode(UNEXPECTED_ERROR, TSeverityIds::S_ERROR);
+ }
+
+ if (Settings.MaxErrors <= Issues.Size()) {
+ ythrow NProtoAST::TTooManyErrors() << "Too many issues";
+ }
+ }
+
+ Issues.AddIssue(TIssue(pos, TString()));
+ auto& curIssue = Issues.back();
+ curIssue.Severity = severity;
+ curIssue.IssueCode = code;
+ IssueMsgHolder.Reset(new TStringOutput(*Issues.back().MutableMessage()));
+ return *IssueMsgHolder;
+}
+
+bool TContext::IsDynamicCluster(const TDeferredAtom& cluster) const {
+ const TString* clusterPtr = cluster.GetLiteral();
+ if (!clusterPtr) {
+ return false;
+ }
+ TString unused;
+ if (ClusterMapping.GetClusterProvider(*clusterPtr, unused)) {
+ return false;
+ }
+ if (Settings.AssumeYdbOnClusterWithSlash && clusterPtr->StartsWith('/')) {
+ return false;
+ }
+ return !Settings.DynamicClusterProvider.empty();
+}
+
+bool TContext::SetPathPrefix(const TString& value, TMaybe<TString> arg) {
+ if (arg.Defined()) {
+ if (*arg == YtProviderName
+ || *arg == KikimrProviderName
+ || *arg == RtmrProviderName
+ )
+ {
+ ProviderPathPrefixes[*arg] = value;
+ return true;
+ }
+
+ TString normalizedClusterName;
+ if (!GetClusterProvider(*arg, normalizedClusterName)) {
+ Error() << "Unknown cluster or provider: " << *arg;
+ IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return false;
+ }
+
+ ClusterPathPrefixes[normalizedClusterName] = value;
+ } else {
+ PathPrefix = value;
+ }
+
+ return true;
+}
+
+TNodePtr TContext::GetPrefixedPath(const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& path) {
+ TStringBuf prefixPath = GetPrefixPath(service, cluster);
+ if (prefixPath) {
+ return AddTablePathPrefix(*this, prefixPath, path);
+ }
+ return path.Build();
+}
+
+TStringBuf TContext::GetPrefixPath(const TString& service, const TDeferredAtom& cluster) const {
+ if (IsDynamicCluster(cluster)) {
+ return {};
+ }
+ auto* clusterPrefix = cluster.GetLiteral()
+ ? ClusterPathPrefixes.FindPtr(*cluster.GetLiteral())
+ : nullptr;
+ if (clusterPrefix && !clusterPrefix->empty()) {
+ return *clusterPrefix;
+ } else {
+ auto* providerPrefix = ProviderPathPrefixes.FindPtr(service);
+ if (providerPrefix && !providerPrefix->empty()) {
+ return *providerPrefix;
+ } else if (!PathPrefix.empty()) {
+ return PathPrefix;
+ }
+ return {};
+ }
+}
+
+TNodePtr TContext::UniversalAlias(const TString& baseName, TNodePtr&& node) {
+ auto alias = MakeName(baseName);
+ UniversalAliases.emplace(alias, node);
+ return BuildAtom(node->GetPos(), alias, TNodeFlags::Default);
+}
+
+bool TContext::IsAlreadyDeclared(const TString& varName) const {
+ return Variables.find(varName) != Variables.end() && !WeakVariables.contains(varName);
+}
+
+void TContext::DeclareVariable(const TString& varName, const TPosition& pos, const TNodePtr& typeNode, bool isWeak) {
+ if (isWeak) {
+ auto inserted = Variables.emplace(varName, std::make_pair(pos, typeNode));
+ YQL_ENSURE(inserted.second);
+ WeakVariables.insert(varName);
+ } else {
+ WeakVariables.erase(WeakVariables.find(varName));
+ Variables[varName] = std::make_pair(pos, typeNode);
+ }
+}
+
+bool TContext::AddExport(TPosition pos, const TString& name) {
+ if (IsAnonymousName(name)) {
+ Error(pos) << "Can not export anonymous name " << name;
+ return false;
+ }
+ if (Exports.contains(name)) {
+ Error(pos) << "Duplicate export symbol: " << name;
+ return false;
+ }
+ if (!Scoped->LookupNode(name)) {
+ Error(pos) << "Unable to export unknown symbol: " << name;
+ return false;
+ }
+ Exports.emplace(name);
+ return true;
+}
+
+TString TContext::AddImport(const TVector<TString>& modulePath) {
+ YQL_ENSURE(!modulePath.empty());
+ TString path = JoinRange("/", modulePath.cbegin(), modulePath.cend());
+ if (!path.StartsWith('/')) {
+ path = Settings.FileAliasPrefix + path;
+ }
+
+ auto iter = ImportModuleAliases.find(path);
+ if (iter == ImportModuleAliases.end()) {
+ const TString alias = MakeName(TStringBuilder() << modulePath.back() << "_module");
+ iter = ImportModuleAliases.emplace(path, alias).first;
+ }
+ return iter->second;
+}
+
+TString TContext::AddSimpleUdf(const TString& udf) {
+ auto& name = SimpleUdfs[udf];
+ if (name.empty()) {
+ name = TStringBuilder() << "Udf" << SimpleUdfs.size();
+ }
+
+ return name;
+}
+
+void TContext::SetPackageVersion(const TString& packageName, ui32 version) {
+ PackageVersions[packageName] = version;
+}
+
+void TScopedState::UseCluster(const TString& service, const TDeferredAtom& cluster) {
+ YQL_ENSURE(!cluster.Empty());
+ if (cluster.GetLiteral()) {
+ if (!Local.UsedPlainClusters.insert(*cluster.GetLiteral()).second) {
+ return;
+ }
+ } else {
+ if (!Local.UsedExprClusters.insert(cluster.Build().Get()).second) {
+ return;
+ }
+ }
+ Local.UsedClusters.push_back({service, cluster});
+}
+
+void TScopedState::AddExprCluster(TNodePtr expr, TContext& ctx) {
+ auto node = expr.Get();
+ if (Local.ExprClustersMap.count(node)) {
+ return;
+ }
+ auto name = ctx.MakeName("cluster");
+ auto wrappedNode = expr->Y("EvaluateAtom", expr);
+ Local.ExprClustersMap.insert({node, {name, wrappedNode}});
+ Local.ExprClusters.push_back(expr);
+}
+
+const TVector<std::pair<TString, TDeferredAtom>>& TScopedState::GetUsedClusters() {
+ return Local.UsedClusters;
+}
+
+TNodePtr TScopedState::WrapCluster(const TDeferredAtom& cluster, TContext& ctx) {
+ auto node = cluster.Build();
+ if (!cluster.GetLiteral()) {
+ if (ctx.CompactNamedExprs) {
+ return node->Y("EvaluateAtom", node);
+ }
+ AddExprCluster(node, ctx);
+ auto exprIt = Local.ExprClustersMap.find(node.Get());
+ YQL_ENSURE(exprIt != Local.ExprClustersMap.end());
+ return node->AstNode(exprIt->second.first);
+ }
+
+ return node;
+}
+
+void TScopedState::Clear() {
+ *this = TScopedState();
+}
+
+TNodePtr TScopedState::LookupNode(const TString& name) {
+ auto mapIt = NamedNodes.find(name);
+ if (mapIt == NamedNodes.end()) {
+ return nullptr;
+ }
+ Y_DEBUG_ABORT_UNLESS(!mapIt->second.empty());
+ mapIt->second.front()->IsUsed = true;
+ return mapIt->second.front()->Node->Clone();
+}
+
+bool TContext::HasNonYtProvider(const ISource& source) const {
+ TTableList tableList;
+ source.GetInputTables(tableList);
+
+ TSet<TString> clusters;
+ for (auto& it: tableList) {
+ if (it.Service != YtProviderName) {
+ return true;
+ }
+ }
+
+ for (auto& cl: Scoped->Local.UsedClusters) {
+ if (cl.first != YtProviderName) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool TContext::UseUnordered(const ISource& source) const {
+ return !HasNonYtProvider(source);
+}
+
+bool TContext::UseUnordered(const TTableRef& table) const {
+ return YtProviderName == table.Service;
+}
+
+
+TMaybe<EColumnRefState> GetFunctionArgColumnStatus(TContext& ctx, const TString& module, const TString& func, size_t argIndex) {
+ static const TSet<TStringBuf> denyForAllArgs = {
+ "datatype",
+ "optionaltype",
+ "listtype",
+ "streamtype",
+ "dicttype",
+ "tupletype",
+ "resourcetype",
+ "taggedtype",
+ "varianttype",
+ "callabletype",
+ "optionalitemtype",
+ "listitemtype",
+ "streamitemtype",
+ "dictkeytype",
+ "dictpayloadtype",
+ "tupleelementtype",
+ "structmembertype",
+ "callableresulttype",
+ "callableargumenttype",
+ "variantunderlyingtype",
+ };
+ static const TMap<std::pair<TStringBuf, size_t>, EColumnRefState> positionalArgsCustomStatus = {
+ { {"frombytes", 1}, EColumnRefState::Deny },
+ { {"enum", 0}, EColumnRefState::Deny },
+ { {"asenum", 0}, EColumnRefState::Deny },
+ { {"variant", 1}, EColumnRefState::Deny },
+ { {"variant", 2}, EColumnRefState::Deny },
+ { {"asvariant", 1}, EColumnRefState::Deny },
+ { {"astagged", 1}, EColumnRefState::Deny },
+ { {"ensuretype", 1}, EColumnRefState::Deny },
+ { {"ensuretype", 2}, EColumnRefState::Deny },
+ { {"ensureconvertibleto", 1}, EColumnRefState::Deny },
+ { {"ensureconvertibleto", 2}, EColumnRefState::Deny },
+
+ { {"nothing", 0}, EColumnRefState::Deny },
+ { {"formattype", 0}, EColumnRefState::Deny },
+ { {"instanceof", 0}, EColumnRefState::Deny },
+ { {"pgtype", 0}, EColumnRefState::AsPgType },
+ { {"pgconst", 0}, EColumnRefState::Deny },
+ { {"pgconst", 1}, EColumnRefState::AsPgType },
+ { {"pgcast", 1}, EColumnRefState::AsPgType },
+
+ { {"unpickle", 0}, EColumnRefState::Deny },
+ { {"typehandle", 0}, EColumnRefState::Deny },
+
+ { {"listcreate", 0}, EColumnRefState::Deny },
+ { {"setcreate", 0}, EColumnRefState::Deny },
+ { {"dictcreate", 0}, EColumnRefState::Deny },
+ { {"dictcreate", 1}, EColumnRefState::Deny },
+ { {"weakfield", 1}, EColumnRefState::Deny },
+
+ { {"Yson::ConvertTo", 1}, EColumnRefState::Deny },
+ };
+
+ TString normalized;
+ if (module.empty()) {
+ normalized = to_lower(func);
+ } else if (to_upper(module) == "YQL") {
+ normalized = "YQL::" + func;
+ } else {
+ normalized = module + "::" + func;
+ }
+
+ if (normalized == "typeof" && argIndex == 0) {
+ // TODO: more such cases?
+ return ctx.GetTopLevelColumnReferenceState();
+ }
+
+ if (denyForAllArgs.contains(normalized)) {
+ return EColumnRefState::Deny;
+ }
+
+ auto it = positionalArgsCustomStatus.find(std::make_pair(normalized, argIndex));
+ if (it != positionalArgsCustomStatus.end()) {
+ return it->second;
+ }
+ return {};
+}
+
+TTranslation::TTranslation(TContext& ctx)
+ : Ctx(ctx)
+{
+}
+
+TContext& TTranslation::Context() {
+ return Ctx;
+}
+
+IOutputStream& TTranslation::Error() {
+ return Ctx.Error();
+}
+
+TNodePtr TTranslation::GetNamedNode(const TString& name) {
+ if (name == "$_") {
+ Ctx.Error() << "Unable to reference anonymous name " << name;
+ return nullptr;
+ }
+ auto res = Ctx.Scoped->LookupNode(name);
+ if (!res) {
+ Ctx.Error() << "Unknown name: " << name;
+ }
+ return SafeClone(res);
+}
+
+TString TTranslation::PushNamedNode(TPosition namePos, const TString& name, const TNodeBuilderByName& builder) {
+ TString resultName = name;
+ if (IsAnonymousName(name)) {
+ resultName = "$_yql_anonymous_name_" + ToString(Ctx.AnonymousNameIndex++);
+ YQL_ENSURE(Ctx.Scoped->NamedNodes.find(resultName) == Ctx.Scoped->NamedNodes.end());
+ }
+ auto node = builder(resultName);
+ Y_DEBUG_ABORT_UNLESS(node);
+ auto mapIt = Ctx.Scoped->NamedNodes.find(resultName);
+ if (mapIt == Ctx.Scoped->NamedNodes.end()) {
+ auto result = Ctx.Scoped->NamedNodes.insert(std::make_pair(resultName, TDeque<TNodeWithUsageInfoPtr>()));
+ Y_DEBUG_ABORT_UNLESS(result.second);
+ mapIt = result.first;
+ }
+
+ mapIt->second.push_front(MakeIntrusive<TNodeWithUsageInfo>(node, namePos, Ctx.ScopeLevel));
+ return resultName;
+}
+
+TString TTranslation::PushNamedNode(NYql::TPosition namePos, const TString &name, NSQLTranslationV1::TNodePtr node) {
+ return PushNamedNode(namePos, name, [node](const TString&) { return node; });
+}
+
+TString TTranslation::PushNamedAtom(TPosition namePos, const TString& name) {
+ auto buildAtom = [namePos](const TString& resultName) {
+ return BuildAtom(namePos, resultName);
+ };
+ return PushNamedNode(namePos, name, buildAtom);
+}
+
+void TTranslation::PopNamedNode(const TString& name) {
+ auto mapIt = Ctx.Scoped->NamedNodes.find(name);
+ Y_DEBUG_ABORT_UNLESS(mapIt != Ctx.Scoped->NamedNodes.end());
+ Y_DEBUG_ABORT_UNLESS(mapIt->second.size() > 0);
+ auto& top = mapIt->second.front();
+ if (!top->IsUsed && !Ctx.HasPendingErrors && !name.StartsWith("$_")) {
+ Ctx.Warning(top->NamePos, TIssuesIds::YQL_UNUSED_SYMBOL) << "Symbol " << name << " is not used";
+ }
+ mapIt->second.pop_front();
+ if (mapIt->second.empty()) {
+ Ctx.Scoped->NamedNodes.erase(mapIt);
+ }
+}
+
+void TTranslation::WarnUnusedNodes() const {
+ if (Ctx.HasPendingErrors) {
+ // result is not reliable in this case
+ return;
+ }
+ for (const auto& [name, items]: Ctx.Scoped->NamedNodes) {
+ if (name.StartsWith("$_")) {
+ continue;
+ }
+ for (const auto& item : items) {
+ if (!item->IsUsed && item->Level == Ctx.ScopeLevel) {
+ Ctx.Warning(item->NamePos, TIssuesIds::YQL_UNUSED_SYMBOL) << "Symbol " << name << " is not used";
+ }
+ }
+ }
+}
+
+TString GetDescription(const google::protobuf::Message& node, const google::protobuf::FieldDescriptor* d) {
+ const auto& field = node.GetReflection()->GetMessage(node, d);
+ return field.GetReflection()->GetString(field, d->message_type()->FindFieldByName("Descr"));
+}
+
+TString TTranslation::AltDescription(const google::protobuf::Message& node, ui32 altCase, const google::protobuf::Descriptor* descr) const {
+ return GetDescription(node, descr->FindFieldByNumber(altCase));
+}
+
+void TTranslation::AltNotImplemented(const TString& ruleName, ui32 altCase, const google::protobuf::Message& node, const google::protobuf::Descriptor* descr) {
+ Error() << ruleName << ": alternative is not implemented yet: " << AltDescription(node, altCase, descr);
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/context.h b/yql/essentials/sql/v1/context.h
new file mode 100644
index 00000000000..4aa766e34a0
--- /dev/null
+++ b/yql/essentials/sql/v1/context.h
@@ -0,0 +1,421 @@
+#pragma once
+
+#include "source.h"
+#include "sql.h"
+
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+#include <yql/essentials/core/issue/protos/issue_id.pb.h>
+#include <yql/essentials/public/issue/yql_warning.h>
+#include <yql/essentials/sql/settings/translation_settings.h>
+#include <yql/essentials/sql/cluster_mapping.h>
+
+#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
+
+#include <util/generic/hash.h>
+#include <util/generic/map.h>
+#include <util/generic/maybe.h>
+#include <util/generic/set.h>
+#include <util/generic/deque.h>
+#include <util/generic/vector.h>
+
+#define ANTLR3_TOKEN(NAME) SQLv1LexerTokens::TOKEN_##NAME << 16
+#define ANTLR4_TOKEN(NAME) (SQLv1Antlr4Lexer::TOKEN_##NAME << 16) + 1
+#define IS_TOKEN(ID, NAME) (UnifiedToken(ID) == ANTLR3_TOKEN(NAME) || UnifiedToken(ID) == ANTLR4_TOKEN(NAME))
+
+namespace NSQLTranslationV1 {
+ inline bool IsAnonymousName(const TString& name) {
+ return name == "$_";
+ }
+
+ inline bool IsStreamingService(const TString& service) {
+ return service == NYql::RtmrProviderName || service == NYql::PqProviderName;
+ }
+
+
+ struct TNodeWithUsageInfo : public TThrRefBase {
+ explicit TNodeWithUsageInfo(const TNodePtr& node, TPosition namePos, int level)
+ : Node(node)
+ , NamePos(namePos)
+ , Level(level)
+ {}
+
+ TNodePtr Node;
+ TPosition NamePos;
+ int Level = 0;
+ bool IsUsed = false;
+ };
+
+ using TNodeWithUsageInfoPtr = TIntrusivePtr<TNodeWithUsageInfo>;
+ using TNamedNodesMap = THashMap<TString, TDeque<TNodeWithUsageInfoPtr>>;
+ using TBlocks = TVector<TNodePtr>;
+
+ struct TScopedState : public TThrRefBase {
+ TString CurrService;
+ TDeferredAtom CurrCluster;
+ bool PragmaClassicDivision = true;
+ bool PragmaCheckedOps = false;
+ bool StrictJoinKeyTypes = false;
+ bool UnicodeLiterals = false;
+ bool WarnUntypedStringLiterals = false;
+ TNamedNodesMap NamedNodes;
+
+ struct TLocal {
+ TVector<std::pair<TString, TDeferredAtom>> UsedClusters;
+ THashSet<TString> UsedPlainClusters;
+ THashSet<INode*> UsedExprClusters;
+ THashMap<INode*, std::pair<TString, TNodePtr>> ExprClustersMap;
+ TVector<TNodePtr> ExprClusters;
+ };
+
+ TLocal Local;
+
+ void UseCluster(const TString& service, const TDeferredAtom& cluster);
+ const TVector<std::pair<TString, TDeferredAtom>>& GetUsedClusters();
+ TNodePtr WrapCluster(const TDeferredAtom& cluster, TContext& ctx);
+ void AddExprCluster(TNodePtr expr, TContext& ctx);
+ void Clear();
+ TNodePtr LookupNode(const TString& name);
+ };
+
+ using TScopedStatePtr = TIntrusivePtr<TScopedState>;
+
+ class TColumnRefScope;
+ enum class EColumnRefState {
+ Deny,
+ Allow,
+ AsStringLiteral,
+ AsPgType,
+ MatchRecognize,
+ };
+
+ class TContext {
+ public:
+ TContext(const NSQLTranslation::TTranslationSettings& settings,
+ const NSQLTranslation::TSQLHints& hints,
+ NYql::TIssues& issues);
+
+ virtual ~TContext();
+
+ const NYql::TPosition& Pos() const;
+
+ void PushCurrentBlocks(TBlocks* blocks);
+ void PopCurrentBlocks();
+ TBlocks& GetCurrentBlocks() const;
+
+ TString MakeName(const TString& name);
+
+ IOutputStream& Error(NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR);
+ IOutputStream& Error(NYql::TPosition pos, NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR);
+ IOutputStream& Warning(NYql::TPosition pos, NYql::TIssueCode code);
+ IOutputStream& Info(NYql::TPosition pos);
+
+ void SetWarningPolicyFor(NYql::TIssueCode code, NYql::EWarningAction action);
+
+ const TString& Token(const NSQLv1Generated::TToken& token) {
+ Position.Row = token.GetLine();
+ Position.Column = token.GetColumn() + 1;
+ return token.GetValue();
+ }
+
+ TPosition TokenPosition(const NSQLv1Generated::TToken& token) {
+ TPosition pos = Position;
+ pos.Row = token.GetLine();
+ pos.Column = token.GetColumn() + 1;
+ return pos;
+ }
+
+ inline void IncrementMonCounter(const TString& name, const TString& value) {
+ if (IncrementMonCounterFunction) {
+ IncrementMonCounterFunction(name, value);
+ }
+ }
+
+ bool HasCluster(const TString& cluster) const {
+ return GetClusterProvider(cluster).Defined();
+ }
+
+ TMaybe<TString> GetClusterProvider(const TString& cluster) const {
+ TString unusedNormalizedClusterName;
+ return GetClusterProvider(cluster, unusedNormalizedClusterName);
+ }
+
+ TMaybe<TString> GetClusterProvider(const TString& cluster, TString& normalizedClusterName) const {
+ auto provider = ClusterMapping.GetClusterProvider(cluster, normalizedClusterName);
+ if (!provider) {
+ if (Settings.AssumeYdbOnClusterWithSlash && cluster.StartsWith('/')) {
+ normalizedClusterName = cluster;
+ return TString(NYql::KikimrProviderName);
+ }
+ if (Settings.DynamicClusterProvider) {
+ normalizedClusterName = cluster.StartsWith('/') ? cluster : Settings.PathPrefix + "/" + cluster;
+ return Settings.DynamicClusterProvider;
+ }
+ return Nothing();
+ }
+
+ return provider;
+ }
+
+ bool IsDynamicCluster(const TDeferredAtom& cluster) const;
+ bool HasNonYtProvider(const ISource& source) const;
+ bool UseUnordered(const ISource& source) const;
+ bool UseUnordered(const TTableRef& table) const;
+
+ bool SetPathPrefix(const TString& value, TMaybe<TString> arg = TMaybe<TString>());
+
+ TNodePtr GetPrefixedPath(const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& path);
+ TStringBuf GetPrefixPath(const TString& service, const TDeferredAtom& cluster) const;
+
+ TNodePtr UniversalAlias(const TString& baseName, TNodePtr&& node);
+
+ void BodyPart() {
+ IntoHeading = false;
+ }
+
+ bool IsParseHeading() const {
+ return IntoHeading;
+ }
+
+ bool IsAlreadyDeclared(const TString& varName) const;
+ void DeclareVariable(const TString& varName, const TPosition& pos, const TNodePtr& typeNode, bool isWeak = false);
+
+ bool AddExport(TPosition symbolPos, const TString& symbolName);
+ TString AddImport(const TVector<TString>& modulePath);
+ TString AddSimpleUdf(const TString& udf);
+ void SetPackageVersion(const TString& packageName, ui32 version);
+
+ bool IsStreamingService(const TStringBuf service) const;
+
+ bool CheckColumnReference(TPosition pos, const TString& name) {
+ const bool allowed = GetColumnReferenceState() != EColumnRefState::Deny;
+ if (!allowed) {
+ Error(pos) << "Column reference \"" << name << "\" is not allowed " << NoColumnErrorContext;
+ IncrementMonCounter("sql_errors", "ColumnReferenceInScopeIsNotAllowed");
+ }
+ return allowed;
+ }
+
+ EColumnRefState GetColumnReferenceState() const {
+ return ColumnReferenceState;
+ }
+
+ EColumnRefState GetTopLevelColumnReferenceState() const {
+ return TopLevelColumnReferenceState;
+ }
+
+ TStringBuf GetMatchRecognizeDefineVar() const {
+ YQL_ENSURE(EColumnRefState::MatchRecognize == ColumnReferenceState,
+ "DefineVar can only be accessed within processing of MATCH_RECOGNIZE lambdas");
+ return MatchRecognizeDefineVar;
+ }
+
+ TVector<NSQLTranslation::TSQLHint> PullHintForToken(NYql::TPosition tokenPos);
+ void WarnUnusedHints();
+
+ private:
+ IOutputStream& MakeIssue(NYql::ESeverity severity, NYql::TIssueCode code, NYql::TPosition pos);
+
+ private:
+ NYql::TPosition Position;
+ THolder<TStringOutput> IssueMsgHolder;
+ NSQLTranslation::TClusterMapping ClusterMapping;
+ TString PathPrefix;
+ THashMap<TString, TString> ProviderPathPrefixes;
+ THashMap<TString, TString> ClusterPathPrefixes;
+ bool IntoHeading = true;
+ NSQLTranslation::TSQLHints SQLHints;
+
+ friend class TColumnRefScope;
+
+ EColumnRefState ColumnReferenceState = EColumnRefState::Deny;
+ EColumnRefState TopLevelColumnReferenceState = EColumnRefState::Deny;
+ TString MatchRecognizeDefineVar;
+ TString NoColumnErrorContext = "in current scope";
+ TVector<TBlocks*> CurrentBlocks;
+
+ public:
+ THashMap<TString, std::pair<TPosition, TNodePtr>> Variables;
+ THashSet<TString> WeakVariables;
+ NSQLTranslation::TTranslationSettings Settings;
+ std::unique_ptr<TMemoryPool> Pool;
+ NYql::TIssues& Issues;
+ TMap<TString, TNodePtr> UniversalAliases;
+ THashSet<TString> Exports;
+ THashMap<TString, TString> ImportModuleAliases;
+ THashMap<TString, TString> RequiredModules;
+ TMap<TString, TString> SimpleUdfs;
+ NSQLTranslation::TIncrementMonCounterFunction IncrementMonCounterFunction;
+ TScopedStatePtr Scoped;
+ int ScopeLevel = 0;
+ size_t AnonymousNameIndex = 0;
+ TDeque<TScopedStatePtr> AllScopes;
+ bool HasPendingErrors;
+ THashMap<TString, ui32> GenIndexes;
+ using TWinSpecsRef = std::reference_wrapper<TWinSpecs>;
+ TDeque<TWinSpecsRef> WinSpecsScopes;
+ bool PragmaRefSelect = false;
+ bool PragmaSampleSelect = false;
+ bool PragmaAllowDotInAlias = false;
+ bool PragmaInferSchema = false;
+ bool PragmaAutoCommit = false;
+ bool PragmaUseTablePrefixForEach = false;
+ bool SimpleColumns = true;
+ bool CoalesceJoinKeysOnQualifiedAll = false;
+ bool PragmaDirectRead = false;
+ bool PragmaYsonFast = true;
+ bool PragmaYsonAutoConvert = false;
+ bool PragmaYsonStrict = true;
+ bool PragmaRegexUseRe2 = true;
+ bool PragmaPullUpFlatMapOverJoin = true;
+ bool FilterPushdownOverJoinOptionalSide = false;
+ bool RotateJoinTree = true;
+ bool WarnUnnamedColumns = false;
+ bool DiscoveryMode = false;
+ bool EnableSystemColumns = true;
+ bool DqEngineEnable = false;
+ bool DqEngineForce = false;
+ TString CostBasedOptimizer;
+ TMaybe<bool> JsonQueryReturnsJsonDocument;
+ TMaybe<bool> AnsiInForEmptyOrNullableItemsCollections;
+ TMaybe<bool> AnsiRankForNullableKeys = true;
+ const bool AnsiQuotedIdentifiers;
+ bool AnsiOptionalAs = true;
+ bool OrderedColumns = false;
+ bool PositionalUnionAll = false;
+ bool BogousStarInGroupByOverJoin = false;
+ bool UnorderedSubqueries = true;
+ bool PragmaDataWatermarks = true;
+ bool WarnOnAnsiAliasShadowing = true;
+ ui32 ResultRowsLimit = 0;
+ ui64 ResultSizeLimit = 0;
+ ui32 PragmaGroupByLimit = 1 << 6;
+ ui32 PragmaGroupByCubeLimit = 5;
+ // if FlexibleTypes=true, emit TypeOrMember callable and resolve Type/Column uncertainty on type annotation stage, otherwise always emit Type
+ bool FlexibleTypes = false;
+ // see YQL-10265
+ bool AnsiCurrentRow = false;
+ TMaybe<bool> YsonCastToString;
+ using TLiteralWithPosition = std::pair<TString, TPosition>;
+ using TLibraryStuff = std::tuple<TPosition, std::optional<TLiteralWithPosition>, std::optional<TLiteralWithPosition>>;
+ std::unordered_map<TString, TLibraryStuff> Libraries; // alias -> optional file with token
+ using TPackageStuff = std::tuple<
+ TPosition, TLiteralWithPosition,
+ std::optional<TLiteralWithPosition>
+ >;
+
+ std::unordered_map<TString, TPackageStuff> Packages; // alias -> url with optional token
+
+ using TOverrideLibraryStuff = std::tuple<TPosition>;
+ std::unordered_map<TString, TOverrideLibraryStuff> OverrideLibraries; // alias -> position
+
+ THashMap<TString, ui32> PackageVersions;
+ NYql::TWarningPolicy WarningPolicy;
+ TString PqReadByRtmrCluster;
+ bool EmitStartsWith = true;
+ TMaybe<bool> EmitAggApply;
+ bool UseBlocks = false;
+ bool AnsiLike = false;
+ bool FeatureR010 = false; //Row pattern recognition: FROM clause
+ TMaybe<bool> CompactGroupBy;
+ bool BlockEngineEnable = false;
+ bool BlockEngineForce = false;
+ bool UnorderedResult = false;
+ ui64 ParallelModeCount = 0;
+ bool CompactNamedExprs = false;
+ bool ValidateUnusedExprs = false;
+ bool AnsiImplicitCrossJoin = false; // select * from A,B
+ bool DistinctOverWindow = false;
+ };
+
+ class TColumnRefScope {
+ public:
+ TColumnRefScope(TContext& ctx, EColumnRefState state, bool isTopLevelExpr = true, const TString& defineVar = "")
+ : PrevTop(ctx.TopLevelColumnReferenceState)
+ , Prev(ctx.ColumnReferenceState)
+ , PrevErr(ctx.NoColumnErrorContext)
+ , PrevDefineVar(ctx.MatchRecognizeDefineVar)
+ , Ctx(ctx)
+ {
+ if (isTopLevelExpr) {
+ Ctx.ColumnReferenceState = Ctx.TopLevelColumnReferenceState = state;
+ } else {
+ Ctx.ColumnReferenceState = state;
+ }
+ YQL_ENSURE(defineVar.empty() || EColumnRefState::MatchRecognize == state, "Internal logic error");
+ ctx.MatchRecognizeDefineVar = defineVar;
+ }
+
+ void SetNoColumnErrContext(const TString& msg) {
+ Ctx.NoColumnErrorContext = msg;
+ }
+
+ ~TColumnRefScope() {
+ Ctx.TopLevelColumnReferenceState = PrevTop;
+ Ctx.ColumnReferenceState = Prev;
+ std::swap(Ctx.NoColumnErrorContext, PrevErr);
+ std::swap(Ctx.MatchRecognizeDefineVar, PrevDefineVar);
+ }
+ private:
+ const EColumnRefState PrevTop;
+ const EColumnRefState Prev;
+ TString PrevErr;
+ TString PrevDefineVar;
+ TContext& Ctx;
+ };
+
+ TMaybe<EColumnRefState> GetFunctionArgColumnStatus(TContext& ctx, const TString& module, const TString& func, size_t argIndex);
+
+ class TTranslation {
+ protected:
+ typedef TSet<ui32> TSetType;
+
+ protected:
+ TTranslation(TContext& ctx);
+
+ public:
+ TContext& Context();
+ IOutputStream& Error();
+
+ const TString& Token(const NSQLv1Generated::TToken& token) {
+ return Ctx.Token(token);
+ }
+
+ ui32 UnifiedToken(ui32 id) const {
+ return Ctx.Settings.Antlr4Parser + (id << 16);
+ }
+
+ TString Identifier(const NSQLv1Generated::TToken& token) {
+ return IdContent(Ctx, Token(token));
+ }
+
+ TString Identifier(const TString& str) const {
+ return IdContent(Ctx, str);
+ }
+
+ TNodePtr GetNamedNode(const TString& name);
+
+ using TNodeBuilderByName = std::function<TNodePtr(const TString& effectiveName)>;
+ TString PushNamedNode(TPosition namePos, const TString& name, const TNodeBuilderByName& builder);
+ TString PushNamedNode(TPosition namePos, const TString& name, TNodePtr node);
+ TString PushNamedAtom(TPosition namePos, const TString& name);
+ void PopNamedNode(const TString& name);
+ void WarnUnusedNodes() const;
+
+ template <typename TNode>
+ void AltNotImplemented(const TString& ruleName, const TNode& node) {
+ AltNotImplemented(ruleName, node.Alt_case(), node, TNode::descriptor());
+ }
+
+ template <typename TNode>
+ TString AltDescription(const TNode& node) const {
+ return AltDescription(node, node.Alt_case(), TNode::descriptor());
+ }
+
+ protected:
+ void AltNotImplemented(const TString& ruleName, ui32 altCase, const google::protobuf::Message& node, const google::protobuf::Descriptor* descr);
+ TString AltDescription(const google::protobuf::Message& node, ui32 altCase, const google::protobuf::Descriptor* descr) const;
+
+ protected:
+ TContext& Ctx;
+ };
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/format/sql_format.cpp b/yql/essentials/sql/v1/format/sql_format.cpp
new file mode 100644
index 00000000000..463c52ede46
--- /dev/null
+++ b/yql/essentials/sql/v1/format/sql_format.cpp
@@ -0,0 +1,3105 @@
+#include "sql_format.h"
+
+#include <yql/essentials/parser/lexer_common/lexer.h>
+#include <yql/essentials/core/sql_types/simple_types.h>
+
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+#include <yql/essentials/sql/v1/proto_parser/proto_parser.h>
+
+#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
+
+#include <library/cpp/protobuf/util/simple_reflection.h>
+#include <library/cpp/resource/resource.h>
+
+#include <util/string/builder.h>
+#include <util/string/split.h>
+#include <util/string/strip.h>
+#include <util/string/subst.h>
+#include <util/generic/hash_set.h>
+
+
+namespace NSQLFormat {
+
+namespace {
+
+using namespace NSQLv1Generated;
+
+using NSQLTranslation::TParsedToken;
+using NSQLTranslation::TParsedTokenList;
+using TTokenIterator = TParsedTokenList::const_iterator;
+
+TTokenIterator SkipWS(TTokenIterator curr, TTokenIterator end) {
+ while (curr != end && curr->Name == "WS") {
+ ++curr;
+ }
+ return curr;
+}
+
+TTokenIterator SkipWSOrComment(TTokenIterator curr, TTokenIterator end) {
+ while (curr != end && (curr->Name == "WS" || curr->Name == "COMMENT")) {
+ ++curr;
+ }
+ return curr;
+}
+
+bool Validate(const TParsedTokenList& query, const TParsedTokenList& formattedQuery) {
+ auto in = query.begin();
+ auto out = formattedQuery.begin();
+ auto inEnd = query.end();
+ auto outEnd = formattedQuery.end();
+
+ while (in != inEnd && out != outEnd) {
+ in = SkipWS(in, inEnd);
+ out = SkipWS(out, outEnd);
+ if (in != inEnd && out != outEnd) {
+ if (in->Name != out->Name) {
+ return false;
+ }
+ if (AsciiEqualsIgnoreCase(in->Name, in->Content)) {
+ if (!AsciiEqualsIgnoreCase(in->Content, out->Content)) {
+ return false;
+ }
+ } else {
+ if (in->Content != out->Content) {
+ return false;
+ }
+ }
+ ++in;
+ ++out;
+ }
+ }
+ in = SkipWS(in, inEnd);
+ out = SkipWS(out, outEnd);
+ return in == inEnd && out == outEnd;
+}
+
+enum EParenType {
+ Open,
+ Close,
+ None
+};
+
+using TAdvanceCallback = std::function<EParenType(TTokenIterator& curr, TTokenIterator end)>;
+
+TTokenIterator SkipToNextBalanced(TTokenIterator begin, TTokenIterator end, const TAdvanceCallback& advance) {
+ i64 level = 0;
+ TTokenIterator curr = begin;
+ while (curr != end) {
+ switch (advance(curr, end)) {
+ case EParenType::Open: {
+ ++level;
+ break;
+ }
+ case EParenType::Close: {
+ --level;
+ if (level < 0) {
+ return end;
+ } else if (level == 0) {
+ return curr;
+ }
+ break;
+ }
+ case EParenType::None:
+ break;
+ }
+ }
+ return curr;
+}
+
+TTokenIterator GetNextStatementBegin(TTokenIterator begin, TTokenIterator end) {
+ TAdvanceCallback advanceLambdaBody = [](TTokenIterator& curr, TTokenIterator end) -> EParenType {
+ Y_UNUSED(end);
+ if (curr->Name == "LBRACE_CURLY") {
+ ++curr;
+ return EParenType::Open;
+ } else if (curr->Name == "RBRACE_CURLY") {
+ ++curr;
+ return EParenType::Close;
+ } else {
+ ++curr;
+ return EParenType::None;
+ }
+ };
+
+ TAdvanceCallback advanceAction = [](TTokenIterator& curr, TTokenIterator end) -> EParenType {
+ auto tmp = curr;
+ if (curr->Name == "DEFINE") {
+ ++curr;
+ curr = SkipWSOrComment(curr, end);
+ if (curr != end && (curr->Name == "ACTION" || curr->Name == "SUBQUERY")) {
+ ++curr;
+ return EParenType::Open;
+ }
+ } else if (curr->Name == "END") {
+ ++curr;
+ curr = SkipWSOrComment(curr, end);
+ if (curr != end && curr->Name == "DEFINE") {
+ ++curr;
+ return EParenType::Close;
+ }
+ }
+
+ curr = tmp;
+ ++curr;
+ return EParenType::None;
+ };
+
+ TAdvanceCallback advanceInlineAction = [](TTokenIterator& curr, TTokenIterator end) -> EParenType {
+ auto tmp = curr;
+ if (curr->Name == "DO") {
+ ++curr;
+ curr = SkipWSOrComment(curr, end);
+ if (curr != end && curr->Name == "BEGIN") {
+ ++curr;
+ return EParenType::Open;
+ }
+ } else if (curr->Name == "END") {
+ ++curr;
+ curr = SkipWSOrComment(curr, end);
+ if (curr != end && curr->Name == "DO") {
+ ++curr;
+ return EParenType::Close;
+ }
+ }
+
+ curr = tmp;
+ ++curr;
+ return EParenType::None;
+ };
+
+ TTokenIterator curr = begin;
+ while (curr != end) {
+ bool matched = false;
+ for (auto cb : {advanceLambdaBody, advanceAction, advanceInlineAction}) {
+ TTokenIterator tmp = curr;
+ if (cb(tmp, end) == EParenType::Open) {
+ curr = SkipToNextBalanced(curr, end, cb);
+ matched = true;
+ if (curr == end) {
+ return curr;
+ }
+ }
+ }
+ if (matched) {
+ continue;
+ }
+ if (curr->Name == "SEMICOLON") {
+ ++curr;
+ break;
+ }
+ ++curr;
+ }
+
+ return curr;
+}
+
+void SplitByStatements(TTokenIterator begin, TTokenIterator end, TVector<TTokenIterator>& output) {
+ output.clear();
+ if (begin == end) {
+ return;
+ }
+ output.push_back(begin);
+ auto curr = begin;
+ while (curr != end) {
+ curr = GetNextStatementBegin(curr, end);
+ output.push_back(curr);
+ }
+}
+
+enum class EScope {
+ Default,
+ TypeName,
+ Identifier,
+ DoubleQuestion
+};
+
+class TPrettyVisitor;
+using TPrettyFunctor = std::function<void(TPrettyVisitor&, const NProtoBuf::Message& msg)>;
+class TObfuscatingVisitor;
+using TObfuscatingFunctor = std::function<void(TObfuscatingVisitor&, const NProtoBuf::Message& msg)>;
+
+struct TStaticData {
+ TStaticData();
+ static const TStaticData& GetInstance() {
+ return *Singleton<TStaticData>();
+ }
+
+ THashSet<TString> Keywords;
+ THashMap<const NProtoBuf::Descriptor*, EScope> ScopeDispatch;
+ THashMap<const NProtoBuf::Descriptor*, TPrettyFunctor> PrettyVisitDispatch;
+ THashMap<const NProtoBuf::Descriptor*, TObfuscatingFunctor> ObfuscatingVisitDispatch;
+};
+
+template <typename T, void (T::*Func)(const NProtoBuf::Message&)>
+void VisitAllFieldsImpl(T* obj, const NProtoBuf::Descriptor* descr, const NProtoBuf::Message& msg) {
+ for (int i = 0; i < descr->field_count(); ++i) {
+ const NProtoBuf::FieldDescriptor* fd = descr->field(i);
+ NProtoBuf::TConstField field(msg, fd);
+ if (field.IsMessage()) {
+ for (size_t j = 0; j < field.Size(); ++j) {
+ (obj->*Func)(*field.template Get<NProtoBuf::Message>(j));
+ }
+ }
+ }
+}
+
+class TObfuscatingVisitor {
+friend struct TStaticData;
+public:
+ TObfuscatingVisitor()
+ : StaticData(TStaticData::GetInstance())
+ {}
+
+ TString Process(const NProtoBuf::Message& msg) {
+ Scopes.push_back(EScope::Default);
+ Visit(msg);
+ return SB;
+ }
+
+private:
+ void VisitToken(const TToken& token) {
+ auto str = token.GetValue();
+ if (str == "<EOF>") {
+ return;
+ }
+
+ if (!First) {
+ SB << ' ';
+ } else {
+ First = false;
+ }
+
+ if (str == "$" && FuncCall) {
+ FuncCall = false;
+ }
+
+ if (Scopes.back() == EScope::Identifier && !FuncCall) {
+ if (str != "$" && !NYql::LookupSimpleTypeBySqlAlias(str, true)) {
+ SB << "id";
+ } else {
+ SB << str;
+ }
+ } else if (NextToken) {
+ SB << *NextToken;
+ NextToken = Nothing();
+ } else {
+ SB << str;
+ }
+ }
+
+ void VisitPragmaValue(const TRule_pragma_value& msg) {
+ switch (msg.Alt_case()) {
+ case TRule_pragma_value::kAltPragmaValue1: {
+ NextToken = "0";
+ break;
+ }
+ case TRule_pragma_value::kAltPragmaValue3: {
+ NextToken = "'str'";
+ break;
+ }
+ case TRule_pragma_value::kAltPragmaValue4: {
+ NextToken = "false";
+ break;
+ }
+ default:;
+ }
+ VisitAllFields(TRule_pragma_value::GetDescriptor(), msg);
+ }
+
+ void VisitLiteralValue(const TRule_literal_value& msg) {
+ switch (msg.Alt_case()) {
+ case TRule_literal_value::kAltLiteralValue1: {
+ NextToken = "0";
+ break;
+ }
+ case TRule_literal_value::kAltLiteralValue2: {
+ NextToken = "0.0";
+ break;
+ }
+ case TRule_literal_value::kAltLiteralValue3: {
+ NextToken = "'str'";
+ break;
+ }
+ case TRule_literal_value::kAltLiteralValue9: {
+ NextToken = "false";
+ break;
+ }
+ default:;
+ }
+
+ VisitAllFields(TRule_literal_value::GetDescriptor(), msg);
+ }
+
+ void VisitAtomExpr(const TRule_atom_expr& msg) {
+ switch (msg.Alt_case()) {
+ case TRule_atom_expr::kAltAtomExpr7: {
+ FuncCall = true;
+ break;
+ }
+ default:;
+ }
+
+ VisitAllFields(TRule_atom_expr::GetDescriptor(), msg);
+ FuncCall = false;
+ }
+
+ void VisitInAtomExpr(const TRule_in_atom_expr& msg) {
+ switch (msg.Alt_case()) {
+ case TRule_in_atom_expr::kAltInAtomExpr6: {
+ FuncCall = true;
+ break;
+ }
+ default:;
+ }
+
+ VisitAllFields(TRule_in_atom_expr::GetDescriptor(), msg);
+ FuncCall = false;
+ }
+
+ void VisitUnaryCasualSubexpr(const TRule_unary_casual_subexpr& msg) {
+ bool invoke = false;
+ for (auto& b : msg.GetRule_unary_subexpr_suffix2().GetBlock1()) {
+ switch (b.GetBlock1().Alt_case()) {
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: {
+ invoke = true;
+ break;
+ }
+ default:;
+ }
+
+ break;
+ }
+
+ if (invoke) {
+ FuncCall = true;
+ }
+
+ Visit(msg.GetBlock1());
+ if (invoke) {
+ FuncCall = false;
+ }
+
+ Visit(msg.GetRule_unary_subexpr_suffix2());
+ }
+
+ void VisitInUnaryCasualSubexpr(const TRule_in_unary_casual_subexpr& msg) {
+ bool invoke = false;
+ for (auto& b : msg.GetRule_unary_subexpr_suffix2().GetBlock1()) {
+ switch (b.GetBlock1().Alt_case()) {
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: {
+ invoke = true;
+ break;
+ }
+ default:;
+ }
+
+ break;
+ }
+
+ if (invoke) {
+ FuncCall = true;
+ }
+
+ Visit(msg.GetBlock1());
+ if (invoke) {
+ FuncCall = false;
+ }
+
+ Visit(msg.GetRule_unary_subexpr_suffix2());
+ }
+
+ void Visit(const NProtoBuf::Message& msg) {
+ const NProtoBuf::Descriptor* descr = msg.GetDescriptor();
+ auto scopePtr = StaticData.ScopeDispatch.FindPtr(descr);
+ if (scopePtr) {
+ Scopes.push_back(*scopePtr);
+ }
+
+ auto funcPtr = StaticData.ObfuscatingVisitDispatch.FindPtr(descr);
+ if (funcPtr) {
+ (*funcPtr)(*this, msg);
+ } else {
+ VisitAllFields(descr, msg);
+ }
+
+ if (scopePtr) {
+ Scopes.pop_back();
+ }
+ }
+
+ void VisitAllFields(const NProtoBuf::Descriptor* descr, const NProtoBuf::Message& msg) {
+ VisitAllFieldsImpl<TObfuscatingVisitor, &TObfuscatingVisitor::Visit>(this, descr, msg);
+ }
+
+ const TStaticData& StaticData;
+ TStringBuilder SB;
+ bool First = true;
+ TMaybe<TString> NextToken;
+ TVector<EScope> Scopes;
+ bool FuncCall = false;
+};
+
+class TPrettyVisitor {
+friend struct TStaticData;
+public:
+ TPrettyVisitor(const TParsedTokenList& parsedTokens, const TParsedTokenList& comments)
+ : StaticData(TStaticData::GetInstance())
+ , ParsedTokens(parsedTokens)
+ , Comments(comments)
+ {
+ }
+
+ TString Process(const NProtoBuf::Message& msg, bool& addLine) {
+ Scopes.push_back(EScope::Default);
+ MarkedTokens.reserve(ParsedTokens.size());
+ MarkTokens(msg);
+ Y_ENSURE(MarkTokenStack.empty());
+ Y_ENSURE(TokenIndex == ParsedTokens.size());
+ TokenIndex = 0;
+ Visit(msg);
+ Y_ENSURE(TokenIndex == ParsedTokens.size());
+ Y_ENSURE(MarkTokenStack.empty());
+ for (; LastComment < Comments.size(); ++LastComment) {
+ const auto text = Comments[LastComment].Content;
+ AddComment(text);
+ }
+ addLine = AddLine.GetOrElse(true);
+
+ return SB;
+ }
+
+private:
+ struct TTokenInfo {
+ bool OpeningBracket = false;
+ bool ClosingBracket = false;
+ bool BracketForcedExpansion = false;
+ ui32 ClosingBracketIndex = 0;
+ };
+
+ using TMarkTokenStack = TVector<ui32>;
+
+ void Out(TStringBuf s) {
+ for (ui32 i = 0; i < s.size(); ++i) {
+ Out(s[i], i == 0);
+ }
+ }
+
+ void Out(char c, bool useIndent = true) {
+ if (c == '\n' || c == '\r') {
+ SB << c;
+ if (!(c == '\n' && !SB.empty() && SB.back() == '\r')) {
+ // do not increase OutLine if \n is preceded by \r
+ // this way we handle \r, \n, or \r\n as single new line
+ ++OutLine;
+ }
+ OutColumn = 0;
+ } else {
+ if (!OutColumn && useIndent) {
+ ui32 indent = (CurrentIndent >= 0) ? CurrentIndent : 0;
+ for (ui32 i = 0; i < indent; ++i) {
+ SB << ' ';
+ }
+ }
+
+ SB << c;
+ ++OutColumn;
+ }
+ }
+
+ void NewLine() {
+ if (OutColumn) {
+ Out('\n');
+ }
+ }
+
+ void AddComment(TStringBuf text) {
+ if (text.StartsWith("--") && !SB.empty() && SB.back() == '-') {
+ Out(' ');
+ }
+
+ Out(text);
+ }
+
+ void MarkTokens(const NProtoBuf::Message& msg) {
+ const NProtoBuf::Descriptor* descr = msg.GetDescriptor();
+ auto scopePtr = StaticData.ScopeDispatch.FindPtr(descr);
+ if (scopePtr) {
+ if (*scopePtr == EScope::TypeName) {
+ ++InsideType;
+ }
+
+ Scopes.push_back(*scopePtr);
+ }
+
+ bool suppressExpr = false;
+ if (descr == TToken::GetDescriptor()) {
+ const auto& token = dynamic_cast<const TToken&>(msg);
+ MarkToken(token);
+ } else if (descr == TRule_sql_stmt_core::GetDescriptor()) {
+ if (AddLine.Empty()) {
+ AddLine = !IsSimpleStatement(dynamic_cast<const TRule_sql_stmt_core&>(msg)).GetOrElse(false);
+ }
+ } else if (descr == TRule_lambda_body::GetDescriptor()) {
+ Y_ENSURE(TokenIndex >= 1);
+ auto prevIndex = TokenIndex - 1;
+ Y_ENSURE(prevIndex < ParsedTokens.size());
+ Y_ENSURE(ParsedTokens[prevIndex].Content == "{");
+ MarkedTokens[prevIndex].OpeningBracket = false;
+ ForceExpandedColumn = ParsedTokens[prevIndex].LinePos;
+ ForceExpandedLine = ParsedTokens[prevIndex].Line;
+ } else if (descr == TRule_in_atom_expr::GetDescriptor()) {
+ const auto& value = dynamic_cast<const TRule_in_atom_expr&>(msg);
+ if (value.Alt_case() == TRule_in_atom_expr::kAltInAtomExpr7) {
+ suppressExpr = true;
+ }
+ } else if (descr == TRule_select_kind_parenthesis::GetDescriptor()) {
+ const auto& value = dynamic_cast<const TRule_select_kind_parenthesis&>(msg);
+ if (value.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis2) {
+ suppressExpr = true;
+ }
+ } else if (descr == TRule_window_specification::GetDescriptor()) {
+ const auto& value = dynamic_cast<const TRule_window_specification&>(msg);
+ const auto& details = value.GetRule_window_specification_details2();
+ const bool needsNewline = details.HasBlock1() || details.HasBlock2() ||
+ details.HasBlock3() || details.HasBlock4();
+ if (needsNewline) {
+ auto& paren = value.GetToken1();
+ ForceExpandedColumn = paren.GetColumn();
+ ForceExpandedLine = paren.GetLine();
+ }
+ suppressExpr = true;
+ } else if (descr == TRule_exists_expr::GetDescriptor()) {
+ const auto& value = dynamic_cast<const TRule_exists_expr&>(msg);
+ auto& paren = value.GetToken2();
+ ForceExpandedColumn = paren.GetColumn();
+ ForceExpandedLine = paren.GetLine();
+ suppressExpr = true;
+ } else if (descr == TRule_case_expr::GetDescriptor()) {
+ const auto& value = dynamic_cast<const TRule_case_expr&>(msg);
+ auto& token = value.GetToken1();
+ ForceExpandedColumn = token.GetColumn();
+ ForceExpandedLine = token.GetLine();
+ }
+
+ const bool expr = (descr == TRule_expr::GetDescriptor() || descr == TRule_in_expr::GetDescriptor());
+ if (expr) {
+ ++InsideExpr;
+ }
+
+ ui64 prevInsideExpr = InsideExpr;
+ if (suppressExpr) {
+ InsideExpr = 0;
+ }
+
+ VisitAllFieldsImpl<TPrettyVisitor, &TPrettyVisitor::MarkTokens>(this, descr, msg);
+ if (suppressExpr) {
+ InsideExpr = prevInsideExpr;
+ }
+
+ if (scopePtr) {
+ if (*scopePtr == EScope::TypeName) {
+ --InsideType;
+ }
+
+ Scopes.pop_back();
+ }
+
+ if (expr) {
+ --InsideExpr;
+ }
+ }
+
+ void MarkToken(const TToken& token) {
+ auto str = token.GetValue();
+ if (str == "<EOF>") {
+ return;
+ }
+
+ MarkedTokens.emplace_back();
+ if (str == "(" || str == "[" || str == "{" || str == "<|" || (InsideType && str == "<")) {
+ MarkTokenStack.push_back(TokenIndex);
+ auto& info = MarkedTokens[TokenIndex];
+ info.OpeningBracket = (InsideExpr > 0);
+ } else if (str == ")") {
+ PopBracket("(");
+ } else if (str == "]") {
+ PopBracket("[");
+ } else if (str == "}") {
+ PopBracket("{");
+ } else if (str == "|>") {
+ PopBracket("<|");
+ } else if (InsideType && str == ">") {
+ PopBracket("<");
+ }
+
+ TokenIndex++;
+ }
+
+ void PopBracket(const TString& expected) {
+ Y_ENSURE(!MarkTokenStack.empty());
+ Y_ENSURE(MarkTokenStack.back() < ParsedTokens.size());
+ auto& openToken = ParsedTokens[MarkTokenStack.back()];
+ Y_ENSURE(openToken.Content == expected);
+ auto& openInfo = MarkedTokens[MarkTokenStack.back()];
+ auto& closeInfo = MarkedTokens[TokenIndex];
+ const bool forcedExpansion = openToken.Line == ForceExpandedLine && openToken.LinePos <= ForceExpandedColumn;
+
+ if (openInfo.OpeningBracket) {
+ openInfo.ClosingBracketIndex = TokenIndex;
+ openInfo.BracketForcedExpansion = forcedExpansion;
+ closeInfo.BracketForcedExpansion = forcedExpansion;
+ closeInfo.ClosingBracket = true;
+ }
+
+ MarkTokenStack.pop_back();
+ }
+
+ void Visit(const NProtoBuf::Message& msg) {
+ const NProtoBuf::Descriptor* descr = msg.GetDescriptor();
+ //Cerr << descr->name() << "\n";
+ auto scopePtr = StaticData.ScopeDispatch.FindPtr(descr);
+ if (descr == TRule_invoke_expr::GetDescriptor()) {
+ AfterInvokeExpr = true;
+ }
+
+ if (descr == TRule_unary_op::GetDescriptor()) {
+ AfterUnaryOp = true;
+ }
+
+ if (scopePtr) {
+ if (*scopePtr == EScope::TypeName) {
+ ++InsideType;
+ }
+
+ Scopes.push_back(*scopePtr);
+ }
+
+ auto funcPtr = StaticData.PrettyVisitDispatch.FindPtr(descr);
+ if (funcPtr) {
+ (*funcPtr)(*this, msg);
+ } else {
+ VisitAllFields(descr, msg);
+ }
+
+ if (scopePtr) {
+ if (*scopePtr == EScope::TypeName) {
+ --InsideType;
+ }
+
+ Scopes.pop_back();
+ }
+ }
+
+ TMaybe<bool> IsSimpleStatement(const TRule_sql_stmt_core& msg) {
+ switch (msg.Alt_case()) {
+ case TRule_sql_stmt_core::kAltSqlStmtCore1: // pragma
+ case TRule_sql_stmt_core::kAltSqlStmtCore5: // drop table
+ case TRule_sql_stmt_core::kAltSqlStmtCore6: // use
+ case TRule_sql_stmt_core::kAltSqlStmtCore8: // commit
+ case TRule_sql_stmt_core::kAltSqlStmtCore11: // rollback
+ case TRule_sql_stmt_core::kAltSqlStmtCore12: // declare
+ case TRule_sql_stmt_core::kAltSqlStmtCore13: // import
+ case TRule_sql_stmt_core::kAltSqlStmtCore14: // export
+ case TRule_sql_stmt_core::kAltSqlStmtCore32: // drop external data source
+ case TRule_sql_stmt_core::kAltSqlStmtCore34: // drop replication
+ return true;
+ case TRule_sql_stmt_core::kAltSqlStmtCore3: { // named nodes
+ const auto& stmt = msg.GetAlt_sql_stmt_core3().GetRule_named_nodes_stmt1();
+ if (stmt.GetBlock3().HasAlt1()) {
+ return true;
+ }
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore17: { // do
+ const auto& stmt = msg.GetAlt_sql_stmt_core17().GetRule_do_stmt1();
+ if (stmt.GetBlock2().HasAlt1()) {
+ return true;
+ }
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore19: // if
+ case TRule_sql_stmt_core::kAltSqlStmtCore20: // for
+ return false;
+ default:
+ break;
+ }
+
+ return {};
+ }
+
+ template <typename T>
+ void VisitRepeated(const ::google::protobuf::RepeatedPtrField<T>& field) {
+ for (const auto& m : field) {
+ Visit(m);
+ }
+ }
+
+ void VisitDefineActionOrSubqueryBody(const TRule_define_action_or_subquery_body& msg) {
+ VisitRepeated(msg.GetBlock1());
+ if (msg.HasBlock2()) {
+ const auto& b = msg.GetBlock2();
+ Visit(b.GetRule_sql_stmt_core1());
+ for (auto block : b.GetBlock2()) {
+ VisitRepeated(block.GetBlock1());
+ if (!IsSimpleStatement(block.GetRule_sql_stmt_core2()).GetOrElse(false)) {
+ Out('\n');
+ }
+ Visit(block.GetRule_sql_stmt_core2());
+ }
+
+ VisitRepeated(b.GetBlock3());
+ }
+ }
+
+ void VisitPragma(const TRule_pragma_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitKeyword(msg.GetToken1());
+ auto prefix = msg.GetRule_opt_id_prefix_or_type2();
+ if (prefix.HasBlock1()) {
+ Visit(prefix.GetBlock1().GetRule_an_id_or_type1());
+ VisitKeyword(prefix.GetBlock1().GetToken2());
+ AfterDot = true;
+ }
+
+ Visit(msg.GetRule_an_id3());
+ if (msg.GetBlock4().HasAlt2()) {
+ AfterInvokeExpr = true;
+ const auto& alt2 = msg.GetBlock4().GetAlt2();
+ VisitKeyword(alt2.GetToken1());
+ Visit(alt2.GetRule_pragma_value2());
+ VisitRepeated(alt2.GetBlock3());
+ VisitKeyword(alt2.GetToken4());
+ } else {
+ Visit(msg.GetBlock4());
+ }
+ }
+
+ void PosFromPartial(const TRule_select_kind_partial& partial) {
+ const auto& kind = partial.GetRule_select_kind1();
+ if (kind.HasBlock1()) { // DISCARD
+ PosFromToken(kind.GetBlock1().GetToken1());
+ } else {
+ switch (kind.GetBlock2().Alt_case()) {
+ case TRule_select_kind_TBlock2::kAlt1:
+ PosFromToken(kind.GetBlock2().GetAlt1().GetRule_process_core1().GetToken1());
+ break;
+ case TRule_select_kind_TBlock2::kAlt2:
+ PosFromToken(kind.GetBlock2().GetAlt2().GetRule_reduce_core1().GetToken1());
+ break;
+ case TRule_select_kind_TBlock2::kAlt3: {
+ const auto& selCore = kind.GetBlock2().GetAlt3().GetRule_select_core1();
+ if (selCore.HasBlock1()) {
+ PosFromToken(selCore.GetBlock1().GetToken1());
+ } else {
+ PosFromToken(selCore.GetToken2());
+ }
+
+ break;
+ }
+
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+ }
+ }
+
+ void VisitSelect(const TRule_select_stmt& msg) {
+ const auto& paren = msg.GetRule_select_kind_parenthesis1();
+ if (paren.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis1) {
+ const auto& partial = paren.GetAlt_select_kind_parenthesis1().GetRule_select_kind_partial1();
+ PosFromPartial(partial);
+ } else {
+ PosFromToken(paren.GetAlt_select_kind_parenthesis2().GetToken1());
+ }
+
+ NewLine();
+ Visit(msg.GetRule_select_kind_parenthesis1());
+ for (const auto& block : msg.GetBlock2()) {
+ NewLine();
+ Visit(block.GetRule_select_op1());
+ NewLine();
+ Visit(block.GetRule_select_kind_parenthesis2());
+ }
+ }
+
+ void VisitSelectUnparenthesized(const TRule_select_unparenthesized_stmt& msg) {
+ const auto& partial = msg.GetRule_select_kind_partial1();
+ PosFromPartial(partial);
+ NewLine();
+ Visit(msg.GetRule_select_kind_partial1());
+ for (const auto& block : msg.GetBlock2()) {
+ NewLine();
+ Visit(block.GetRule_select_op1());
+ NewLine();
+ Visit(block.GetRule_select_kind_parenthesis2());
+ }
+ }
+
+ void VisitNamedNodes(const TRule_named_nodes_stmt& msg) {
+ PosFromToken(msg.GetRule_bind_parameter_list1().GetRule_bind_parameter1().GetToken1());
+ NewLine();
+ Visit(msg.GetRule_bind_parameter_list1());
+ Visit(msg.GetToken2());
+ switch (msg.GetBlock3().Alt_case()) {
+ case TRule_named_nodes_stmt::TBlock3::kAlt1: {
+ const auto& alt = msg.GetBlock3().GetAlt1();
+ Visit(alt);
+ break;
+ }
+
+ case TRule_named_nodes_stmt::TBlock3::kAlt2: {
+ const auto& alt = msg.GetBlock3().GetAlt2();
+ const auto& subselect = alt.GetRule_subselect_stmt1();
+ switch (subselect.GetBlock1().Alt_case()) {
+ case TRule_subselect_stmt::TBlock1::kAlt1: {
+ const auto& alt = subselect.GetBlock1().GetAlt1();
+ Visit(alt.GetToken1());
+ NewLine();
+ PushCurrentIndent();
+ Visit(alt.GetRule_select_stmt2());
+ PopCurrentIndent();
+ NewLine();
+ Visit(alt.GetToken3());
+ break;
+ }
+
+ case TRule_subselect_stmt::TBlock1::kAlt2: {
+ const auto& alt = subselect.GetBlock1().GetAlt2();
+ NewLine();
+ PushCurrentIndent();
+ Visit(alt);
+ PopCurrentIndent();
+ break;
+ }
+
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+
+ break;
+ }
+
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+ }
+
+ void VisitCreateTable(const TRule_create_table_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ Visit(msg.GetToken1());
+ Visit(msg.GetBlock2());
+ Visit(msg.GetBlock3());
+ Visit(msg.GetBlock4());
+ Visit(msg.GetRule_simple_table_ref5());
+ Visit(msg.GetToken6());
+ PushCurrentIndent();
+ NewLine();
+ Visit(msg.GetRule_create_table_entry7());
+ for (const auto& b : msg.GetBlock8()) {
+ Visit(b.GetToken1());
+ NewLine();
+ Visit(b.GetRule_create_table_entry2());
+ }
+ if (msg.HasBlock9()) {
+ Visit(msg.GetBlock9());
+ }
+
+ PopCurrentIndent();
+ NewLine();
+ Visit(msg.GetToken10());
+ if (msg.HasBlock11()) {
+ NewLine();
+ Visit(msg.GetBlock11());
+ }
+ if (msg.HasBlock12()) {
+ NewLine();
+ Visit(msg.GetBlock12());
+ }
+ if (msg.HasBlock13()) {
+ NewLine();
+ Visit(msg.GetBlock13());
+ }
+ if (msg.HasBlock14()) {
+ NewLine();
+ Visit(msg.GetBlock14());
+ }
+ if (msg.HasBlock15()) {
+ NewLine();
+ Visit(msg.GetBlock15());
+ }
+ }
+
+ void VisitDropTable(const TRule_drop_table_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_drop_table_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAnalyze(const TRule_analyze_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_analyze_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitBackup(const TRule_backup_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_backup_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitRestore(const TRule_restore_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_restore_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitUse(const TRule_use_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_use_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitIntoTable(const TRule_into_table_stmt& msg) {
+ switch (msg.GetBlock1().Alt_case()) {
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt1:
+ PosFromToken(msg.GetBlock1().GetAlt1().GetToken1());
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt2:
+ PosFromToken(msg.GetBlock1().GetAlt2().GetToken1());
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt3:
+ PosFromToken(msg.GetBlock1().GetAlt3().GetToken1());
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt4:
+ PosFromToken(msg.GetBlock1().GetAlt4().GetToken1());
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt5:
+ PosFromToken(msg.GetBlock1().GetAlt5().GetToken1());
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt6:
+ PosFromToken(msg.GetBlock1().GetAlt6().GetToken1());
+ break;
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+
+ NewLine();
+ VisitAllFields(TRule_into_table_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCommit(const TRule_commit_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_commit_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitUpdate(const TRule_update_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ Visit(msg.GetToken1());
+ Visit(msg.GetRule_simple_table_ref2());
+ switch (msg.GetBlock3().Alt_case()) {
+ case TRule_update_stmt_TBlock3::kAlt1: {
+ const auto& alt = msg.GetBlock3().GetAlt1();
+ NewLine();
+ Visit(alt.GetToken1());
+ const auto& choice = alt.GetRule_set_clause_choice2();
+ NewLine();
+
+ switch (choice.Alt_case()) {
+ case TRule_set_clause_choice::kAltSetClauseChoice1: {
+ const auto& clauses = choice.GetAlt_set_clause_choice1().GetRule_set_clause_list1();
+ PushCurrentIndent();
+ Visit(clauses.GetRule_set_clause1());
+ for (auto& block : clauses.GetBlock2()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_set_clause2());
+ }
+
+ PopCurrentIndent();
+ break;
+ }
+ case TRule_set_clause_choice::kAltSetClauseChoice2: {
+ const auto& multiColumn = choice.GetAlt_set_clause_choice2().GetRule_multiple_column_assignment1();
+ const auto& targets = multiColumn.GetRule_set_target_list1();
+ Visit(targets.GetToken1());
+ NewLine();
+ PushCurrentIndent();
+ Visit(targets.GetRule_set_target2());
+ for (auto& block : targets.GetBlock3()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_set_target2());
+ }
+
+ NewLine();
+ PopCurrentIndent();
+ Visit(targets.GetToken4());
+ Visit(multiColumn.GetToken2());
+ Visit(multiColumn.GetToken3());
+ NewLine();
+ const auto& simpleValues = multiColumn.GetRule_simple_values_source4();
+ switch (simpleValues.Alt_case()) {
+ case TRule_simple_values_source::kAltSimpleValuesSource1: {
+ const auto& exprs = simpleValues.GetAlt_simple_values_source1().GetRule_expr_list1();
+ PushCurrentIndent();
+ Visit(exprs.GetRule_expr1());
+ for (const auto& block : exprs.GetBlock2()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_expr2());
+ }
+
+ PopCurrentIndent();
+ break;
+ }
+ case TRule_simple_values_source::kAltSimpleValuesSource2: {
+ PushCurrentIndent();
+ Visit(simpleValues.GetAlt_simple_values_source2());
+ PopCurrentIndent();
+ break;
+ }
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+
+ NewLine();
+ Visit(multiColumn.GetToken5());
+ break;
+ }
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+
+ PopCurrentIndent();
+ if (alt.HasBlock3()) {
+ NewLine();
+ Visit(alt.GetBlock3());
+ }
+
+ PopCurrentIndent();
+ break;
+ }
+ case TRule_update_stmt_TBlock3::kAlt2: {
+ const auto& alt = msg.GetBlock3().GetAlt2();
+ NewLine();
+ Visit(alt.GetToken1());
+ Visit(alt.GetRule_into_values_source2());
+ break;
+ }
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+ }
+
+ void VisitDelete(const TRule_delete_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ Visit(msg.GetToken1());
+ Visit(msg.GetToken2());
+ Visit(msg.GetRule_simple_table_ref3());
+ if (msg.HasBlock4()) {
+ switch (msg.GetBlock4().Alt_case()) {
+ case TRule_delete_stmt_TBlock4::kAlt1: {
+ const auto& alt = msg.GetBlock4().GetAlt1();
+ NewLine();
+ Visit(alt);
+ break;
+ }
+ case TRule_delete_stmt_TBlock4::kAlt2: {
+ const auto& alt = msg.GetBlock4().GetAlt2();
+ NewLine();
+ Visit(alt);
+ break;
+ }
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+ }
+ }
+
+ void VisitRollback(const TRule_rollback_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_rollback_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitDeclare(const TRule_declare_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_declare_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitImport(const TRule_import_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_import_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitExport(const TRule_export_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_export_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAlterTable(const TRule_alter_table_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitKeyword(msg.GetToken1());
+ VisitKeyword(msg.GetToken2());
+ Visit(msg.GetRule_simple_table_ref3());
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_alter_table_action4());
+ for (auto& b : msg.GetBlock5()) {
+ Visit(b.GetToken1());
+ NewLine();
+ Visit(b.GetRule_alter_table_action2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitAlterTableStore(const TRule_alter_table_store_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_alter_table_store_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAlterExternalTable(const TRule_alter_external_table_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitKeyword(msg.GetToken1());
+ VisitKeyword(msg.GetToken2());
+ VisitKeyword(msg.GetToken3());
+ Visit(msg.GetRule_simple_table_ref4());
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_alter_external_table_action5());
+ for (auto& b : msg.GetBlock6()) {
+ Visit(b.GetToken1());
+ NewLine();
+ Visit(b.GetRule_alter_external_table_action2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitDo(const TRule_do_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitKeyword(msg.GetToken1());
+ switch (msg.GetBlock2().Alt_case()) {
+ case TRule_do_stmt_TBlock2::kAlt1: { // CALL
+ const auto& alt = msg.GetBlock2().GetAlt1().GetRule_call_action1();
+ Visit(alt.GetBlock1());
+ AfterInvokeExpr = true;
+ Visit(alt.GetToken2());
+ if (alt.HasBlock3()) {
+ Visit(alt.GetBlock3());
+ }
+
+ Visit(alt.GetToken4());
+ break;
+ }
+ case TRule_do_stmt_TBlock2::kAlt2: { // INLINE
+ const auto& alt = msg.GetBlock2().GetAlt2().GetRule_inline_action1();
+ VisitKeyword(alt.GetToken1());
+ PushCurrentIndent();
+ NewLine();
+ Visit(alt.GetRule_define_action_or_subquery_body2());
+ PopCurrentIndent();
+ NewLine();
+ VisitKeyword(alt.GetToken3());
+ VisitKeyword(alt.GetToken4());
+ break;
+ }
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+ }
+
+ void VisitAction(const TRule_define_action_or_subquery_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitKeyword(msg.GetToken1());
+ VisitKeyword(msg.GetToken2());
+ Visit(msg.GetRule_bind_parameter3());
+ AfterInvokeExpr = true;
+ Visit(msg.GetToken4());
+ if (msg.HasBlock5()) {
+ Visit(msg.GetBlock5());
+ }
+
+ Visit(msg.GetToken6());
+ VisitKeyword(msg.GetToken7()); // AS
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_define_action_or_subquery_body8());
+ PopCurrentIndent();
+ NewLine();
+ VisitKeyword(msg.GetToken9());
+ VisitKeyword(msg.GetToken10());
+ }
+
+ void VisitIf(const TRule_if_stmt& msg) {
+ if (msg.HasBlock1()) {
+ PosFromToken(msg.GetBlock1().GetToken1());
+ } else {
+ PosFromToken(msg.GetToken2());
+ }
+
+ NewLine();
+ if (msg.HasBlock1()) {
+ Visit(msg.GetBlock1());
+ }
+
+ Visit(msg.GetToken2());
+ Visit(msg.GetRule_expr3());
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_do_stmt4());
+ PopCurrentIndent();
+ if (msg.HasBlock5()) {
+ NewLine();
+ Visit(msg.GetBlock5().GetToken1());
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetBlock5().GetRule_do_stmt2());
+ PopCurrentIndent();
+ }
+ }
+
+ void VisitFor(const TRule_for_stmt& msg) {
+ if (msg.HasBlock1()) {
+ PosFromToken(msg.GetBlock1().GetToken1());
+ } else if (msg.HasBlock2()) {
+ PosFromToken(msg.GetBlock2().GetToken1());
+ } else {
+ PosFromToken(msg.GetToken3());
+ }
+
+ NewLine();
+ if (msg.HasBlock1()) {
+ Visit(msg.GetBlock1());
+ }
+
+ if (msg.HasBlock2()) {
+ Visit(msg.GetBlock2());
+ }
+
+ Visit(msg.GetToken3());
+ Visit(msg.GetRule_bind_parameter4());
+ Visit(msg.GetToken5());
+ Visit(msg.GetRule_expr6());
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_do_stmt7());
+ PopCurrentIndent();
+ if (msg.HasBlock8()) {
+ NewLine();
+ Visit(msg.GetBlock8().GetToken1());
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetBlock8().GetRule_do_stmt2());
+ PopCurrentIndent();
+ }
+ }
+
+ void VisitValues(const TRule_values_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitKeyword(msg.GetToken1());
+ const auto& rowList = msg.GetRule_values_source_row_list2();
+ PushCurrentIndent();
+ NewLine();
+ Visit(rowList.GetRule_values_source_row1());
+ for (const auto& b : rowList.GetBlock2()) {
+ Visit(b.GetToken1());
+ NewLine();
+ Visit(b.GetRule_values_source_row2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitGrantPermissions(const TRule_grant_permissions_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_grant_permissions_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitRevokePermissions(const TRule_revoke_permissions_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_revoke_permissions_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCreateUser(const TRule_create_user_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_create_user_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAlterUser(const TRule_alter_user_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_alter_user_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCreateGroup(const TRule_create_group_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_create_group_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAlterGroup(const TRule_alter_group_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_alter_group_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitDropRole(const TRule_drop_role_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_drop_role_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitUpsertObject(const TRule_upsert_object_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_upsert_object_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCreateObject(const TRule_create_object_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_create_object_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAlterObject(const TRule_alter_object_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_alter_object_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitDropObject(const TRule_drop_object_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_drop_object_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCreateTopic(const TRule_create_topic_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitKeyword(msg.GetToken1());
+ VisitKeyword(msg.GetToken2());
+ Visit(msg.GetBlock3());
+ Visit(msg.GetRule_topic_ref4());
+ if (msg.HasBlock5()) {
+ PushCurrentIndent();
+ auto& b = msg.GetBlock5().GetRule_create_topic_entries1();
+ Visit(b.GetToken1());
+ NewLine();
+ Visit(b.GetRule_create_topic_entry2());
+ for (auto& subEntry : b.GetBlock3()) {
+ Visit(subEntry.GetToken1());
+ NewLine();
+ Visit(subEntry.GetRule_create_topic_entry2());
+ }
+ NewLine();
+ PopCurrentIndent();
+ Visit(b.GetToken4());
+ }
+ if (msg.HasBlock6()) {
+ auto& b = msg.GetBlock6().GetRule_with_topic_settings1();
+ VisitKeyword(b.GetToken1());
+ VisitKeyword(b.GetToken2());
+ PushCurrentIndent();
+ NewLine();
+ Visit(b.GetRule_topic_settings3());
+ PopCurrentIndent();
+ NewLine();
+ VisitKeyword(b.GetToken4());
+ }
+ }
+
+ void VisitAlterTopic(const TRule_alter_topic_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitKeyword(msg.GetToken1());
+ VisitKeyword(msg.GetToken2());
+ Visit(msg.GetBlock3());
+ Visit(msg.GetRule_topic_ref4());
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_alter_topic_action5());
+ for (auto& b : msg.GetBlock6()) {
+ Visit(b.GetToken1());
+ NewLine();
+ Visit(b.GetRule_alter_topic_action2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitDropTopic(const TRule_drop_topic_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_drop_topic_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCreateExternalDataSource(const TRule_create_external_data_source_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_create_external_data_source_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAlterExternalDataSource(const TRule_alter_external_data_source_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitToken(msg.GetToken1());
+ VisitToken(msg.GetToken2());
+ VisitToken(msg.GetToken3());
+ VisitToken(msg.GetToken4());
+ Visit(msg.GetRule_object_ref5());
+
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_alter_external_data_source_action6());
+ for (const auto& action : msg.GetBlock7()) {
+ Visit(action.GetToken1()); // comma
+ NewLine();
+ Visit(action.GetRule_alter_external_data_source_action2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitDropExternalDataSource(const TRule_drop_external_data_source_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_drop_external_data_source_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCreateView(const TRule_create_view_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_create_view_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitDropView(const TRule_drop_view_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_drop_view_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCreateAsyncReplication(const TRule_create_replication_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_create_replication_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAlterAsyncReplication(const TRule_alter_replication_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_alter_replication_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitDropAsyncReplication(const TRule_drop_replication_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_drop_replication_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCreateResourcePool(const TRule_create_resource_pool_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_create_resource_pool_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAlterResourcePool(const TRule_alter_resource_pool_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitToken(msg.GetToken1());
+ VisitToken(msg.GetToken2());
+ VisitToken(msg.GetToken3());
+ Visit(msg.GetRule_object_ref4());
+
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_alter_resource_pool_action5());
+ for (const auto& action : msg.GetBlock6()) {
+ Visit(action.GetToken1()); // comma
+ NewLine();
+ Visit(action.GetRule_alter_resource_pool_action2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitDropResourcePool(const TRule_drop_resource_pool_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_drop_resource_pool_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCreateBackupCollection(const TRule_create_backup_collection_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_create_backup_collection_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAlterBackupCollection(const TRule_alter_backup_collection_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitToken(msg.GetToken1());
+ Visit(msg.GetRule_backup_collection2());
+
+ NewLine();
+ PushCurrentIndent();
+ switch (msg.GetBlock3().Alt_case()) {
+ case TRule_alter_backup_collection_stmt_TBlock3::kAlt1: {
+ Visit(msg.GetBlock3().GetAlt1().GetRule_alter_backup_collection_actions1().GetRule_alter_backup_collection_action1());
+ for (const auto& action : msg.GetBlock3().GetAlt1().GetRule_alter_backup_collection_actions1().GetBlock2()) {
+ Visit(action.GetToken1()); // comma
+ NewLine();
+ Visit(action.GetRule_alter_backup_collection_action2());
+ }
+ break;
+ }
+ case TRule_alter_backup_collection_stmt_TBlock3::kAlt2: {
+ Visit(msg.GetBlock3().GetAlt2().GetRule_alter_backup_collection_entries1().GetRule_alter_backup_collection_entry1());
+ for (const auto& entry : msg.GetBlock3().GetAlt2().GetRule_alter_backup_collection_entries1().GetBlock2()) {
+ Visit(entry.GetToken1()); // comma
+ NewLine();
+ Visit(entry.GetRule_alter_backup_collection_entry2());
+ }
+ break;
+ }
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitDropBackupCollection(const TRule_drop_backup_collection_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_drop_backup_collection_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitCreateResourcePoolClassifier(const TRule_create_resource_pool_classifier_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_create_resource_pool_classifier_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAlterResourcePoolClassifier(const TRule_alter_resource_pool_classifier_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitToken(msg.GetToken1());
+ VisitToken(msg.GetToken2());
+ VisitToken(msg.GetToken3());
+ VisitToken(msg.GetToken4());
+ Visit(msg.GetRule_object_ref5());
+
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_alter_resource_pool_classifier_action6());
+ for (const auto& action : msg.GetBlock7()) {
+ Visit(action.GetToken1()); // comma
+ NewLine();
+ Visit(action.GetRule_alter_resource_pool_classifier_action2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitDropResourcePoolClassifier(const TRule_drop_resource_pool_classifier_stmt& msg) {
+ PosFromToken(msg.GetToken1());
+ NewLine();
+ VisitAllFields(TRule_drop_resource_pool_classifier_stmt::GetDescriptor(), msg);
+ }
+
+ void VisitAllFields(const NProtoBuf::Descriptor* descr, const NProtoBuf::Message& msg) {
+ VisitAllFieldsImpl<TPrettyVisitor, &TPrettyVisitor::Visit>(this, descr, msg);
+ }
+
+ void WriteComments() {
+ while (LastComment < Comments.size()) {
+ const auto& c = Comments[LastComment];
+ if (c.Line > LastLine || c.Line == LastLine && c.LinePos > LastColumn) {
+ break;
+ }
+
+ AddComment(c.Content);
+ ++LastComment;
+ }
+ }
+
+ void PosFromToken(const TToken& token) {
+ LastLine = token.GetLine();
+ LastColumn = token.GetColumn();
+ WriteComments();
+ }
+
+ void PosFromParsedToken(const TParsedToken& token) {
+ LastLine = token.Line;
+ LastColumn = token.LinePos;
+ WriteComments();
+ }
+
+ void VisitToken(const TToken& token) {
+ VisitTokenImpl(token, false);
+ }
+
+ void VisitKeyword(const TToken& token) {
+ VisitTokenImpl(token, true);
+ }
+
+ void VisitTokenImpl(const TToken& token, bool forceKeyword) {
+ PosFromToken(token);
+ auto str = token.GetValue();
+
+ if (str == "<EOF>") {
+ return;
+ }
+
+ //Cerr << str << "\n";
+ auto currentScope = Scopes.back();
+ if (!SkipSpaceAfterUnaryOp && !InMultiTokenOp) {
+ if (AfterLess && str == ">") {
+ Out(' ');
+ } else if (AfterDigits && str == ".") {
+ Out(' ');
+ } else if (OutColumn && (currentScope == EScope::DoubleQuestion || str != "?")
+ && str != ":" && str != "." && str != "," && str != ";" && str != ")" && str != "]"
+ && str != "}" && str != "|>" && str != "::" && !AfterNamespace && !AfterBracket
+ && !AfterInvokeExpr && !AfterDollarOrAt && !AfterDot && (!AfterQuestion || str != "?")
+ && (!InsideType || (str != "<" && str != ">" && str != "<>"))
+ && (!InsideType || !AfterLess)
+ && (!AfterKeyExpr || str != "[")
+ ) {
+ Out(' ');
+ }
+ }
+
+ SkipSpaceAfterUnaryOp = false;
+ if (AfterUnaryOp) {
+ if (str == "+" || str == "-" || str == "~") {
+ SkipSpaceAfterUnaryOp = true;
+ }
+
+ AfterUnaryOp = false;
+ }
+
+ AfterInvokeExpr = false;
+ AfterNamespace = (str == "::");
+ AfterBracket = (str == "(" || str == "[" || str == "{" || str == "<|");
+ AfterDot = (str == ".");
+ AfterDigits = !str.empty() && AllOf(str, [](char c) { return c >= '0' && c <= '9'; });
+ AfterQuestion = (str == "?");
+ AfterLess = (str == "<");
+ AfterKeyExpr = false;
+
+ if (forceKeyword) {
+ str = to_upper(str);
+ } else if (currentScope == EScope::Default) {
+ if (auto p = StaticData.Keywords.find(to_upper(str)); p != StaticData.Keywords.end()) {
+ str = *p;
+ }
+ }
+
+ AfterDollarOrAt = (str == "$" || str == "@");
+
+ const auto& markedInfo = MarkedTokens[TokenIndex];
+ if (markedInfo.ClosingBracket) {
+ Y_ENSURE(!MarkTokenStack.empty());
+ auto beginTokenIndex = MarkTokenStack.back();
+ if (markedInfo.BracketForcedExpansion || ParsedTokens[beginTokenIndex].Line != ParsedTokens[TokenIndex].Line) {
+ // multiline
+ PopCurrentIndent();
+ NewLine();
+ }
+
+ MarkTokenStack.pop_back();
+ }
+
+ Out(str);
+ if (str == ";") {
+ Out('\n');
+ }
+
+ if (markedInfo.OpeningBracket) {
+ MarkTokenStack.push_back(TokenIndex);
+ if (markedInfo.BracketForcedExpansion || ParsedTokens[TokenIndex].Line != ParsedTokens[markedInfo.ClosingBracketIndex].Line) {
+ // multiline
+ PushCurrentIndent();
+ NewLine();
+ }
+ }
+
+ if (str == "," && !MarkTokenStack.empty()) {
+ const bool addNewline =
+ (TokenIndex + 1 < ParsedTokens.size() && ParsedTokens[TokenIndex].Line != ParsedTokens[TokenIndex + 1].Line)
+ || (TokenIndex > 0 && ParsedTokens[TokenIndex - 1].Line != ParsedTokens[TokenIndex].Line);
+ // add line for trailing comma
+ if (addNewline) {
+ NewLine();
+ }
+ }
+
+ TokenIndex++;
+ }
+
+ void VisitIntoValuesSource(const TRule_into_values_source& msg) {
+ switch (msg.Alt_case()) {
+ case TRule_into_values_source::kAltIntoValuesSource1: {
+ const auto& alt = msg.GetAlt_into_values_source1();
+ if (alt.HasBlock1()) {
+ const auto& columns = alt.GetBlock1().GetRule_pure_column_list1();
+ Visit(columns.GetToken1());
+ NewLine();
+ PushCurrentIndent();
+ Visit(columns.GetRule_an_id2());
+ for (const auto& block : columns.GetBlock3()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_an_id2());
+ }
+
+ PopCurrentIndent();
+ NewLine();
+ Visit(columns.GetToken4());
+ NewLine();
+ }
+
+ Visit(alt.GetRule_values_source2());
+ break;
+ }
+ case TRule_into_values_source::kAltIntoValuesSource2: {
+ VisitAllFields(TRule_into_values_source::GetDescriptor(), msg);
+ break;
+ }
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+ }
+
+ void VisitSelectKind(const TRule_select_kind& msg) {
+ if (msg.HasBlock1()) {
+ Visit(msg.GetBlock1());
+ }
+
+ Visit(msg.GetBlock2());
+ if (msg.HasBlock3()) {
+ NewLine();
+ Visit(msg.GetBlock3());
+ }
+ }
+
+ void VisitProcessCore(const TRule_process_core& msg) {
+ Visit(msg.GetToken1());
+ if (msg.HasBlock2()) {
+ Visit(msg.GetBlock2());
+ }
+
+ Visit(msg.GetRule_named_single_source3());
+ VisitRepeated(msg.GetBlock4());
+ if (msg.HasBlock5()) {
+ NewLine();
+ const auto& block5 = msg.GetBlock5();
+ Visit(block5.GetToken1());
+ Visit(block5.GetRule_using_call_expr2());
+ if (block5.HasBlock3()) {
+ Visit(block5.GetBlock3());
+ }
+
+ if (block5.HasBlock4()) {
+ NewLine();
+ Visit(block5.GetBlock4());
+ }
+
+ if (block5.HasBlock5()) {
+ NewLine();
+ Visit(block5.GetBlock5());
+ }
+
+ if (block5.HasBlock6()) {
+ NewLine();
+ Visit(block5.GetBlock6());
+ }
+
+ if (block5.HasBlock7()) {
+ NewLine();
+ Visit(block5.GetBlock7());
+ }
+ }
+ }
+
+ void VisitReduceCore(const TRule_reduce_core& msg) {
+ Visit(msg.GetToken1());
+ Visit(msg.GetRule_named_single_source2());
+ VisitRepeated(msg.GetBlock3());
+
+ if (msg.HasBlock4()) {
+ NewLine();
+ Visit(msg.GetBlock4());
+ }
+
+ NewLine();
+ Visit(msg.GetToken5());
+ const auto& columns = msg.GetRule_column_list6();
+ NewLine();
+ PushCurrentIndent();
+ Visit(columns.GetRule_column_name1());
+ for (const auto& block : columns.GetBlock2()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_column_name2());
+ }
+
+ if (columns.HasBlock3()) {
+ Visit(columns.GetBlock3());
+ }
+
+ PopCurrentIndent();
+ NewLine();
+ Visit(msg.GetToken7());
+ if (msg.HasBlock8()) {
+ Visit(msg.GetBlock8());
+ }
+
+ Visit(msg.GetRule_using_call_expr9());
+ if (msg.HasBlock10()) {
+ Visit(msg.GetBlock10());
+ }
+
+ if (msg.HasBlock11()) {
+ NewLine();
+ Visit(msg.GetBlock11());
+ }
+
+ if (msg.HasBlock12()) {
+ NewLine();
+ Visit(msg.GetBlock12());
+ }
+
+ if (msg.HasBlock13()) {
+ NewLine();
+ Visit(msg.GetBlock13());
+ }
+ }
+
+ void VisitSortSpecificationList(const TRule_sort_specification_list& msg) {
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_sort_specification1());
+ for (const auto& block : msg.GetBlock2()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_sort_specification2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitSelectCore(const TRule_select_core& msg) {
+ if (msg.HasBlock1()) {
+ Visit(msg.GetBlock1());
+ NewLine();
+ }
+
+ Visit(msg.GetToken2());
+ if (msg.HasBlock3()) {
+ Visit(msg.GetBlock3());
+ }
+
+ Visit(msg.GetRule_opt_set_quantifier4());
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_result_column5());
+ for (const auto& block : msg.GetBlock6()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_result_column2());
+ }
+
+ if (msg.HasBlock7()) {
+ Visit(msg.GetBlock7());
+ }
+
+ if (msg.HasBlock8()) {
+ NewLine();
+ Visit(msg.GetBlock8());
+ }
+
+ PopCurrentIndent();
+ if (msg.HasBlock9()) {
+ NewLine();
+ Visit(msg.GetBlock9());
+ }
+
+ if (msg.HasBlock10()) {
+ NewLine();
+ Visit(msg.GetBlock10());
+ }
+
+ if (msg.HasBlock11()) {
+ NewLine();
+ Visit(msg.GetBlock11());
+ }
+
+ if (msg.HasBlock12()) {
+ NewLine();
+ Visit(msg.GetBlock12());
+ }
+
+ if (msg.HasBlock13()) {
+ NewLine();
+ Visit(msg.GetBlock13());
+ }
+
+ if (msg.HasBlock14()) {
+ NewLine();
+ Visit(msg.GetBlock14());
+ }
+ }
+
+ void VisitJoinSource(const TRule_join_source& msg) {
+ if (msg.HasBlock1()) {
+ Visit(msg.GetBlock1());
+ }
+
+ Visit(msg.GetRule_flatten_source2());
+ for (const auto& block : msg.GetBlock3()) {
+ NewLine();
+ Visit(block.GetRule_join_op1());
+ if (block.HasBlock2()) {
+ Visit(block.GetBlock2());
+ }
+
+ Visit(block.GetRule_flatten_source3());
+ if (block.HasBlock4()) {
+ NewLine();
+ Visit(block.GetBlock4());
+ }
+ }
+ }
+
+ void VisitSingleSource(const TRule_single_source& msg) {
+ switch (msg.Alt_case()) {
+ case TRule_single_source::kAltSingleSource1: {
+ const auto& alt = msg.GetAlt_single_source1();
+ Visit(alt);
+ break;
+ }
+ case TRule_single_source::kAltSingleSource2: {
+ const auto& alt = msg.GetAlt_single_source2();
+ Visit(alt.GetToken1());
+ PushCurrentIndent();
+ Visit(alt.GetRule_select_stmt2());
+ PopCurrentIndent();
+ NewLine();
+ Visit(alt.GetToken3());
+ break;
+ }
+ case TRule_single_source::kAltSingleSource3: {
+ const auto& alt = msg.GetAlt_single_source3();
+ Visit(alt.GetToken1());
+ PushCurrentIndent();
+ Visit(alt.GetRule_values_stmt2());
+ PopCurrentIndent();
+ NewLine();
+ Visit(alt.GetToken3());
+ break;
+ }
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+ }
+
+ void VisitFlattenSource(const TRule_flatten_source& msg) {
+ Visit(msg.GetRule_named_single_source1());
+ if (msg.HasBlock2()) {
+ PushCurrentIndent();
+ NewLine();
+ Visit(msg.GetBlock2());
+ PopCurrentIndent();
+ }
+ }
+
+ void VisitNamedSingleSource(const TRule_named_single_source& msg) {
+ Visit(msg.GetRule_single_source1());
+ if (msg.HasBlock2()) {
+ const auto& matchRecognize = msg.GetBlock2();
+ //TODO handle MATCH_RECOGNIZE block
+ //https://st.yandex-team.ru/YQL-16186
+ Visit(matchRecognize);
+ }
+ if (msg.HasBlock3()) {
+ NewLine();
+ PushCurrentIndent();
+ const auto& block3 = msg.GetBlock3();
+ Visit(block3.GetBlock1());
+ if (block3.HasBlock2()) {
+ const auto& columns = block3.GetBlock2().GetRule_pure_column_list1();
+ Visit(columns.GetToken1());
+ NewLine();
+ PushCurrentIndent();
+ Visit(columns.GetRule_an_id2());
+ for (const auto& block : columns.GetBlock3()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_an_id2());
+ }
+
+ NewLine();
+ PopCurrentIndent();
+ Visit(columns.GetToken4());
+ }
+
+ PopCurrentIndent();
+ }
+
+ if (msg.HasBlock4()) {
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetBlock4());
+ PopCurrentIndent();
+ }
+ }
+
+ void VisitSimpleTableRef(const TRule_simple_table_ref& msg) {
+ Visit(msg.GetRule_simple_table_ref_core1());
+ if (msg.HasBlock2()) {
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetBlock2());
+ PopCurrentIndent();
+ }
+ }
+
+ void VisitIntoSimpleTableRef(const TRule_into_simple_table_ref& msg) {
+ Visit(msg.GetRule_simple_table_ref1());
+ if (msg.HasBlock2()) {
+ const auto& block2 = msg.GetBlock2();
+ NewLine();
+ PushCurrentIndent();
+ Visit(block2.GetToken1());
+ Visit(block2.GetToken2());
+ const auto& columns = block2.GetRule_pure_column_list3();
+ Visit(columns.GetToken1());
+ NewLine();
+ PushCurrentIndent();
+ Visit(columns.GetRule_an_id2());
+ for (const auto& block : columns.GetBlock3()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_an_id2());
+ }
+
+ PopCurrentIndent();
+ NewLine();
+ Visit(columns.GetToken4());
+ PopCurrentIndent();
+ }
+ }
+
+ void VisitSelectKindPartial(const TRule_select_kind_partial& msg) {
+ Visit(msg.GetRule_select_kind1());
+ if (msg.HasBlock2()) {
+ NewLine();
+ Visit(msg.GetBlock2());
+ }
+ }
+
+ void VisitFlattenByArg(const TRule_flatten_by_arg& msg) {
+ switch (msg.Alt_case()) {
+ case TRule_flatten_by_arg::kAltFlattenByArg1: {
+ const auto& alt = msg.GetAlt_flatten_by_arg1();
+ NewLine();
+ PushCurrentIndent();
+ Visit(alt);
+ PopCurrentIndent();
+ break;
+ }
+ case TRule_flatten_by_arg::kAltFlattenByArg2: {
+ const auto& alt = msg.GetAlt_flatten_by_arg2();
+ Visit(alt.GetToken1());
+ NewLine();
+ PushCurrentIndent();
+ const auto& exprs = alt.GetRule_named_expr_list2();
+ Visit(exprs.GetRule_named_expr1());
+ for (const auto& block : exprs.GetBlock2()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_named_expr2());
+ }
+
+ if (alt.HasBlock3()) {
+ Visit(alt.GetBlock3());
+ }
+
+ NewLine();
+ PopCurrentIndent();
+ Visit(alt.GetToken4());
+ break;
+ }
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+ }
+
+ void VisitWithoutColumnList(const TRule_without_column_list& msg) {
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_without_column_name1());
+ for (const auto& block : msg.GetBlock2()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_without_column_name2());
+ }
+
+ if (msg.HasBlock3()) {
+ Visit(msg.GetBlock3());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitTableRef(const TRule_table_ref& msg) {
+ if (msg.HasBlock1()) {
+ Visit(msg.GetBlock1());
+ }
+
+ if (msg.HasBlock2()) {
+ Visit(msg.GetBlock2());
+ }
+
+ const auto& block3 = msg.GetBlock3();
+ switch (block3.Alt_case()) {
+ case TRule_table_ref::TBlock3::kAlt1: {
+ const auto& alt = block3.GetAlt1();
+ const auto& key = alt.GetRule_table_key1();
+ Visit(key.GetRule_id_table_or_type1());
+ if (key.HasBlock2()) {
+ NewLine();
+ PushCurrentIndent();
+ Visit(key.GetBlock2());
+ PopCurrentIndent();
+ }
+
+ break;
+ }
+ case TRule_table_ref::TBlock3::kAlt2: {
+ const auto& alt = block3.GetAlt2();
+ Visit(alt.GetRule_an_id_expr1());
+ AfterInvokeExpr = true;
+ Visit(alt.GetToken2());
+ if (alt.HasBlock3()) {
+ Visit(alt.GetBlock3());
+ }
+
+ Visit(alt.GetToken4());
+ break;
+ }
+ case TRule_table_ref::TBlock3::kAlt3: {
+ const auto& alt = block3.GetAlt3();
+ Visit(alt.GetRule_bind_parameter1());
+ if (alt.HasBlock2()) {
+ AfterInvokeExpr = true;
+ Visit(alt.GetBlock2());
+ }
+
+ if (alt.HasBlock3()) {
+ NewLine();
+ PushCurrentIndent();
+ Visit(alt.GetBlock3());
+ PopCurrentIndent();
+ }
+
+ break;
+ }
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+
+ if (msg.HasBlock4()) {
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetBlock4());
+ PopCurrentIndent();
+ }
+ }
+
+ void VisitGroupingElementList(const TRule_grouping_element_list& msg) {
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_grouping_element1());
+ for (const auto& block : msg.GetBlock2()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_grouping_element2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitGroupByClause(const TRule_group_by_clause& msg) {
+ Visit(msg.GetToken1());
+ if (msg.HasBlock2()) {
+ Visit(msg.GetBlock2());
+ }
+
+ Visit(msg.GetToken3());
+ Visit(msg.GetRule_opt_set_quantifier4());
+ Visit(msg.GetRule_grouping_element_list5());
+ if (msg.HasBlock6()) {
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetBlock6());
+ PopCurrentIndent();
+ }
+ }
+
+ void VisitWindowDefinitionList(const TRule_window_definition_list& msg) {
+ NewLine();
+ PushCurrentIndent();
+
+ Visit(msg.GetRule_window_definition1());
+ for (const auto& block : msg.GetBlock2()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_window_definition2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitWindowSpecification(const TRule_window_specification& msg) {
+ Visit(msg.GetToken1());
+ const auto& details = msg.GetRule_window_specification_details2();
+ const bool needsNewline = details.HasBlock1() || details.HasBlock2() ||
+ details.HasBlock3() || details.HasBlock4();
+ if (needsNewline) {
+ NewLine();
+ PushCurrentIndent();
+ }
+
+ if (details.HasBlock1()) {
+ NewLine();
+ Visit(details.GetBlock1());
+ }
+
+ if (details.HasBlock2()) {
+ NewLine();
+ Visit(details.GetBlock2());
+ }
+
+ if (details.HasBlock3()) {
+ NewLine();
+ Visit(details.GetBlock3());
+ }
+
+ if (details.HasBlock4()) {
+ NewLine();
+ Visit(details.GetBlock4());
+ }
+
+ if (needsNewline) {
+ NewLine();
+ PopCurrentIndent();
+ }
+
+ Visit(msg.GetToken3());
+ }
+
+ void VisitWindowParitionClause(const TRule_window_partition_clause& msg) {
+ Visit(msg.GetToken1());
+ if (msg.HasBlock2()) {
+ Visit(msg.GetBlock2());
+ }
+
+ Visit(msg.GetToken3());
+ const auto& exprs = msg.GetRule_named_expr_list4();
+ PushCurrentIndent();
+ NewLine();
+ Visit(exprs.GetRule_named_expr1());
+ for (const auto& block : exprs.GetBlock2()) {
+ Visit(block.GetToken1());
+ NewLine();
+ Visit(block.GetRule_named_expr2());
+ }
+
+ PopCurrentIndent();
+ }
+
+ void VisitLambdaBody(const TRule_lambda_body& msg) {
+ PushCurrentIndent();
+ NewLine();
+ VisitRepeated(msg.GetBlock1());
+ for (const auto& block : msg.GetBlock2()) {
+ Visit(block);
+ NewLine();
+ }
+
+ Visit(msg.GetToken3());
+ Visit(msg.GetRule_expr4());
+ VisitRepeated(msg.GetBlock5());
+
+ PopCurrentIndent();
+ NewLine();
+ }
+
+ void VisitInAtomExpr(const TRule_in_atom_expr& msg) {
+ if (msg.Alt_case() == TRule_in_atom_expr::kAltInAtomExpr7) {
+ const auto& alt = msg.GetAlt_in_atom_expr7();
+ Visit(alt.GetToken1());
+ NewLine();
+ PushCurrentIndent();
+ Visit(alt.GetRule_select_stmt2());
+ NewLine();
+ PopCurrentIndent();
+ Visit(alt.GetToken3());
+ } else {
+ VisitAllFields(TRule_in_atom_expr::GetDescriptor(), msg);
+ }
+ }
+
+ void VisitSelectKindParenthesis(const TRule_select_kind_parenthesis& msg) {
+ if (msg.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis2) {
+ const auto& alt = msg.GetAlt_select_kind_parenthesis2();
+ Visit(alt.GetToken1());
+ NewLine();
+ PushCurrentIndent();
+ Visit(alt.GetRule_select_kind_partial2());
+ PopCurrentIndent();
+ NewLine();
+ Visit(alt.GetToken3());
+ } else {
+ VisitAllFields(TRule_select_kind_parenthesis::GetDescriptor(), msg);
+ }
+ }
+
+ void VisitCastExpr(const TRule_cast_expr& msg) {
+ Visit(msg.GetToken1());
+ AfterInvokeExpr = true;
+ Visit(msg.GetToken2());
+ Visit(msg.GetRule_expr3());
+ Visit(msg.GetToken4());
+ Visit(msg.GetRule_type_name_or_bind5());
+ Visit(msg.GetToken6());
+ }
+
+ void VisitBitCastExpr(const TRule_bitcast_expr& msg) {
+ Visit(msg.GetToken1());
+ AfterInvokeExpr = true;
+ Visit(msg.GetToken2());
+ Visit(msg.GetRule_expr3());
+ Visit(msg.GetToken4());
+ Visit(msg.GetRule_type_name_simple5());
+ Visit(msg.GetToken6());
+ }
+
+ void VisitExtOrderByClause(const TRule_ext_order_by_clause& msg) {
+ if (msg.HasBlock1()) {
+ Visit(msg.GetBlock1());
+ }
+
+ Visit(msg.GetRule_order_by_clause2());
+ }
+
+ void VisitKeyExpr(const TRule_key_expr& msg) {
+ AfterKeyExpr = true;
+ VisitAllFields(TRule_key_expr::GetDescriptor(), msg);
+ }
+
+ void VisitExistsExpr(const TRule_exists_expr& msg) {
+ VisitKeyword(msg.GetToken1());
+ VisitToken(msg.GetToken2());
+
+ NewLine();
+ PushCurrentIndent();
+
+ Visit(msg.GetBlock3());
+
+ PopCurrentIndent();
+ NewLine();
+
+ VisitToken(msg.GetToken4());
+ }
+
+ void VisitCaseExpr(const TRule_case_expr& msg) {
+ VisitKeyword(msg.GetToken1());
+ if (msg.HasBlock2()) {
+ Visit(msg.GetBlock2());
+ }
+ NewLine();
+ PushCurrentIndent();
+
+ for (const auto& block : msg.GetBlock3()) {
+ Visit(block);
+ NewLine();
+ }
+
+ if (msg.HasBlock4()) {
+ const auto& block = msg.GetBlock4();
+ VisitKeyword(block.GetToken1());
+ Visit(block.GetRule_expr2());
+ }
+
+ PopCurrentIndent();
+ NewLine();
+ Visit(msg.GetToken5());
+ }
+
+ void VisitWhenExpr(const TRule_when_expr& msg) {
+ VisitKeyword(msg.GetToken1());
+ Visit(msg.GetRule_expr2());
+
+ NewLine();
+ PushCurrentIndent();
+ VisitKeyword(msg.GetToken3());
+ Visit(msg.GetRule_expr4());
+ PopCurrentIndent();
+ }
+
+ void VisitWithTableSettingsExpr(const TRule_with_table_settings& msg) {
+ VisitKeyword(msg.GetToken1());
+ Visit(msg.GetToken2());
+
+ const bool needIndent = msg.Block4Size() > 0; // more then one setting
+ if (needIndent) {
+ NewLine();
+ PushCurrentIndent();
+ Visit(msg.GetRule_table_settings_entry3()); // first setting
+
+ for (const auto& entry : msg.GetBlock4()) {
+ Visit(entry.GetToken1()); // comma
+ NewLine();
+ Visit(entry.GetRule_table_settings_entry2()); // other settings
+ }
+ PopCurrentIndent();
+ NewLine();
+ } else {
+ Visit(msg.GetRule_table_settings_entry3());
+ }
+
+ Visit(msg.GetToken5());
+ }
+
+ void VisitExpr(const TRule_expr& msg) {
+ if (msg.HasAlt_expr2()) {
+ Visit(msg.GetAlt_expr2());
+ return;
+ }
+ const auto& orExpr = msg.GetAlt_expr1();
+ auto getExpr = [](const TRule_expr::TAlt1::TBlock2& b) -> const TRule_or_subexpr& { return b.GetRule_or_subexpr2(); };
+ auto getOp = [](const TRule_expr::TAlt1::TBlock2& b) -> const TToken& { return b.GetToken1(); };
+ VisitBinaryOp(orExpr.GetRule_or_subexpr1(), getOp, getExpr, orExpr.GetBlock2().begin(), orExpr.GetBlock2().end());
+ }
+
+ void VisitOrSubexpr(const TRule_or_subexpr& msg) {
+ auto getExpr = [](const TRule_or_subexpr::TBlock2& b) -> const TRule_and_subexpr& { return b.GetRule_and_subexpr2(); };
+ auto getOp = [](const TRule_or_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); };
+ VisitBinaryOp(msg.GetRule_and_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end());
+ }
+
+ void VisitAndSubexpr(const TRule_and_subexpr& msg) {
+ auto getExpr = [](const TRule_and_subexpr::TBlock2& b) -> const TRule_xor_subexpr& { return b.GetRule_xor_subexpr2(); };
+ auto getOp = [](const TRule_and_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); };
+ VisitBinaryOp(msg.GetRule_xor_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end());
+ }
+
+ void VisitEqSubexpr(const TRule_eq_subexpr& msg) {
+ auto getExpr = [](const TRule_eq_subexpr::TBlock2& b) -> const TRule_neq_subexpr& { return b.GetRule_neq_subexpr2(); };
+ auto getOp = [](const TRule_eq_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); };
+ VisitBinaryOp(msg.GetRule_neq_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end());
+ }
+
+ void VisitNeqSubexpr(const TRule_neq_subexpr& msg) {
+ VisitNeqSubexprImpl(msg, false);
+ }
+
+ void VisitNeqSubexprImpl(const TRule_neq_subexpr& msg, bool pushedIndent) {
+ auto getExpr = [](const TRule_neq_subexpr::TBlock2& b) -> const TRule_bit_subexpr& { return b.GetRule_bit_subexpr2(); };
+ auto getOp = [](const TRule_neq_subexpr::TBlock2& b) -> const TRule_neq_subexpr::TBlock2::TBlock1& { return b.GetBlock1(); };
+ VisitBinaryOp(msg.GetRule_bit_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end());
+
+ if (msg.HasBlock3()) {
+ const auto& b = msg.GetBlock3();
+ switch (b.Alt_case()) {
+ case TRule_neq_subexpr_TBlock3::kAlt1: {
+ const auto& alt = b.GetAlt1();
+ const bool hasFirstNewline = LastLine != ParsedTokens[TokenIndex].Line;
+ // 2 is `??` size in tokens
+ const bool hasSecondNewline = ParsedTokens[TokenIndex].Line != ParsedTokens[TokenIndex + 2].Line;
+ const ui32 currentOutLine = OutLine;
+
+ PosFromParsedToken(ParsedTokens[TokenIndex]);
+ if (currentOutLine != OutLine || (hasFirstNewline && hasSecondNewline)) {
+ NewLine();
+ if (!pushedIndent) {
+ PushCurrentIndent();
+ pushedIndent = true;
+ }
+ }
+
+ Visit(alt.GetRule_double_question1());
+ PosFromParsedToken(ParsedTokens[TokenIndex]);
+ if (hasFirstNewline || hasSecondNewline) {
+ NewLine();
+ if (!pushedIndent) {
+ PushCurrentIndent();
+ pushedIndent = true;
+ }
+ }
+
+ VisitNeqSubexprImpl(alt.GetRule_neq_subexpr2(), pushedIndent);
+ if (pushedIndent) {
+ PopCurrentIndent();
+ }
+
+ break;
+ }
+ case TRule_neq_subexpr_TBlock3::kAlt2:
+ Visit(b.GetAlt2());
+ break;
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+ }
+ }
+
+ void VisitBitSubexpr(const TRule_bit_subexpr& msg) {
+ auto getExpr = [](const TRule_bit_subexpr::TBlock2& b) -> const TRule_add_subexpr& { return b.GetRule_add_subexpr2(); };
+ auto getOp = [](const TRule_bit_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); };
+ VisitBinaryOp(msg.GetRule_add_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end());
+ }
+
+ void VisitAddSubexpr(const TRule_add_subexpr& msg) {
+ auto getExpr = [](const TRule_add_subexpr::TBlock2& b) -> const TRule_mul_subexpr& { return b.GetRule_mul_subexpr2(); };
+ auto getOp = [](const TRule_add_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); };
+ VisitBinaryOp(msg.GetRule_mul_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end());
+ }
+
+ void VisitMulSubexpr(const TRule_mul_subexpr& msg) {
+ auto getExpr = [](const TRule_mul_subexpr::TBlock2& b) -> const TRule_con_subexpr& { return b.GetRule_con_subexpr2(); };
+ auto getOp = [](const TRule_mul_subexpr::TBlock2& b) -> const TToken& { return b.GetToken1(); };
+ VisitBinaryOp(msg.GetRule_con_subexpr1(), getOp, getExpr, msg.GetBlock2().begin(), msg.GetBlock2().end());
+ }
+
+ ui32 BinaryOpTokenSize(const TToken&) {
+ return 1;
+ }
+
+ ui32 BinaryOpTokenSize(const TRule_neq_subexpr::TBlock2::TBlock1& block) {
+ switch (block.Alt_case()) {
+ case TRule_neq_subexpr::TBlock2::TBlock1::kAlt1:
+ case TRule_neq_subexpr::TBlock2::TBlock1::kAlt3:
+ case TRule_neq_subexpr::TBlock2::TBlock1::kAlt5:
+ case TRule_neq_subexpr::TBlock2::TBlock1::kAlt6:
+ case TRule_neq_subexpr::TBlock2::TBlock1::kAlt7:
+ return 1;
+ case TRule_neq_subexpr::TBlock2::TBlock1::kAlt2:
+ return 2;
+ case TRule_neq_subexpr::TBlock2::TBlock1::kAlt4:
+ return 3;
+ default:
+ ythrow yexception() << "Alt is not supported";
+ }
+ }
+
+ void VisitShiftRight(const TRule_shift_right& msg) {
+ VisitToken(msg.GetToken1());
+ InMultiTokenOp = true;
+ VisitToken(msg.GetToken2());
+ InMultiTokenOp = false;
+ }
+
+ void VisitRotRight(const TRule_rot_right& msg) {
+ VisitToken(msg.GetToken1());
+ InMultiTokenOp = true;
+ VisitToken(msg.GetToken2());
+ VisitToken(msg.GetToken3());
+ InMultiTokenOp = false;
+ }
+
+ template <typename TExpr, typename TGetOp, typename TGetExpr, typename TIter>
+ void VisitBinaryOp(const TExpr& expr, TGetOp getOp, TGetExpr getExpr, TIter begin, TIter end) {
+ Visit(expr);
+ bool pushedIndent = false;
+
+ for (; begin != end; ++begin) {
+ const auto op = getOp(*begin);
+ const auto opSize = BinaryOpTokenSize(op);
+ const bool hasFirstNewline = LastLine != ParsedTokens[TokenIndex].Line;
+ const bool hasSecondNewline = ParsedTokens[TokenIndex].Line != ParsedTokens[TokenIndex + opSize].Line;
+ const ui32 currentOutLine = OutLine;
+
+ PosFromParsedToken(ParsedTokens[TokenIndex]);
+ if (currentOutLine != OutLine || (hasFirstNewline && hasSecondNewline)) {
+ NewLine();
+ if (!pushedIndent) {
+ PushCurrentIndent();
+ pushedIndent = true;
+ }
+ }
+ Visit(op);
+
+ PosFromParsedToken(ParsedTokens[TokenIndex]);
+ if (hasFirstNewline || hasSecondNewline) {
+ NewLine();
+ if (!pushedIndent) {
+ PushCurrentIndent();
+ pushedIndent = true;
+ }
+ }
+
+ Visit(getExpr(*begin));
+ }
+
+ if (pushedIndent) {
+ PopCurrentIndent();
+ }
+ }
+
+ void PushCurrentIndent() {
+ CurrentIndent += OneIndent;
+ }
+
+ void PopCurrentIndent() {
+ CurrentIndent -= OneIndent;
+ }
+
+private:
+ const TStaticData& StaticData;
+ const TParsedTokenList& ParsedTokens;
+ const TParsedTokenList& Comments;
+ TStringBuilder SB;
+ ui32 OutColumn = 0;
+ ui32 OutLine = 1;
+ ui32 LastLine = 0;
+ ui32 LastColumn = 0;
+ ui32 LastComment = 0;
+ i32 CurrentIndent = 0;
+ TVector<EScope> Scopes;
+ TMaybe<bool> AddLine;
+ ui64 InsideType = 0;
+ bool AfterNamespace = false;
+ bool AfterBracket = false;
+ bool AfterInvokeExpr = false;
+ bool AfterUnaryOp = false;
+ bool SkipSpaceAfterUnaryOp = false;
+ bool AfterDollarOrAt = false;
+ bool AfterDot = false;
+ bool AfterDigits = false;
+ bool AfterQuestion = false;
+ bool AfterLess = false;
+ bool AfterKeyExpr = false;
+ bool InMultiTokenOp = false;
+ ui32 ForceExpandedLine = 0;
+ ui32 ForceExpandedColumn = 0;
+
+ ui32 TokenIndex = 0;
+ TMarkTokenStack MarkTokenStack;
+ TVector<TTokenInfo> MarkedTokens;
+ ui64 InsideExpr = 0;
+};
+
+template <typename T>
+TPrettyFunctor MakePrettyFunctor(void (TPrettyVisitor::*memberPtr)(const T& msg)) {
+ return [memberPtr](TPrettyVisitor& visitor, const NProtoBuf::Message& rawMsg) {
+ (visitor.*memberPtr)(dynamic_cast<const T&>(rawMsg));
+ };
+}
+
+template <typename T>
+TObfuscatingFunctor MakeObfuscatingFunctor(void (TObfuscatingVisitor::*memberPtr)(const T& msg)) {
+ return [memberPtr](TObfuscatingVisitor& visitor, const NProtoBuf::Message& rawMsg) {
+ (visitor.*memberPtr)(dynamic_cast<const T&>(rawMsg));
+ };
+}
+
+TStaticData::TStaticData()
+ : Keywords(GetKeywords())
+ , ScopeDispatch({
+ {TRule_type_name::GetDescriptor(), EScope::TypeName},
+ {TRule_type_name_composite::GetDescriptor(), EScope::TypeName},
+ {TRule_double_question::GetDescriptor(), EScope::DoubleQuestion},
+ {TRule_id::GetDescriptor(), EScope::Identifier},
+ {TRule_id_or_type::GetDescriptor(), EScope::Identifier},
+ {TRule_id_schema::GetDescriptor(), EScope::Identifier},
+ {TRule_id_expr::GetDescriptor(), EScope::Identifier},
+ {TRule_id_expr_in::GetDescriptor(), EScope::Identifier},
+ {TRule_id_window::GetDescriptor(), EScope::Identifier},
+ {TRule_id_table::GetDescriptor(), EScope::Identifier},
+ {TRule_id_without::GetDescriptor(), EScope::Identifier},
+ {TRule_id_hint::GetDescriptor(), EScope::Identifier},
+ {TRule_identifier::GetDescriptor(), EScope::Identifier},
+ {TRule_id_table_or_type::GetDescriptor(), EScope::Identifier},
+ {TRule_bind_parameter::GetDescriptor(), EScope::Identifier},
+ {TRule_an_id_as_compat::GetDescriptor(), EScope::Identifier},
+ })
+ , PrettyVisitDispatch({
+ {TToken::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitToken)},
+ {TRule_into_values_source::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitIntoValuesSource)},
+ {TRule_select_kind::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelectKind)},
+ {TRule_process_core::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitProcessCore)},
+ {TRule_reduce_core::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitReduceCore)},
+ {TRule_sort_specification_list::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSortSpecificationList)},
+ {TRule_select_core::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelectCore)},
+ {TRule_join_source::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitJoinSource)},
+ {TRule_single_source::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSingleSource)},
+ {TRule_flatten_source::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitFlattenSource)},
+ {TRule_named_single_source::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitNamedSingleSource)},
+ {TRule_simple_table_ref::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSimpleTableRef)},
+ {TRule_into_simple_table_ref::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitIntoSimpleTableRef)},
+ {TRule_select_kind_partial::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelectKindPartial)},
+ {TRule_flatten_by_arg::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitFlattenByArg)},
+ {TRule_without_column_list::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWithoutColumnList)},
+ {TRule_table_ref::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitTableRef)},
+ {TRule_grouping_element_list::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitGroupingElementList)},
+ {TRule_group_by_clause::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitGroupByClause)},
+ {TRule_window_definition_list::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWindowDefinitionList)},
+ {TRule_window_specification::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWindowSpecification)},
+ {TRule_window_partition_clause::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWindowParitionClause)},
+ {TRule_lambda_body::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitLambdaBody)},
+ {TRule_in_atom_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitInAtomExpr)},
+ {TRule_select_kind_parenthesis::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelectKindParenthesis)},
+ {TRule_cast_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCastExpr)},
+ {TRule_bitcast_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitBitCastExpr)},
+ {TRule_ext_order_by_clause::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitExtOrderByClause)},
+ {TRule_key_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitKeyExpr)},
+ {TRule_define_action_or_subquery_body::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDefineActionOrSubqueryBody)},
+ {TRule_exists_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitExistsExpr)},
+ {TRule_case_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCaseExpr)},
+ {TRule_when_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWhenExpr)},
+ {TRule_with_table_settings::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWithTableSettingsExpr)},
+
+ {TRule_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitExpr)},
+ {TRule_or_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitOrSubexpr)},
+ {TRule_and_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAndSubexpr)},
+ {TRule_eq_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitEqSubexpr)},
+ {TRule_neq_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitNeqSubexpr)},
+ {TRule_bit_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitBitSubexpr)},
+ {TRule_add_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAddSubexpr)},
+ {TRule_mul_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitMulSubexpr)},
+
+ {TRule_rot_right::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitRotRight)},
+ {TRule_shift_right::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitShiftRight)},
+
+ {TRule_pragma_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitPragma)},
+ {TRule_select_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelect)},
+ {TRule_select_unparenthesized_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitSelectUnparenthesized)},
+ {TRule_named_nodes_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitNamedNodes)},
+ {TRule_create_table_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateTable)},
+ {TRule_drop_table_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropTable)},
+ {TRule_use_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitUse)},
+ {TRule_into_table_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitIntoTable)},
+ {TRule_commit_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCommit)},
+ {TRule_update_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitUpdate)},
+ {TRule_delete_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDelete)},
+ {TRule_rollback_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitRollback)},
+ {TRule_declare_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDeclare)},
+ {TRule_import_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitImport)},
+ {TRule_export_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitExport)},
+ {TRule_alter_table_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterTable)},
+ {TRule_alter_external_table_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterExternalTable)},
+ {TRule_do_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDo)},
+ {TRule_define_action_or_subquery_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAction)},
+ {TRule_if_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitIf)},
+ {TRule_for_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitFor)},
+ {TRule_values_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitValues)},
+ {TRule_create_user_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateUser)},
+ {TRule_alter_user_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterUser)},
+ {TRule_create_group_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateGroup)},
+ {TRule_alter_group_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterGroup)},
+ {TRule_drop_role_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropRole)},
+ {TRule_upsert_object_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitUpsertObject)},
+ {TRule_create_object_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateObject)},
+ {TRule_alter_object_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterObject)},
+ {TRule_drop_object_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropObject)},
+ {TRule_create_external_data_source_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateExternalDataSource)},
+ {TRule_alter_external_data_source_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterExternalDataSource)},
+ {TRule_drop_external_data_source_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropExternalDataSource)},
+ {TRule_create_replication_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateAsyncReplication)},
+ {TRule_alter_replication_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterAsyncReplication)},
+ {TRule_drop_replication_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropAsyncReplication)},
+ {TRule_create_topic_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateTopic)},
+ {TRule_alter_topic_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterTopic)},
+ {TRule_drop_topic_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropTopic)},
+ {TRule_grant_permissions_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitGrantPermissions)},
+ {TRule_revoke_permissions_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitRevokePermissions)},
+ {TRule_alter_table_store_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterTableStore)},
+ {TRule_create_view_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateView)},
+ {TRule_drop_view_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropView)},
+ {TRule_create_resource_pool_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateResourcePool)},
+ {TRule_alter_resource_pool_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterResourcePool)},
+ {TRule_drop_resource_pool_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropResourcePool)},
+ {TRule_create_backup_collection_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateBackupCollection)},
+ {TRule_alter_backup_collection_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterBackupCollection)},
+ {TRule_drop_backup_collection_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropBackupCollection)},
+ {TRule_analyze_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAnalyze)},
+ {TRule_create_resource_pool_classifier_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCreateResourcePoolClassifier)},
+ {TRule_alter_resource_pool_classifier_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterResourcePoolClassifier)},
+ {TRule_drop_resource_pool_classifier_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropResourcePoolClassifier)},
+ {TRule_backup_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitBackup)},
+ {TRule_restore_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitRestore)},
+ })
+ , ObfuscatingVisitDispatch({
+ {TToken::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitToken)},
+ {TRule_literal_value::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitLiteralValue)},
+ {TRule_pragma_value::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitPragmaValue)},
+ {TRule_atom_expr::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitAtomExpr)},
+ {TRule_in_atom_expr::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitInAtomExpr)},
+ {TRule_unary_casual_subexpr::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitUnaryCasualSubexpr)},
+ {TRule_in_unary_casual_subexpr::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitInUnaryCasualSubexpr)},
+ })
+{
+ // ensure that all statements have a visitor
+ auto coreDescr = TRule_sql_stmt_core::GetDescriptor();
+ for (int i = 0; i < coreDescr->field_count(); ++i) {
+ const NProtoBuf::FieldDescriptor* fd = coreDescr->field(i);
+ if (fd->cpp_type() != NProtoBuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ continue;
+ }
+
+ auto altDescr = fd->message_type();
+ for (int j = 0; j < altDescr->field_count(); ++j) {
+ auto fd2 = altDescr->field(j);
+ if (fd2->cpp_type() != NProtoBuf::FieldDescriptor::CPPTYPE_MESSAGE) {
+ continue;
+ }
+
+ auto stmtMessage = fd2->message_type();
+ Y_ENSURE(PrettyVisitDispatch.contains(stmtMessage), TStringBuilder() << "Missing visitor for " << stmtMessage->name());
+ }
+ }
+}
+
+class TSqlFormatter : public NSQLFormat::ISqlFormatter {
+public:
+ TSqlFormatter(const NSQLTranslation::TTranslationSettings& settings)
+ : Settings(settings)
+ {}
+
+ bool Format(const TString& query, TString& formattedQuery, NYql::TIssues& issues, EFormatMode mode) override {
+ formattedQuery = (mode == EFormatMode::Obfuscate) ? "" : query;
+ auto parsedSettings = Settings;
+ if (!NSQLTranslation::ParseTranslationSettings(query, parsedSettings, issues)) {
+ return false;
+ }
+
+ if (parsedSettings.PgParser) {
+ return mode != EFormatMode::Obfuscate;
+ }
+
+ if (mode == EFormatMode::Obfuscate) {
+ auto message = NSQLTranslationV1::SqlAST(query, "Query", issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, parsedSettings.Arena);
+ if (!message) {
+ return false;
+ }
+
+ TObfuscatingVisitor visitor;
+ return Format(visitor.Process(*message), formattedQuery, issues, EFormatMode::Pretty);
+ }
+
+ auto lexer = NSQLTranslationV1::MakeLexer(parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser);
+ TParsedTokenList allTokens;
+ auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) {
+ if (token.Name != "EOF") {
+ allTokens.push_back(token);
+ }
+ };
+
+ if (!lexer->Tokenize(query, "Query", onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
+ return false;
+ }
+
+ TVector<TTokenIterator> statements;
+ SplitByStatements(allTokens.begin(), allTokens.end(), statements);
+ TStringBuilder finalFormattedQuery;
+ for (size_t i = 1; i < statements.size(); ++i) {
+ TStringBuilder currentQueryBuilder;
+ for (auto it = statements[i - 1]; it != statements[i]; ++it) {
+ currentQueryBuilder << it->Content;
+ }
+
+ TString currentQuery = currentQueryBuilder;
+ currentQuery = StripStringLeft(currentQuery);
+ bool isBlank = true;
+ for (auto c : currentQuery) {
+ if (c != ';') {
+ isBlank = false;
+ break;
+ }
+ };
+
+ if (isBlank) {
+ continue;
+ }
+
+ TVector<NSQLTranslation::TParsedToken> comments;
+ TParsedTokenList parsedTokens, stmtTokens;
+ bool hasTrailingComments = false;
+ auto onNextRawToken = [&](NSQLTranslation::TParsedToken&& token) {
+ stmtTokens.push_back(token);
+ if (token.Name == "COMMENT") {
+ comments.emplace_back(std::move(token));
+ hasTrailingComments = true;
+ } else if (token.Name != "WS" && token.Name != "EOF") {
+ parsedTokens.emplace_back(std::move(token));
+ hasTrailingComments = false;
+ }
+ };
+
+ if (!lexer->Tokenize(currentQuery, "Query", onNextRawToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
+ return false;
+ }
+
+ NYql::TIssues parserIssues;
+ auto message = NSQLTranslationV1::SqlAST(currentQuery, "Query", parserIssues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, parsedSettings.Arena);
+ if (!message) {
+ finalFormattedQuery << currentQuery;
+ if (!currentQuery.EndsWith("\n")) {
+ finalFormattedQuery << "\n";
+ }
+
+ continue;
+ }
+
+ TPrettyVisitor visitor(parsedTokens, comments);
+ bool addLine;
+ auto currentFormattedQuery = visitor.Process(*message, addLine);
+ TParsedTokenList stmtFormattedTokens;
+ auto onNextFormattedToken = [&](NSQLTranslation::TParsedToken&& token) {
+ stmtFormattedTokens.push_back(token);
+ };
+
+ if (!lexer->Tokenize(currentFormattedQuery, "Query", onNextFormattedToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
+ return false;
+ }
+
+ if (!Validate(stmtFormattedTokens, stmtTokens)) {
+ issues.AddIssue(NYql::TIssue({}, TStringBuilder() << "Validation failed: " << currentQuery.Quote() << " != " << currentFormattedQuery.Quote()));
+ return false;
+ }
+
+ if (addLine && !finalFormattedQuery.empty()) {
+ finalFormattedQuery << "\n";
+ }
+
+ finalFormattedQuery << currentFormattedQuery;
+ if (parsedTokens.back().Name != "SEMICOLON") {
+ if (hasTrailingComments
+ && !comments.back().Content.EndsWith("\n")
+ && comments.back().Content.StartsWith("--")) {
+ finalFormattedQuery << "\n";
+ }
+ finalFormattedQuery << ";\n";
+ }
+ }
+
+ formattedQuery = finalFormattedQuery;
+ return true;
+ }
+
+private:
+ const NSQLTranslation::TTranslationSettings Settings;
+};
+
+}
+
+ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslation::TTranslationSettings& settings) {
+ return ISqlFormatter::TPtr(new TSqlFormatter(settings));
+}
+
+TString MutateQuery(const TString& query, const NSQLTranslation::TTranslationSettings& settings) {
+ auto parsedSettings = settings;
+ NYql::TIssues issues;
+ if (!NSQLTranslation::ParseTranslationSettings(query, parsedSettings, issues)) {
+ throw yexception() << issues.ToString();
+ }
+
+ auto lexer = NSQLTranslationV1::MakeLexer(parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser);
+ TVector<NSQLTranslation::TParsedToken> allTokens;
+ auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) {
+ if (token.Name != "EOF") {
+ allTokens.push_back(token);
+ }
+ };
+
+ if (!lexer->Tokenize(query, "Query", onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
+ throw yexception() << issues.ToString();
+ }
+
+ TStringBuilder newQueryBuilder;
+ ui32 index = 0;
+ for (const auto& x : allTokens) {
+ newQueryBuilder << " /*" << index++ << "*/ ";
+ newQueryBuilder << x.Content;
+ }
+
+ newQueryBuilder << " /*" << index++ << "*/ ";
+ return newQueryBuilder;
+}
+
+bool SqlFormatSimple(const TString& query, TString& formattedQuery, TString& error) {
+ try {
+ google::protobuf::Arena arena;
+ NSQLTranslation::TTranslationSettings settings;
+ settings.Arena = &arena;
+
+ auto formatter = MakeSqlFormatter(settings);
+ NYql::TIssues issues;
+ const bool result = formatter->Format(query, formattedQuery, issues);
+ if (!result) {
+ error = issues.ToString();
+ }
+ return result;
+ } catch (const std::exception& e) {
+ error = e.what();
+ return false;
+ }
+}
+
+THashSet<TString> GetKeywords() {
+ TString grammar;
+ // ANTLR4-MIGRATION: just change SQLv1 to SQLv1Antlr4
+ Y_ENSURE(NResource::FindExact("SQLv1.g.in", &grammar));
+ THashSet<TString> res;
+ TVector<TString> lines;
+ Split(grammar, "\n", lines);
+ for (auto s : lines) {
+ s = StripString(s);
+ if (s.StartsWith("//")) {
+ continue;
+ }
+
+ auto pos1 = s.find(':');
+ auto pos2 = s.find(';');
+ if (pos1 == TString::npos || pos2 == TString::npos || pos2 < pos1 + 2) {
+ continue;
+ }
+
+ auto before = s.substr(0, pos1);
+ auto after = s.substr(pos1 + 1, pos2 - pos1 - 1);
+ SubstGlobal(after, " ", "");
+ SubstGlobal(after, "'", "");
+ if (after == before) {
+ //Cerr << before << "\n";
+ res.insert(before);
+ }
+ }
+
+ return res;
+}
+
+} // namespace NSQLFormat
diff --git a/yql/essentials/sql/v1/format/sql_format.h b/yql/essentials/sql/v1/format/sql_format.h
new file mode 100644
index 00000000000..6944a730710
--- /dev/null
+++ b/yql/essentials/sql/v1/format/sql_format.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <yql/essentials/public/issue/yql_issue.h>
+#include <yql/essentials/sql/settings/translation_settings.h>
+
+#include <util/generic/string.h>
+
+namespace NSQLFormat {
+
+constexpr ui32 OneIndent = 4;
+
+enum class EFormatMode {
+ Pretty,
+ Obfuscate
+};
+
+class ISqlFormatter {
+public:
+ using TPtr = THolder<ISqlFormatter>;
+
+ virtual bool Format(const TString& query, TString& formattedQuery, NYql::TIssues& issues,
+ EFormatMode mode = EFormatMode::Pretty) = 0;
+ virtual ~ISqlFormatter() = default;
+};
+
+ISqlFormatter::TPtr MakeSqlFormatter(const NSQLTranslation::TTranslationSettings& settings = {});
+
+// insert spaces and comments between each tokens
+TString MutateQuery(const TString& query, const NSQLTranslation::TTranslationSettings& settings = {});
+
+bool SqlFormatSimple(const TString& query, TString& formattedQuery, TString& error);
+
+THashSet<TString> GetKeywords();
+
+}
diff --git a/yql/essentials/sql/v1/format/sql_format_ut.cpp b/yql/essentials/sql/v1/format/sql_format_ut.cpp
new file mode 100644
index 00000000000..3cfd7031159
--- /dev/null
+++ b/yql/essentials/sql/v1/format/sql_format_ut.cpp
@@ -0,0 +1,51 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include "sql_format.h"
+
+#include <google/protobuf/arena.h>
+#include <util/string/subst.h>
+#include <util/string/join.h>
+
+namespace {
+
+using TCases = TVector<std::pair<TString, TString>>;
+
+struct TSetup {
+ TSetup() {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.Arena = &Arena;
+ Formatter = NSQLFormat::MakeSqlFormatter(settings);
+ }
+
+ void Run(const TCases& cases, NSQLFormat::EFormatMode mode = NSQLFormat::EFormatMode::Pretty) {
+ for (const auto& c : cases) {
+ NYql::TIssues issues;
+ TString formatted;
+ auto res = Formatter->Format(c.first, formatted, issues, mode);
+ UNIT_ASSERT_C(res, issues.ToString());
+ auto expected = c.second;
+ SubstGlobal(expected, "\t", TString(NSQLFormat::OneIndent, ' '));
+ UNIT_ASSERT_NO_DIFF(formatted, expected);
+
+ TString formatted2;
+ auto res2 = Formatter->Format(formatted, formatted2, issues);
+ UNIT_ASSERT_C(res2, issues.ToString());
+ UNIT_ASSERT_NO_DIFF(formatted, formatted2);
+
+ if (mode == NSQLFormat::EFormatMode::Pretty) {
+ auto mutatedQuery = NSQLFormat::MutateQuery(c.first);
+ auto res3 = Formatter->Format(mutatedQuery, formatted, issues);
+ UNIT_ASSERT_C(res3, issues.ToString());
+ }
+ }
+ }
+
+ google::protobuf::Arena Arena;
+ NSQLFormat::ISqlFormatter::TPtr Formatter;
+};
+
+}
+
+Y_UNIT_TEST_SUITE(CheckSqlFormatter) {
+ #include "sql_format_ut.h"
+}
diff --git a/yql/essentials/sql/v1/format/sql_format_ut.h b/yql/essentials/sql/v1/format/sql_format_ut.h
new file mode 100644
index 00000000000..951bf427989
--- /dev/null
+++ b/yql/essentials/sql/v1/format/sql_format_ut.h
@@ -0,0 +1,1650 @@
+Y_UNIT_TEST(Pragma) {
+ TCases cases = {
+ {"pragma user = user;","PRAGMA user = user;\n"},
+ {"pragma user = default;","PRAGMA user = default;\n"},
+ {"pragma user.user = user;","PRAGMA user.user = user;\n"},
+ {"pragma user.user(user);","PRAGMA user.user(user);\n"},
+ {"pragma user.user(user, user);","PRAGMA user.user(user, user);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(DotAfterDigits) {
+ TCases cases = {
+ {"select a.1 .b from plato.foo;","SELECT\n\ta.1 .b\nFROM plato.foo;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(GrantPermissions) {
+ TCases cases {
+ {"use plato;grant connect, modify tables, list on `/Root` to user;", "USE plato;\n\nGRANT CONNECT, MODIFY TABLES, LIST ON `/Root` TO user;\n"},
+ {"use plato;grant select , select tables, select attributes on `/Root` to user;", "USE plato;\n\nGRANT SELECT, SELECT TABLES, SELECT ATTRIBUTES ON `/Root` TO user;\n"},
+ {"use plato;grant insert, modify attributes on `/Root` to user;", "USE plato;\n\nGRANT INSERT, MODIFY ATTRIBUTES ON `/Root` TO user;\n"},
+ {"use plato;grant use legacy, use on `/Root` to user1, user2;", "USE plato;\n\nGRANT USE LEGACY, USE ON `/Root` TO user1, user2;\n"},
+ {"use plato;grant manage, full legacy, full, create on `/Root` to user;", "USE plato;\n\nGRANT MANAGE, FULL LEGACY, FULL, CREATE ON `/Root` TO user;\n"},
+ {"use plato;grant drop, grant, select row, update row on `/Root` to user;", "USE plato;\n\nGRANT DROP, GRANT, SELECT ROW, UPDATE ROW ON `/Root` TO user;\n"},
+ {"use plato;grant erase row, create directory on `/Root` to user;", "USE plato;\n\nGRANT ERASE ROW, CREATE DIRECTORY ON `/Root` TO user;\n"},
+ {"use plato;grant create table, create queue, remove schema on `/Root` to user;", "USE plato;\n\nGRANT CREATE TABLE, CREATE QUEUE, REMOVE SCHEMA ON `/Root` TO user;\n"},
+ {"use plato;grant describe schema, alter schema on `/Root` to user;", "USE plato;\n\nGRANT DESCRIBE SCHEMA, ALTER SCHEMA ON `/Root` TO user;\n"},
+ {"use plato;grant select, on `/Root` to user, with grant option;", "USE plato;\n\nGRANT SELECT, ON `/Root` TO user, WITH GRANT OPTION;\n"},
+ {"use plato;grant all privileges on `/Root` to user;", "USE plato;\n\nGRANT ALL PRIVILEGES ON `/Root` TO user;\n"},
+ {"use plato;grant list on `/Root/db1`, `/Root/db2` to user;", "USE plato;\n\nGRANT LIST ON `/Root/db1`, `/Root/db2` TO user;\n"}
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(RevokePermissions) {
+ TCases cases {
+ {"use plato;revoke connect, modify tables, list on `/Root` from user;", "USE plato;\n\nREVOKE CONNECT, MODIFY TABLES, LIST ON `/Root` FROM user;\n"},
+ {"use plato;revoke select , select tables, select attributes on `/Root` from user;", "USE plato;\n\nREVOKE SELECT, SELECT TABLES, SELECT ATTRIBUTES ON `/Root` FROM user;\n"},
+ {"use plato;revoke insert, modify attributes on `/Root` from user;", "USE plato;\n\nREVOKE INSERT, MODIFY ATTRIBUTES ON `/Root` FROM user;\n"},
+ {"use plato;revoke use legacy, use on `/Root` from user1, user2;", "USE plato;\n\nREVOKE USE LEGACY, USE ON `/Root` FROM user1, user2;\n"},
+ {"use plato;revoke manage, full legacy, full, create on `/Root` from user;", "USE plato;\n\nREVOKE MANAGE, FULL LEGACY, FULL, CREATE ON `/Root` FROM user;\n"},
+ {"use plato;revoke drop, grant, select row, update row on `/Root` from user;", "USE plato;\n\nREVOKE DROP, GRANT, SELECT ROW, UPDATE ROW ON `/Root` FROM user;\n"},
+ {"use plato;revoke erase row, create directory on `/Root` from user;", "USE plato;\n\nREVOKE ERASE ROW, CREATE DIRECTORY ON `/Root` FROM user;\n"},
+ {"use plato;revoke create table, create queue, remove schema on `/Root` from user;", "USE plato;\n\nREVOKE CREATE TABLE, CREATE QUEUE, REMOVE SCHEMA ON `/Root` FROM user;\n"},
+ {"use plato;revoke describe schema, alter schema on `/Root` from user;", "USE plato;\n\nREVOKE DESCRIBE SCHEMA, ALTER SCHEMA ON `/Root` FROM user;\n"},
+ {"use plato;revoke grant option for insert, on `/Root` from user;", "USE plato;\n\nREVOKE GRANT OPTION FOR INSERT, ON `/Root` FROM user;\n"},
+ {"use plato;revoke all privileges on `/Root` from user;", "USE plato;\n\nREVOKE ALL PRIVILEGES ON `/Root` FROM user;\n"},
+ {"use plato;revoke list on `/Root/db1`, `/Root/db2` from user;", "USE plato;\n\nREVOKE LIST ON `/Root/db1`, `/Root/db2` FROM user;\n"}
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(DropRole) {
+ TCases cases = {
+ {"use plato;drop user user,user,user;","USE plato;\n\nDROP USER user, user, user;\n"},
+ {"use plato;drop group if exists user;","USE plato;\n\nDROP GROUP IF EXISTS user;\n"},
+ {"use plato;drop group user,;","USE plato;\n\nDROP GROUP user,;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(CreateUser) {
+ TCases cases = {
+ {"use plato;create user user;","USE plato;\n\nCREATE USER user;\n"},
+ {"use plato;create user user encrypted password 'foo';","USE plato;\n\nCREATE USER user ENCRYPTED PASSWORD 'foo';\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(CreateGroup) {
+ TCases cases = {
+ {"use plato;create group user;","USE plato;\n\nCREATE GROUP user;\n"},
+ {"use plato;create group user with user user;","USE plato;\n\nCREATE GROUP user WITH USER user;\n"},
+ {"use plato;create group user with user user, user,;","USE plato;\n\nCREATE GROUP user WITH USER user, user,;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(AlterUser) {
+ TCases cases = {
+ {"use plato;alter user user rename to user;","USE plato;\n\nALTER USER user RENAME TO user;\n"},
+ {"use plato;alter user user encrypted password 'foo';","USE plato;\n\nALTER USER user ENCRYPTED PASSWORD 'foo';\n"},
+ {"use plato;alter user user with encrypted password 'foo';","USE plato;\n\nALTER USER user WITH ENCRYPTED PASSWORD 'foo';\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(AlterGroup) {
+ TCases cases = {
+ {"use plato;alter group user add user user;","USE plato;\n\nALTER GROUP user ADD USER user;\n"},
+ {"use plato;alter group user drop user user;","USE plato;\n\nALTER GROUP user DROP USER user;\n"},
+ {"use plato;alter group user add user user, user,;","USE plato;\n\nALTER GROUP user ADD USER user, user,;\n"},
+ {"use plato;alter group user rename to user;","USE plato;\n\nALTER GROUP user RENAME TO user;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Use) {
+ TCases cases = {
+ {"use user;","USE user;\n"},
+ {"use user:user;","USE user: user;\n"},
+ {"use user:*;","USE user: *;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Commit) {
+ TCases cases = {
+ {"commit;","COMMIT;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Rollback) {
+ TCases cases = {
+ {"rollback;","ROLLBACK;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Export) {
+ TCases cases = {
+ {"export $foo;","EXPORT $foo;\n"},
+ {"export $foo, $bar;","EXPORT $foo, $bar;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Import) {
+ TCases cases = {
+ {"import user symbols $foo;","IMPORT user SYMBOLS $foo;\n"},
+ {"import user symbols $foo,$bar;","IMPORT user SYMBOLS $foo, $bar;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Values) {
+ TCases cases = {
+ {"values (1);","VALUES\n\t(1);\n"},
+ {"values (1,2),(3,4);","VALUES\n\t(1, 2),\n\t(3, 4);\n"},
+ {"values ('a\nb');","VALUES\n\t('a\nb');\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Declare) {
+ TCases cases = {
+ {"declare $foo as int32;","DECLARE $foo AS int32;\n"},
+ {"declare $foo as bool ?","DECLARE $foo AS bool?;\n"},
+ {"declare $foo as bool ? ?","DECLARE $foo AS bool??;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(NamedNode) {
+ TCases cases = {
+ {"$x=1","$x = 1;\n"},
+ {"$x,$y=(2,3)","$x, $y = (2, 3);\n"},
+ {"$a = select 1 union all select 2","$a =\n\tSELECT\n\t\t1\n\tUNION ALL\n\tSELECT\n\t\t2;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(DropTable) {
+ TCases cases = {
+ {"drop table user","DROP TABLE user;\n"},
+ {"drop table if exists user","DROP TABLE IF EXISTS user;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(CreateTable) {
+ TCases cases = {
+ {"create table user(user int32)","CREATE TABLE user (\n\tuser int32\n);\n"},
+ {"create table user(user int32,user bool ?)","CREATE TABLE user (\n\tuser int32,\n\tuser bool?\n);\n"},
+ {"create table user(user int32) with (user=user)","CREATE TABLE user (\n\tuser int32\n)\nWITH (user = user);\n"},
+ {"create table user(primary key (user))","CREATE TABLE user (\n\tPRIMARY KEY (user)\n);\n"},
+ {"create table user(primary key (user,user))","CREATE TABLE user (\n\tPRIMARY KEY (user, user)\n);\n"},
+ {"create table user(partition by (user))","CREATE TABLE user (\n\tPARTITION BY (user)\n);\n"},
+ {"create table user(partition by (user,user))","CREATE TABLE user (\n\tPARTITION BY (user, user)\n);\n"},
+ {"create table user(order by (user asc))","CREATE TABLE user (\n\tORDER BY (user ASC)\n);\n"},
+ {"create table user(order by (user desc,user))","CREATE TABLE user (\n\tORDER BY (user DESC, user)\n);\n"},
+ {"create table user(user int32) with (ttl=interval('P1D') on user as seconds)",
+ "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS SECONDS);\n"},
+ {"create table user(user int32) with (ttl=interval('P1D') on user as MilliSeconds)",
+ "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS MILLISECONDS);\n"},
+ {"create table user(user int32) with (ttl=interval('P1D') on user as microSeconds)",
+ "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS MICROSECONDS);\n"},
+ {"create table user(user int32) with (ttl=interval('P1D') on user as nAnOsEcOnDs)",
+ "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS NANOSECONDS);\n"},
+ {"create table user(index user global unique sync on (user,user) with (user=user,user=user))",
+ "CREATE TABLE user (\n\tINDEX user GLOBAL UNIQUE SYNC ON (user, user) WITH (user = user, user = user)\n);\n"},
+ {"create table user(index user global async on (user) with (user=user,))",
+ "CREATE TABLE user (\n\tINDEX user GLOBAL ASYNC ON (user) WITH (user = user,)\n);\n"},
+ {"create table user(index user local on (user) cover (user))",
+ "CREATE TABLE user (\n\tINDEX user LOCAL ON (user) COVER (user)\n);\n"},
+ {"create table user(index user local on (user) cover (user,user))",
+ "CREATE TABLE user (\n\tINDEX user LOCAL ON (user) COVER (user, user)\n);\n"},
+ {"create table user(index idx global using subtype on (col) cover (col) with (setting = foo, another_setting = bar));",
+ "CREATE TABLE user (\n\tINDEX idx GLOBAL USING subtype ON (col) COVER (col) WITH (setting = foo, another_setting = bar)\n);\n"},
+ {"create table user(family user (user='foo'))",
+ "CREATE TABLE user (\n\tFAMILY user (user = 'foo')\n);\n"},
+ {"create table user(family user (user='foo',user='bar'))",
+ "CREATE TABLE user (\n\tFAMILY user (user = 'foo', user = 'bar')\n);\n"},
+ {"create table user(changefeed user with (user='foo'))",
+ "CREATE TABLE user (\n\tCHANGEFEED user WITH (user = 'foo')\n);\n"},
+ {"create table user(changefeed user with (user='foo',user='bar'))",
+ "CREATE TABLE user (\n\tCHANGEFEED user WITH (user = 'foo', user = 'bar')\n);\n"},
+ {"create table user(user) AS SELECT 1","CREATE TABLE user (\n\tuser\n)\nAS\nSELECT\n 1;\n"},
+ {"create table user(user) AS VALUES (1), (2)","CREATE TABLE user (\n\tuser\n)\nAS\nVALUES\n (1),\n (2);\n"},
+ {"create table user(foo int32, bar bool ?) inherits (s3:$cluster.xxx) partition by hash(a,b,hash) with (inherits=interval('PT1D') ON logical_time) tablestore tablestore",
+ "CREATE TABLE user (\n"
+ "\tfoo int32,\n"
+ "\tbar bool?\n"
+ ")\n"
+ "INHERITS (s3: $cluster.xxx)\n"
+ "PARTITION BY HASH (a, b, hash)\n"
+ "WITH (inherits = interval('PT1D') ON logical_time)\n"
+ "TABLESTORE tablestore;\n"},
+ {"create table user(foo int32, bar bool ?) partition by hash(a,b,hash) with (tiering='some')",
+ "CREATE TABLE user (\n"
+ "\tfoo int32,\n"
+ "\tbar bool?\n"
+ ")\n"
+ "PARTITION BY HASH (a, b, hash)\n"
+ "WITH (tiering = 'some');\n"},
+ {"create table if not exists user(user int32)", "CREATE TABLE IF NOT EXISTS user (\n\tuser int32\n);\n"},
+ {"create temp table user(user int32)", "CREATE TEMP TABLE user (\n\tuser int32\n);\n"},
+ {"create temporary table user(user int32)", "CREATE TEMPORARY TABLE user (\n\tuser int32\n);\n"}
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(ObjectOperations) {
+ TCases cases = {
+ {"alter oBject usEr (TYpe abcde) Set (a = b)",
+ "ALTER OBJECT usEr (TYPE abcde) SET (a = b);\n"},
+ {"creAte oBject usEr (tYpe abcde) With (a = b)",
+ "CREATE OBJECT usEr (TYPE abcde) WITH (a = b);\n"},
+ {"creAte oBject if not exIstS usEr (tYpe abcde) With (a = b)",
+ "CREATE OBJECT IF NOT EXISTS usEr (TYPE abcde) WITH (a = b);\n"},
+ {"creAte oBject usEr (tYpe abcde) With a = b",
+ "CREATE OBJECT usEr (TYPE abcde) WITH a = b;\n"},
+ {"dRop oBject usEr (tYpe abcde) With (aeEE)",
+ "DROP OBJECT usEr (TYPE abcde) WITH (aeEE);\n"},
+ {"dRop oBject If ExistS usEr (tYpe abcde) With (aeEE)",
+ "DROP OBJECT IF EXISTS usEr (TYPE abcde) WITH (aeEE);\n"},
+ {"dRop oBject usEr (tYpe abcde) With aeEE",
+ "DROP OBJECT usEr (TYPE abcde) WITH aeEE;\n"}
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(TableStoreOperations) {
+ TCases cases = {
+ {"alter tableStore uSer aDd column usEr int32",
+ "ALTER TABLESTORE uSer ADD COLUMN usEr int32;\n"},
+ {"alter tableStore uSer drOp column usEr",
+ "ALTER TABLESTORE uSer DROP COLUMN usEr;\n"}
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(ExternalDataSourceOperations) {
+ TCases cases = {
+ {"creAte exTernAl daTa SouRce usEr With (a = \"b\")",
+ "CREATE EXTERNAL DATA SOURCE usEr WITH (a = \"b\");\n"},
+ {"creAte exTernAl daTa SouRce if not exists usEr With (a = \"b\")",
+ "CREATE EXTERNAL DATA SOURCE IF NOT EXISTS usEr WITH (a = \"b\");\n"},
+ {"creAte oR rePlaCe exTernAl daTa SouRce usEr With (a = \"b\")",
+ "CREATE OR REPLACE EXTERNAL DATA SOURCE usEr WITH (a = \"b\");\n"},
+ {"create external data source eds with (a=\"a\",b=\"b\",c = true)",
+ "CREATE EXTERNAL DATA SOURCE eds WITH (\n\ta = \"a\",\n\tb = \"b\",\n\tc = TRUE\n);\n"},
+ {"alter external data source eds set a true, reset (b, c), set (x=y, z=false)",
+ "ALTER EXTERNAL DATA SOURCE eds\n\tSET a TRUE,\n\tRESET (b, c),\n\tSET (x = y, z = FALSE);\n"},
+ {"alter external data source eds reset (a), set (x=y)",
+ "ALTER EXTERNAL DATA SOURCE eds\n\tRESET (a),\n\tSET (x = y);\n"},
+ {"dRop exTerNal Data SouRce usEr",
+ "DROP EXTERNAL DATA SOURCE usEr;\n"},
+ {"dRop exTerNal Data SouRce if exists usEr",
+ "DROP EXTERNAL DATA SOURCE IF EXISTS usEr;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(AsyncReplication) {
+ TCases cases = {
+ {"create async replication user for table1 AS table2 with (user='foo')",
+ "CREATE ASYNC REPLICATION user FOR table1 AS table2 WITH (user = 'foo');\n"},
+ {"alter async replication user set (user='foo')",
+ "ALTER ASYNC REPLICATION user SET (user = 'foo');\n"},
+ {"drop async replication user",
+ "DROP ASYNC REPLICATION user;\n"},
+ {"drop async replication user cascade",
+ "DROP ASYNC REPLICATION user CASCADE;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(ExternalTableOperations) {
+ TCases cases = {
+ {"creAte exTernAl TabLe usEr (a int) With (a = \"b\")",
+ "CREATE EXTERNAL TABLE usEr (\n\ta int\n)\nWITH (a = \"b\");\n"},
+ {"creAte oR rePlaCe exTernAl TabLe usEr (a int) With (a = \"b\")",
+ "CREATE OR REPLACE EXTERNAL TABLE usEr (\n\ta int\n)\nWITH (a = \"b\");\n"},
+ {"creAte exTernAl TabLe iF NOt Exists usEr (a int) With (a = \"b\")",
+ "CREATE EXTERNAL TABLE IF NOT EXISTS usEr (\n\ta int\n)\nWITH (a = \"b\");\n"},
+ {"create external table user (a int) with (a=\"b\",c=\"d\")",
+ "CREATE EXTERNAL TABLE user (\n\ta int\n)\nWITH (\n\ta = \"b\",\n\tc = \"d\"\n);\n"},
+ {"alter external table user add column col1 int32, drop column col2, reset(prop), set (prop2 = 42, x=y), set a true",
+ "ALTER EXTERNAL TABLE user\n\tADD COLUMN col1 int32,\n\tDROP COLUMN col2,\n\tRESET (prop),\n\tSET (prop2 = 42, x = y),\n\tSET a TRUE;\n"},
+ {"dRop exTerNal taBlE usEr",
+ "DROP EXTERNAL TABLE usEr;\n"},
+ {"dRop exTerNal taBlE iF eXiStS usEr",
+ "DROP EXTERNAL TABLE IF EXISTS usEr;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(TypeSelection) {
+ TCases cases = {
+ {"Select tYpe.* frOm Table tYpe",
+ "SELECT\n\ttYpe.*\nFROM Table\n\ttYpe;\n"}
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(AlterTable) {
+ TCases cases = {
+ {"alter table user add user int32",
+ "ALTER TABLE user\n\tADD user int32;\n"},
+ {"alter table user add user int32, add user bool ?",
+ "ALTER TABLE user\n\tADD user int32,\n\tADD user bool?;\n"},
+ {"alter table user add column user int32",
+ "ALTER TABLE user\n\tADD COLUMN user int32;\n"},
+ {"alter table user drop user",
+ "ALTER TABLE user\n\tDROP user;\n"},
+ {"alter table user drop column user",
+ "ALTER TABLE user\n\tDROP COLUMN user;\n"},
+ {"alter table user alter column user set family user",
+ "ALTER TABLE user\n\tALTER COLUMN user SET FAMILY user;\n"},
+ {"alter table t alter column c drop not null",
+ "ALTER TABLE t\n\tALTER COLUMN c DROP NOT NULL;\n"},
+ {"alter table user add family user(user='foo')",
+ "ALTER TABLE user\n\tADD FAMILY user (user = 'foo');\n"},
+ {"alter table user alter family user set user 'foo'",
+ "ALTER TABLE user\n\tALTER FAMILY user SET user 'foo';\n"},
+ {"alter table user set user user",
+ "ALTER TABLE user\n\tSET user user;\n"},
+ {"alter table user set (user=user)",
+ "ALTER TABLE user\n\tSET (user = user);\n"},
+ {"alter table user set (user=user,user=user)",
+ "ALTER TABLE user\n\tSET (user = user, user = user);\n"},
+ {"alter table user reset(user)",
+ "ALTER TABLE user\n\tRESET (user);\n"},
+ {"alter table user reset(user, user)",
+ "ALTER TABLE user\n\tRESET (user, user);\n"},
+ {"alter table user add index user local on (user)",
+ "ALTER TABLE user\n\tADD INDEX user LOCAL ON (user);\n"},
+ {"alter table user alter index idx set setting 'foo'",
+ "ALTER TABLE user\n\tALTER INDEX idx SET setting 'foo';\n"},
+ {"alter table user alter index idx set (setting = 'foo', another_setting = 'bar')",
+ "ALTER TABLE user\n\tALTER INDEX idx SET (setting = 'foo', another_setting = 'bar');\n"},
+ {"alter table user alter index idx reset (setting, another_setting)",
+ "ALTER TABLE user\n\tALTER INDEX idx RESET (setting, another_setting);\n"},
+ {"alter table user add index idx global using subtype on (col) cover (col) with (setting = foo, another_setting = 'bar');",
+ "ALTER TABLE user\n\tADD INDEX idx GLOBAL USING subtype ON (col) COVER (col) WITH (setting = foo, another_setting = 'bar');\n"},
+ {"alter table user drop index user",
+ "ALTER TABLE user\n\tDROP INDEX user;\n"},
+ {"alter table user rename to user",
+ "ALTER TABLE user\n\tRENAME TO user;\n"},
+ {"alter table user add changefeed user with (user = 'foo')",
+ "ALTER TABLE user\n\tADD CHANGEFEED user WITH (user = 'foo');\n"},
+ {"alter table user alter changefeed user disable",
+ "ALTER TABLE user\n\tALTER CHANGEFEED user DISABLE;\n"},
+ {"alter table user alter changefeed user set(user='foo')",
+ "ALTER TABLE user\n\tALTER CHANGEFEED user SET (user = 'foo');\n"},
+ {"alter table user drop changefeed user",
+ "ALTER TABLE user\n\tDROP CHANGEFEED user;\n"},
+ {"alter table user add changefeed user with (initial_scan = tRUe)",
+ "ALTER TABLE user\n\tADD CHANGEFEED user WITH (initial_scan = TRUE);\n"},
+ {"alter table user add changefeed user with (initial_scan = FaLsE)",
+ "ALTER TABLE user\n\tADD CHANGEFEED user WITH (initial_scan = FALSE);\n"},
+ {"alter table user add changefeed user with (retention_period = Interval(\"P1D\"))",
+ "ALTER TABLE user\n\tADD CHANGEFEED user WITH (retention_period = Interval(\"P1D\"));\n"},
+ {"alter table user add changefeed user with (virtual_timestamps = TruE)",
+ "ALTER TABLE user\n\tADD CHANGEFEED user WITH (virtual_timestamps = TRUE);\n"},
+ {"alter table user add changefeed user with (virtual_timestamps = fAlSe)",
+ "ALTER TABLE user\n\tADD CHANGEFEED user WITH (virtual_timestamps = FALSE);\n"},
+ {"alter table user add changefeed user with (resolved_timestamps = Interval(\"PT1S\"))",
+ "ALTER TABLE user\n\tADD CHANGEFEED user WITH (resolved_timestamps = Interval(\"PT1S\"));\n"},
+ {"alter table user add changefeed user with (topic_min_active_partitions = 1)",
+ "ALTER TABLE user\n\tADD CHANGEFEED user WITH (topic_min_active_partitions = 1);\n"},
+ {"alter table user add changefeed user with (topic_auto_partitioning = 'ENABLED', topic_min_active_partitions = 1, topic_max_active_partitions = 7)",
+ "ALTER TABLE user\n\tADD CHANGEFEED user WITH (topic_auto_partitioning = 'ENABLED', topic_min_active_partitions = 1, topic_max_active_partitions = 7);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(CreateTopic) {
+ TCases cases = {
+ {"create topic topic1",
+ "CREATE TOPIC topic1;\n"},
+ {"create topic topic1 (consumer c1)",
+ "CREATE TOPIC topic1 (\n\tCONSUMER c1\n);\n"},
+ {"create topic topic1 (consumer c1, consumer c2 with (important = True))",
+ "CREATE TOPIC topic1 (\n\tCONSUMER c1,\n\tCONSUMER c2 WITH (important = TRUE)\n);\n"},
+ {"create topic topic1 (consumer c1) with (partition_count_limit = 5)",
+ "CREATE TOPIC topic1 (\n\tCONSUMER c1\n) WITH (\n\tpartition_count_limit = 5\n);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(AlterTopic) {
+ TCases cases = {
+ {"alter topic topic1 alter consumer c1 set (important = false)",
+ "ALTER TOPIC topic1\n\tALTER CONSUMER c1 SET (important = FALSE);\n"},
+ {"alter topic topic1 alter consumer c1 set (important = false), alter consumer c2 reset (read_from)",
+ "ALTER TOPIC topic1\n\tALTER CONSUMER c1 SET (important = FALSE),\n\tALTER CONSUMER c2 RESET (read_from);\n"},
+ {"alter topic topic1 add consumer c1, drop consumer c2",
+ "ALTER TOPIC topic1\n\tADD CONSUMER c1,\n\tDROP CONSUMER c2;\n"},
+ {"alter topic topic1 set (supported_codecs = 'RAW'), RESET (retention_period)",
+ "ALTER TOPIC topic1\n\tSET (supported_codecs = 'RAW'),\n\tRESET (retention_period);\n"},
+
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(DropTopic) {
+ TCases cases = {
+ {"drop topic topic1",
+ "DROP TOPIC topic1;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(TopicExistsStatement) {
+ TCases cases = {
+ {"drop topic if exists topic1",
+ "DROP TOPIC IF EXISTS topic1;\n"},
+ {"create topic if not exists topic1 with (partition_count_limit = 5)",
+ "CREATE TOPIC IF NOT EXISTS topic1 WITH (\n\tpartition_count_limit = 5\n);\n"},
+ {"alter topic if exists topic1 alter consumer c1 set (important = false)",
+ "ALTER TOPIC IF EXISTS topic1\n\tALTER CONSUMER c1 SET (important = FALSE);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Do) {
+ TCases cases = {
+ {"do $a(1,2,3)",
+ "DO $a(1, 2, 3);\n"},
+ {"do begin values(1); end do;",
+ "DO BEGIN\n\tVALUES\n\t\t(1);\nEND DO;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(DefineActionOrSubquery) {
+ TCases cases = {
+ {"define action $a() as "
+ "define action $b() as "
+ "values(1); "
+ "end define; "
+ "define subquery $c() as "
+ "select 1; "
+ "end define; "
+ "do $b(); "
+ "process $c(); "
+ "end define",
+ "DEFINE ACTION $a() AS\n\tDEFINE ACTION $b() AS\n\t\t"
+ "VALUES\n\t\t\t(1);\n\tEND DEFINE;\n\n\t"
+ "DEFINE SUBQUERY $c() AS\n\t\tSELECT\n\t\t\t1;\n\t"
+ "END DEFINE;\n\tDO $b();\n\n\tPROCESS $c();\nEND DEFINE;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(If) {
+ TCases cases = {
+ {"evaluate if 1=1 do $a()",
+ "EVALUATE IF 1 = 1\n\tDO $a();\n"},
+ {"evaluate if 1=1 do $a() else do $b()",
+ "EVALUATE IF 1 = 1\n\tDO $a()\nELSE\n\tDO $b();\n"},
+ {"evaluate if 1=1 do begin select 1; end do",
+ "EVALUATE IF 1 = 1\n\tDO BEGIN\n\t\tSELECT\n\t\t\t1;\n\tEND DO;\n"},
+ {"evaluate if 1=1 do begin select 1; end do else do begin select 2; end do",
+ "EVALUATE IF 1 = 1\n\tDO BEGIN\n\t\tSELECT\n\t\t\t1;\n\tEND DO\n"
+ "ELSE\n\tDO BEGIN\n\t\tSELECT\n\t\t\t2;\n\tEND DO;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(For) {
+ TCases cases = {
+ {"evaluate for $x in [] do $a($x)",
+ "EVALUATE FOR $x IN []\n\tDO $a($x);\n"},
+ {"evaluate for $x in [] do $a($x) else do $b()",
+ "EVALUATE FOR $x IN []\n\tDO $a($x)\nELSE\n\tDO $b();\n"},
+ {"evaluate for $x in [] do begin select $x; end do",
+ "EVALUATE FOR $x IN []\n\tDO BEGIN\n\t\tSELECT\n\t\t\t$x;\n\tEND DO;\n"},
+ {"evaluate for $x in [] do begin select $x; end do else do begin select 2; end do",
+ "EVALUATE FOR $x IN []\n\tDO BEGIN\n\t\tSELECT\n\t\t\t$x;\n\tEND DO\nELSE\n\tDO BEGIN\n\t\tSELECT\n\t\t\t2;\n\tEND DO;\n"},
+ {"evaluate parallel for $x in [] do $a($x)",
+ "EVALUATE PARALLEL FOR $x IN []\n\tDO $a($x);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Update) {
+ TCases cases = {
+ {"update user on default values",
+ "UPDATE user\nON DEFAULT VALUES;\n"},
+ {"update user on values (1),(2)",
+ "UPDATE user\nON\nVALUES\n\t(1),\n\t(2);\n"},
+ {"update user on select 1 as x, 2 as y",
+ "UPDATE user\nON\nSELECT\n\t1 AS x,\n\t2 AS y;\n"},
+ {"update user on (x) values (1),(2),(3)",
+ "UPDATE user\nON (\n\tx\n)\nVALUES\n\t(1),\n\t(2),\n\t(3);\n"},
+ {"update user on (x,y) values (1,2),(2,3),(3,4)",
+ "UPDATE user\nON (\n\tx,\n\ty\n)\nVALUES\n\t(1, 2),\n\t(2, 3),\n\t(3, 4);\n"},
+ {"update user on (x) select 1",
+ "UPDATE user\nON (\n\tx\n)\nSELECT\n\t1;\n"},
+ {"update user on (x,y) select 1,2",
+ "UPDATE user\nON (\n\tx,\n\ty\n)\nSELECT\n\t1,\n\t2;\n"},
+ {"update user set x=1",
+ "UPDATE user\nSET\n\tx = 1;\n"},
+ {"update user set (x)=(1)",
+ "UPDATE user\nSET\n(\n\tx\n) = (\n\t1\n);\n"},
+ {"update user set (x,y)=(1,2)",
+ "UPDATE user\nSET\n(\n\tx,\n\ty\n) = (\n\t1,\n\t2\n);\n"},
+ {"update user set (x,y)=(select 1,2)",
+ "UPDATE user\nSET\n(\n\tx,\n\ty\n) = (\n\tSELECT\n\t\t1,\n\t\t2\n);\n"},
+ {"update user set x=1,y=2 where z=3",
+ "UPDATE user\nSET\n\tx = 1,\n\ty = 2\nWHERE z = 3;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Delete) {
+ TCases cases = {
+ {"delete from user",
+ "DELETE FROM user;\n"},
+ {"delete from user where 1=1",
+ "DELETE FROM user\nWHERE 1 = 1;\n"},
+ {"delete from user on select 1 as x, 2 as y",
+ "DELETE FROM user\nON\nSELECT\n\t1 AS x,\n\t2 AS y;\n"},
+ {"delete from user on (x) values (1)",
+ "DELETE FROM user\nON (\n\tx\n)\nVALUES\n\t(1);\n"},
+ {"delete from user on (x,y) values (1,2), (3,4)",
+ "DELETE FROM user\nON (\n\tx,\n\ty\n)\nVALUES\n\t(1, 2),\n\t(3, 4);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Into) {
+ TCases cases = {
+ {"insert into user select 1 as x",
+ "INSERT INTO user\nSELECT\n\t1 AS x;\n"},
+ {"insert or abort into user select 1 as x",
+ "INSERT OR ABORT INTO user\nSELECT\n\t1 AS x;\n"},
+ {"insert or revert into user select 1 as x",
+ "INSERT OR REVERT INTO user\nSELECT\n\t1 AS x;\n"},
+ {"insert or ignore into user select 1 as x",
+ "INSERT OR IGNORE INTO user\nSELECT\n\t1 AS x;\n"},
+ {"upsert into user select 1 as x",
+ "UPSERT INTO user\nSELECT\n\t1 AS x;\n"},
+ {"replace into user select 1 as x",
+ "REPLACE INTO user\nSELECT\n\t1 AS x;\n"},
+ {"insert into user(x) values (1)",
+ "INSERT INTO user (\n\tx\n)\nVALUES\n\t(1);\n"},
+ {"insert into user(x,y) values (1,2)",
+ "INSERT INTO user (\n\tx,\n\ty\n)\nVALUES\n\t(1, 2);\n"},
+ {"insert into plato.user select 1 as x",
+ "INSERT INTO plato.user\nSELECT\n\t1 AS x;\n"},
+ {"insert into @user select 1 as x",
+ "INSERT INTO @user\nSELECT\n\t1 AS x;\n"},
+ {"insert into $user select 1 as x",
+ "INSERT INTO $user\nSELECT\n\t1 AS x;\n"},
+ {"insert into @$user select 1 as x",
+ "INSERT INTO @$user\nSELECT\n\t1 AS x;\n"},
+ {"upsert into user erase by (x,y) values (1)",
+ "UPSERT INTO user\n\tERASE BY (\n\t\tx,\n\t\ty\n\t)\nVALUES\n\t(1);\n"},
+ {"insert into user with truncate select 1 as x",
+ "INSERT INTO user\n\tWITH truncate\nSELECT\n\t1 AS x;\n"},
+ {"insert into user with (truncate,inferscheme='1') select 1 as x",
+ "INSERT INTO user\n\tWITH (truncate, inferscheme = '1')\nSELECT\n\t1 AS x;\n"},
+ {"insert into user with schema Struct<user:int32> select 1 as user",
+ "INSERT INTO user\n\tWITH SCHEMA Struct<user: int32>\nSELECT\n\t1 AS user;\n"},
+ {"insert into user with schema (int32 as user) select 1 as user",
+ "INSERT INTO user\n\tWITH SCHEMA (int32 AS user)\nSELECT\n\t1 AS user;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Process) {
+ TCases cases = {
+ {"process user",
+ "PROCESS user;\n"},
+ {"process user using $f() as user",
+ "PROCESS user\nUSING $f() AS user;\n"},
+ {"process user,user using $f()",
+ "PROCESS user, user\nUSING $f();\n"},
+ {"process user using $f() where 1=1 having 1=1 assume order by user",
+ "PROCESS user\nUSING $f()\nWHERE 1 = 1\nHAVING 1 = 1\nASSUME ORDER BY\n\tuser;\n"},
+ {"process user using $f() union all process user using $f()",
+ "PROCESS user\nUSING $f()\nUNION ALL\nPROCESS user\nUSING $f();\n"},
+ {"process user using $f() with foo=bar",
+ "PROCESS user\nUSING $f()\nWITH foo = bar;\n"},
+ {"discard process user using $f()",
+ "DISCARD PROCESS user\nUSING $f();\n"},
+ {"process user using $f() into result user",
+ "PROCESS user\nUSING $f()\nINTO RESULT user;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Reduce) {
+ TCases cases = {
+ {"reduce user on user using $f()",
+ "REDUCE user\nON\n\tuser\nUSING $f();\n"},
+ {"reduce user on user, using $f()",
+ "REDUCE user\nON\n\tuser,\nUSING $f();\n"},
+ {"discard reduce user on user using $f();",
+ "DISCARD REDUCE user\nON\n\tuser\nUSING $f();\n"},
+ {"reduce user on user using $f() into result user",
+ "REDUCE user\nON\n\tuser\nUSING $f()\nINTO RESULT user;\n"},
+ {"reduce user on user using all $f()",
+ "REDUCE user\nON\n\tuser\nUSING ALL $f();\n"},
+ {"reduce user on user using $f() as user",
+ "REDUCE user\nON\n\tuser\nUSING $f() AS user;\n"},
+ {"reduce user,user on user using $f()",
+ "REDUCE user, user\nON\n\tuser\nUSING $f();\n"},
+ {"reduce user on user,user using $f()",
+ "REDUCE user\nON\n\tuser,\n\tuser\nUSING $f();\n"},
+ {"reduce user on user using $f() where 1=1 having 1=1 assume order by user",
+ "REDUCE user\nON\n\tuser\nUSING $f()\nWHERE 1 = 1\nHAVING 1 = 1\nASSUME ORDER BY\n\tuser;\n"},
+ {"reduce user presort user,user on user using $f();",
+ "REDUCE user\nPRESORT\n\tuser,\n\tuser\nON\n\tuser\nUSING $f();\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Select) {
+ TCases cases = {
+ {"select 1",
+ "SELECT\n\t1;\n"},
+ {"select 1,",
+ "SELECT\n\t1,;\n"},
+ {"select 1 as x",
+ "SELECT\n\t1 AS x;\n"},
+ {"select *",
+ "SELECT\n\t*;\n"},
+ {"select a.*",
+ "SELECT\n\ta.*;\n"},
+ {"select * without a",
+ "SELECT\n\t*\n\tWITHOUT\n\t\ta;\n"},
+ {"select * without a,b",
+ "SELECT\n\t*\n\tWITHOUT\n\t\ta,\n\t\tb;\n"},
+ {"select * without a,",
+ "SELECT\n\t*\n\tWITHOUT\n\t\ta,;\n"},
+ {"select 1 from user",
+ "SELECT\n\t1\nFROM user;\n"},
+ {"select 1 from plato.user",
+ "SELECT\n\t1\nFROM plato.user;\n"},
+ {"select 1 from $user",
+ "SELECT\n\t1\nFROM $user;\n"},
+ {"select 1 from @user",
+ "SELECT\n\t1\nFROM @user;\n"},
+ {"select 1 from @$user",
+ "SELECT\n\t1\nFROM @$user;\n"},
+ {"select 1 from user view user",
+ "SELECT\n\t1\nFROM user\n\tVIEW user;\n"},
+ {"select 1 from user as user",
+ "SELECT\n\t1\nFROM user\n\tAS user;\n"},
+ {"select 1 from user as user(user)",
+ "SELECT\n\t1\nFROM user\n\tAS user (\n\t\tuser\n\t);\n"},
+ {"select 1 from user as user(user, user)",
+ "SELECT\n\t1\nFROM user\n\tAS user (\n\t\tuser,\n\t\tuser\n\t);\n"},
+ {"select 1 from user with user=user",
+ "SELECT\n\t1\nFROM user\n\tWITH user = user;\n"},
+ {"select 1 from user with (user=user, user=user)",
+ "SELECT\n\t1\nFROM user\n\tWITH (user = user, user = user);\n"},
+ {"select 1 from user sample 0.1",
+ "SELECT\n\t1\nFROM user\n\tSAMPLE 0.1;\n"},
+ {"select 1 from user tablesample system(0.1)",
+ "SELECT\n\t1\nFROM user\n\tTABLESAMPLE SYSTEM (0.1);\n"},
+ {"select 1 from user tablesample bernoulli(0.1) repeatable(10)",
+ "SELECT\n\t1\nFROM user\n\tTABLESAMPLE BERNOULLI (0.1) REPEATABLE (10);\n"},
+ {"select 1 from user flatten columns",
+ "SELECT\n\t1\nFROM user\n\tFLATTEN COLUMNS;\n"},
+ {"select 1 from user flatten list by user",
+ "SELECT\n\t1\nFROM user\n\tFLATTEN LIST BY\n\t\tuser;\n"},
+ {"select 1 from user flatten list by (user,user)",
+ "SELECT\n\t1\nFROM user\n\tFLATTEN LIST BY (\n\t\tuser,\n\t\tuser\n\t);\n"},
+ {"select 1 from $user(1,2)",
+ "SELECT\n\t1\nFROM $user(1, 2);\n"},
+ {"select 1 from $user(1,2) view user",
+ "SELECT\n\t1\nFROM $user(1, 2)\n\tVIEW user;\n"},
+ {"select 1 from range('a','b')",
+ "SELECT\n\t1\nFROM range('a', 'b');\n"},
+ {"from user select 1",
+ "FROM user\nSELECT\n\t1;\n"},
+ {"select * from user as a join user as b on a.x=b.y",
+ "SELECT\n\t*\nFROM user\n\tAS a\nJOIN user\n\tAS b\nON a.x = b.y;\n"},
+ {"select * from user as a join user as b using(x)",
+ "SELECT\n\t*\nFROM user\n\tAS a\nJOIN user\n\tAS b\nUSING (x);\n"},
+ {"select * from any user as a full join user as b on a.x=b.y",
+ "SELECT\n\t*\nFROM ANY user\n\tAS a\nFULL JOIN user\n\tAS b\nON a.x = b.y;\n"},
+ {"select * from user as a left join any user as b on a.x=b.y",
+ "SELECT\n\t*\nFROM user\n\tAS a\nLEFT JOIN ANY user\n\tAS b\nON a.x = b.y;\n"},
+ {"select * from any user as a right join any user as b on a.x=b.y",
+ "SELECT\n\t*\nFROM ANY user\n\tAS a\nRIGHT JOIN ANY user\n\tAS b\nON a.x = b.y;\n"},
+ {"select * from user as a cross join user as b",
+ "SELECT\n\t*\nFROM user\n\tAS a\nCROSS JOIN user\n\tAS b;\n"},
+ {"select 1 from user where key = 1",
+ "SELECT\n\t1\nFROM user\nWHERE key = 1;\n"},
+ {"select 1 from user having count(*) = 1",
+ "SELECT\n\t1\nFROM user\nHAVING count(*) = 1;\n"},
+ {"select 1 from user group by key",
+ "SELECT\n\t1\nFROM user\nGROUP BY\n\tkey;\n"},
+ {"select 1 from user group compact by key, value as v",
+ "SELECT\n\t1\nFROM user\nGROUP COMPACT BY\n\tkey,\n\tvalue AS v;\n"},
+ {"select 1 from user group by key with combine",
+ "SELECT\n\t1\nFROM user\nGROUP BY\n\tkey\n\tWITH combine;\n"},
+ {"select 1 from user order by key asc",
+ "SELECT\n\t1\nFROM user\nORDER BY\n\tkey ASC;\n"},
+ {"select 1 from user order by key, value desc",
+ "SELECT\n\t1\nFROM user\nORDER BY\n\tkey,\n\tvalue DESC;\n"},
+ {"select 1 from user assume order by key",
+ "SELECT\n\t1\nFROM user\nASSUME ORDER BY\n\tkey;\n"},
+ {"select 1 from user window w1 as (), w2 as ()",
+ "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (),\n\tw2 AS ();\n"},
+ {"select 1 from user window w1 as (user)",
+ "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tuser\n\t);\n"},
+ {"select 1 from user window w1 as (partition by user)",
+ "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tPARTITION BY\n\t\t\tuser\n\t);\n"},
+ {"select 1 from user window w1 as (partition by user, user)",
+ "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tPARTITION BY\n\t\t\tuser,\n\t\t\tuser\n\t);\n"},
+ {"select 1 from user window w1 as (order by user asc)",
+ "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tORDER BY\n\t\t\tuser ASC\n\t);\n"},
+ {"select 1 from user window w1 as (order by user, user desc)",
+ "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tORDER BY\n\t\t\tuser,\n\t\t\tuser DESC\n\t);\n"},
+ {"select 1 from user window w1 as (rows between 1 preceding and 1 following)",
+ "SELECT\n\t1\nFROM user\nWINDOW\n\tw1 AS (\n\t\tROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING\n\t);\n"},
+ {"select 1 limit 10",
+ "SELECT\n\t1\nLIMIT 10;\n"},
+ {"select 1 limit 10 offset 5",
+ "SELECT\n\t1\nLIMIT 10 OFFSET 5;\n"},
+ { "select 1 union all select 2",
+ "SELECT\n\t1\nUNION ALL\nSELECT\n\t2;\n" },
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(CompositeTypesAndQuestions) {
+ TCases cases = {
+ {"declare $_x AS list<int32>??;declare $_y AS int32 ? ? ;select 1<>2, 1??2,"
+ "formattype(list<int32>), formattype(resource<user>),formattype(tuple<>), formattype(tuple< >), formattype(int32 ? ? )",
+ "DECLARE $_x AS list<int32>??;\nDECLARE $_y AS int32??;\n\nSELECT\n\t1 <> 2,\n\t1 ?? 2,\n\tformattype(list<int32>),"
+ "\n\tformattype(resource<user>),\n\tformattype(tuple<>),\n\tformattype(tuple< >),\n\tformattype(int32??" ");\n"
+ },
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Lambda) {
+ TCases cases = {
+ {"$f=($a,$b)->{$x=$a+$b;return $a*$x};$g=($a,$b?)->($a+$b??0);select $f(10,4),$g(1,2);",
+ "$f = ($a, $b) -> {\n\t$x = $a + $b;\n\tRETURN $a * $x\n};\n"
+ "$g = ($a, $b?) -> ($a + $b ?? 0);\n\n"
+ "SELECT\n\t$f(10, 4),\n\t$g(1, 2);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(NestedSelect) {
+ TCases cases = {
+ {"$x=select 1",
+ "$x =\n\tSELECT\n\t\t1;\n"},
+ {"$x=(select 1)",
+ "$x = (\n\tSELECT\n\t\t1\n);\n"},
+ {"select 1 in (select 1)",
+ "SELECT\n\t1 IN (\n\t\tSELECT\n\t\t\t1\n\t);\n"},
+ {"select 1 in ((select 1))",
+ "SELECT\n\t1 IN (\n\t\t(\n\t\t\tSELECT\n\t\t\t\t1\n\t\t)\n\t);\n"},
+ {"select 1 in (\nselect 1)",
+ "SELECT\n\t1 IN (\n\t\tSELECT\n\t\t\t1\n\t);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Cast) {
+ TCases cases = {
+ {"select cast(1 as string)","SELECT\n\tCAST(1 AS string);\n"},
+ {"select bitcast(1 as int32)","SELECT\n\tBITCAST(1 AS int32);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(StructLiteral) {
+ TCases cases = {
+ {"select <||>","SELECT\n\t<||>;\n"},
+ {"select <|a:1|>","SELECT\n\t<|a: 1|>;\n"},
+ {"select <|a:1,b:2|>","SELECT\n\t<|a: 1, b: 2|>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(TableHints) {
+ TCases cases = {
+ {"select * from plato.T with schema(foo int32, bar list<string>) where key is not null",
+ "SELECT\n\t*\nFROM plato.T\n\tWITH SCHEMA (foo int32, bar list<string>)\nWHERE key IS NOT NULL;\n"},
+ {"select * from plato.T with schema struct<foo:integer, Bar:list<string?>> where key<0",
+ "SELECT\n\t*\nFROM plato.T\n\tWITH SCHEMA struct<foo: integer, Bar: list<string?>>\nWHERE key < 0;\n"},
+ {"select * from plato.T with (foo=bar, x=$y, a=(a, b, c), u='aaa', schema (foo int32, bar list<string>))",
+ "SELECT\n\t*\nFROM plato.T\n\tWITH (foo = bar, x = $y, a = (a, b, c), u = 'aaa', SCHEMA (foo int32, bar list<string>));\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(BoolAsVariableName) {
+ TCases cases = {
+ {"$ False = True; select $ False;",
+ "$False = TRUE;\n\nSELECT\n\t$False;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(WithSchemaEquals) {
+ TCases cases = {
+ {"select * from plato.T with (format= csv_with_names, schema=(year int32 Null, month String, day String not null, a Utf8, b Uint16));",
+ "SELECT\n\t*\nFROM plato.T\n\tWITH (format = csv_with_names, SCHEMA = (year int32 NULL, month String, day String NOT NULL, a Utf8, b Uint16));\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(SquareBrackets) {
+ TCases cases = {
+ {"select a[0]",
+ "SELECT\n\ta[0];\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineList) {
+ TCases cases = {
+ {"select [\n]",
+ "SELECT\n\t[\n\t];\n"},
+ {"select [1\n]",
+ "SELECT\n\t[\n\t\t1\n\t];\n"},
+ {"select [\n1]",
+ "SELECT\n\t[\n\t\t1\n\t];\n"},
+ {"select [1,\n]",
+ "SELECT\n\t[\n\t\t1,\n\t];\n"},
+ {"select [1\n,]",
+ "SELECT\n\t[\n\t\t1,\n\t];\n"},
+ {"select [\n1,]",
+ "SELECT\n\t[\n\t\t1,\n\t];\n"},
+ {"select [1,2,\n3,4]",
+ "SELECT\n\t[\n\t\t1, 2,\n\t\t3, 4\n\t];\n"},
+ {"select [1,2,\n3,4,]",
+ "SELECT\n\t[\n\t\t1, 2,\n\t\t3, 4,\n\t];\n"},
+ {"select [1,2\n,3,\n4\n,5]",
+ "SELECT\n\t[\n\t\t1, 2,\n\t\t3,\n\t\t4,\n\t\t5\n\t];\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineTuple) {
+ TCases cases = {
+ {"select (\n)",
+ "SELECT\n\t(\n\t);\n"},
+ {"select (1,\n)",
+ "SELECT\n\t(\n\t\t1,\n\t);\n"},
+ {"select (1\n,)",
+ "SELECT\n\t(\n\t\t1,\n\t);\n"},
+ {"select (\n1,)",
+ "SELECT\n\t(\n\t\t1,\n\t);\n"},
+ {"select (1,2,\n3,4)",
+ "SELECT\n\t(\n\t\t1, 2,\n\t\t3, 4\n\t);\n"},
+ {"select (1,2,\n3,4,)",
+ "SELECT\n\t(\n\t\t1, 2,\n\t\t3, 4,\n\t);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineSet) {
+ TCases cases = {
+ {"select {\n}",
+ "SELECT\n\t{\n\t};\n"},
+ {"select {1\n}",
+ "SELECT\n\t{\n\t\t1\n\t};\n"},
+ {"select {\n1}",
+ "SELECT\n\t{\n\t\t1\n\t};\n"},
+ {"select {1,\n}",
+ "SELECT\n\t{\n\t\t1,\n\t};\n"},
+ {"select {1\n,}",
+ "SELECT\n\t{\n\t\t1,\n\t};\n"},
+ {"select {\n1,}",
+ "SELECT\n\t{\n\t\t1,\n\t};\n"},
+ {"select {1,2,\n3,4}",
+ "SELECT\n\t{\n\t\t1, 2,\n\t\t3, 4\n\t};\n"},
+ {"select {1,2,\n3,4,}",
+ "SELECT\n\t{\n\t\t1, 2,\n\t\t3, 4,\n\t};\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineDict) {
+ TCases cases = {
+ {"select {0:1\n}",
+ "SELECT\n\t{\n\t\t0: 1\n\t};\n"},
+ {"select {\n0:1}",
+ "SELECT\n\t{\n\t\t0: 1\n\t};\n"},
+ {"select {0:1,\n}",
+ "SELECT\n\t{\n\t\t0: 1,\n\t};\n"},
+ {"select {0:1\n,}",
+ "SELECT\n\t{\n\t\t0: 1,\n\t};\n"},
+ {"select {\n0:1,}",
+ "SELECT\n\t{\n\t\t0: 1,\n\t};\n"},
+ {"select {10:1,20:2,\n30:3,40:4}",
+ "SELECT\n\t{\n\t\t10: 1, 20: 2,\n\t\t30: 3, 40: 4\n\t};\n"},
+ {"select {10:1,20:2,\n30:3,40:4,}",
+ "SELECT\n\t{\n\t\t10: 1, 20: 2,\n\t\t30: 3, 40: 4,\n\t};\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineFuncCall) {
+ TCases cases = {
+ {"select f(\n)",
+ "SELECT\n\tf(\n\t);\n"},
+ {"select f(1\n)",
+ "SELECT\n\tf(\n\t\t1\n\t);\n"},
+ {"select f(\n1)",
+ "SELECT\n\tf(\n\t\t1\n\t);\n"},
+ {"select f(1,\n)",
+ "SELECT\n\tf(\n\t\t1,\n\t);\n"},
+ {"select f(1\n,)",
+ "SELECT\n\tf(\n\t\t1,\n\t);\n"},
+ {"select f(\n1,)",
+ "SELECT\n\tf(\n\t\t1,\n\t);\n"},
+ {"select f(1,2,\n3,4)",
+ "SELECT\n\tf(\n\t\t1, 2,\n\t\t3, 4\n\t);\n"},
+ {"select f(1,2,\n3,4,)",
+ "SELECT\n\tf(\n\t\t1, 2,\n\t\t3, 4,\n\t);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineStruct) {
+ TCases cases = {
+ {"select <|\n|>",
+ "SELECT\n\t<|\n\t|>;\n"},
+ {"select <|a:1\n|>",
+ "SELECT\n\t<|\n\t\ta: 1\n\t|>;\n"},
+ {"select <|\na:1|>",
+ "SELECT\n\t<|\n\t\ta: 1\n\t|>;\n"},
+ {"select <|a:1,\n|>",
+ "SELECT\n\t<|\n\t\ta: 1,\n\t|>;\n"},
+ {"select <|a:1\n,|>",
+ "SELECT\n\t<|\n\t\ta: 1,\n\t|>;\n"},
+ {"select <|\na:1,|>",
+ "SELECT\n\t<|\n\t\ta: 1,\n\t|>;\n"},
+ {"select <|a:1,b:2,\nc:3,d:4|>",
+ "SELECT\n\t<|\n\t\ta: 1, b: 2,\n\t\tc: 3, d: 4\n\t|>;\n"},
+ {"select <|a:1,b:2,\nc:3,d:4,|>",
+ "SELECT\n\t<|\n\t\ta: 1, b: 2,\n\t\tc: 3, d: 4,\n\t|>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineListType) {
+ TCases cases = {
+ {"select list<int32\n>",
+ "SELECT\n\tlist<\n\t\tint32\n\t>;\n"},
+ {"select list<\nint32>",
+ "SELECT\n\tlist<\n\t\tint32\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineOptionalType) {
+ TCases cases = {
+ {"select optional<int32\n>",
+ "SELECT\n\toptional<\n\t\tint32\n\t>;\n"},
+ {"select optional<\nint32>",
+ "SELECT\n\toptional<\n\t\tint32\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineStreamType) {
+ TCases cases = {
+ {"select stream<int32\n>",
+ "SELECT\n\tstream<\n\t\tint32\n\t>;\n"},
+ {"select stream<\nint32>",
+ "SELECT\n\tstream<\n\t\tint32\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineFlowType) {
+ TCases cases = {
+ {"select flow<int32\n>",
+ "SELECT\n\tflow<\n\t\tint32\n\t>;\n"},
+ {"select flow<\nint32>",
+ "SELECT\n\tflow<\n\t\tint32\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineSetType) {
+ TCases cases = {
+ {"select set<int32\n>",
+ "SELECT\n\tset<\n\t\tint32\n\t>;\n"},
+ {"select set<\nint32>",
+ "SELECT\n\tset<\n\t\tint32\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineTupleType) {
+ TCases cases = {
+ {"select tuple<\n>",
+ "SELECT\n\ttuple<\n\t\t \n\t>;\n"},
+ {"select tuple<int32\n>",
+ "SELECT\n\ttuple<\n\t\tint32\n\t>;\n"},
+ {"select tuple<\nint32>",
+ "SELECT\n\ttuple<\n\t\tint32\n\t>;\n"},
+ {"select tuple<int32,\n>",
+ "SELECT\n\ttuple<\n\t\tint32,\n\t>;\n"},
+ {"select tuple<int32\n,>",
+ "SELECT\n\ttuple<\n\t\tint32,\n\t>;\n"},
+ {"select tuple<\nint32,>",
+ "SELECT\n\ttuple<\n\t\tint32,\n\t>;\n"},
+ {"select tuple<\nint32,string,\ndouble,bool>",
+ "SELECT\n\ttuple<\n\t\tint32, string,\n\t\tdouble, bool\n\t>;\n"},
+ {"select tuple<\nint32,string,\ndouble,bool,>",
+ "SELECT\n\ttuple<\n\t\tint32, string,\n\t\tdouble, bool,\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineStructType) {
+ TCases cases = {
+ {"select struct<\n>",
+ "SELECT\n\tstruct<\n\t\t \n\t>;\n"},
+ {"select struct<a:int32\n>",
+ "SELECT\n\tstruct<\n\t\ta: int32\n\t>;\n"},
+ {"select struct<\na:int32>",
+ "SELECT\n\tstruct<\n\t\ta: int32\n\t>;\n"},
+ {"select struct<a:int32,\n>",
+ "SELECT\n\tstruct<\n\t\ta: int32,\n\t>;\n"},
+ {"select struct<a:int32\n,>",
+ "SELECT\n\tstruct<\n\t\ta: int32,\n\t>;\n"},
+ {"select struct<\na:int32,>",
+ "SELECT\n\tstruct<\n\t\ta: int32,\n\t>;\n"},
+ {"select struct<\na:int32,b:string,\nc:double,d:bool>",
+ "SELECT\n\tstruct<\n\t\ta: int32, b: string,\n\t\tc: double, d: bool\n\t>;\n"},
+ {"select struct<\na:int32,b:string,\nc:double,d:bool,>",
+ "SELECT\n\tstruct<\n\t\ta: int32, b: string,\n\t\tc: double, d: bool,\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineVariantOverTupleType) {
+ TCases cases = {
+ {"select variant<int32\n>",
+ "SELECT\n\tvariant<\n\t\tint32\n\t>;\n"},
+ {"select variant<\nint32>",
+ "SELECT\n\tvariant<\n\t\tint32\n\t>;\n"},
+ {"select variant<int32,\n>",
+ "SELECT\n\tvariant<\n\t\tint32,\n\t>;\n"},
+ {"select variant<int32\n,>",
+ "SELECT\n\tvariant<\n\t\tint32,\n\t>;\n"},
+ {"select variant<\nint32,>",
+ "SELECT\n\tvariant<\n\t\tint32,\n\t>;\n"},
+ {"select variant<\nint32,string,\ndouble,bool>",
+ "SELECT\n\tvariant<\n\t\tint32, string,\n\t\tdouble, bool\n\t>;\n"},
+ {"select variant<\nint32,string,\ndouble,bool,>",
+ "SELECT\n\tvariant<\n\t\tint32, string,\n\t\tdouble, bool,\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineVariantOverStructType) {
+ TCases cases = {
+ {"select variant<a:int32\n>",
+ "SELECT\n\tvariant<\n\t\ta: int32\n\t>;\n"},
+ {"select variant<\na:int32>",
+ "SELECT\n\tvariant<\n\t\ta: int32\n\t>;\n"},
+ {"select variant<a:int32,\n>",
+ "SELECT\n\tvariant<\n\t\ta: int32,\n\t>;\n"},
+ {"select variant<a:int32\n,>",
+ "SELECT\n\tvariant<\n\t\ta: int32,\n\t>;\n"},
+ {"select variant<\na:int32,>",
+ "SELECT\n\tvariant<\n\t\ta: int32,\n\t>;\n"},
+ {"select variant<\na:int32,b:string,\nc:double,d:bool>",
+ "SELECT\n\tvariant<\n\t\ta: int32, b: string,\n\t\tc: double, d: bool\n\t>;\n"},
+ {"select variant<\na:int32,b:string,\nc:double,d:bool,>",
+ "SELECT\n\tvariant<\n\t\ta: int32, b: string,\n\t\tc: double, d: bool,\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineEnum) {
+ TCases cases = {
+ {"select enum<a\n>",
+ "SELECT\n\tenum<\n\t\ta\n\t>;\n"},
+ {"select enum<\na>",
+ "SELECT\n\tenum<\n\t\ta\n\t>;\n"},
+ {"select enum<a,\n>",
+ "SELECT\n\tenum<\n\t\ta,\n\t>;\n"},
+ {"select enum<a\n,>",
+ "SELECT\n\tenum<\n\t\ta,\n\t>;\n"},
+ {"select enum<\na,>",
+ "SELECT\n\tenum<\n\t\ta,\n\t>;\n"},
+ {"select enum<\na,b,\nc,d>",
+ "SELECT\n\tenum<\n\t\ta, b,\n\t\tc, d\n\t>;\n"},
+ {"select enum<\na,b,\nc,d,>",
+ "SELECT\n\tenum<\n\t\ta, b,\n\t\tc, d,\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineResourceType) {
+ TCases cases = {
+ {"select resource<foo\n>",
+ "SELECT\n\tresource<\n\t\tfoo\n\t>;\n"},
+ {"select resource<\nfoo>",
+ "SELECT\n\tresource<\n\t\tfoo\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineTaggedType) {
+ TCases cases = {
+ {"select tagged<int32,foo\n>",
+ "SELECT\n\ttagged<\n\t\tint32, foo\n\t>;\n"},
+ {"select tagged<int32,\nfoo>",
+ "SELECT\n\ttagged<\n\t\tint32,\n\t\tfoo\n\t>;\n"},
+ {"select tagged<int32\n,foo>",
+ "SELECT\n\ttagged<\n\t\tint32,\n\t\tfoo\n\t>;\n"},
+ {"select tagged<\nint32,foo>",
+ "SELECT\n\ttagged<\n\t\tint32, foo\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineDictType) {
+ TCases cases = {
+ {"select dict<int32,string\n>",
+ "SELECT\n\tdict<\n\t\tint32, string\n\t>;\n"},
+ {"select dict<int32,\nstring>",
+ "SELECT\n\tdict<\n\t\tint32,\n\t\tstring\n\t>;\n"},
+ {"select dict<int32\n,string>",
+ "SELECT\n\tdict<\n\t\tint32,\n\t\tstring\n\t>;\n"},
+ {"select dict<\nint32,string>",
+ "SELECT\n\tdict<\n\t\tint32, string\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiLineCallableType) {
+ TCases cases = {
+ {"select callable<()->int32\n>",
+ "SELECT\n\tcallable<\n\t\t() -> int32\n\t>;\n"},
+ {"select callable<\n()->int32>",
+ "SELECT\n\tcallable<\n\t\t() -> int32\n\t>;\n"},
+ {"select callable<\n(int32)->int32>",
+ "SELECT\n\tcallable<\n\t\t(int32) -> int32\n\t>;\n"},
+ {"select callable<\n(int32,\ndouble)->int32>",
+ "SELECT\n\tcallable<\n\t\t(\n\t\t\tint32,\n\t\t\tdouble\n\t\t) -> int32\n\t>;\n"},
+ {"select callable<\n(int32\n,double)->int32>",
+ "SELECT\n\tcallable<\n\t\t(\n\t\t\tint32,\n\t\t\tdouble\n\t\t) -> int32\n\t>;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(UnaryOp) {
+ TCases cases = {
+ {"select -x,+x,~x,-1,-1.0,+1,+1.0,~1u",
+ "SELECT\n\t-x,\n\t+x,\n\t~x,\n\t-1,\n\t-1.0,\n\t+1,\n\t+1.0,\n\t~1u;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MatchRecognize) {
+ TCases cases = {{R"(
+pragma FeatureR010="prototype";
+USE plato;
+SELECT
+ *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN ( A )
+ DEFINE A as A
+);
+)",
+R"(PRAGMA FeatureR010 = "prototype";
+USE plato;
+
+SELECT
+ *
+FROM Input MATCH_RECOGNIZE (PATTERN (A) DEFINE A AS A);
+)"
+ }};
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(CreateTableTrailingComma) {
+ TCases cases = {
+ {"CREATE TABLE tableName (Key Uint32, PRIMARY KEY (Key),);",
+ "CREATE TABLE tableName (\n\tKey Uint32,\n\tPRIMARY KEY (Key),\n);\n"},
+ {"CREATE TABLE tableName (Key Uint32,);",
+ "CREATE TABLE tableName (\n\tKey Uint32,\n);\n"},
+ };
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Union) {
+ TCases cases = {
+ {"select 1 union all select 2 union select 3 union all select 4 union select 5",
+ "SELECT\n\t1\nUNION ALL\nSELECT\n\t2\nUNION\nSELECT\n\t3\nUNION ALL\nSELECT\n\t4\nUNION\nSELECT\n\t5;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(CommentAfterLastSelect) {
+ TCases cases = {
+ {"SELECT 1--comment\n",
+ "SELECT\n\t1--comment\n;\n"},
+ {"SELECT 1\n\n--comment\n",
+ "SELECT\n\t1--comment\n;\n"},
+ {"SELECT 1\n\n--comment",
+ "SELECT\n\t1--comment\n;\n"},
+ {"SELECT * FROM Input\n\n\n\n/* comment */\n\n\n",
+ "SELECT\n\t*\nFROM Input/* comment */;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(WindowFunctionInsideExpr) {
+ TCases cases = {
+ {"SELECT CAST(ROW_NUMBER() OVER () AS String) AS x,\nFROM Input;",
+ "SELECT\n\tCAST(ROW_NUMBER() OVER () AS String) AS x,\nFROM Input;\n"},
+ {"SELECT CAST(ROW_NUMBER() OVER (PARTITION BY key) AS String) AS x,\nFROM Input;",
+ "SELECT\n\tCAST(\n\t\tROW_NUMBER() OVER (\n\t\t\tPARTITION BY\n\t\t\t\tkey\n\t\t) AS String\n\t) AS x,\nFROM Input;\n"},
+ {"SELECT CAST(ROW_NUMBER() OVER (users) AS String) AS x,\nFROM Input;",
+ "SELECT\n\tCAST(\n\t\tROW_NUMBER() OVER (\n\t\t\tusers\n\t\t) AS String\n\t) AS x,\nFROM Input;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(ExistsExpr) {
+ TCases cases = {
+ {"SELECT EXISTS (SELECT 1);",
+ "SELECT\n\tEXISTS (\n\t\tSELECT\n\t\t\t1\n\t);\n"},
+ {"SELECT CAST(EXISTS(SELECT 1) AS Int) AS x,\nFROM Input;",
+ "SELECT\n\tCAST(\n\t\tEXISTS (\n\t\t\tSELECT\n\t\t\t\t1\n\t\t) AS Int\n\t) AS x,\nFROM Input;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(LambdaInsideExpr) {
+ TCases cases = {
+ {"SELECT ListMap(AsList(1,2),($x)->{return $x+1});",
+ "SELECT\n\tListMap(\n\t\tAsList(1, 2), ($x) -> {\n\t\t\tRETURN $x + 1\n\t\t}\n\t);\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(CaseExpr) {
+ TCases cases = {
+ {"SELECT CASE WHEN 1 == 2 THEN 3 WHEN 4 == 5 THEN 6 WHEN 7 == 8 THEN 9 ELSE 10 END;",
+ "SELECT\n\tCASE\n\t\tWHEN 1 == 2\n\t\t\tTHEN 3\n\t\tWHEN 4 == 5\n\t\t\tTHEN 6\n\t\tWHEN 7 == 8\n\t\t\tTHEN 9\n\t\tELSE 10\n\tEND;\n"},
+ {"SELECT CAST(CASE WHEN 1 == 2 THEN 3 WHEN 4 == 5 THEN 6 ELSE 10 END AS String);",
+ "SELECT\n\tCAST(\n\t\tCASE\n\t\t\tWHEN 1 == 2\n\t\t\t\tTHEN 3\n\t\t\tWHEN 4 == 5\n\t\t\t\tTHEN 6\n\t\t\tELSE 10\n\t\tEND AS String\n\t);\n"},
+ {"SELECT CASE x WHEN 1 THEN 2 WHEN 3 THEN 4 WHEN 5 THEN 6 ELSE 10 END;",
+ "SELECT\n\tCASE x\n\t\tWHEN 1\n\t\t\tTHEN 2\n\t\tWHEN 3\n\t\t\tTHEN 4\n\t\tWHEN 5\n\t\t\tTHEN 6\n\t\tELSE 10\n\tEND;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(MultiTokenOperations) {
+ TCases cases = {
+ {"$x = 1 >>| 2;",
+ "$x = 1 >>| 2;\n"},
+ {"$x = 1 >> 2;",
+ "$x = 1 >> 2;\n"},
+ {"$x = 1 ?? 2;",
+ "$x = 1 ?? 2;\n"},
+ {"$x = 1 > /*comment*/ > /*comment*/ | 2;",
+ "$x = 1 >/*comment*/>/*comment*/| 2;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(OperatorNewlines) {
+ TCases cases = {
+ {"$x = TRUE\nOR\nFALSE;",
+ "$x = TRUE\n\tOR\n\tFALSE;\n"},
+ {"$x = TRUE OR\nFALSE;",
+ "$x = TRUE OR\n\tFALSE;\n"},
+ {"$x = TRUE\nOR FALSE;",
+ "$x = TRUE OR\n\tFALSE;\n"},
+ {"$x = 1\n+2\n*3;",
+ "$x = 1 +\n\t2 *\n\t\t3;\n"},
+ {"$x = 1\n+\n2\n*3\n*5\n+\n4;",
+ "$x = 1\n\t+\n\t2 *\n\t\t3 *\n\t\t5\n\t+\n\t4;\n"},
+ {"$x = 1\n+2+3+4\n+5+6+7+\n\n8+9+10;",
+ "$x = 1 +\n\t2 + 3 + 4 +\n\t5 + 6 + 7 +\n\t8 + 9 + 10;\n"},
+ {"$x = TRUE\nAND\nTRUE OR\nFALSE\nAND TRUE\nOR FALSE\nAND TRUE\nOR FALSE;",
+ "$x = TRUE\n\tAND\n\tTRUE OR\n\tFALSE AND\n\t\tTRUE OR\n\tFALSE AND\n\t\tTRUE OR\n\tFALSE;\n"},
+ {"$x = 1 -- comment\n+ 2;",
+ "$x = 1-- comment\n\t+\n\t2;\n"},
+ {"$x = 1 -- comment\n+ -- comment\n2;",
+ "$x = 1-- comment\n\t+-- comment\n\t2;\n"},
+ {"$x = 1 + -- comment\n2;",
+ "$x = 1 +-- comment\n\t2;\n"},
+ {"$x = 1\n>\n>\n|\n2;",
+ "$x = 1\n\t>>|\n\t2;\n"},
+ {"$x = 1\n?? 2 ??\n3\n??\n4 +\n5\n*\n6 +\n7 ??\n8;",
+ "$x = 1 ??\n\t2 ??\n\t3\n\t??\n\t4 +\n\t\t5\n\t\t\t*\n\t\t\t6 +\n\t\t7 ??\n\t8;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(ObfuscateSelect) {
+ TCases cases = {
+ {"select 1;",
+ "SELECT\n\t0;\n"},
+ {"select true;",
+ "SELECT\n\tFALSE;\n"},
+ {"select 'foo';",
+ "SELECT\n\t'str';\n"},
+ {"select 3.0;",
+ "SELECT\n\t0.0;\n"},
+ {"select col;",
+ "SELECT\n\tid;\n"},
+ {"select * from tab;",
+ "SELECT\n\t*\nFROM id;\n"},
+ {"select cast(col as int32);",
+ "SELECT\n\tCAST(id AS int32);\n"},
+ {"select func(col);",
+ "SELECT\n\tfunc(id);\n"},
+ {"select mod::func(col);",
+ "SELECT\n\tmod::func(id);\n"},
+ {"declare $a as int32;",
+ "DECLARE $id AS int32;\n"},
+ {"select * from `logs/of/bob` where pwd='foo';",
+ "SELECT\n\t*\nFROM id\nWHERE id = 'str';\n"},
+ {"select $f();",
+ "SELECT\n\t$id();\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases, NSQLFormat::EFormatMode::Obfuscate);
+}
+
+Y_UNIT_TEST(ObfuscatePragma) {
+ TCases cases = {
+ {"pragma a=1",
+ "PRAGMA id = 0;\n"},
+ {"pragma a='foo';",
+ "PRAGMA id = 'str';\n"},
+ {"pragma a=true;",
+ "PRAGMA id = FALSE;\n"},
+ {"pragma a=$foo;",
+ "PRAGMA id = $id;\n"},
+ {"pragma a=foo;",
+ "PRAGMA id = id;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases, NSQLFormat::EFormatMode::Obfuscate);
+}
+
+Y_UNIT_TEST(CreateView) {
+ TCases cases = {
+ {"creAte vIEw TheView wiTh (security_invoker = trUE) As SELect 1",
+ "CREATE VIEW TheView WITH (security_invoker = TRUE) AS\nSELECT\n\t1;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(DropView) {
+ TCases cases = {
+ {"dRop viEW theVIEW",
+ "DROP VIEW theVIEW;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(ResourcePoolOperations) {
+ TCases cases = {
+ {"creAte reSourCe poOl naMe With (a = \"b\")",
+ "CREATE RESOURCE POOL naMe WITH (a = \"b\");\n"},
+ {"create resource pool eds with (a=\"a\",b=\"b\",c = true)",
+ "CREATE RESOURCE POOL eds WITH (\n\ta = \"a\",\n\tb = \"b\",\n\tc = TRUE\n);\n"},
+ {"alTer reSOurcE poOl naMe resEt (b, c), seT (x=y, z=false)",
+ "ALTER RESOURCE POOL naMe\n\tRESET (b, c),\n\tSET (x = y, z = FALSE);\n"},
+ {"alter resource pool eds reset (a), set (x=y)",
+ "ALTER RESOURCE POOL eds\n\tRESET (a),\n\tSET (x = y);\n"},
+ {"dRop reSourCe poOl naMe",
+ "DROP RESOURCE POOL naMe;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(BackupCollectionOperations) {
+ TCases cases = {
+ {"creAte BackuP colLection `-naMe` wIth (a = \"b\")",
+ "CREATE BACKUP COLLECTION `-naMe` WITH (a = \"b\");\n"},
+ {"creAte BackuP colLection `-naMe` DATabase wIth (a = \"b\")",
+ "CREATE BACKUP COLLECTION `-naMe` DATABASE WITH (a = \"b\");\n"},
+ {"creAte BackuP colLection `-naMe` ( tabLe `tbl1` , TablE `tbl2`) wIth (a = \"b\")",
+ "CREATE BACKUP COLLECTION `-naMe` (TABLE `tbl1`, TABLE `tbl2`) WITH (a = \"b\");\n"},
+ {"alTer bACKuP coLLECTION naMe resEt (b, c), seT (x=y, z=false)",
+ "ALTER BACKUP COLLECTION naMe\n\tRESET (b, c),\n\tSET (x = y, z = FALSE);\n"},
+ {"alTer bACKuP coLLECTION naMe aDD DATAbase",
+ "ALTER BACKUP COLLECTION naMe\n\tADD DATABASE;\n"},
+ {"alTer bACKuP coLLECTION naMe DRoP \n\n DaTAbase",
+ "ALTER BACKUP COLLECTION naMe\n\tDROP DATABASE;\n"},
+ {"alTer bACKuP coLLECTION naMe add \n\n tablE\n\tsometable,drOp TABle `other`",
+ "ALTER BACKUP COLLECTION naMe\n\tADD TABLE sometable,\n\tDROP TABLE `other`;\n"},
+ {"DROP backup collectiOn `/some/path`",
+ "DROP BACKUP COLLECTION `/some/path`;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Analyze) {
+ TCases cases = {
+ {"analyze table (col1, col2, col3)",
+ "ANALYZE table (col1, col2, col3);\n"},
+ {"analyze table",
+ "ANALYZE table;\n"}
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(ResourcePoolClassifierOperations) {
+ TCases cases = {
+ {"creAte reSourCe poOl ClaSsiFIer naMe With (a = \"b\")",
+ "CREATE RESOURCE POOL CLASSIFIER naMe WITH (a = \"b\");\n"},
+ {"create resource pool classifier eds with (a=\"a\",b=\"b\",c = true)",
+ "CREATE RESOURCE POOL CLASSIFIER eds WITH (\n\ta = \"a\",\n\tb = \"b\",\n\tc = TRUE\n);\n"},
+ {"alTer reSOurcE poOl ClaSsiFIer naMe resEt (b, c), seT (x=y, z=false)",
+ "ALTER RESOURCE POOL CLASSIFIER naMe\n\tRESET (b, c),\n\tSET (x = y, z = FALSE);\n"},
+ {"alter resource pool classifier eds reset (a), set (x=y)",
+ "ALTER RESOURCE POOL CLASSIFIER eds\n\tRESET (a),\n\tSET (x = y);\n"},
+ {"dRop reSourCe poOl ClaSsiFIer naMe",
+ "DROP RESOURCE POOL CLASSIFIER naMe;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Backup) {
+ TCases cases = {
+ {"\tBaCKup\n\n TestCollection incremENTAl",
+ "BACKUP TestCollection INCREMENTAL;\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
+
+Y_UNIT_TEST(Restore) {
+ TCases cases = {
+ {"resToRe\n\n\n TestCollection aT\n \t \n '2024-06-16_20-14-02'",
+ "RESTORE TestCollection AT '2024-06-16_20-14-02';\n"},
+ };
+
+ TSetup setup;
+ setup.Run(cases);
+}
diff --git a/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp b/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp
new file mode 100644
index 00000000000..63c08bb8b7e
--- /dev/null
+++ b/yql/essentials/sql/v1/format/sql_format_ut_antlr4.cpp
@@ -0,0 +1,52 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include "sql_format.h"
+
+#include <google/protobuf/arena.h>
+#include <util/string/subst.h>
+#include <util/string/join.h>
+
+namespace {
+
+using TCases = TVector<std::pair<TString, TString>>;
+
+struct TSetup {
+ TSetup() {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.Arena = &Arena;
+ settings.Antlr4Parser = true;
+ Formatter = NSQLFormat::MakeSqlFormatter(settings);
+ }
+
+ void Run(const TCases& cases, NSQLFormat::EFormatMode mode = NSQLFormat::EFormatMode::Pretty) {
+ for (const auto& c : cases) {
+ NYql::TIssues issues;
+ TString formatted;
+ auto res = Formatter->Format(c.first, formatted, issues, mode);
+ UNIT_ASSERT_C(res, issues.ToString());
+ auto expected = c.second;
+ SubstGlobal(expected, "\t", TString(NSQLFormat::OneIndent, ' '));
+ UNIT_ASSERT_NO_DIFF(formatted, expected);
+
+ TString formatted2;
+ auto res2 = Formatter->Format(formatted, formatted2, issues);
+ UNIT_ASSERT_C(res2, issues.ToString());
+ UNIT_ASSERT_NO_DIFF(formatted, formatted2);
+
+ if (mode == NSQLFormat::EFormatMode::Pretty) {
+ auto mutatedQuery = NSQLFormat::MutateQuery(c.first);
+ auto res3 = Formatter->Format(mutatedQuery, formatted, issues);
+ UNIT_ASSERT_C(res3, issues.ToString());
+ }
+ }
+ }
+
+ google::protobuf::Arena Arena;
+ NSQLFormat::ISqlFormatter::TPtr Formatter;
+};
+
+}
+
+Y_UNIT_TEST_SUITE(CheckSqlFormatter) {
+ #include "sql_format_ut.h"
+}
diff --git a/yql/essentials/sql/v1/format/ut/ya.make b/yql/essentials/sql/v1/format/ut/ya.make
new file mode 100644
index 00000000000..4c3ef65f965
--- /dev/null
+++ b/yql/essentials/sql/v1/format/ut/ya.make
@@ -0,0 +1,7 @@
+UNITTEST_FOR(yql/essentials/sql/v1/format)
+
+SRCS(
+ sql_format_ut.cpp
+)
+
+END()
diff --git a/yql/essentials/sql/v1/format/ut_antlr4/ya.make b/yql/essentials/sql/v1/format/ut_antlr4/ya.make
new file mode 100644
index 00000000000..a0f9d710a8c
--- /dev/null
+++ b/yql/essentials/sql/v1/format/ut_antlr4/ya.make
@@ -0,0 +1,7 @@
+UNITTEST_FOR(yql/essentials/sql/v1/format)
+
+SRCS(
+ sql_format_ut_antlr4.cpp
+)
+
+END()
diff --git a/yql/essentials/sql/v1/format/ya.make b/yql/essentials/sql/v1/format/ya.make
new file mode 100644
index 00000000000..642addcb7b1
--- /dev/null
+++ b/yql/essentials/sql/v1/format/ya.make
@@ -0,0 +1,26 @@
+LIBRARY()
+
+
+SRCS(
+ sql_format.cpp
+)
+
+RESOURCE(DONT_PARSE yql/essentials/sql/v1/SQLv1.g.in SQLv1.g.in)
+RESOURCE(DONT_PARSE yql/essentials/sql/v1/SQLv1Antlr4.g.in SQLv1Antlr4.g.in)
+
+PEERDIR(
+ yql/essentials/parser/lexer_common
+ yql/essentials/sql/settings
+ yql/essentials/sql/v1/lexer
+ yql/essentials/sql/v1/proto_parser
+ yql/essentials/core/sql_types
+ library/cpp/protobuf/util
+ library/cpp/resource
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+ ut_antlr4
+)
diff --git a/yql/essentials/sql/v1/insert.cpp b/yql/essentials/sql/v1/insert.cpp
new file mode 100644
index 00000000000..181137457f1
--- /dev/null
+++ b/yql/essentials/sql/v1/insert.cpp
@@ -0,0 +1,443 @@
+#include "source.h"
+#include "context.h"
+
+#include <yql/essentials/utils/yql_panic.h>
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+static const TMap<ESQLWriteColumnMode, EWriteColumnMode> sqlIntoMode2WriteColumn = {
+ {ESQLWriteColumnMode::InsertInto, EWriteColumnMode::Insert},
+ {ESQLWriteColumnMode::InsertOrAbortInto, EWriteColumnMode::InsertOrAbort},
+ {ESQLWriteColumnMode::InsertOrIgnoreInto, EWriteColumnMode::InsertOrIgnore},
+ {ESQLWriteColumnMode::InsertOrRevertInto, EWriteColumnMode::InsertOrRevert},
+ {ESQLWriteColumnMode::UpsertInto, EWriteColumnMode::Upsert},
+ {ESQLWriteColumnMode::ReplaceInto, EWriteColumnMode::Replace},
+ {ESQLWriteColumnMode::InsertIntoWithTruncate, EWriteColumnMode::Renew},
+ {ESQLWriteColumnMode::Update, EWriteColumnMode::Update},
+ {ESQLWriteColumnMode::Delete, EWriteColumnMode::Delete},
+};
+
+class TModifySourceBase: public ISource {
+public:
+ TModifySourceBase(TPosition pos, const TVector<TString>& columnsHint)
+ : ISource(pos)
+ , ColumnsHint(columnsHint)
+ {
+ }
+
+ bool AddFilter(TContext& ctx, TNodePtr filter) override {
+ Y_UNUSED(filter);
+ ctx.Error(Pos) << "Source does not allow filtering";
+ return false;
+ }
+
+ bool AddGroupKey(TContext& ctx, const TString& column) override {
+ Y_UNUSED(column);
+ ctx.Error(Pos) << "Source does not allow grouping";
+ return false;
+ }
+
+ bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override {
+ YQL_ENSURE(aggr);
+ ctx.Error(aggr->GetPos()) << "Source does not allow aggregation";
+ return false;
+ }
+
+ TNodePtr BuildFilter(TContext& ctx, const TString& label) override {
+ Y_UNUSED(ctx);
+ Y_UNUSED(label);
+ return nullptr;
+ }
+
+ std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx) override {
+ Y_UNUSED(label);
+ Y_UNUSED(ctx);
+ return { nullptr, true };
+ }
+
+protected:
+ TVector<TString> ColumnsHint;
+ TString OperationHumanName;
+};
+
+class TUpdateByValues: public TModifySourceBase {
+public:
+ TUpdateByValues(TPosition pos, const TString& operationHumanName, const TVector<TString>& columnsHint, const TVector<TNodePtr>& values)
+ : TModifySourceBase(pos, columnsHint)
+ , OperationHumanName(operationHumanName)
+ , Values(values)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (ColumnsHint.size() != Values.size()) {
+ ctx.Error(Pos) << "VALUES have " << Values.size() << " columns, " << OperationHumanName << " expects: " << ColumnsHint.size();
+ return false;
+ }
+ for (auto& value: Values) {
+ if (!value->Init(ctx, src)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ Y_UNUSED(ctx);
+ YQL_ENSURE(Values.size() == ColumnsHint.size());
+
+ auto structObj = Y("AsStruct");
+ for (size_t i = 0; i < Values.size(); ++i) {
+ TString column = ColumnsHint[i];
+ TNodePtr value = Values[i];
+
+ structObj = L(structObj, Q(Y(Q(column), value)));
+ }
+
+ auto updateRow = BuildLambda(Pos, Y("row"), structObj);
+ return updateRow;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TUpdateByValues(Pos, OperationHumanName, ColumnsHint, CloneContainer(Values));
+ }
+private:
+ TString OperationHumanName;
+
+protected:
+ TVector<TNodePtr> Values;
+};
+
+class TModifyByValues: public TModifySourceBase {
+public:
+ TModifyByValues(TPosition pos, const TString& operationHumanName, const TVector<TString>& columnsHint, const TVector<TVector<TNodePtr>>& values)
+ : TModifySourceBase(pos, columnsHint)
+ , OperationHumanName(operationHumanName)
+ , Values(values)
+ {
+ FakeSource = BuildFakeSource(pos);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ bool hasError = false;
+ for (const auto& row: Values) {
+ if (ColumnsHint.empty()) {
+ ctx.Error(Pos) << OperationHumanName << " ... VALUES requires specification of table columns";
+ hasError = true;
+ continue;
+ }
+ if (ColumnsHint.size() != row.size()) {
+ ctx.Error(Pos) << "VALUES have " << row.size() << " columns, " << OperationHumanName << " expects: " << ColumnsHint.size();
+ hasError = true;
+ continue;
+ }
+ for (auto& value: row) {
+ if (!value->Init(ctx, FakeSource.Get())) {
+ hasError = true;
+ continue;
+ }
+ }
+ }
+ return !hasError;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ Y_UNUSED(ctx);
+ auto tuple = Y();
+ for (const auto& row: Values) {
+ auto rowValues = Y("AsStruct"); // ordered struct
+ auto column = ColumnsHint.begin();
+ for (auto value: row) {
+ rowValues = L(rowValues, Q(Y(BuildQuotedAtom(Pos, *column), value)));
+ ++column;
+ }
+ tuple = L(tuple, rowValues);
+ }
+ return Y("PersistableRepr", Q(tuple));
+ }
+
+ TNodePtr DoClone() const final {
+ TVector<TVector<TNodePtr>> clonedValues;
+ clonedValues.reserve(Values.size());
+ for (auto cur: Values) {
+ clonedValues.push_back(CloneContainer(cur));
+ }
+ return new TModifyByValues(Pos, OperationHumanName, ColumnsHint, clonedValues);
+ }
+
+private:
+ TString OperationHumanName;
+ TVector<TVector<TNodePtr>> Values;
+ TSourcePtr FakeSource;
+};
+
+class TModifyBySource: public TModifySourceBase {
+public:
+ TModifyBySource(TPosition pos, const TString& operationHumanName, const TVector<TString>& columnsHint, TSourcePtr source)
+ : TModifySourceBase(pos, columnsHint)
+ , OperationHumanName(operationHumanName)
+ , Source(std::move(source))
+ {}
+
+ void GetInputTables(TTableList& tableList) const override {
+ if (Source) {
+ return Source->GetInputTables(tableList);
+ }
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!Source->Init(ctx, src)) {
+ return false;
+ }
+ const size_t numColumns = ColumnsHint.size();
+ if (numColumns) {
+ const auto sourceColumns = Source->GetColumns();
+ if (!sourceColumns || sourceColumns->All || sourceColumns->QualifiedAll) {
+ return true;
+ }
+
+ if (numColumns != sourceColumns->List.size()) {
+ ctx.Error(Pos) << "SELECT have " << numColumns << " columns, " << OperationHumanName << " expects: " << ColumnsHint.size();
+ return false;
+ }
+
+ TStringStream str;
+ bool mismatchFound = false;
+ for (size_t i = 0; i < numColumns; ++i) {
+ bool hasName = sourceColumns->NamedColumns[i];
+ if (hasName) {
+ const auto& hintColumn = ColumnsHint[i];
+ const auto& sourceColumn = sourceColumns->List[i];
+ if (hintColumn != sourceColumn) {
+ if (!mismatchFound) {
+ str << "Column names in SELECT don't match column specification in parenthesis";
+ mismatchFound = true;
+ }
+ str << ". \"" << hintColumn << "\" doesn't match \"" << sourceColumn << "\"";
+ }
+ }
+ }
+ if (mismatchFound) {
+ ctx.Warning(Pos, TIssuesIds::YQL_SOURCE_SELECT_COLUMN_MISMATCH) << str.Str();
+ }
+ }
+ return true;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ auto input = Source->Build(ctx);
+ if (ColumnsHint.empty()) {
+ return input;
+ }
+ auto columns = Y();
+ for (auto column: ColumnsHint) {
+ columns = L(columns, BuildQuotedAtom(Pos, column));
+ }
+ const auto sourceColumns = Source->GetColumns();
+ if (!sourceColumns || sourceColumns->All || sourceColumns->QualifiedAll || sourceColumns->HasUnnamed) {
+ // will try to resolve column mapping on type annotation stage
+ return Y("OrderedSqlRename", input, Q(columns));
+ }
+
+ YQL_ENSURE(sourceColumns->List.size() == ColumnsHint.size());
+ auto srcColumn = Source->GetColumns()->List.begin();
+ auto structObj = Y("AsStruct"); // ordered struct
+ for (auto column: ColumnsHint) {
+ structObj = L(structObj, Q(Y(BuildQuotedAtom(Pos, column),
+ Y("Member", "row", BuildQuotedAtom(Pos, *srcColumn))
+ )));
+ ++srcColumn;
+ }
+ return Y("AssumeColumnOrder", Y("OrderedMap", input, BuildLambda(Pos, Y("row"), structObj)), Q(columns));
+ }
+
+ TNodePtr DoClone() const final {
+ return new TModifyBySource(Pos, OperationHumanName, ColumnsHint, Source->CloneSource());
+ }
+
+ EOrderKind GetOrderKind() const final {
+ return Source->GetOrderKind();
+ }
+
+private:
+ TString OperationHumanName;
+ TSourcePtr Source;
+};
+
+TSourcePtr BuildWriteValues(TPosition pos, const TString& operationHumanName, const TVector<TString>& columnsHint, const TVector<TVector<TNodePtr>>& values) {
+ return new TModifyByValues(pos, operationHumanName, columnsHint, values);
+}
+
+TSourcePtr BuildWriteValues(TPosition pos, const TString& operationHumanName, const TVector<TString>& columnsHint, TSourcePtr source) {
+ return new TModifyBySource(pos, operationHumanName, columnsHint, std::move(source));
+}
+
+TSourcePtr BuildUpdateValues(TPosition pos, const TVector<TString>& columnsHint, const TVector<TNodePtr>& values) {
+ return new TUpdateByValues(pos, "UPDATE", columnsHint, values);
+}
+
+class TWriteColumnsNode: public TAstListNode {
+public:
+ TWriteColumnsNode(TPosition pos, TScopedStatePtr scoped,
+ const TTableRef& table, EWriteColumnMode mode, TSourcePtr values = nullptr, TNodePtr options = nullptr)
+ : TAstListNode(pos)
+ , Scoped(scoped)
+ , Table(table)
+ , Mode(mode)
+ , Values(std::move(values))
+ , Options(std::move(options))
+ {
+ FakeSource = BuildFakeSource(pos);
+ }
+
+ void ResetSource(TSourcePtr source) {
+ TableSource = std::move(source);
+ }
+
+ void ResetUpdate(TSourcePtr update) {
+ Update = std::move(update);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ TTableList tableList;
+ TNodePtr values;
+ auto options = Y();
+ if (Options) {
+ if (!Options->Init(ctx, src)) {
+ return false;
+ }
+ options = L(Options);
+ }
+
+ ISource* underlyingSrc = src;
+
+ if (TableSource) {
+ if (!TableSource->Init(ctx, src) || !TableSource->InitFilters(ctx)) {
+ return false;
+ }
+ options = L(options, Q(Y(Q("filter"), TableSource->BuildFilterLambda())));
+ }
+
+ bool unordered = false;
+ if (Values) {
+ if (!Values->Init(ctx, TableSource.Get())) {
+ return false;
+ }
+
+ Values->GetInputTables(tableList);
+ underlyingSrc = Values.Get();
+ values = Values->Build(ctx);
+ if (!values) {
+ return false;
+ }
+ unordered = (EOrderKind::None == Values->GetOrderKind());
+ }
+
+ TNodePtr node(BuildInputTables(Pos, tableList, false, Scoped));
+ if (!node->Init(ctx, underlyingSrc)) {
+ return false;
+ }
+
+ if (Update) {
+ if (!Update->Init(ctx, TableSource.Get()) || !Update->InitFilters(ctx)) {
+ return false;
+ }
+ options = L(options, Q(Y(Q("update"), Update->Build(ctx))));
+ }
+
+ auto write = BuildWriteTable(Pos, "values", Table, Mode, std::move(options), Scoped);
+ if (!write->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ if (values) {
+ node = L(node, Y("let", "values", values));
+ if (unordered && ctx.UseUnordered(Table)) {
+ node = L(node, Y("let", "values", Y("Unordered", "values")));
+ }
+ } else {
+ node = L(node, Y("let", "values", Y("Void")));
+ }
+ node = L(node, Y("let", "world", write));
+ node = L(node, Y("return", "world"));
+
+ Add("block", Q(node));
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return {};
+ }
+
+protected:
+ TScopedStatePtr Scoped;
+ TTableRef Table;
+ TSourcePtr TableSource;
+ EWriteColumnMode Mode;
+ TSourcePtr Values;
+ TSourcePtr Update;
+ TSourcePtr FakeSource;
+ TNodePtr Options;
+};
+
+EWriteColumnMode ToWriteColumnsMode(ESQLWriteColumnMode sqlWriteColumnMode) {
+ return sqlIntoMode2WriteColumn.at(sqlWriteColumnMode);
+}
+
+TNodePtr BuildWriteColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, EWriteColumnMode mode, TSourcePtr values, TNodePtr options) {
+ YQL_ENSURE(values, "Invalid values node");
+ return new TWriteColumnsNode(pos, scoped, table, mode, std::move(values), std::move(options));
+}
+
+TNodePtr BuildUpdateColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr values, TSourcePtr source, TNodePtr options) {
+ YQL_ENSURE(values, "Invalid values node");
+ TIntrusivePtr<TWriteColumnsNode> writeNode = new TWriteColumnsNode(pos, scoped, table, EWriteColumnMode::Update, nullptr, options);
+ writeNode->ResetSource(std::move(source));
+ writeNode->ResetUpdate(std::move(values));
+ return writeNode;
+}
+
+TNodePtr BuildDelete(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr source, TNodePtr options) {
+ TIntrusivePtr<TWriteColumnsNode> writeNode = new TWriteColumnsNode(pos, scoped, table, EWriteColumnMode::Delete, nullptr, options);
+ writeNode->ResetSource(std::move(source));
+ return writeNode;
+}
+
+
+class TEraseColumnsNode: public TAstListNode {
+public:
+ TEraseColumnsNode(TPosition pos, const TVector<TString>& columns)
+ : TAstListNode(pos)
+ , Columns(columns)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(ctx);
+ Y_UNUSED(src);
+
+ TNodePtr columnList = Y();
+ for (const auto& column: Columns) {
+ columnList->Add(Q(column));
+ }
+
+ Add(Q(Y(Q("erase_columns"), Q(columnList))));
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TEraseColumnsNode(GetPos(), Columns);
+ }
+
+private:
+ TVector<TString> Columns;
+};
+
+
+TNodePtr BuildEraseColumns(TPosition pos, const TVector<TString>& columns) {
+ return new TEraseColumnsNode(pos, columns);
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/join.cpp b/yql/essentials/sql/v1/join.cpp
new file mode 100644
index 00000000000..de789569c76
--- /dev/null
+++ b/yql/essentials/sql/v1/join.cpp
@@ -0,0 +1,670 @@
+#include "source.h"
+#include "context.h"
+
+#include <yql/essentials/utils/yql_panic.h>
+
+#include <library/cpp/charset/ci_string.h>
+#include <util/generic/hash_set.h>
+#include <util/string/cast.h>
+#include <util/string/split.h>
+#include <util/string/join.h>
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+TString NormalizeJoinOp(const TString& joinOp) {
+ TVector<TString> joinOpsParts;
+ Split(joinOp, " ", joinOpsParts);
+ for (auto&x : joinOpsParts) {
+ x.to_title();
+ }
+
+ return JoinSeq("", joinOpsParts);
+}
+
+struct TJoinDescr {
+ TString Op;
+ TJoinLinkSettings LinkSettings;
+
+ struct TFullColumn {
+ ui32 Source;
+ TNodePtr Column;
+ };
+
+ TVector<std::pair<TFullColumn, TFullColumn>> Keys;
+
+ explicit TJoinDescr(const TString& op)
+ : Op(op)
+ {}
+};
+
+class TJoinBase: public IJoin {
+public:
+ TJoinBase(TPosition pos, TVector<TSourcePtr>&& sources, TVector<bool>&& anyFlags)
+ : IJoin(pos)
+ , Sources(std::move(sources))
+ , AnyFlags(std::move(anyFlags))
+ {
+ YQL_ENSURE(Sources.size() == AnyFlags.size());
+ }
+
+ void AllColumns() override {
+ for (auto& source: Sources) {
+ source->AllColumns();
+ }
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
+ ISource* srcByName = nullptr;
+ if (column.IsArtificial()) {
+ return true;
+ }
+ if (const auto sourceName = *column.GetSourceName()) {
+ for (auto& source: Sources) {
+ if (sourceName == source->GetLabel()) {
+ srcByName = source.Get();
+ break;
+ }
+ }
+ if (!srcByName) {
+ if (column.IsAsterisk()) {
+ ctx.Error(column.GetPos()) << "Unknown correlation name for asterisk: " << sourceName;
+ return {};
+ }
+ // \todo add warning, either mistake in correlation name, either it's a column
+ column.ResetColumn("", sourceName);
+ column.SetUseSourceAsColumn();
+ column.SetAsNotReliable();
+ }
+ }
+
+ if (column.IsAsterisk()) {
+ if (!column.GetCountHint()) {
+ if (srcByName) {
+ srcByName->AllColumns();
+ } else {
+ for (auto& source: Sources) {
+ source->AllColumns();
+ }
+ }
+ }
+ return true;
+ }
+ if (srcByName) {
+ column.ResetAsReliable();
+ if (!srcByName->AddColumn(ctx, column)) {
+ return {};
+ }
+ if (!KeysInitializing && !column.IsAsterisk()) {
+ column.SetUseSource();
+ }
+ return true;
+ } else {
+ unsigned acceptedColumns = 0;
+ TIntrusivePtr<TColumnNode> tryColumn = static_cast<TColumnNode*>(column.Clone().Get());
+ tryColumn->SetAsNotReliable();
+ TString lastAcceptedColumnSource;
+ for (auto& source: Sources) {
+ if (source->AddColumn(ctx, *tryColumn)) {
+ ++acceptedColumns;
+ lastAcceptedColumnSource = source->GetLabel();
+ }
+ }
+ if (!acceptedColumns) {
+ TStringBuilder sb;
+ const auto& fullColumnName = FullColumnName(column);
+ sb << "Column " << fullColumnName << " is not fit to any source";
+ for (auto& source: Sources) {
+ if (const auto mistype = source->FindColumnMistype(fullColumnName)) {
+ sb << ". Did you mean " << mistype.GetRef() << "?";
+ break;
+ }
+ }
+ ctx.Error(column.GetPos()) << sb;
+ return {};
+ } else {
+ column.SetAsNotReliable();
+ }
+ return false;
+ }
+ }
+
+ const TColumns* GetColumns() const override {
+ YQL_ENSURE(IsColumnDone, "Unable to GetColumns while it's not finished");
+ return &JoinedColumns;
+ }
+
+ void GetInputTables(TTableList& tableList) const override {
+ for (auto& src: Sources) {
+ src->GetInputTables(tableList);
+ }
+ ISource::GetInputTables(tableList);
+ }
+
+ TNodePtr BuildJoinKeys(TContext& ctx, const TVector<TDeferredAtom>& names) override {
+ const size_t n = JoinOps.size();
+ TString what(Sources[n]->GetLabel());
+ static const TSet<TString> noRightSourceJoinOps = {"LeftOnly", "LeftSemi"};
+ for (size_t nn = n; nn > 0 && noRightSourceJoinOps.contains(JoinOps[nn-1]); --nn) {
+ what = Sources[nn-1]->GetLabel();
+ }
+ const TString with(Sources[n + 1]->GetLabel());
+
+ for (auto index = n; index <= n + 1; ++index) {
+ const auto& label = Sources[index]->GetLabel();
+ if (label.Contains('.')) {
+ ctx.Error(Sources[index]->GetPos()) << "Invalid label: " << label << ", unable to use name with dot symbol, you should use AS <simple alias name>";
+ return nullptr;
+ }
+ }
+ if (what.empty() && with.empty()) {
+ ctx.Error() << "At least one correlation name is required in join";
+ return nullptr;
+ }
+ if (what == with) {
+ ctx.Error() << "Self joins are not supporting ON syntax";
+ return nullptr;
+ }
+ TPosition pos(ctx.Pos());
+ TNodePtr expr;
+ for (auto& name: names) {
+ auto lhs = BuildColumn(Pos, name, what);
+ auto rhs = BuildColumn(Pos, name, with);
+ if (!lhs || !rhs) {
+ return nullptr;
+ }
+ TNodePtr eq(BuildBinaryOp(ctx, pos, "==", lhs, rhs));
+ if (expr) {
+ expr = BuildBinaryOp(ctx, pos, "And", expr, eq);
+ } else {
+ expr = eq;
+ }
+ }
+ if (expr && Sources.size() > 2) {
+ ctx.Error() << "Multi-way JOINs should be connected with ON clause instead of USING clause";
+ return nullptr;
+ }
+ return expr;
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+ void SetupJoin(const TString& opName, TNodePtr expr, const TJoinLinkSettings& linkSettings) override {
+ JoinOps.push_back(opName);
+ JoinExprs.push_back(expr);
+ JoinLinkSettings.push_back(linkSettings);
+ }
+
+ bool IsStream() const override {
+ return AnyOf(Sources, [] (const TSourcePtr& s) { return s->IsStream(); });
+ }
+
+protected:
+ static TString FullColumnName(const TColumnNode& column) {
+ auto sourceName = *column.GetSourceName();
+ auto columnName = *column.GetColumnName();
+ return sourceName ? DotJoin(sourceName, columnName) : columnName;
+ }
+
+ bool InitKeysOrFilters(TContext& ctx, ui32 joinIdx, TNodePtr expr) {
+ const TString joinOp(JoinOps[joinIdx]);
+ const TJoinLinkSettings linkSettings(JoinLinkSettings[joinIdx]);
+ const TCallNode* op = nullptr;
+ if (expr) {
+ const TString opName(expr->GetOpName());
+ if (opName != "==") {
+ ctx.Error(expr->GetPos()) << "JOIN ON expression must be a conjunction of equality predicates";
+ return false;
+ }
+
+ op = expr->GetCallNode();
+ YQL_ENSURE(op, "Invalid JOIN equal operation node");
+ YQL_ENSURE(op->GetArgs().size() == 2, "Invalid JOIN equal operation arguments");
+ }
+
+ ui32 idx = 0;
+ THashMap<TString, ui32> sources;
+ for (auto& source: Sources) {
+ auto label = source->GetLabel();
+ if (!label) {
+ ctx.Error(source->GetPos()) << "JOIN: missing correlation name for source";
+ return false;
+ }
+ sources.insert({ source->GetLabel(), idx });
+ ++idx;
+ }
+ if (sources.size() != Sources.size()) {
+ ctx.Error(expr ? expr->GetPos() : Pos) << "JOIN: all correlation names must be different";
+ return false;
+ }
+
+ ui32 pos = 0;
+ ui32 leftArg = 0;
+ ui32 rightArg = 0;
+ ui32 leftSourceIdx = 0;
+ ui32 rightSourceIdx = 0;
+ const TString* leftSource = nullptr;
+ const TString* rightSource = nullptr;
+ const TString* sameColumnNamePtr = nullptr;
+ TSet<TString> joinedSources;
+ if (op) {
+ const TString* columnNamePtr = nullptr;
+ for (auto& arg : op->GetArgs()) {
+ const auto sourceNamePtr = arg->GetSourceName();
+ if (!sourceNamePtr) {
+ ctx.Error(expr->GetPos()) << "JOIN: each equality predicate argument must depend on exactly one JOIN input";
+ return false;
+ }
+ const auto sourceName = *sourceNamePtr;
+ if (sourceName.empty()) {
+ ctx.Error(expr->GetPos()) << "JOIN: column requires correlation name";
+ return false;
+ }
+ auto it = sources.find(sourceName);
+ if (it != sources.end()) {
+ joinedSources.insert(sourceName);
+ if (it->second == joinIdx + 1) {
+ rightArg = pos;
+ rightSource = sourceNamePtr;
+ rightSourceIdx = it->second;
+ }
+ else if (it->second > joinIdx + 1) {
+ ctx.Error(expr->GetPos()) << "JOIN: can not use source: " << sourceName << " in equality predicate, it is out of current join scope";
+ return false;
+ }
+ else {
+ leftArg = pos;
+ leftSource = sourceNamePtr;
+ leftSourceIdx = it->second;
+ }
+ }
+ else {
+ ctx.Error(expr->GetPos()) << "JOIN: unknown corellation name: " << sourceName;
+ return false;
+ }
+ if (!columnNamePtr) {
+ columnNamePtr = arg->GetColumnName();
+ } else {
+ auto curColumnNamePtr = arg->GetColumnName();
+ if (curColumnNamePtr && *curColumnNamePtr == *columnNamePtr) {
+ sameColumnNamePtr = columnNamePtr;
+ }
+ }
+ ++pos;
+ }
+ } else {
+ for (auto& x : sources) {
+ if (x.second == joinIdx) {
+ leftArg = pos;
+ leftSourceIdx = x.second;
+ joinedSources.insert(x.first);
+ }
+ else if (x.second = joinIdx + 1) {
+ rightArg = pos;
+ rightSourceIdx = x.second;
+ joinedSources.insert(x.first);
+ }
+ }
+ }
+
+ if (joinedSources.size() == 1) {
+ ctx.Error(expr ? expr->GetPos() : Pos) << "JOIN: different correlation names are required for joined tables";
+ return false;
+ }
+
+ if (op) {
+ if (joinedSources.size() != 2) {
+ ctx.Error(expr->GetPos()) << "JOIN ON expression must be a conjunction of equality predicates over at most two sources";
+ return false;
+ }
+ if (!rightSource) {
+ ctx.Error(expr->GetPos()) << "JOIN ON equality predicate must have one of its arguments from the rightmost source";
+ return false;
+ }
+ }
+
+ KeysInitializing = true;
+ if (op) {
+ for (auto& arg : op->GetArgs()) {
+ if (!arg->Init(ctx, this)) {
+ return false;
+ }
+ }
+
+ Y_DEBUG_ABORT_UNLESS(leftSource);
+ if (sameColumnNamePtr) {
+ SameKeyMap[*sameColumnNamePtr].insert(*leftSource);
+ SameKeyMap[*sameColumnNamePtr].insert(*rightSource);
+ }
+ }
+
+ if (joinIdx == JoinDescrs.size()) {
+ TJoinDescr newDescr(joinOp);
+ newDescr.LinkSettings = linkSettings;
+ JoinDescrs.push_back(std::move(newDescr));
+ }
+
+ JoinDescrs.back().Keys.push_back({ { leftSourceIdx, op ? op->GetArgs()[leftArg] : nullptr},
+ { rightSourceIdx, op ? op->GetArgs()[rightArg] : nullptr } });
+ KeysInitializing = false;
+ return true;
+ }
+
+ bool IsJoinKeysInitializing() const override {
+ return KeysInitializing;
+ }
+
+protected:
+ TVector<TString> JoinOps;
+ TVector<TNodePtr> JoinExprs;
+ TVector<TJoinLinkSettings> JoinLinkSettings;
+ TVector<TJoinDescr> JoinDescrs;
+ THashMap<TString, THashSet<TString>> SameKeyMap;
+ const TVector<TSourcePtr> Sources;
+ const TVector<bool> AnyFlags;
+ TColumns JoinedColumns;
+ bool KeysInitializing = false;
+ bool IsColumnDone = false;
+
+ void FinishColumns() override {
+ if (IsColumnDone) {
+ return;
+ }
+ YQL_ENSURE(JoinOps.size()+1 == Sources.size());
+ bool excludeNextSource = false;
+ decltype(JoinOps)::const_iterator opIter = JoinOps.begin();
+ for (auto& src: Sources) {
+ if (excludeNextSource) {
+ excludeNextSource = false;
+ if (opIter != JoinOps.end()) {
+ ++opIter;
+ }
+ continue;
+ }
+ if (opIter != JoinOps.end()) {
+ auto joinOper = *opIter;
+ ++opIter;
+ if (joinOper == "LeftSemi" || joinOper == "LeftOnly") {
+ excludeNextSource = true;
+ }
+ if (joinOper == "RightSemi" || joinOper == "RightOnly") {
+ continue;
+ }
+ }
+ auto columnsPtr = src->GetColumns();
+ if (!columnsPtr) {
+ continue;
+ }
+ TColumns upColumns;
+ upColumns.Merge(*columnsPtr);
+ upColumns.SetPrefix(src->GetLabel());
+ JoinedColumns.Merge(upColumns);
+ }
+ IsColumnDone = true;
+ }
+};
+
+bool TJoinBase::DoInit(TContext& ctx, ISource* initSrc) {
+ for (auto& source: Sources) {
+ if (!source->Init(ctx, initSrc)) {
+ return false;
+ }
+
+ auto src = source.Get();
+ if (src->IsFlattenByExprs()) {
+ for (auto& expr : static_cast<ISource const*>(src)->Expressions(EExprSeat::FlattenByExpr)) {
+ if (!expr->Init(ctx, src)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ YQL_ENSURE(JoinOps.size() == JoinExprs.size(), "Invalid join exprs number");
+ YQL_ENSURE(JoinOps.size() == JoinLinkSettings.size());
+
+ const TSet<TString> allowedJoinOps = {"Inner", "Left", "Right", "Full", "LeftOnly", "RightOnly", "Exclusion", "LeftSemi", "RightSemi", "Cross"};
+ for (auto& opName: JoinOps) {
+ if (!allowedJoinOps.contains(opName)) {
+ ctx.Error(Pos) << "Invalid join op: " << opName;
+ return false;
+ }
+ }
+
+ ui32 idx = 0;
+ for (auto expr: JoinExprs) {
+ if (expr) {
+ TDeque<TNodePtr> conjQueue;
+ conjQueue.push_back(expr);
+ while (!conjQueue.empty()) {
+ TNodePtr cur = conjQueue.front();
+ conjQueue.pop_front();
+ if (cur->GetOpName() == "And") {
+ auto conj = cur->GetCallNode();
+ YQL_ENSURE(conj, "Invalid And operation node");
+ conjQueue.insert(conjQueue.begin(), conj->GetArgs().begin(), conj->GetArgs().end());
+ } else if (!InitKeysOrFilters(ctx, idx, cur)) {
+ return false;
+ }
+ }
+ } else {
+ if (!InitKeysOrFilters(ctx, idx, nullptr)) {
+ return false;
+ }
+ }
+ ++idx;
+ }
+
+ TSet<ui32> joinedSources;
+ for (auto& descr: JoinDescrs) {
+ for (auto& key : descr.Keys) {
+ joinedSources.insert(key.first.Source);
+ joinedSources.insert(key.second.Source);
+ }
+ }
+ for (idx = 0; idx < Sources.size(); ++idx) {
+ if (!joinedSources.contains(idx)) {
+ ctx.Error(Sources[idx]->GetPos()) << "Source: " << Sources[idx]->GetLabel() << " was not used in join expressions";
+ return false;
+ }
+ }
+
+ return ISource::DoInit(ctx, initSrc);
+}
+
+class TEquiJoin: public TJoinBase {
+public:
+ TEquiJoin(TPosition pos, TVector<TSourcePtr>&& sources, TVector<bool>&& anyFlags, bool strictJoinKeyTypes)
+ : TJoinBase(pos, std::move(sources), std::move(anyFlags))
+ , StrictJoinKeyTypes(strictJoinKeyTypes)
+ {
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ TMap<std::pair<TString, TString>, TNodePtr> extraColumns;
+ TNodePtr joinTree;
+ for (auto& descr: JoinDescrs) {
+ auto leftBranch = joinTree;
+ bool leftAny = false;
+ if (!leftBranch) {
+ leftBranch = BuildQuotedAtom(Pos, Sources[descr.Keys[0].first.Source]->GetLabel());
+ leftAny = AnyFlags[descr.Keys[0].first.Source];
+ }
+ bool rightAny = AnyFlags[descr.Keys[0].second.Source];
+ auto leftKeys = GetColumnNames(ctx, extraColumns, descr.Keys, true);
+ auto rightKeys = GetColumnNames(ctx, extraColumns, descr.Keys, false);
+ if (!leftKeys || !rightKeys) {
+ return nullptr;
+ }
+
+ TNodePtr linkOptions = Y();
+ if (TJoinLinkSettings::EStrategy::SortedMerge == descr.LinkSettings.Strategy) {
+ linkOptions = L(linkOptions, Q(Y(Q("forceSortedMerge"))));
+ } else if (TJoinLinkSettings::EStrategy::StreamLookup == descr.LinkSettings.Strategy) {
+ linkOptions = L(linkOptions, Q(Y(Q("forceStreamLookup"))));
+ } else if (TJoinLinkSettings::EStrategy::ForceMap == descr.LinkSettings.Strategy) {
+ linkOptions = L(linkOptions, Q(Y(Q("join_algo"), Q("MapJoin"))));
+ } else if (TJoinLinkSettings::EStrategy::ForceGrace == descr.LinkSettings.Strategy) {
+ linkOptions = L(linkOptions, Q(Y(Q("join_algo"), Q("GraceJoin"))));
+ }
+ if (leftAny) {
+ linkOptions = L(linkOptions, Q(Y(Q("left"), Q("any"))));
+ }
+ if (rightAny) {
+ linkOptions = L(linkOptions, Q(Y(Q("right"), Q("any"))));
+ }
+
+ if (descr.LinkSettings.Compact) {
+ linkOptions = L(linkOptions, Q(Y(Q("compact"))));
+ }
+
+ joinTree = Q(Y(
+ Q(descr.Op),
+ leftBranch,
+ BuildQuotedAtom(Pos, Sources[descr.Keys[0].second.Source]->GetLabel()),
+ leftKeys,
+ rightKeys,
+ Q(linkOptions)
+ ));
+ }
+
+ TNodePtr equiJoin(Y("EquiJoin"));
+ bool ordered = false;
+ for (size_t i = 0; i < Sources.size(); ++i) {
+ auto& source = Sources[i];
+ auto sourceNode = source->Build(ctx);
+ if (!sourceNode) {
+ return nullptr;
+ }
+ const bool useOrderedForSource = ctx.UseUnordered(*source);
+ ordered = ordered || useOrderedForSource;
+ if (source->IsFlattenByColumns() || source->IsFlattenColumns()) {
+ auto flatten = source->IsFlattenByColumns() ?
+ source->BuildFlattenByColumns("row") :
+ source->BuildFlattenColumns("row");
+
+ if (!flatten) {
+ return nullptr;
+ }
+ auto block = Y(Y("let", "flatten", sourceNode));
+
+ if (source->IsFlattenByExprs()) {
+ auto premap = source->BuildPreFlattenMap(ctx);
+ if (!premap) {
+ return nullptr;
+ }
+
+ block = L(block, Y("let", "flatten", Y(useOrderedForSource ? "OrderedFlatMap" : "FlatMap", "flatten", BuildLambda(Pos, Y("row"), premap))));
+ }
+
+ block = L(block, Y("let", "flatten", Y(useOrderedForSource ? "OrderedFlatMap" : "FlatMap", "flatten", BuildLambda(Pos, Y("row"), flatten, "res"))));
+ sourceNode = Y("block", Q(L(block, Y("return", "flatten"))));
+ }
+ TNodePtr extraMembers;
+ for (auto it = extraColumns.lower_bound({ source->GetLabel(), "" }); it != extraColumns.end(); ++it) {
+ if (it->first.first != source->GetLabel()) {
+ break;
+ }
+ if (!extraMembers) {
+ extraMembers = Y();
+ }
+ extraMembers = L(
+ extraMembers,
+ Y("let", "row", Y("AddMember", "row", BuildQuotedAtom(it->second->GetPos(), it->first.second), it->second))
+ );
+ }
+ if (extraMembers) {
+ sourceNode = Y(useOrderedForSource ? "OrderedMap" : "Map", sourceNode, BuildLambda(Pos, Y("row"), extraMembers, "row"));
+ }
+ sourceNode = Y("RemoveSystemMembers", sourceNode);
+ equiJoin = L(equiJoin, Q(Y(sourceNode, BuildQuotedAtom(source->GetPos(), source->GetLabel()))));
+ }
+ TNodePtr removeMembers;
+ for(auto it: extraColumns) {
+ if (!removeMembers) {
+ removeMembers = Y();
+ }
+ removeMembers = L(
+ removeMembers,
+ Y("let", "row", Y("ForceRemoveMember", "row", BuildQuotedAtom(Pos, DotJoin(it.first.first, it.first.second))))
+ );
+ }
+ auto options = Y();
+ if (StrictJoinKeyTypes) {
+ options = L(options, Q(Y(Q("strict_keys"))));
+ }
+ equiJoin = L(equiJoin, joinTree, Q(options));
+ if (removeMembers) {
+ equiJoin = Y(ordered ? "OrderedMap" : "Map", equiJoin, BuildLambda(Pos, Y("row"), removeMembers, "row"));
+ }
+ return equiJoin;
+ }
+
+ const THashMap<TString, THashSet<TString>>& GetSameKeysMap() const override {
+ return SameKeyMap;
+ }
+
+ TVector<TString> GetJoinLabels() const override {
+ TVector<TString> labels;
+ for (auto& source: Sources) {
+ const auto label = source->GetLabel();
+ YQL_ENSURE(label);
+ labels.push_back(label);
+ }
+ return labels;
+ }
+
+ TPtr DoClone() const final {
+ TVector<TSourcePtr> clonedSources;
+ for (auto& cur: Sources) {
+ clonedSources.push_back(cur->CloneSource());
+ }
+ auto newSource = MakeIntrusive<TEquiJoin>(Pos, std::move(clonedSources), TVector<bool>(AnyFlags), StrictJoinKeyTypes);
+ newSource->JoinOps = JoinOps;
+ newSource->JoinExprs = CloneContainer(JoinExprs);
+ newSource->JoinLinkSettings = JoinLinkSettings;
+ return newSource;
+ }
+
+private:
+ TNodePtr GetColumnNames(
+ TContext& ctx,
+ TMap<std::pair<TString, TString>, TNodePtr>& extraColumns,
+ const TVector<std::pair<TJoinDescr::TFullColumn, TJoinDescr::TFullColumn>>& keys,
+ bool left
+ ) {
+ Y_UNUSED(ctx);
+ auto res = Y();
+ for (auto& it: keys) {
+ auto tableName = Sources[left ? it.first.Source : it.second.Source]->GetLabel();
+ TString columnName;
+ auto column = left ? it.first.Column : it.second.Column;
+ if (!column) {
+ continue;
+ }
+
+ if (column->GetColumnName()) {
+ columnName = *column->GetColumnName();
+ } else {
+ TStringStream str;
+ str << "_equijoin_column_" << extraColumns.size();
+ columnName = str.Str();
+ extraColumns.insert({ std::make_pair(tableName, columnName), column });
+ }
+
+ res = L(res, BuildQuotedAtom(Pos, tableName));
+ res = L(res, BuildQuotedAtom(Pos, columnName));
+ }
+
+ return Q(res);
+ }
+
+ const bool StrictJoinKeyTypes;
+};
+
+TSourcePtr BuildEquiJoin(TPosition pos, TVector<TSourcePtr>&& sources, TVector<bool>&& anyFlags, bool strictJoinKeyTypes) {
+ return new TEquiJoin(pos, std::move(sources), std::move(anyFlags), strictJoinKeyTypes);
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/lexer/lexer.cpp b/yql/essentials/sql/v1/lexer/lexer.cpp
new file mode 100644
index 00000000000..b6d2362f21f
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/lexer.cpp
@@ -0,0 +1,77 @@
+#include "lexer.h"
+
+#include <yql/essentials/public/issue/yql_issue.h>
+#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h>
+#include <yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h>
+#include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h>
+#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_ansi/SQLv1Lexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h>
+
+#if defined(_tsan_enabled_)
+#include <util/system/mutex.h>
+#endif
+
+namespace NALPDefault {
+extern ANTLR_UINT8 *SQLv1ParserTokenNames[];
+}
+
+namespace NALPAnsi {
+extern ANTLR_UINT8 *SQLv1ParserTokenNames[];
+}
+
+
+namespace NSQLTranslationV1 {
+
+namespace {
+
+#if defined(_tsan_enabled_)
+TMutex SanitizerSQLTranslationMutex;
+#endif
+
+using NSQLTranslation::ILexer;
+
+class TV1Lexer : public ILexer {
+public:
+ explicit TV1Lexer(bool ansi, bool antlr4)
+ : Ansi(ansi), Antlr4(antlr4)
+ {
+ }
+
+ bool Tokenize(const TString& query, const TString& queryName, const TTokenCallback& onNextToken, NYql::TIssues& issues, size_t maxErrors) override {
+ NYql::TIssues newIssues;
+#if defined(_tsan_enabled_)
+ TGuard<TMutex> grd(SanitizerSQLTranslationMutex);
+#endif
+ NSQLTranslation::TErrorCollectorOverIssues collector(newIssues, maxErrors, "");
+ if (Ansi && !Antlr4) {
+ NProtoAST::TLexerTokensCollector3<NALPAnsi::SQLv1Lexer> tokensCollector(query, (const char**)NALPAnsi::SQLv1ParserTokenNames, queryName);
+ tokensCollector.CollectTokens(collector, onNextToken);
+ } else if (!Ansi && !Antlr4) {
+ NProtoAST::TLexerTokensCollector3<NALPDefault::SQLv1Lexer> tokensCollector(query, (const char**)NALPDefault::SQLv1ParserTokenNames, queryName);
+ tokensCollector.CollectTokens(collector, onNextToken);
+ } else if (Ansi && Antlr4) {
+ NProtoAST::TLexerTokensCollector4<NALPAnsiAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName);
+ tokensCollector.CollectTokens(collector, onNextToken);
+ } else {
+ NProtoAST::TLexerTokensCollector4<NALPDefaultAntlr4::SQLv1Antlr4Lexer> tokensCollector(query, queryName);
+ tokensCollector.CollectTokens(collector, onNextToken);
+ }
+
+ issues.AddIssues(newIssues);
+ return !AnyOf(newIssues.begin(), newIssues.end(), [](auto issue) { return issue.GetSeverity() == NYql::ESeverity::TSeverityIds_ESeverityId_S_ERROR; });
+ }
+
+private:
+ const bool Ansi;
+ const bool Antlr4;
+};
+
+} // namespace
+
+NSQLTranslation::ILexer::TPtr MakeLexer(bool ansi, bool antlr4) {
+ return NSQLTranslation::ILexer::TPtr(new TV1Lexer(ansi, antlr4));
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/lexer/lexer.h b/yql/essentials/sql/v1/lexer/lexer.h
new file mode 100644
index 00000000000..fe0102be79c
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/lexer.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <yql/essentials/parser/lexer_common/lexer.h>
+
+namespace NSQLTranslationV1 {
+
+NSQLTranslation::ILexer::TPtr MakeLexer(bool ansi, bool antlr4);
+
+}
diff --git a/yql/essentials/sql/v1/lexer/tsan.supp b/yql/essentials/sql/v1/lexer/tsan.supp
new file mode 100644
index 00000000000..d8a9765b09e
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/tsan.supp
@@ -0,0 +1 @@
+race:NALPDefault::SQLv1LexerCyclicDFA33::specialStateTransition
diff --git a/yql/essentials/sql/v1/lexer/ya.make b/yql/essentials/sql/v1/lexer/ya.make
new file mode 100644
index 00000000000..5174f6f595b
--- /dev/null
+++ b/yql/essentials/sql/v1/lexer/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+PEERDIR(
+ yql/essentials/core/issue/protos
+ yql/essentials/parser/proto_ast/gen/v1
+ yql/essentials/parser/proto_ast/gen/v1_ansi
+ yql/essentials/parser/proto_ast/gen/v1_antlr4
+ yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4
+)
+
+SRCS(
+ lexer.cpp
+)
+
+SUPPRESSIONS(
+ tsan.supp
+)
+
+END()
diff --git a/yql/essentials/sql/v1/list_builtin.cpp b/yql/essentials/sql/v1/list_builtin.cpp
new file mode 100644
index 00000000000..c059768cb4d
--- /dev/null
+++ b/yql/essentials/sql/v1/list_builtin.cpp
@@ -0,0 +1,142 @@
+#include "list_builtin.h"
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+TAstNode* TListBuiltin::Translate(TContext& ctx) const {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+}
+
+TNodePtr TListBuiltin::GetIdentityLambda() {
+ return BuildLambda(Pos, Y("arg"), Y(), "arg");
+}
+
+bool TListSortBuiltin::DoInit(TContext& ctx, ISource* src) {
+ if (Args.size() < 1 || Args.size() > 2) {
+ ctx.Error(Pos) << OpName << " requires one or two parameters.";
+ return false;
+ }
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+ if (Args.size() == 2) {
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+ } else {
+ Args.push_back(GetIdentityLambda());
+ }
+ Node = Y(OpName, Args[0], Y("Bool", Q(Asc ? "true" : "false")), Args[1]);
+ return true;
+}
+
+bool TListExtractBuiltin::DoInit(TContext& ctx, ISource* src) {
+ if (Args.size() != 2) {
+ ctx.Error(Pos) << OpName << " requires exactly two parameters.";
+ return false;
+ }
+
+ for (const auto& arg : Args) {
+ if (!arg->Init(ctx, src)) {
+ return false;
+ }
+ }
+
+ Args[1] = MakeAtomFromExpression(Pos, ctx, Args[1]).Build();
+ Node = Y(OpName, Args[0], Args[1]);
+ return true;
+}
+
+bool TListProcessBuiltin::CheckArgs(TContext& ctx, ISource* src) {
+ if (Args.size() != 2 ) {
+ ctx.Error(Pos) << OpName << " requires exactly two parameters";
+ return false;
+ }
+
+ for (const auto& arg : Args) {
+ if (!arg->Init(ctx, src)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool TListMapBuiltin::DoInit(TContext& ctx, ISource* src) {
+ if (!CheckArgs(ctx, src)) {
+ return false;
+ };
+ Node = Y(OpName, Args[0], Args[1]);
+
+ return true;
+}
+
+bool TListFilterBuiltin::DoInit(TContext& ctx, ISource* src) {
+ if (!CheckArgs(ctx, src)) {
+ return false;
+ };
+ Node = Y(OpName, Args[0], GetFilterLambda());
+ return true;
+}
+
+TNodePtr TListFilterBuiltin::GetFilterLambda() {
+ return BuildLambda(Pos, Y("item"), Y("Coalesce", Y("Apply", Args[1], "item"), Y("Bool", Q("false"))));
+}
+
+bool TListCreateBuiltin::DoInit(TContext& ctx, ISource* src) {
+ if (Args.size() != 1) {
+ ctx.Error(Pos) << OpName << " requires only one parameter";
+ return false;
+ }
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+ Node = Y("List", Y("ListType", Args[0]));
+ return true;
+}
+
+void TListCreateBuiltin::DoUpdateState() const {
+ State.Set(ENodeState::Const);
+}
+
+bool TDictCreateBuiltin::DoInit(TContext& ctx, ISource* src) {
+ if (Args.size() != 2) {
+ ctx.Error(Pos) << OpName << " requires two parameters";
+ return false;
+ }
+
+ for (ui32 i = 0; i < 2; ++i) {
+ if (!Args[i]->Init(ctx, src)) {
+ return false;
+ }
+ }
+
+ Node = Y("Dict", Y("DictType", Args[0], Args[1]));
+ return true;
+}
+
+void TDictCreateBuiltin::DoUpdateState() const {
+ State.Set(ENodeState::Const);
+}
+
+bool TSetCreateBuiltin::DoInit(TContext& ctx, ISource* src) {
+ if (Args.size() != 1) {
+ ctx.Error(Pos) << OpName << " requires one parameter";
+ return false;
+ }
+
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+
+ Node = Y("Dict", Y("DictType", Args[0], Y("VoidType")));
+ return true;
+}
+
+void TSetCreateBuiltin::DoUpdateState() const {
+ State.Set(ENodeState::Const);
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/list_builtin.h b/yql/essentials/sql/v1/list_builtin.h
new file mode 100644
index 00000000000..61646498b7b
--- /dev/null
+++ b/yql/essentials/sql/v1/list_builtin.h
@@ -0,0 +1,160 @@
+#pragma once
+
+#include "node.h"
+#include "context.h"
+
+#include <yql/essentials/ast/yql_type_string.h>
+
+#include <library/cpp/charset/ci_string.h>
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+#include <util/string/util.h>
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+class TListBuiltin: public TCallNode {
+public:
+ TListBuiltin(TPosition pos,
+ const TString& opName,
+ const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, args.size(), args.size(), args)
+ , OpName(opName)
+ , Args(args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override = 0;
+
+ TAstNode* Translate(TContext& ctx) const override;
+
+protected:
+ const TString OpName;
+ TVector<TNodePtr> Args;
+ TNodePtr Node;
+
+ inline TNodePtr GetIdentityLambda();
+};
+
+class TListSortBuiltin final: public TListBuiltin {
+public:
+ TListSortBuiltin(TPosition pos, const TVector<TNodePtr>& args, bool asc)
+ : TListBuiltin(pos, "ListSort", args)
+ , Asc(asc)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+ TNodePtr DoClone() const final {
+ return new TListSortBuiltin(Pos, CloneContainer(Args), Asc);
+ }
+
+private:
+ const bool Asc;
+};
+
+class TListExtractBuiltin final: public TListBuiltin {
+public:
+ TListExtractBuiltin(TPosition pos, const TVector<TNodePtr>& args)
+ : TListBuiltin(pos, "ListExtract", args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+ TNodePtr DoClone() const final {
+ return new TListExtractBuiltin(Pos, CloneContainer(Args));
+ }
+};
+
+class TListProcessBuiltin: public TListBuiltin {
+protected:
+ TListProcessBuiltin(TPosition pos,
+ const TString& opName,
+ const TVector<TNodePtr>& args)
+ : TListBuiltin(pos, opName, args)
+ {}
+
+ bool CheckArgs(TContext& ctx, ISource* src);
+};
+
+class TListMapBuiltin final: public TListProcessBuiltin {
+public:
+ TListMapBuiltin(TPosition pos,
+ const TVector<TNodePtr>& args,
+ bool flat)
+ : TListProcessBuiltin(pos, flat ? "ListFlatMap" : "ListMap", args)
+ , Flat(flat)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+ TNodePtr DoClone() const final {
+ return new TListMapBuiltin(Pos, CloneContainer(Args), Flat);
+ }
+private:
+ bool Flat;
+};
+
+class TListFilterBuiltin final: public TListProcessBuiltin {
+public:
+ TListFilterBuiltin(TPosition pos, const TString& opName,
+ const TVector<TNodePtr>& args)
+ : TListProcessBuiltin(pos, opName, args)
+ {}
+
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+ TNodePtr DoClone() const final {
+ return new TListFilterBuiltin(Pos, OpName, CloneContainer(Args));
+ }
+protected:
+ virtual TNodePtr GetFilterLambda();
+};
+
+class TListCreateBuiltin final: public TListBuiltin {
+public:
+ TListCreateBuiltin(TPosition pos,
+ const TVector<TNodePtr>& args)
+ : TListBuiltin(pos, "ListCreate", args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+ void DoUpdateState() const override;
+
+ TNodePtr DoClone() const final {
+ return new TListCreateBuiltin(Pos, CloneContainer(Args));
+ }
+};
+
+class TDictCreateBuiltin final: public TListBuiltin {
+public:
+ TDictCreateBuiltin(TPosition pos,
+ const TVector<TNodePtr>& args)
+ : TListBuiltin(pos, "DictCreate", args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+ void DoUpdateState() const override;
+
+ TNodePtr DoClone() const final {
+ return new TDictCreateBuiltin(Pos, CloneContainer(Args));
+ }
+};
+
+class TSetCreateBuiltin final: public TListBuiltin {
+public:
+ TSetCreateBuiltin(TPosition pos,
+ const TVector<TNodePtr>& args)
+ : TListBuiltin(pos, "SetCreate", args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+ void DoUpdateState() const override;
+
+ TNodePtr DoClone() const final {
+ return new TSetCreateBuiltin(Pos, CloneContainer(Args));
+ }
+};
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/match_recognize.cpp b/yql/essentials/sql/v1/match_recognize.cpp
new file mode 100644
index 00000000000..47055e2f3d7
--- /dev/null
+++ b/yql/essentials/sql/v1/match_recognize.cpp
@@ -0,0 +1,254 @@
+#include "match_recognize.h"
+#include "source.h"
+#include "context.h"
+
+namespace NSQLTranslationV1 {
+
+namespace {
+
+const auto VarDataName = "data";
+const auto VarMatchedVarsName = "vars";
+const auto VarLastRowIndexName = "lri";
+
+} //namespace {
+
+class TMatchRecognize: public TAstListNode {
+public:
+ TMatchRecognize(
+ TPosition pos,
+ ISource* source,
+ const TString& inputTable,
+ std::pair<TPosition, TVector<TNamedFunction>>&& partitioners,
+ std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs,
+ std::pair<TPosition, TVector<TNamedFunction>>&& measures,
+ std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
+ std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo,
+ std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
+ std::pair<TPosition, TNodePtr>&& subset,
+ std::pair<TPosition, TVector<TNamedFunction>>&& definitions
+ ): TAstListNode(pos, {BuildAtom(pos, "block")})
+ {
+ Add(BuildBlockStatements(
+ pos,
+ source,
+ inputTable,
+ std::move(partitioners),
+ std::move(sortSpecs),
+ std::move(measures),
+ std::move(rowsPerMatch),
+ std::move(skipTo),
+ std::move(pattern),
+ std::move(subset),
+ std::move(definitions)
+ ));
+ }
+private:
+ TMatchRecognize(const TMatchRecognize& other)
+ : TAstListNode(other.Pos)
+ {
+ Nodes = CloneContainer(other.Nodes);
+ }
+
+ TNodePtr BuildBlockStatements(
+ TPosition pos,
+ ISource* source,
+ const TString& inputTable,
+ std::pair<TPosition, TVector<TNamedFunction>>&& partitioners,
+ std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs,
+ std::pair<TPosition, TVector<TNamedFunction>>&& measures,
+ std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
+ std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo,
+ std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
+ std::pair<TPosition, TNodePtr>&& subset,
+ std::pair<TPosition, TVector<TNamedFunction>>&& definitions
+ ) {
+ Y_UNUSED(pos);
+
+ auto inputRowType = Y("ListItemType",Y("TypeOf", inputTable));
+
+ auto patternNode = Pattern(pattern.first, pattern.second);
+
+ auto partitionColumns = Y();
+ for (const auto& p: partitioners.second){
+ partitionColumns->Add(BuildQuotedAtom(p.callable->GetPos(), p.name));
+ }
+ partitionColumns = Q(partitionColumns);
+ auto partitionKeySelector = Y();
+ for (const auto& p: partitioners.second){
+ partitionKeySelector->Add(p.callable);
+ }
+ partitionKeySelector = BuildLambda(partitioners.first, Y("row"), Q(partitionKeySelector));
+
+ auto measureNames = Y();
+ for (const auto& m: measures.second){
+ measureNames->Add(BuildQuotedAtom(m.callable->GetPos(), m.name));
+ }
+ TNodePtr measuresNode = Y("MatchRecognizeMeasures", inputRowType, patternNode, Q(measureNames));
+ for (const auto& m: measures.second){
+ measuresNode->Add(BuildLambda(m.callable->GetPos(), Y(VarDataName, VarMatchedVarsName), m.callable));
+ }
+ auto defineNames = Y();
+ for (const auto& d: definitions.second) {
+ defineNames->Add(BuildQuotedAtom(d.callable->GetPos(), d.name));
+ }
+
+ TNodePtr defineNode = Y("MatchRecognizeDefines", inputRowType, patternNode, Q(defineNames));
+ for (const auto& d: definitions.second) {
+ defineNode->Add(BuildLambda(d.callable->GetPos(), Y(VarDataName, VarMatchedVarsName, VarLastRowIndexName), d.callable));
+ }
+
+ return Q(Y(
+ Y("let", "input", inputTable),
+ Y("let", "partitionKeySelector", partitionKeySelector),
+ Y("let", "partitionColumns", partitionColumns),
+ Y("let", "sortTraits", sortSpecs.second.empty()? Y("Void") : source->BuildSortSpec(sortSpecs.second, inputTable, true, false)),
+ Y("let", "measures", measuresNode),
+ Y("let", "rowsPerMatch", BuildQuotedAtom(rowsPerMatch.first, "RowsPerMatch_" + ToString(rowsPerMatch.second))),
+ Y("let", "skipTo", BuildTuple(skipTo.first, {Q("AfterMatchSkip_" + ToString(skipTo.second.To)), Q(ToString(skipTo.second.Var))})),
+ Y("let", "pattern", patternNode),
+ Y("let", "subset", subset.second ? subset.second : Q("")),
+ Y("let", "define", defineNode),
+ Y("let", "res", Y("MatchRecognize",
+ "input",
+ "partitionKeySelector",
+ "partitionColumns",
+ "sortTraits",
+ Y("MatchRecognizeParams",
+ "measures",
+ "rowsPerMatch",
+ "skipTo",
+ "pattern",
+ "define"
+ )
+ )),
+ Y("return", "res")
+ ));
+ }
+
+ TPtr PatternFactor(const TPosition& pos, const NYql::NMatchRecognize::TRowPatternFactor& factor) {
+ return BuildTuple(pos, {
+ factor.Primary.index() == 0 ?
+ BuildQuotedAtom(pos, std::get<0>(factor.Primary)) :
+ Pattern(pos, std::get<1>(factor.Primary)),
+ BuildQuotedAtom(pos, ToString(factor.QuantityMin)),
+ BuildQuotedAtom(pos, ToString(factor.QuantityMax)),
+ BuildQuotedAtom(pos, ToString(factor.Greedy)),
+ BuildQuotedAtom(pos, ToString(factor.Output)),
+ BuildQuotedAtom(pos, ToString(factor.Unused))
+ });
+ }
+
+
+ TPtr PatternTerm(const TPosition& pos, const NYql::NMatchRecognize::TRowPatternTerm& term) {
+ auto factors = Y();
+ for (const auto& f: term)
+ factors->Add(PatternFactor(pos, f));
+ return Q(std::move(factors));
+ }
+
+ TPtr Pattern(const TPosition& pos, const NYql::NMatchRecognize::TRowPattern& pattern) {
+ TNodePtr patternNode = Y("MatchRecognizePattern");
+ for (const auto& t: pattern) {
+ patternNode->Add(PatternTerm(pos, t));
+ }
+ return patternNode;
+ }
+
+ TPtr DoClone() const final{
+ return new TMatchRecognize(*this);
+ }
+};
+
+TNodePtr TMatchRecognizeBuilder::Build(TContext& ctx, TString&& inputTable, ISource* source){
+ TNodePtr node = new TMatchRecognize(
+ Pos,
+ source,
+ std::move(inputTable),
+ std::move(Partitioners),
+ std::move(SortSpecs),
+ std::move(Measures),
+ std::move(RowsPerMatch),
+ std::move(SkipTo),
+ std::move(Pattern),
+ std::move(Subset),
+ std::move(Definitions)
+ );
+ if (!node->Init(ctx, source))
+ return nullptr;
+ return node;
+}
+
+namespace {
+const auto DefaultNavigatingFunction = "MatchRecognizeDefaultNavigating";
+}
+
+bool TMatchRecognizeVarAccessNode::DoInit(TContext& ctx, ISource* src) {
+ //If referenced var is the var that is currently being defined
+ //then it's a reference to the last row in a partition
+ Node = new TMatchRecognizeNavigate(ctx.Pos(), DefaultNavigatingFunction, TVector<TNodePtr>{this->Clone()});
+ return Node->Init(ctx, src);
+}
+
+bool TMatchRecognizeNavigate::DoInit(TContext& ctx, ISource* src) {
+ Y_UNUSED(src);
+ if (Args.size() != 1) {
+ ctx.Error(Pos) << "Exactly one argument is required in MATCH_RECOGNIZE navigation function";
+ return false;
+ }
+ const auto varColumn = dynamic_cast<TMatchRecognizeVarAccessNode *>(Args[0].Get());
+ if (not varColumn) {
+ ctx.Error(Pos) << "Row pattern navigation operations are applicable to row pattern variable only";
+ return false;
+ }
+ const auto varData = BuildAtom(ctx.Pos(), VarDataName);
+ const auto varMatchedVars = BuildAtom(ctx.Pos(), VarMatchedVarsName);
+ const auto varLastRowIndex = BuildAtom(ctx.Pos(), VarLastRowIndexName);
+
+ const auto matchedRanges = Y("Member", varMatchedVars, Q(varColumn->GetVar()));
+ TNodePtr navigatedRowIndex;
+ if (DefaultNavigatingFunction == Name) {
+ if (not varColumn->IsTheSameVar()) {
+ ctx.Error(Pos) << "Row pattern navigation function is required";
+ return false;
+ }
+ navigatedRowIndex = varLastRowIndex;
+ }
+ else if ("PREV" == Name) {
+ if (not varColumn->IsTheSameVar()) {
+ ctx.Error(Pos) << "PREV relative to matched vars is not implemented yet";
+ return false;
+ }
+ navigatedRowIndex = Y(
+ "-",
+ varLastRowIndex,
+ Y("Uint64", Q("1"))
+ );
+ } else if ("FIRST" == Name) {
+ navigatedRowIndex = Y(
+ "Member",
+ Y("Head", matchedRanges),
+ Q("From")
+ );
+ } else if ("LAST" == Name) {
+ navigatedRowIndex = Y(
+ "Member",
+ Y("Last", matchedRanges),
+ Q("To")
+ );
+ } else {
+ ctx.Error(Pos) << "Internal logic error";
+ return false;
+ }
+ Add("Member");
+ Add(
+ Y(
+ "Lookup",
+ Y("ToIndexDict", varData),
+ navigatedRowIndex
+ )
+ ),
+ Add(Q(varColumn->GetColumn()));
+ return true;
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/match_recognize.h b/yql/essentials/sql/v1/match_recognize.h
new file mode 100644
index 00000000000..b78c0faf65e
--- /dev/null
+++ b/yql/essentials/sql/v1/match_recognize.h
@@ -0,0 +1,130 @@
+#pragma once
+#include "node.h"
+#include <yql/essentials/core/sql_types/match_recognize.h>
+#include <util/generic/ptr.h>
+
+namespace NSQLTranslationV1 {
+
+struct TNamedFunction {
+ TNodePtr callable; //Callable with some free args
+ TString name;
+};
+
+enum class ERowsPerMatch {
+ OneRow,
+ AllRows
+};
+
+class TMatchRecognizeBuilder: public TSimpleRefCount<TMatchRecognizeBuilder> {
+public:
+ TMatchRecognizeBuilder(
+ TPosition clausePos,
+ std::pair<TPosition, TVector<TNamedFunction>>&& partitioners,
+ std::pair<TPosition, TVector<TSortSpecificationPtr>>&& sortSpecs,
+ std::pair<TPosition, TVector<TNamedFunction>>&& measures,
+ std::pair<TPosition, ERowsPerMatch>&& rowsPerMatch,
+ std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo>&& skipTo,
+ std::pair<TPosition, NYql::NMatchRecognize::TRowPattern>&& pattern,
+ std::pair<TPosition, TNodePtr>&& subset,
+ std::pair<TPosition, TVector<TNamedFunction>>&& definitions
+ )
+ : Pos(clausePos)
+ , Partitioners(std::move(partitioners))
+ , SortSpecs(std::move(sortSpecs))
+ , Measures(std::move(measures))
+ , RowsPerMatch(std::move(rowsPerMatch))
+ , SkipTo(std::move(skipTo))
+ , Pattern(std::move(pattern))
+ , Subset(std::move(subset))
+ , Definitions(definitions)
+
+ {}
+ TNodePtr Build(TContext& ctx, TString&& inputTable, ISource* source);
+private:
+ TPosition Pos;
+ std::pair<TPosition, TVector<TNamedFunction>> Partitioners;
+ std::pair<TPosition, TVector<TSortSpecificationPtr>> SortSpecs;
+ std::pair<TPosition, TVector<TNamedFunction>> Measures;
+ std::pair<TPosition, ERowsPerMatch> RowsPerMatch;
+ std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> SkipTo;
+ std::pair<TPosition, NYql::NMatchRecognize::TRowPattern> Pattern;
+ std::pair<TPosition, TNodePtr> Subset;
+ std::pair<TPosition, TVector<TNamedFunction>> Definitions;
+};
+
+using TMatchRecognizeBuilderPtr=TIntrusivePtr<TMatchRecognizeBuilder> ;
+
+class TMatchRecognizeVarAccessNode: public INode {
+public:
+ TMatchRecognizeVarAccessNode(TPosition pos, const TString& var, const TString& column, bool theSameVar)
+ : INode(pos)
+ , Var(var)
+ , TheSameVar(theSameVar)
+ , Column(column)
+ {
+ }
+
+ TString GetVar() const {
+ return Var;
+ }
+
+ bool IsTheSameVar() const {
+ return TheSameVar;
+ }
+
+ TString GetColumn() const {
+ return Column;
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return Node->Translate(ctx);
+ }
+
+ TPtr DoClone() const override {
+ YQL_ENSURE(!Node, "TMatchRecognizeVarAccessNode::Clone: Node must not be initialized");
+ auto copy = new TMatchRecognizeVarAccessNode(Pos, Var, Column, TheSameVar);
+ return copy;
+ }
+
+protected:
+ void DoUpdateState() const override {
+ YQL_ENSURE(Node);
+ }
+
+ void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ Node->VisitTree(func, visited);
+ }
+
+private:
+ TNodePtr Node;
+ const TString Var;
+ const bool TheSameVar; //reference the same var as being defined by this expression;
+ const TString Column;
+};
+
+class TMatchRecognizeNavigate: public TAstListNode {
+public:
+ TMatchRecognizeNavigate(TPosition pos, const TString& name, const TVector<TNodePtr>& args)
+ : TAstListNode(pos)
+ , Name(name)
+ , Args(args)
+ {
+ }
+
+private:
+ TNodePtr DoClone() const override {
+ return new TMatchRecognizeNavigate(GetPos(), Name, CloneContainer(Args));
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+private:
+ const TString Name;
+ const TVector<TNodePtr> Args;
+};
+
+} // namespace NSQLTranslationV1
+
diff --git a/yql/essentials/sql/v1/node.cpp b/yql/essentials/sql/v1/node.cpp
new file mode 100644
index 00000000000..c7cafda7a52
--- /dev/null
+++ b/yql/essentials/sql/v1/node.cpp
@@ -0,0 +1,3477 @@
+#include "node.h"
+#include "source.h"
+#include "context.h"
+
+#include <yql/essentials/ast/yql_ast_escaping.h>
+#include <yql/essentials/ast/yql_expr.h>
+#include <yql/essentials/core/sql_types/simple_types.h>
+#include <yql/essentials/minikql/mkql_type_ops.h>
+#include <yql/essentials/parser/pg_catalog/catalog.h>
+#include <yql/essentials/utils/yql_panic.h>
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+#include <library/cpp/charset/ci_string.h>
+#include <util/generic/hash_set.h>
+#include <util/stream/str.h>
+#include <util/string/cast.h>
+#include <util/string/escape.h>
+#include <util/string/subst.h>
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+TString ErrorDistinctWithoutCorrelation(const TString& column) {
+ return TStringBuilder() << "DISTINCT columns for JOIN in SELECT should have table aliases (correlation name),"
+ " add it if necessary to FROM section over 'AS <alias>' keyword and put it like '<alias>." << column << "'";
+}
+
+TString ErrorDistinctByGroupKey(const TString& column) {
+ return TStringBuilder() << "Unable to use DISTINCT by grouping column: " << column << ". You should leave one of them.";
+}
+
+TTopicRef::TTopicRef(const TString& refName, const TDeferredAtom& cluster, TNodePtr keys)
+ : RefName(refName)
+ , Cluster(cluster)
+ , Keys(keys)
+{
+}
+
+TColumnConstraints::TColumnConstraints(TNodePtr defaultExpr, bool nullable)
+ : DefaultExpr(defaultExpr)
+ , Nullable(nullable)
+{
+}
+
+
+TColumnSchema::TColumnSchema(TPosition pos, const TString& name, const TNodePtr& type, bool nullable,
+ TVector<TIdentifier> families, bool serial, TNodePtr defaultExpr, ETypeOfChange typeOfChange)
+ : Pos(pos)
+ , Name(name)
+ , Type(type)
+ , Nullable(nullable)
+ , Families(families)
+ , Serial(serial)
+ , DefaultExpr(defaultExpr)
+ , TypeOfChange(typeOfChange)
+{
+}
+
+INode::INode(TPosition pos)
+ : Pos(pos)
+{
+}
+
+INode::~INode()
+{
+}
+
+TPosition INode::GetPos() const {
+ return Pos;
+}
+
+const TString& INode::GetLabel() const {
+ return Label;
+}
+
+TMaybe<TPosition> INode::GetLabelPos() const {
+ return LabelPos;
+}
+
+void INode::SetLabel(const TString& label, TMaybe<TPosition> pos) {
+ Label = label;
+ LabelPos = pos;
+}
+
+bool INode::IsImplicitLabel() const {
+ return ImplicitLabel;
+}
+
+void INode::MarkImplicitLabel(bool isImplicitLabel) {
+ ImplicitLabel = isImplicitLabel;
+}
+
+void INode::SetCountHint(bool isCount) {
+ State.Set(ENodeState::CountHint, isCount);
+}
+
+bool INode::GetCountHint() const {
+ return State.Test(ENodeState::CountHint);
+}
+
+bool INode::IsConstant() const {
+ return HasState(ENodeState::Const);
+}
+
+bool INode::MaybeConstant() const {
+ return HasState(ENodeState::MaybeConst);
+}
+
+bool INode::IsAggregated() const {
+ return HasState(ENodeState::Aggregated);
+}
+
+bool INode::IsAggregationKey() const {
+ return HasState(ENodeState::AggregationKey);
+}
+
+bool INode::IsOverWindow() const {
+ return HasState(ENodeState::OverWindow);
+}
+
+bool INode::IsOverWindowDistinct() const {
+ return HasState(ENodeState::OverWindowDistinct);
+}
+
+bool INode::IsNull() const {
+ return false;
+}
+
+bool INode::IsLiteral() const {
+ return false;
+}
+
+TString INode::GetLiteralType() const {
+ return "";
+}
+
+TString INode::GetLiteralValue() const {
+ return "";
+}
+
+bool INode::IsIntegerLiteral() const {
+ return false;
+}
+
+INode::TPtr INode::ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const {
+ Y_UNUSED(ctx);
+ if (IsNull()) {
+ return BuildLiteralNull(pos);
+ }
+ return new TCallNodeImpl(pos, opName, { Clone() });
+}
+
+bool INode::IsAsterisk() const {
+ return false;
+}
+
+const TString* INode::SubqueryAlias() const {
+ return nullptr;
+}
+
+TString INode::GetOpName() const {
+ return TString();
+}
+
+const TString* INode::GetLiteral(const TString& type) const {
+ Y_UNUSED(type);
+ return nullptr;
+}
+
+const TString* INode::GetColumnName() const {
+ return nullptr;
+}
+
+void INode::AssumeColumn() {
+}
+
+const TString* INode::GetSourceName() const {
+ return nullptr;
+}
+
+const TString* INode::GetAtomContent() const {
+ return nullptr;
+}
+
+bool INode::IsOptionalArg() const {
+ return false;
+}
+
+size_t INode::GetTupleSize() const {
+ return 0;
+}
+
+INode::TPtr INode::GetTupleElement(size_t index) const {
+ Y_UNUSED(index);
+ return nullptr;
+}
+
+ITableKeys* INode::GetTableKeys() {
+ return nullptr;
+}
+
+ISource* INode::GetSource() {
+ return nullptr;
+}
+
+TVector<TNodePtr>* INode::ContentListPtr() {
+ return nullptr;
+}
+
+bool INode::Init(TContext& ctx, ISource* src) {
+ if (State.Test(ENodeState::Failed)) {
+ return false;
+ }
+
+ if (!State.Test(ENodeState::Initialized)) {
+ if (!DoInit(ctx, src)) {
+ State.Set(ENodeState::Failed);
+ return false;
+ }
+ State.Set(ENodeState::Initialized);
+ }
+ return true;
+}
+
+bool INode::InitReference(TContext& ctx) {
+ Y_UNUSED(ctx);
+ return true;
+}
+
+bool INode::DoInit(TContext& ctx, ISource* src) {
+ Y_UNUSED(ctx);
+ Y_UNUSED(src);
+ return true;
+}
+
+TNodePtr INode::AstNode() const {
+ return new TAstListNodeImpl(Pos);
+}
+
+TNodePtr INode::AstNode(TNodePtr node) const {
+ return node;
+}
+
+TNodePtr INode::AstNode(const TString& str) const {
+ return new TAstAtomNodeImpl(Pos, str, TNodeFlags::Default);
+}
+
+TNodePtr INode::AstNode(TAstNode* node) const {
+ return new TAstDirectNode(node);
+}
+
+TNodePtr INode::Clone() const {
+ TNodePtr clone = DoClone();
+ if (!clone) {
+ clone = const_cast<INode*>(this);
+ } else {
+ YQL_ENSURE(!State.Test(ENodeState::Initialized), "Clone should be for uninitialized or persistent node");
+ clone->SetLabel(Label, LabelPos);
+ clone->MarkImplicitLabel(ImplicitLabel);
+ }
+ return clone;
+}
+
+TAggregationPtr INode::GetAggregation() const {
+ return {};
+}
+
+void INode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) {
+ Y_UNUSED(ctx);
+ Y_UNUSED(src);
+ Y_UNUSED(exprs);
+}
+
+INode::TPtr INode::WindowSpecFunc(const TPtr& type) const {
+ Y_UNUSED(type);
+ return {};
+}
+
+bool INode::SetViewName(TContext& ctx, TPosition pos, const TString& view) {
+ Y_UNUSED(pos);
+ Y_UNUSED(view);
+ ctx.Error() << "Node not support views";
+ return false;
+}
+
+bool INode::SetPrimaryView(TContext& ctx, TPosition pos) {
+ Y_UNUSED(pos);
+ ctx.Error() << "Node not support primary views";
+ return false;
+}
+
+void INode::UseAsInner() {
+ AsInner = true;
+}
+
+void INode::DisableSort() {
+ DisableSort_ = true;
+}
+
+bool INode::UsedSubquery() const {
+ return false;
+}
+
+bool INode::IsSelect() const {
+ return false;
+}
+
+bool INode::HasSelectResult() const {
+ return false;
+}
+
+const TString* INode::FuncName() const {
+ return nullptr;
+}
+
+const TString* INode::ModuleName() const {
+ return nullptr;
+}
+
+bool INode::HasSkip() const {
+ return false;
+}
+
+TColumnNode* INode::GetColumnNode() {
+ return nullptr;
+}
+
+const TColumnNode* INode::GetColumnNode() const {
+ return nullptr;
+}
+
+TTupleNode* INode::GetTupleNode() {
+ return nullptr;
+}
+
+const TTupleNode* INode::GetTupleNode() const {
+ return nullptr;
+}
+
+TCallNode* INode::GetCallNode() {
+ return nullptr;
+}
+
+const TCallNode* INode::GetCallNode() const {
+ return nullptr;
+}
+
+TStructNode* INode::GetStructNode() {
+ return nullptr;
+}
+
+const TStructNode* INode::GetStructNode() const {
+ return nullptr;
+}
+
+TAccessNode* INode::GetAccessNode() {
+ return nullptr;
+}
+
+const TAccessNode* INode::GetAccessNode() const {
+ return nullptr;
+}
+
+TLambdaNode* INode::GetLambdaNode() {
+ return nullptr;
+}
+
+const TLambdaNode* INode::GetLambdaNode() const {
+ return nullptr;
+}
+
+TUdfNode* INode::GetUdfNode() {
+ return nullptr;
+}
+
+const TUdfNode* INode::GetUdfNode() const {
+ return nullptr;
+}
+
+void INode::VisitTree(const TVisitFunc& func) const {
+ TVisitNodeSet visited;
+ VisitTree(func, visited);
+}
+
+void INode::VisitTree(const TVisitFunc& func, TVisitNodeSet& visited) const {
+ if (visited.emplace(this).second && HasState(ENodeState::Initialized) && func(*this)) {
+ DoVisitChildren(func, visited);
+ }
+}
+
+TNodePtr INode::ShallowCopy() const {
+ Y_DEBUG_ABORT_UNLESS(false, "Node is not copyable");
+ return nullptr;
+}
+
+void INode::DoUpdateState() const {
+}
+
+void INode::PrecacheState() const {
+ if (State.Test(ENodeState::Failed)) {
+ return;
+ }
+
+ /// Not work right now! It's better use Init at first, because some kind of update depend on it
+ /// \todo turn on and remove all issues
+ //Y_DEBUG_ABORT_UNLESS(State.Test(ENodeState::Initialized));
+ if (State.Test(ENodeState::Precached)) {
+ return;
+ }
+ DoUpdateState();
+ State.Set(ENodeState::Precached);
+}
+
+void INode::DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const {
+ Y_UNUSED(func);
+ Y_UNUSED(visited);
+}
+
+void INode::DoAdd(TNodePtr node) {
+ Y_UNUSED(node);
+ Y_DEBUG_ABORT_UNLESS(false, "Node is not expandable");
+}
+
+bool IProxyNode::IsNull() const {
+ return Inner->IsNull();
+}
+
+bool IProxyNode::IsLiteral() const {
+ return Inner->IsNull();
+}
+
+TString IProxyNode::GetLiteralType() const {
+ return Inner->GetLiteralType();
+}
+
+TString IProxyNode::GetLiteralValue() const {
+ return Inner->GetLiteralValue();
+}
+
+bool IProxyNode::IsIntegerLiteral() const {
+ return Inner->IsIntegerLiteral();
+}
+
+INode::TPtr IProxyNode::ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const {
+ return Inner->ApplyUnaryOp(ctx, pos, opName);
+}
+
+bool IProxyNode::IsAsterisk() const {
+ return Inner->IsAsterisk();
+}
+
+const TString* IProxyNode::SubqueryAlias() const {
+ return Inner->SubqueryAlias();
+}
+
+TString IProxyNode::GetOpName() const {
+ return Inner->GetOpName();
+}
+
+const TString* IProxyNode::GetLiteral(const TString& type) const {
+ return Inner->GetLiteral(type);
+}
+
+const TString* IProxyNode::GetColumnName() const {
+ return Inner->GetColumnName();
+}
+
+void IProxyNode::AssumeColumn() {
+ Inner->AssumeColumn();
+}
+
+const TString* IProxyNode::GetSourceName() const {
+ return Inner->GetSourceName();
+}
+
+const TString* IProxyNode::GetAtomContent() const {
+ return Inner->GetAtomContent();
+}
+
+bool IProxyNode::IsOptionalArg() const {
+ return Inner->IsOptionalArg();
+}
+
+size_t IProxyNode::GetTupleSize() const {
+ return Inner->GetTupleSize();
+}
+
+INode::TPtr IProxyNode::GetTupleElement(size_t index) const {
+ return Inner->GetTupleElement(index);
+}
+
+ITableKeys* IProxyNode::GetTableKeys() {
+ return Inner->GetTableKeys();
+}
+
+ISource* IProxyNode::GetSource() {
+ return Inner->GetSource();
+}
+
+TVector<INode::TPtr>* IProxyNode::ContentListPtr() {
+ return Inner->ContentListPtr();
+}
+
+TAggregationPtr IProxyNode::GetAggregation() const {
+ return Inner->GetAggregation();
+}
+
+void IProxyNode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) {
+ Inner->CollectPreaggregateExprs(ctx, src, exprs);
+}
+
+INode::TPtr IProxyNode::WindowSpecFunc(const TPtr& type) const {
+ return Inner->WindowSpecFunc(type);
+}
+
+bool IProxyNode::SetViewName(TContext& ctx, TPosition pos, const TString& view) {
+ return Inner->SetViewName(ctx, pos, view);
+}
+
+bool IProxyNode::SetPrimaryView(TContext& ctx, TPosition pos) {
+ return Inner->SetPrimaryView(ctx, pos);
+}
+
+bool IProxyNode::UsedSubquery() const {
+ return Inner->UsedSubquery();
+}
+
+bool IProxyNode::IsSelect() const {
+ return Inner->IsSelect();
+}
+
+bool IProxyNode::HasSelectResult() const {
+ return Inner->HasSelectResult();
+}
+
+const TString* IProxyNode::FuncName() const {
+ return Inner->FuncName();
+}
+
+const TString* IProxyNode::ModuleName() const {
+ return Inner->ModuleName();
+}
+
+bool IProxyNode::HasSkip() const {
+ return Inner->HasSkip();
+}
+
+TColumnNode* IProxyNode::GetColumnNode() {
+ return Inner->GetColumnNode();
+}
+
+const TColumnNode* IProxyNode::GetColumnNode() const {
+ return static_cast<const INode*>(Inner.Get())->GetColumnNode();
+}
+
+TTupleNode* IProxyNode::GetTupleNode() {
+ return Inner->GetTupleNode();
+}
+
+const TTupleNode* IProxyNode::GetTupleNode() const {
+ return static_cast<const INode*>(Inner.Get())->GetTupleNode();
+}
+
+TCallNode* IProxyNode::GetCallNode() {
+ return Inner->GetCallNode();
+}
+
+const TCallNode* IProxyNode::GetCallNode() const {
+ return static_cast<const INode*>(Inner.Get())->GetCallNode();
+}
+
+TStructNode* IProxyNode::GetStructNode() {
+ return Inner->GetStructNode();
+}
+
+const TStructNode* IProxyNode::GetStructNode() const {
+ return static_cast<const INode*>(Inner.Get())->GetStructNode();
+}
+
+TAccessNode* IProxyNode::GetAccessNode() {
+ return Inner->GetAccessNode();
+}
+
+const TAccessNode* IProxyNode::GetAccessNode() const {
+ return static_cast<const INode*>(Inner.Get())->GetAccessNode();
+}
+
+TLambdaNode* IProxyNode::GetLambdaNode() {
+ return Inner->GetLambdaNode();
+}
+
+const TLambdaNode* IProxyNode::GetLambdaNode() const {
+ return static_cast<const INode*>(Inner.Get())->GetLambdaNode();
+}
+
+TUdfNode* IProxyNode::GetUdfNode() {
+ return Inner->GetUdfNode();
+}
+
+const TUdfNode* IProxyNode::GetUdfNode() const {
+ return static_cast<const INode*>(Inner.Get())->GetUdfNode();
+}
+
+void IProxyNode::DoUpdateState() const {
+ static_assert(static_cast<int>(ENodeState::End) == 10, "Need to support new states here");
+ State.Set(ENodeState::CountHint, Inner->GetCountHint());
+ State.Set(ENodeState::Const, Inner->IsConstant());
+ State.Set(ENodeState::MaybeConst, Inner->MaybeConstant());
+ State.Set(ENodeState::Aggregated, Inner->IsAggregated());
+ State.Set(ENodeState::AggregationKey, Inner->IsAggregationKey());
+ State.Set(ENodeState::OverWindow, Inner->IsOverWindow());
+ State.Set(ENodeState::OverWindowDistinct, Inner->IsOverWindowDistinct());
+}
+
+void IProxyNode::DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const {
+ Inner->VisitTree(func, visited);
+}
+
+bool IProxyNode::InitReference(TContext& ctx) {
+ return Inner->InitReference(ctx);
+}
+
+bool IProxyNode::DoInit(TContext& ctx, ISource* src) {
+ return Inner->Init(ctx, src);
+}
+
+void IProxyNode::DoAdd(TPtr node) {
+ Inner->Add(node);
+}
+
+void MergeHints(TTableHints& base, const TTableHints& overrides) {
+ for (auto& i : overrides) {
+ base[i.first] = i.second;
+ }
+}
+
+TTableHints CloneContainer(const TTableHints& hints) {
+ TTableHints result;
+ for (auto& [name, nodes] : hints) {
+ result.emplace(std::make_pair(name, CloneContainer(nodes)));
+ }
+ return result;
+}
+
+TAstAtomNode::TAstAtomNode(TPosition pos, const TString& content, ui32 flags, bool isOptionalArg)
+ : INode(pos)
+ , Content(content)
+ , Flags(flags)
+ , IsOptionalArg_(isOptionalArg)
+{
+}
+
+TAstAtomNode::~TAstAtomNode()
+{
+}
+
+void TAstAtomNode::DoUpdateState() const {
+ State.Set(ENodeState::Const);
+}
+
+TAstNode* TAstAtomNode::Translate(TContext& ctx) const {
+ return TAstNode::NewAtom(Pos, Content, *ctx.Pool, Flags);
+}
+
+const TString* TAstAtomNode::GetAtomContent() const {
+ return &Content;
+}
+
+bool TAstAtomNode::IsOptionalArg() const {
+ return IsOptionalArg_;
+}
+
+TAstDirectNode::TAstDirectNode(TAstNode* node)
+ : INode(node->GetPosition())
+ , Node(node)
+{
+}
+
+TAstNode* TAstDirectNode::Translate(TContext& ctx) const {
+ Y_UNUSED(ctx);
+ return Node;
+}
+
+TNodePtr BuildAtom(TPosition pos, const TString& content, ui32 flags, bool isOptionalArg) {
+ return new TAstAtomNodeImpl(pos, content, flags, isOptionalArg);
+}
+
+TAstListNode::TAstListNode(TPosition pos)
+ : INode(pos)
+{
+}
+
+TAstListNode::~TAstListNode()
+{
+}
+
+bool TAstListNode::DoInit(TContext& ctx, ISource* src) {
+ for (auto& node: Nodes) {
+ if (!node->Init(ctx, src)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+TAstNode* TAstListNode::Translate(TContext& ctx) const {
+ TSmallVec<TAstNode*> children;
+ children.reserve(Nodes.size());
+ auto listPos = Pos;
+ for (auto& node: Nodes) {
+ if (node) {
+ auto astNode = node->Translate(ctx);
+ if (!astNode) {
+ return nullptr;
+ }
+ children.push_back(astNode);
+ } else {
+ ctx.Error(Pos) << "Translation error: encountered empty TNodePtr";
+ return nullptr;
+ }
+ }
+
+ return TAstNode::NewList(listPos, children.data(), children.size(), *ctx.Pool);
+}
+
+void TAstListNode::UpdateStateByListNodes(const TVector<TNodePtr>& nodes) const {
+ bool isConst = true;
+ struct TAttributesFlags {
+ bool has = false;
+ bool all = true;
+ };
+ std::array<ENodeState, 3> checkStates = {{ENodeState::Aggregated, ENodeState::AggregationKey, ENodeState::OverWindow}};
+ std::map<ENodeState, TAttributesFlags> flags;
+ for (auto& node: nodes) {
+ const bool isNodeConst = node->IsConstant();
+ const bool isNodeMaybeConst = node->MaybeConstant();
+ for (auto state: checkStates) {
+ if (node->HasState(state)) {
+ flags[state].has = true;
+ } else if (!isNodeConst && !isNodeMaybeConst) {
+ flags[state].all = false;
+ }
+
+ if (!isNodeConst) {
+ isConst = false;
+ }
+ }
+ }
+ State.Set(ENodeState::Const, isConst);
+ for (auto& flag: flags) {
+ State.Set(flag.first, flag.second.has && flag.second.all);
+ }
+ State.Set(ENodeState::MaybeConst, !isConst && AllOf(nodes, [](const auto& node) { return node->IsConstant() || node->MaybeConstant(); }));
+}
+
+void TAstListNode::DoUpdateState() const {
+ UpdateStateByListNodes(Nodes);
+}
+
+void TAstListNode::DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const {
+ for (auto& node : Nodes) {
+ node->VisitTree(func, visited);
+ }
+}
+
+TAstListNode::TAstListNode(const TAstListNode& node)
+ : INode(node.Pos)
+ , Nodes(node.Nodes)
+{
+ Label = node.Label;
+ State = node.State;
+}
+
+TAstListNode::TAstListNode(TPosition pos, TVector<TNodePtr>&& nodes)
+ : INode(pos)
+ , Nodes(std::move(nodes))
+{
+ for (const auto& node: Nodes) {
+ YQL_ENSURE(node, "Null ptr passed as list element");
+ }
+}
+
+TNodePtr TAstListNode::ShallowCopy() const {
+ return new TAstListNodeImpl(Pos, Nodes);
+}
+
+void TAstListNode::DoAdd(TNodePtr node) {
+ Y_DEBUG_ABORT_UNLESS(node);
+ Y_DEBUG_ABORT_UNLESS(node.Get() != this);
+ Nodes.push_back(node);
+}
+
+TAstListNodeImpl::TAstListNodeImpl(TPosition pos)
+ : TAstListNode(pos)
+{}
+
+TAstListNodeImpl::TAstListNodeImpl(TPosition pos, TVector<TNodePtr> nodes)
+ : TAstListNode(pos)
+{
+ for (const auto& node: nodes) {
+ YQL_ENSURE(node, "Null ptr passed as list element");
+ }
+ Nodes.swap(nodes);
+}
+
+void TAstListNodeImpl::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) {
+ for (auto& node : Nodes) {
+ node->CollectPreaggregateExprs(ctx, src, exprs);
+ }
+}
+
+TNodePtr TAstListNodeImpl::DoClone() const {
+ return new TAstListNodeImpl(Pos, CloneContainer(Nodes));
+}
+
+TCallNode::TCallNode(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
+ : TAstListNode(pos)
+ , OpName(opName)
+ , MinArgs(minArgs)
+ , MaxArgs(maxArgs)
+ , Args(args)
+{
+ for (const auto& arg: Args) {
+ YQL_ENSURE(arg, "Null ptr passed as call argument");
+ }
+}
+
+TString TCallNode::GetOpName() const {
+ return OpName;
+}
+
+const TString* DeriveCommonSourceName(const TVector<TNodePtr> &nodes) {
+ const TString* name = nullptr;
+ for (auto& node: nodes) {
+ auto n = node->GetSourceName();
+ if (!n) {
+ continue;
+ }
+ if (name && *n != *name) {
+ return nullptr;
+ }
+ name = n;
+ }
+ return name;
+}
+
+
+const TString* TCallNode::GetSourceName() const {
+ return DeriveCommonSourceName(Args);
+}
+
+const TVector<TNodePtr>& TCallNode::GetArgs() const {
+ return Args;
+}
+
+void TCallNode::DoUpdateState() const {
+ UpdateStateByListNodes(Args);
+}
+
+TString TCallNode::GetCallExplain() const {
+ auto derivedName = GetOpName();
+ TStringBuilder sb;
+ sb << derivedName << "()";
+ if (derivedName != OpName) {
+ sb << ", converted to " << OpName << "()";
+ }
+ return std::move(sb);
+}
+
+void TCallNode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) {
+ for (auto& arg : Args) {
+ arg->CollectPreaggregateExprs(ctx, src, exprs);
+ }
+}
+
+bool TCallNode::ValidateArguments(TContext& ctx) const {
+ const auto argsCount = static_cast<i32>(Args.size());
+ if (MinArgs >= 0 && MaxArgs == MinArgs && argsCount != MinArgs) {
+ ctx.Error(Pos) << GetCallExplain() << " requires exactly " << MinArgs << " arguments, given: " << Args.size();
+ return false;
+ }
+
+ if (MinArgs >= 0 && argsCount < MinArgs) {
+ ctx.Error(Pos) << GetCallExplain() << " requires at least " << MinArgs << " arguments, given: " << Args.size();
+ return false;
+ }
+
+ if (MaxArgs >= 0 && argsCount > MaxArgs) {
+ ctx.Error(Pos) << GetCallExplain() << " requires at most " << MaxArgs << " arguments, given: " << Args.size();
+ return false;
+ }
+
+ return true;
+}
+
+bool TCallNode::DoInit(TContext& ctx, ISource* src) {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ bool hasError = false;
+ for (auto& arg: Args) {
+ if (!arg->Init(ctx, src)) {
+ hasError = true;
+ continue;
+ }
+ }
+
+ if (hasError) {
+ return false;
+ }
+
+ Nodes.push_back(BuildAtom(Pos, OpName,
+ OpName.cend() == std::find_if_not(OpName.cbegin(), OpName.cend(), [](char c) { return bool(std::isalnum(c)); }) ? TNodeFlags::Default : TNodeFlags::ArbitraryContent));
+ Nodes.insert(Nodes.end(), Args.begin(), Args.end());
+ return true;
+}
+
+TCallNode* TCallNode::GetCallNode() {
+ return this;
+}
+
+const TCallNode* TCallNode::GetCallNode() const {
+ return this;
+}
+
+TCallNodeImpl::TCallNodeImpl(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, minArgs, maxArgs, args)
+{}
+
+TCallNodeImpl::TCallNodeImpl(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, args.size(), args.size(), args)
+{}
+
+TCallNode::TPtr TCallNodeImpl::DoClone() const {
+ return new TCallNodeImpl(GetPos(), OpName, MinArgs, MaxArgs, CloneContainer(Args));
+}
+
+TFuncNodeImpl::TFuncNodeImpl(TPosition pos, const TString& opName)
+ : TCallNode(pos, opName, 0, 0, {})
+{}
+
+TCallNode::TPtr TFuncNodeImpl::DoClone() const {
+ return new TFuncNodeImpl(GetPos(), OpName);
+}
+
+const TString* TFuncNodeImpl::FuncName() const {
+ return &OpName;
+}
+
+TCallNodeDepArgs::TCallNodeDepArgs(ui32 reqArgsCount, TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, minArgs, maxArgs, args)
+ , ReqArgsCount(reqArgsCount)
+{}
+
+TCallNodeDepArgs::TCallNodeDepArgs(ui32 reqArgsCount, TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, args.size(), args.size(), args)
+ , ReqArgsCount(reqArgsCount)
+{}
+
+TCallNode::TPtr TCallNodeDepArgs::DoClone() const {
+ return new TCallNodeDepArgs(ReqArgsCount, GetPos(), OpName, MinArgs, MaxArgs, CloneContainer(Args));
+}
+
+bool TCallNodeDepArgs::DoInit(TContext& ctx, ISource* src) {
+ if (!TCallNode::DoInit(ctx, src)) {
+ return false;
+ }
+
+ for (ui32 i = 1 + ReqArgsCount; i < Nodes.size(); ++i) {
+ Nodes[i] = Y("DependsOn", Nodes[i]);
+ }
+ return true;
+}
+
+TCallDirectRow::TPtr TCallDirectRow::DoClone() const {
+ return new TCallDirectRow(Pos, OpName, CloneContainer(Args));
+}
+
+TCallDirectRow::TCallDirectRow(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, minArgs, maxArgs, args)
+{}
+
+TCallDirectRow::TCallDirectRow(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 0, 0, args)
+{}
+
+bool TCallDirectRow::DoInit(TContext& ctx, ISource* src) {
+ if (!src || (ctx.CompactNamedExprs && src->IsFake())) {
+ ctx.Error(Pos) << "Unable to use function: " << OpName << " without source";
+ return false;
+ }
+ if (src->IsCompositeSource() || src->GetJoin() || src->HasAggregations() || src->IsFlattenByColumns() || src->IsOverWindowSource()) {
+ ctx.Error(Pos) << "Failed to use function: " << OpName << " with aggregation, join, flatten by or window functions";
+ return false;
+ }
+ if (!TCallNode::DoInit(ctx, src)) {
+ return false;
+ }
+ Nodes.push_back(Y("DependsOn", "row"));
+ return true;
+}
+
+void TCallDirectRow::DoUpdateState() const {
+ State.Set(ENodeState::Const, false);
+}
+
+void TWinAggrEmulation::DoUpdateState() const {
+ State.Set(ENodeState::OverWindow, true);
+}
+
+bool TWinAggrEmulation::DoInit(TContext& ctx, ISource* src) {
+ if (!src) {
+ ctx.Error(Pos) << "Unable to use window function " << OpName << " without source";
+ return false;
+ }
+
+ if (!src->IsOverWindowSource()) {
+ ctx.Error(Pos) << "Failed to use window function " << OpName << " without window specification";
+ return false;
+ }
+ if (!src->AddFuncOverWindow(ctx, this)) {
+ ctx.Error(Pos) << "Failed to use window function " << OpName << " without window specification or in wrong place";
+ return false;
+ }
+
+ FuncAlias = "_yql_" + src->MakeLocalName(OpName);
+ src->AddTmpWindowColumn(FuncAlias);
+ if (!TCallNode::DoInit(ctx, src)) {
+ return false;
+ }
+ Nodes.clear();
+ Add("Member", "row", Q(FuncAlias));
+ return true;
+}
+
+INode::TPtr TWinAggrEmulation::WindowSpecFunc(const TPtr& type) const {
+ auto result = Y(OpName, type);
+ for (const auto& arg: Args) {
+ result = L(result, arg);
+ }
+ return Q(Y(Q(FuncAlias), result));
+}
+
+TWinAggrEmulation::TWinAggrEmulation(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, minArgs, maxArgs, args)
+ , FuncAlias(opName)
+{}
+
+TWinRowNumber::TWinRowNumber(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
+ : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args)
+{}
+
+TWinCumeDist::TWinCumeDist(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
+ : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args)
+{}
+
+bool TWinCumeDist::DoInit(TContext& ctx, ISource* src) {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ YQL_ENSURE(Args.size() == 0);
+ TVector<TNodePtr> optionsElements;
+ if (ctx.AnsiCurrentRow) {
+ optionsElements.push_back(BuildTuple(Pos, { BuildQuotedAtom(Pos, "ansi", NYql::TNodeFlags::Default) }));
+ }
+ Args.push_back(BuildTuple(Pos, optionsElements));
+
+ MinArgs = MaxArgs = 1;
+ if (!TWinAggrEmulation::DoInit(ctx, src)) {
+ return false;
+ }
+
+ YQL_ENSURE(Args.size() == 1);
+ return true;
+}
+
+TWinNTile::TWinNTile(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
+ : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args)
+{
+ FakeSource = BuildFakeSource(pos);
+}
+
+bool TWinNTile::DoInit(TContext& ctx, ISource* src) {
+ if (Args.size() >= 1 && !Args[0]->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ if (!TWinAggrEmulation::DoInit(ctx, src)) {
+ return false;
+ }
+ return true;
+}
+
+TWinLeadLag::TWinLeadLag(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
+ : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args)
+{}
+
+bool TWinLeadLag::DoInit(TContext& ctx, ISource* src) {
+ if (Args.size() >= 2) {
+ if (!Args[1]->IsIntegerLiteral()) {
+ ctx.Error(Args[1]->GetPos()) << "Expected integer literal as second parameter of " << OpName << "( ) function";
+ return false;
+ }
+ }
+ if (!TWinAggrEmulation::DoInit(ctx, src)) {
+ return false;
+ }
+ if (Args.size() >= 1) {
+ Args[0] = BuildLambda(Pos, Y("row"), Args[0]);
+ }
+ return true;
+}
+
+TWinRank::TWinRank(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
+ : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args)
+{
+
+}
+
+bool TExternalFunctionConfig::DoInit(TContext& ctx, ISource* src) {
+ for (auto& param: Config) {
+ auto paramName = Y(BuildQuotedAtom(Pos, param.first));
+ if (!param.second->Init(ctx, src)) {
+ return false;
+ }
+ Nodes.push_back(Q(L(paramName, param.second)));
+ }
+ return true;
+}
+
+INode::TPtr TExternalFunctionConfig::DoClone() const {
+ TFunctionConfig cloned;
+ for (auto& [name, node] : Config) {
+ cloned[name] = SafeClone(node);
+ }
+
+ return new TExternalFunctionConfig(GetPos(), cloned);
+}
+
+bool TWinRank::DoInit(TContext& ctx, ISource* src) {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!src) {
+ ctx.Error(Pos) << "Unable to use window function: " << OpName << " without source";
+ return false;
+ }
+
+ auto winNamePtr = src->GetWindowName();
+ if (!winNamePtr) {
+ ctx.Error(Pos) << "Failed to use window function: " << OpName << " without window";
+ return false;
+ }
+
+ auto winSpecPtr = src->FindWindowSpecification(ctx, *winNamePtr);
+ if (!winSpecPtr) {
+ return false;
+ }
+
+ const auto& orderSpec = winSpecPtr->OrderBy;
+ if (orderSpec.empty()) {
+ if (Args.empty()) {
+ ctx.Warning(GetPos(), TIssuesIds::YQL_RANK_WITHOUT_ORDER_BY) <<
+ OpName << "() is used with unordered window - all rows will be considered equal to each other";
+ } else {
+ ctx.Warning(GetPos(), TIssuesIds::YQL_RANK_WITHOUT_ORDER_BY) <<
+ OpName << "(<expression>) is used with unordered window - the result is likely to be undefined";
+ }
+ }
+
+ if (Args.empty()) {
+ for (const auto& spec: orderSpec) {
+ Args.push_back(spec->Clone()->OrderExpr);
+ }
+
+ if (Args.size() != 1) {
+ Args = {BuildTuple(GetPos(), Args)};
+ }
+ }
+
+ YQL_ENSURE(Args.size() == 1);
+
+ TVector<TNodePtr> optionsElements;
+ if (!ctx.AnsiRankForNullableKeys.Defined()) {
+ optionsElements.push_back(BuildTuple(Pos, { BuildQuotedAtom(Pos, "warnNoAnsi", NYql::TNodeFlags::Default) }));
+ } else if (*ctx.AnsiRankForNullableKeys) {
+ optionsElements.push_back(BuildTuple(Pos, { BuildQuotedAtom(Pos, "ansi", NYql::TNodeFlags::Default) }));
+ }
+ Args.push_back(BuildTuple(Pos, optionsElements));
+
+ MinArgs = MaxArgs = 2;
+ if (!TWinAggrEmulation::DoInit(ctx, src)) {
+ return false;
+ }
+
+ YQL_ENSURE(Args.size() == 2);
+ Args[0] = BuildLambda(Pos, Y("row"), Args[0]);
+ return true;
+}
+
+class TQuotedAtomNode: public TAstListNode {
+public:
+ TQuotedAtomNode(TPosition pos, const TString& content, ui32 flags)
+ : TAstListNode(pos)
+ {
+ Add("quote", BuildAtom(pos, content, flags));
+ }
+
+protected:
+ TQuotedAtomNode(const TQuotedAtomNode& other)
+ : TAstListNode(other.Pos)
+ {
+ Nodes = CloneContainer(other.Nodes);
+ }
+ TPtr DoClone() const final {
+ return new TQuotedAtomNode(*this);
+ }
+};
+
+TNodePtr BuildQuotedAtom(TPosition pos, const TString& content, ui32 flags) {
+ return new TQuotedAtomNode(pos, content, flags);
+}
+
+
+TNodePtr ITableKeys::AddView(TNodePtr key, const TViewDescription& view) {
+ if (view.PrimaryFlag) {
+ return L(key, Q(Y(Q("primary_view"))));
+ } else if (!view.empty()) {
+ return L(key, Q(Y(Q("view"), Y("String", BuildQuotedAtom(Pos, view.ViewName)))));
+ } else {
+ return key;
+ }
+}
+
+TString TColumns::AddUnnamed() {
+ TString desiredResult = TStringBuilder() << "column" << List.size();
+ if (!All) {
+ HasUnnamed = true;
+ List.emplace_back();
+ NamedColumns.push_back(false);
+ }
+ return desiredResult;
+}
+
+bool TColumns::Add(const TString* column, bool countHint, bool isArtificial, bool isReliable) {
+ if (!column || *column == "*") {
+ if (!countHint) {
+ SetAll();
+ }
+ } else if (!All) {
+ if (column->EndsWith('*')) {
+ QualifiedAll = true;
+ }
+
+ bool inserted = false;
+ if (isArtificial) {
+ inserted = Artificial.insert(*column).second;
+ } else {
+ inserted = Real.insert(*column).second;
+ }
+ if (!isReliable) {
+ HasUnreliable = true;
+ }
+ if (std::find(List.begin(), List.end(), *column) == List.end()) {
+ List.push_back(*column);
+ NamedColumns.push_back(true);
+ }
+ return inserted;
+ }
+ return All;
+}
+
+void TColumns::Merge(const TColumns& columns) {
+ if (columns.All) {
+ SetAll();
+ } else {
+ YQL_ENSURE(columns.List.size() == columns.NamedColumns.size());
+ size_t myUnnamed = NamedColumns.size() - std::accumulate(NamedColumns.begin(), NamedColumns.end(), 0);
+ size_t otherUnnamed = 0;
+ for (size_t i = 0; i < columns.List.size(); ++i) {
+ auto& c = columns.List[i];
+ if (!columns.NamedColumns[i]) {
+ if (++otherUnnamed > myUnnamed) {
+ AddUnnamed();
+ ++myUnnamed;
+ }
+ continue;
+ }
+ if (columns.Real.contains(c)) {
+ Add(&c, false, false);
+ }
+ if (columns.Artificial.contains(c)) {
+ Add(&c, false, true);
+ }
+ }
+ HasUnreliable |= columns.HasUnreliable;
+ HasUnnamed |= columns.HasUnnamed;
+ }
+}
+
+void TColumns::SetPrefix(const TString& prefix) {
+ Y_DEBUG_ABORT_UNLESS(!prefix.empty());
+ auto addPrefixFunc = [&prefix](const TString& str) {
+ return prefix + "." + str;
+ };
+ TSet<TString> newReal;
+ TSet<TString> newArtificial;
+ TVector<TString> newList;
+ std::transform(Real.begin(), Real.end(), std::inserter(newReal, newReal.begin()), addPrefixFunc);
+ std::transform(Artificial.begin(), Artificial.end(), std::inserter(newArtificial, newArtificial.begin()), addPrefixFunc);
+ std::transform(List.begin(), List.end(), std::back_inserter(newList), addPrefixFunc);
+ newReal.swap(Real);
+ newArtificial.swap(Artificial);
+ newList.swap(List);
+}
+
+void TColumns::SetAll() {
+ All = true;
+ QualifiedAll = false;
+ Real.clear();
+ List.clear();
+ Artificial.clear();
+ NamedColumns.clear();
+ HasUnnamed = HasUnreliable = false;
+}
+
+namespace {
+
+bool MaybeAutogenerated(const TString& name) {
+ TStringBuf prefix = "column";
+ if (!name.StartsWith(prefix)) {
+ return false;
+ }
+
+ TString suffix = name.substr(prefix.size());
+ return !suffix.empty() && AllOf(suffix, [](const auto c) { return std::isdigit(c); });
+}
+
+bool MatchDotSuffix(const TSet<TString>& columns, const TString& column) {
+ for (const auto& col: columns) {
+ const auto pos = col.find_first_of(".");
+ if (pos == TString::npos) {
+ continue;
+ }
+ if (column == col.substr(pos + 1)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+}
+
+bool TColumns::IsColumnPossible(TContext& ctx, const TString& name) const {
+ if (All || Real.contains(name) || Artificial.contains(name)) {
+ return true;
+ }
+
+ if (ctx.SimpleColumns && !name.Contains('.') && (MatchDotSuffix(Real, name) || MatchDotSuffix(Artificial, name))) {
+ return true;
+ }
+
+ if (QualifiedAll) {
+ if (ctx.SimpleColumns) {
+ return true;
+ }
+ if (HasUnnamed) {
+ const auto dotPos = name.find_first_of(".");
+ TString suffix = (dotPos == TString::npos) ? name : name.substr(dotPos + 1);
+ if (MaybeAutogenerated(suffix)) {
+ return true;
+ }
+ }
+ for (const auto& real: Real) {
+ const auto pos = real.find_first_of("*");
+ if (pos == TString::npos) {
+ continue;
+ }
+ if (name.StartsWith(real.substr(0, pos))) {
+ return true;
+ }
+ }
+ } else if (HasUnnamed && MaybeAutogenerated(name)) {
+ return true;
+ }
+ return false;
+}
+
+TSortSpecification::TSortSpecification(const TNodePtr& orderExpr, bool ascending)
+ : OrderExpr(orderExpr->Clone())
+ , Ascending(ascending)
+ , CleanOrderExpr(orderExpr->Clone())
+{
+}
+
+TSortSpecificationPtr TSortSpecification::Clone() const {
+ return MakeIntrusive<TSortSpecification>(CleanOrderExpr, Ascending);
+}
+
+TFrameBoundPtr TFrameBound::Clone() const {
+ auto res = MakeIntrusive<TFrameBound>();
+ res->Pos = Pos;
+ res->Bound = SafeClone(Bound);
+ res->Settings = Settings;
+ return res;
+}
+
+TFrameSpecificationPtr TFrameSpecification::Clone() const {
+ YQL_ENSURE(FrameBegin);
+ YQL_ENSURE(FrameEnd);
+ auto res = MakeIntrusive<TFrameSpecification>();
+ res->FrameType = FrameType;
+ res->FrameBegin = FrameBegin->Clone();
+ res->FrameEnd = FrameEnd->Clone();
+ res->FrameExclusion = FrameExclusion;
+ return res;
+}
+
+TWindowSpecificationPtr TWindowSpecification::Clone() const {
+ YQL_ENSURE(Frame);
+ auto res = MakeIntrusive<TWindowSpecification>();
+ res->ExistingWindowName = ExistingWindowName;
+ res->Partitions = CloneContainer(Partitions);
+ res->IsCompact = IsCompact;
+ res->OrderBy = CloneContainer(OrderBy);
+ res->Session = SafeClone(Session);
+ res->Frame = Frame->Clone();
+ return res;
+}
+
+TWinSpecs CloneContainer(const TWinSpecs& specs) {
+ TWinSpecs newSpecs;
+ for (auto cur: specs) {
+ newSpecs.emplace(cur.first, cur.second->Clone());
+ }
+ return newSpecs;
+}
+
+TLegacyHoppingWindowSpecPtr TLegacyHoppingWindowSpec::Clone() const {
+ auto res = MakeIntrusive<TLegacyHoppingWindowSpec>();
+ res->TimeExtractor = TimeExtractor->Clone();
+ res->Hop = Hop->Clone();
+ res->Interval = Interval->Clone();
+ res->Delay = Delay->Clone();
+ res->DataWatermarks = DataWatermarks;
+ return res;
+}
+
+TColumnNode::TColumnNode(TPosition pos, const TString& column, const TString& source, bool maybeType)
+ : INode(pos)
+ , ColumnName(column)
+ , Source(source)
+ , MaybeType(maybeType)
+{
+}
+
+TColumnNode::TColumnNode(TPosition pos, const TNodePtr& column, const TString& source)
+ : INode(pos)
+ , ColumnExpr(column)
+ , Source(source)
+{
+}
+
+TColumnNode::~TColumnNode()
+{
+}
+
+bool TColumnNode::IsAsterisk() const {
+ return ColumnName == "*";
+}
+
+bool TColumnNode::IsArtificial() const {
+ return Artificial;
+}
+
+const TString* TColumnNode::GetColumnName() const {
+ return UseSourceAsColumn ? &Source : (ColumnExpr ? nullptr : &ColumnName);
+}
+
+const TString* TColumnNode::GetSourceName() const {
+ return UseSourceAsColumn ? &Empty : &Source;
+}
+
+TColumnNode* TColumnNode::GetColumnNode() {
+ return this;
+}
+
+const TColumnNode* TColumnNode::GetColumnNode () const {
+ return this;
+}
+
+bool TColumnNode::DoInit(TContext& ctx, ISource* src) {
+ if (src) {
+ YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized or Aggregated already invalid
+ if (src->ShouldUseSourceAsColumn(*GetSourceName())) {
+ if (!IsAsterisk() && IsReliable()) {
+ SetUseSourceAsColumn();
+ }
+ }
+
+ if (GetColumnName()) {
+ auto fullName = Source ? DotJoin(Source, *GetColumnName()) : *GetColumnName();
+ auto alias = src->GetGroupByColumnAlias(fullName);
+ if (alias) {
+ ResetColumn(alias, {});
+ }
+ Artificial = !Source && src->IsExprAlias(*GetColumnName());
+ }
+
+ if (!src->AddColumn(ctx, *this)) {
+ return false;
+ }
+ if (GetColumnName()) {
+ if (src->GetJoin() && Source) {
+ GroupKey = src->IsGroupByColumn(DotJoin(Source, *GetColumnName()));
+ } else {
+ GroupKey = src->IsGroupByColumn(*GetColumnName()) || src->IsAlias(EExprSeat::GroupBy, *GetColumnName());
+ }
+ }
+ }
+ if (IsAsterisk()) {
+ Node = AstNode("row");
+ } else {
+ TString callable;
+ if (MaybeType) {
+ callable = Reliable && !UseSource ? "SqlPlainColumnOrType" : "SqlColumnOrType";
+ } else {
+ // TODO: consider replacing Member -> SqlPlainColumn
+ callable = Reliable && !UseSource ? "Member" : "SqlColumn";
+ }
+ Node = Y(callable, "row", ColumnExpr ? Y("EvaluateAtom", ColumnExpr) : BuildQuotedAtom(Pos, *GetColumnName()));
+ if (UseSource) {
+ YQL_ENSURE(Source);
+ Node = L(Node, BuildQuotedAtom(Pos, Source));
+ }
+ }
+ return Node->Init(ctx, src);
+}
+
+void TColumnNode::SetUseSourceAsColumn() {
+ YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized or Aggregated already invalid
+ YQL_ENSURE(!IsAsterisk());
+ UseSourceAsColumn = true;
+}
+
+void TColumnNode::ResetAsReliable() {
+ Reliable = true;
+}
+
+void TColumnNode::SetAsNotReliable() {
+ Reliable = false;
+}
+
+void TColumnNode::SetUseSource() {
+ UseSource = true;
+}
+
+bool TColumnNode::IsUseSourceAsColumn() const {
+ return UseSourceAsColumn;
+}
+
+bool TColumnNode::IsUseSource() const {
+ return UseSource;
+}
+
+bool TColumnNode::IsReliable() const {
+ return Reliable;
+}
+
+bool TColumnNode::CanBeType() const {
+ return MaybeType;
+}
+
+TNodePtr TColumnNode::DoClone() const {
+ YQL_ENSURE(!Node, "TColumnNode::Clone: Node should not be initialized");
+ auto copy = ColumnExpr ? new TColumnNode(Pos, ColumnExpr, Source) : new TColumnNode(Pos, ColumnName, Source, MaybeType);
+ copy->GroupKey = GroupKey;
+ copy->Artificial = Artificial;
+ copy->Reliable = Reliable;
+ copy->UseSource = UseSource;
+ copy->UseSourceAsColumn = UseSourceAsColumn;
+ return copy;
+}
+
+void TColumnNode::DoUpdateState() const {
+ State.Set(ENodeState::Const, false);
+ State.Set(ENodeState::MaybeConst, MaybeType);
+ State.Set(ENodeState::Aggregated, GroupKey);
+ State.Set(ENodeState::AggregationKey, GroupKey);
+}
+
+TAstNode* TColumnNode::Translate(TContext& ctx) const {
+ return Node->Translate(ctx);
+}
+
+void TColumnNode::ResetColumn(const TString& column, const TString& source) {
+ YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized
+ Reliable = true;
+ UseSource = false;
+ UseSourceAsColumn = false;
+ ColumnName = column;
+ ColumnExpr = nullptr;
+ Source = source;
+}
+
+void TColumnNode::ResetColumn(const TNodePtr& column, const TString& source) {
+ YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized
+ Reliable = true;
+ UseSource = false;
+ UseSourceAsColumn = false;
+ ColumnName = "";
+ ColumnExpr = column;
+ Source = source;
+}
+
+const TString TColumnNode::Empty;
+
+TNodePtr BuildColumn(TPosition pos, const TString& column, const TString& source) {
+ bool maybeType = false;
+ return new TColumnNode(pos, column, source, maybeType);
+}
+
+TNodePtr BuildColumn(TPosition pos, const TNodePtr& column, const TString& source) {
+ return new TColumnNode(pos, column, source);
+}
+
+TNodePtr BuildColumn(TPosition pos, const TDeferredAtom& column, const TString& source) {
+ return column.GetLiteral() ? BuildColumn(pos, *column.GetLiteral(), source) : BuildColumn(pos, column.Build(), source);
+}
+
+TNodePtr BuildColumnOrType(TPosition pos, const TString& column) {
+ TString source = "";
+ bool maybeType = true;
+ return new TColumnNode(pos, column, source, maybeType);
+}
+
+ITableKeys::ITableKeys(TPosition pos)
+ : INode(pos)
+{
+}
+
+const TString* ITableKeys::GetTableName() const {
+ return nullptr;
+}
+
+ITableKeys* ITableKeys::GetTableKeys() {
+ return this;
+}
+
+TAstNode* ITableKeys::Translate(TContext& ctx) const {
+ Y_DEBUG_ABORT_UNLESS(false);
+ Y_UNUSED(ctx);
+ return nullptr;
+}
+
+bool IAggregation::IsDistinct() const {
+ return !DistinctKey.empty();
+}
+
+void IAggregation::DoUpdateState() const {
+ State.Set(ENodeState::Aggregated, AggMode == EAggregateMode::Normal);
+ State.Set(ENodeState::OverWindow, AggMode == EAggregateMode::OverWindow);
+ State.Set(ENodeState::OverWindowDistinct, AggMode == EAggregateMode::OverWindowDistinct);
+}
+
+const TString* IAggregation::GetGenericKey() const {
+ return nullptr;
+}
+
+void IAggregation::Join(IAggregation*) {
+ YQL_ENSURE(false, "Should not be called");
+}
+
+const TString& IAggregation::GetName() const {
+ return Name;
+}
+
+EAggregateMode IAggregation::GetAggregationMode() const {
+ return AggMode;
+}
+
+void IAggregation::MarkKeyColumnAsGenerated() {
+ IsGeneratedKeyColumn = true;
+}
+
+IAggregation::IAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode)
+ : INode(pos), Name(name), Func(func), AggMode(aggMode)
+{}
+
+TAstNode* IAggregation::Translate(TContext& ctx) const {
+ Y_DEBUG_ABORT_UNLESS(false);
+ Y_UNUSED(ctx);
+ return nullptr;
+}
+
+std::pair<TNodePtr, bool> IAggregation::AggregationTraits(const TNodePtr& type, bool overState, bool many, bool allowAggApply, TContext& ctx) const {
+ const bool distinct = AggMode == EAggregateMode::Distinct;
+ const auto listType = distinct ? Y("ListType", Y("StructMemberType", Y("ListItemType", type), BuildQuotedAtom(Pos, DistinctKey))) : type;
+ auto apply = GetApply(listType, many, allowAggApply, ctx);
+ if (!apply) {
+ return { nullptr, false };
+ }
+
+ auto wrapped = WrapIfOverState(apply, overState, many, ctx);
+ if (!wrapped) {
+ return { nullptr, false };
+ }
+
+ return { distinct ?
+ Q(Y(Q(Name), wrapped, BuildQuotedAtom(Pos, DistinctKey))) :
+ Q(Y(Q(Name), wrapped)), true };
+}
+
+TNodePtr IAggregation::WrapIfOverState(const TNodePtr& input, bool overState, bool many, TContext& ctx) const {
+ if (!overState) {
+ return input;
+ }
+
+ auto extractor = GetExtractor(many, ctx);
+ if (!extractor) {
+ return nullptr;
+ }
+
+ return Y(ToString("AggOverState"), extractor, BuildLambda(Pos, Y(), input));
+}
+
+void IAggregation::AddFactoryArguments(TNodePtr& apply) const {
+ Y_UNUSED(apply);
+}
+
+std::vector<ui32> IAggregation::GetFactoryColumnIndices() const {
+ return {0u};
+}
+
+TNodePtr IAggregation::WindowTraits(const TNodePtr& type, TContext& ctx) const {
+ YQL_ENSURE(AggMode == EAggregateMode::OverWindow || AggMode == EAggregateMode::OverWindowDistinct, "Windows traits is unavailable");
+
+ const bool distinct = AggMode == EAggregateMode::OverWindowDistinct;
+ const auto listType = distinct ? Y("ListType", Y("StructMemberType", Y("ListItemType", type), BuildQuotedAtom(Pos, DistinctKey))) : type;
+ auto traits = Y(Q(Name), GetApply(listType, false, false, ctx));
+ if (AggMode == EAggregateMode::OverWindowDistinct) {
+ traits->Add(BuildQuotedAtom(Pos, DistinctKey));
+ }
+
+ return Q(traits);
+}
+
+namespace {
+bool UnescapeQuoted(const TString& str, TPosition& pos, char quoteChar, TString& result, TString& error, bool utf8Aware) {
+ result = error = {};
+
+ size_t readBytes = 0;
+ TStringBuf atom(str);
+ TStringOutput sout(result);
+ atom.Skip(1);
+ result.reserve(str.size());
+
+ auto unescapeResult = UnescapeArbitraryAtom(atom, quoteChar, &sout, &readBytes);
+ if (unescapeResult != EUnescapeResult::OK) {
+ TTextWalker walker(pos, utf8Aware);
+ walker.Advance(atom.Trunc(readBytes));
+ error = UnescapeResultToString(unescapeResult);
+ return false;
+ }
+ return true;
+}
+
+TString UnescapeAnsiQuoted(const TString& str) {
+ YQL_ENSURE(str.length() >= 2);
+ YQL_ENSURE(str[0] == str[str.length() - 1]);
+ YQL_ENSURE(str[0] == '\'' || str[0] == '"');
+
+ TString quote(1, str[0]);
+ TString replace(2, str[0]);
+
+ TString result = str.substr(1, str.length() - 2);
+ SubstGlobal(result, replace, quote);
+ return result;
+}
+
+enum class EStringContentMode : int {
+ Default = 0,
+ AnsiIdent,
+ TypedStringLiteral,
+};
+
+TMaybe<TStringContent>
+StringContentInternal(TContext& ctx, TPosition pos, const TString& input, EStringContentMode mode) {
+ TStringContent result;
+ if (mode == EStringContentMode::AnsiIdent) {
+ if (!(input.size() >= 2 && input.StartsWith('"') && input.EndsWith('"'))) {
+ ctx.Error(pos) << "Expected double quoted identifier, got string literal";
+ return {};
+ }
+
+ result.Flags = NYql::TNodeFlags::ArbitraryContent;
+ result.Content = UnescapeAnsiQuoted(input);
+ return result;
+ }
+
+ TString str = input;
+ if (mode == EStringContentMode::TypedStringLiteral) {
+ auto lower = to_lower(str);
+ if (lower.EndsWith("y")) {
+ str = str.substr(0, str.size() - 1);
+ result.Type = NKikimr::NUdf::EDataSlot::Yson;
+ } else if (lower.EndsWith("j")) {
+ str = str.substr(0, str.size() - 1);
+ result.Type = NKikimr::NUdf::EDataSlot::Json;
+ } else if (lower.EndsWith("p")) {
+ str = str.substr(0, str.size() - 1);
+ result.PgType = "PgText";
+ } else if (lower.EndsWith("pt")) {
+ str = str.substr(0, str.size() - 2);
+ result.PgType = "PgText";
+ } else if (lower.EndsWith("pb")) {
+ str = str.substr(0, str.size() - 2);
+ result.PgType = "PgBytea";
+ } else if (lower.EndsWith("pv")) {
+ str = str.substr(0, str.size() - 2);
+ result.PgType = "PgVarchar";
+ } else if (lower.EndsWith("s")) {
+ str = str.substr(0, str.size() - 1);
+ result.Type = NKikimr::NUdf::EDataSlot::String;
+ } else if (lower.EndsWith("u")) {
+ str = str.substr(0, str.size() - 1);
+ result.Type = NKikimr::NUdf::EDataSlot::Utf8;
+ } else {
+ if (ctx.Scoped->WarnUntypedStringLiterals) {
+ ctx.Warning(pos, TIssuesIds::YQL_UNTYPED_STRING_LITERALS)
+ << "Please add suffix u for Utf8 strings or s for arbitrary binary strings";
+ }
+
+ if (ctx.Scoped->UnicodeLiterals) {
+ result.Type = NKikimr::NUdf::EDataSlot::Utf8;
+ }
+ }
+ }
+
+ if (mode == EStringContentMode::Default && (result.Type != NKikimr::NUdf::EDataSlot::String || result.PgType)) {
+ ctx.Error(pos) << "Type suffix is not allowed here";
+ return {};
+ }
+
+ bool doubleQuoted = (str.StartsWith('"') && str.EndsWith('"'));
+ bool singleQuoted = !doubleQuoted && (str.StartsWith('\'') && str.EndsWith('\''));
+
+ if (str.size() >= 2 && (doubleQuoted || singleQuoted)) {
+ result.Flags = NYql::TNodeFlags::ArbitraryContent;
+ if (ctx.Settings.AnsiLexer) {
+ YQL_ENSURE(singleQuoted);
+ result.Content = UnescapeAnsiQuoted(str);
+ } else {
+ TString error;
+ if (!UnescapeQuoted(str, pos, str[0], result.Content, error, ctx.Settings.Antlr4Parser)) {
+ ctx.Error(pos) << "Failed to parse string literal: " << error;
+ return {};
+ }
+ }
+ } else if (str.size() >= 4 && str.StartsWith("@@") && str.EndsWith("@@")) {
+ result.Flags = TNodeFlags::MultilineContent;
+ TString s = str.substr(2, str.length() - 4);
+ SubstGlobal(s, "@@@@", "@@");
+ result.Content.swap(s);
+ } else {
+ ctx.Error(pos) << "Invalid string literal: " << EscapeC(str);
+ return {};
+ }
+
+ if (!result.PgType.Defined() && !NKikimr::NMiniKQL::IsValidStringValue(result.Type, result.Content)) {
+ ctx.Error() << "Invalid value " << result.Content.Quote() << " for type " << result.Type;
+ return {};
+ }
+
+ return result;
+}
+} // namespace
+
+TMaybe<TStringContent> StringContent(TContext& ctx, TPosition pos, const TString& input) {
+ if (ctx.AnsiQuotedIdentifiers && input.StartsWith('"')) {
+ ctx.Error() << "Expected string literal, got quoted identifier";
+ return {};
+ }
+
+ return StringContentInternal(ctx, pos, input, EStringContentMode::Default);
+}
+
+TMaybe<TStringContent> StringContentOrIdContent(TContext& ctx, TPosition pos, const TString& input) {
+ return StringContentInternal(ctx, pos, input,
+ (ctx.AnsiQuotedIdentifiers && input.StartsWith('"'))? EStringContentMode::AnsiIdent : EStringContentMode::Default);
+}
+
+TTtlSettings::TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr, const TMaybe<EUnit>& columnUnit)
+ : ColumnName(columnName)
+ , Expr(expr)
+ , ColumnUnit(columnUnit)
+{
+}
+
+TString IdContent(TContext& ctx, const TString& s) {
+ YQL_ENSURE(!s.empty(), "Empty identifier not expected");
+ if (!s.StartsWith('`')) {
+ return s;
+ }
+ auto endSym = '`';
+ if (s.size() < 2 || !s.EndsWith(endSym)) {
+ ctx.Error() << "The identifier that starts with: '" << s[0] << "' should ends with: '" << endSym << "'";
+ return {};
+ }
+ size_t skipSymbols = 1;
+
+ TStringBuf atom(s.data() + skipSymbols, s.size() - 2 * skipSymbols + 1);
+ TString unescapedStr;
+ TStringOutput sout(unescapedStr);
+ unescapedStr.reserve(s.size());
+
+ size_t readBytes = 0;
+ TPosition pos = ctx.Pos();
+ pos.Column += skipSymbols - 1;
+
+ auto unescapeResult = UnescapeArbitraryAtom(atom, endSym, &sout, &readBytes);
+ if (unescapeResult != EUnescapeResult::OK) {
+ TTextWalker walker(pos, ctx.Settings.Antlr4Parser);
+ walker.Advance(atom.Trunc(readBytes));
+ ctx.Error(pos) << "Cannot parse broken identifier: " << UnescapeResultToString(unescapeResult);
+ return {};
+ }
+
+ if (readBytes != atom.size()) {
+ ctx.Error() << "The identifier not parsed completely";
+ return {};
+ }
+
+ return unescapedStr;
+}
+
+TString IdContentFromString(TContext& ctx, const TString& str) {
+ if (!ctx.AnsiQuotedIdentifiers) {
+ ctx.Error() << "String literal can not be used here";
+ return {};
+ }
+ auto parsed = StringContentInternal(ctx, ctx.Pos(), str, EStringContentMode::AnsiIdent);
+ if (!parsed) {
+ return {};
+ }
+
+ return parsed->Content;
+}
+
+
+namespace {
+class TInvalidLiteralNode final: public INode {
+public:
+ TInvalidLiteralNode(TPosition pos)
+ : INode(pos)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* source) override {
+ Y_UNUSED(ctx);
+ Y_UNUSED(source);
+ return false;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_UNUSED(ctx);
+ return nullptr;
+ }
+
+ TPtr DoClone() const override {
+ return new TInvalidLiteralNode(GetPos());
+ }
+};
+
+}
+
+TLiteralNode::TLiteralNode(TPosition pos, bool isNull)
+ : TAstListNode(pos)
+ , Null(isNull)
+ , Void(!isNull)
+{
+ Add(isNull ? "Null" : "Void");
+}
+
+TLiteralNode::TLiteralNode(TPosition pos, const TString& type, const TString& value)
+ : TAstListNode(pos)
+ , Null(false)
+ , Void(false)
+ , Type(type)
+ , Value(value)
+{
+ if (Type.StartsWith("Pg")) {
+ Add("PgConst", BuildQuotedAtom(Pos, Value), Y("PgType", Q(to_lower(Type.substr(2)))));
+ } else {
+ Add(Type, BuildQuotedAtom(Pos, Value));
+ }
+}
+
+TLiteralNode::TLiteralNode(TPosition pos, const TString& value, ui32 nodeFlags)
+ : TAstListNode(pos)
+ , Null(false)
+ , Void(false)
+ , Type("String")
+ , Value(value)
+{
+ Add(Type, BuildQuotedAtom(pos, Value, nodeFlags));
+}
+
+TLiteralNode::TLiteralNode(TPosition pos, const TString& value, ui32 nodeFlags, const TString& type)
+ : TAstListNode(pos)
+ , Null(false)
+ , Void(false)
+ , Type(type)
+ , Value(value)
+{
+ if (Type.StartsWith("Pg")) {
+ Add("PgConst", BuildQuotedAtom(Pos, Value, nodeFlags), Y("PgType", Q(to_lower(Type.substr(2)))));
+ } else {
+ Add(Type, BuildQuotedAtom(pos, Value, nodeFlags));
+ }
+}
+
+bool TLiteralNode::IsNull() const {
+ return Null;
+}
+
+const TString* TLiteralNode::GetLiteral(const TString& type) const {
+ return type == Type ? &Value : nullptr;
+}
+
+bool TLiteralNode::IsLiteral() const {
+ return true;
+}
+
+TString TLiteralNode::GetLiteralType() const {
+ return Type;
+}
+
+TString TLiteralNode::GetLiteralValue() const {
+ return Value;
+}
+
+void TLiteralNode::DoUpdateState() const {
+ State.Set(ENodeState::Const);
+}
+
+TNodePtr TLiteralNode::DoClone() const {
+ auto res = (Null || Void) ? MakeIntrusive<TLiteralNode>(Pos, Null) : MakeIntrusive<TLiteralNode>(Pos, Type, Value);
+ res->Nodes = Nodes;
+ return res;
+}
+
+template<typename T>
+TLiteralNumberNode<T>::TLiteralNumberNode(TPosition pos, const TString& type, const TString& value, bool implicitType)
+ : TLiteralNode(pos, type, value)
+ , ImplicitType(implicitType)
+{}
+
+template<typename T>
+TNodePtr TLiteralNumberNode<T>::DoClone() const {
+ return new TLiteralNumberNode<T>(Pos, Type, Value, ImplicitType);
+}
+
+template<typename T>
+bool TLiteralNumberNode<T>::DoInit(TContext& ctx, ISource* src) {
+ Y_UNUSED(src);
+ T val;
+ if (!TryFromString(Value, val)) {
+ ctx.Error(Pos) << "Failed to parse " << Value << " as integer literal of " << Type << " type: value out of range for " << Type;
+ return false;
+ }
+ return true;
+}
+
+template<typename T>
+bool TLiteralNumberNode<T>::IsIntegerLiteral() const {
+ return std::numeric_limits<T>::is_integer;
+}
+
+template<typename T>
+TNodePtr TLiteralNumberNode<T>::ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const {
+ YQL_ENSURE(!Value.empty());
+ if (opName == "Minus" && IsIntegerLiteral() && Value[0] != '-') {
+ if (ImplicitType) {
+ ui64 val = FromString<ui64>(Value);
+ TString negated = "-" + Value;
+ if (val <= ui64(std::numeric_limits<i32>::max()) + 1) {
+ // negated value fits in Int32
+ i32 v;
+ YQL_ENSURE(TryFromString(negated, v));
+ return new TLiteralNumberNode<i32>(pos, Type.StartsWith("Pg") ? "PgInt4" : "Int32", negated);
+ }
+ if (val <= ui64(std::numeric_limits<i64>::max()) + 1) {
+ // negated value fits in Int64
+ i64 v;
+ YQL_ENSURE(TryFromString(negated, v));
+ return new TLiteralNumberNode<i64>(pos, Type.StartsWith("Pg") ? "PgInt8" : "Int64", negated);
+ }
+
+ ctx.Error(pos) << "Failed to parse negative integer: " << negated << ", number limit overflow";
+ return {};
+ }
+
+ if (std::numeric_limits<T>::is_signed) {
+ return new TLiteralNumberNode<T>(pos, Type, "-" + Value);
+ }
+ }
+ return INode::ApplyUnaryOp(ctx, pos, opName);
+}
+
+
+template class TLiteralNumberNode<i32>;
+template class TLiteralNumberNode<i64>;
+template class TLiteralNumberNode<ui32>;
+template class TLiteralNumberNode<ui64>;
+template class TLiteralNumberNode<float>;
+template class TLiteralNumberNode<double>;
+template class TLiteralNumberNode<ui8>;
+template class TLiteralNumberNode<i8>;
+template class TLiteralNumberNode<ui16>;
+template class TLiteralNumberNode<i16>;
+
+TNodePtr BuildLiteralNull(TPosition pos) {
+ return new TLiteralNode(pos, true);
+}
+
+TNodePtr BuildLiteralVoid(TPosition pos) {
+ return new TLiteralNode(pos, false);
+}
+
+TNodePtr BuildLiteralSmartString(TContext& ctx, const TString& value) {
+ auto unescaped = StringContent(ctx, ctx.Pos(), value);
+ if (!unescaped) {
+ return new TInvalidLiteralNode(ctx.Pos());
+ }
+
+ YQL_ENSURE(unescaped->Type == NKikimr::NUdf::EDataSlot::String);
+ return new TLiteralNode(ctx.Pos(), unescaped->Content, unescaped->Flags, "String");
+}
+
+TMaybe<TExprOrIdent> BuildLiteralTypedSmartStringOrId(TContext& ctx, const TString& value) {
+ TExprOrIdent result;
+ if (ctx.AnsiQuotedIdentifiers && value.StartsWith('"')) {
+ auto unescaped = StringContentInternal(ctx, ctx.Pos(), value, EStringContentMode::AnsiIdent);
+ if (!unescaped) {
+ return {};
+ }
+ result.Ident = unescaped->Content;
+ return result;
+ }
+ auto unescaped = StringContentInternal(ctx, ctx.Pos(), value, EStringContentMode::TypedStringLiteral);
+ if (!unescaped) {
+ return {};
+ }
+
+ TString type = unescaped->PgType ? *unescaped->PgType : ToString(unescaped->Type);
+ result.Expr = new TLiteralNode(ctx.Pos(), unescaped->Content, unescaped->Flags, type);
+ return result;
+}
+
+
+TNodePtr BuildLiteralRawString(TPosition pos, const TString& value, bool isUtf8) {
+ return new TLiteralNode(pos, isUtf8 ? "Utf8" : "String", value);
+}
+
+TNodePtr BuildLiteralBool(TPosition pos, bool value) {
+ return new TLiteralNode(pos, "Bool", value ? "true" : "false");
+}
+
+TAsteriskNode::TAsteriskNode(TPosition pos)
+ : INode(pos)
+{}
+
+bool TAsteriskNode::IsAsterisk() const {
+ return true;
+};
+
+TNodePtr TAsteriskNode::DoClone() const {
+ return new TAsteriskNode(Pos);
+}
+
+TAstNode* TAsteriskNode::Translate(TContext& ctx) const {
+ ctx.Error(Pos) << "* is not allowed here";
+ return nullptr;
+}
+
+TNodePtr BuildEmptyAction(TPosition pos) {
+ TNodePtr params = new TAstListNodeImpl(pos);
+ TNodePtr arg = new TAstAtomNodeImpl(pos, "x", TNodeFlags::Default);
+ params->Add(arg);
+ return BuildLambda(pos, params, arg);
+}
+
+TDeferredAtom::TDeferredAtom()
+{}
+
+TDeferredAtom::TDeferredAtom(TPosition pos, const TString& str)
+{
+ Node = BuildQuotedAtom(pos, str);
+ Explicit = str;
+ Repr = str;
+}
+
+TDeferredAtom::TDeferredAtom(TNodePtr node, TContext& ctx)
+{
+ Node = node;
+ Repr = ctx.MakeName("DeferredAtom");
+}
+
+const TString* TDeferredAtom::GetLiteral() const {
+ return Explicit.Get();
+}
+
+bool TDeferredAtom::GetLiteral(TString& value, TContext& ctx) const {
+ if (Explicit) {
+ value = *Explicit;
+ return true;
+ }
+
+ ctx.Error(Node ? Node->GetPos() : ctx.Pos()) << "Expected literal value";
+ return false;
+}
+
+TNodePtr TDeferredAtom::Build() const {
+ return Node;
+}
+
+TString TDeferredAtom::GetRepr() const {
+ return Repr;
+}
+
+bool TDeferredAtom::Empty() const {
+ return !Node || Repr.empty();
+}
+
+bool TDeferredAtom::HasNode() const {
+ return !!Node;
+}
+
+TTupleNode::TTupleNode(TPosition pos, const TVector<TNodePtr>& exprs)
+ : TAstListNode(pos)
+ , Exprs(exprs)
+{}
+
+bool TTupleNode::IsEmpty() const {
+ return Exprs.empty();
+}
+
+const TVector<TNodePtr>& TTupleNode::Elements() const {
+ return Exprs;
+}
+
+TTupleNode* TTupleNode::GetTupleNode() {
+ return this;
+}
+
+const TTupleNode* TTupleNode::GetTupleNode() const {
+ return this;
+}
+
+bool TTupleNode::DoInit(TContext& ctx, ISource* src) {
+ auto node(Y());
+ for (auto& expr: Exprs) {
+ if (expr->GetLabel()) {
+ ctx.Error(expr->GetPos()) << "Tuple does not allow named members";
+ return false;
+ }
+ node = L(node, expr);
+ }
+ Add("quote", node);
+ return TAstListNode::DoInit(ctx, src);
+}
+
+size_t TTupleNode::GetTupleSize() const {
+ return Exprs.size();
+}
+
+TNodePtr TTupleNode::GetTupleElement(size_t index) const {
+ return Exprs[index];
+}
+
+TNodePtr TTupleNode::DoClone() const {
+ return new TTupleNode(Pos, CloneContainer(Exprs));
+}
+
+void TTupleNode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) {
+ for (auto& expr : Exprs) {
+ expr->CollectPreaggregateExprs(ctx, src, exprs);
+ }
+}
+
+const TString* TTupleNode::GetSourceName() const {
+ return DeriveCommonSourceName(Exprs);
+}
+
+TNodePtr BuildTuple(TPosition pos, const TVector<TNodePtr>& exprs) {
+ return new TTupleNode(pos, exprs);
+}
+
+TStructNode::TStructNode(TPosition pos, const TVector<TNodePtr>& exprs, const TVector<TNodePtr>& labels, bool ordered)
+ : TAstListNode(pos)
+ , Exprs(exprs)
+ , Labels(labels)
+ , Ordered(ordered)
+{
+ YQL_ENSURE(Labels.empty() || Labels.size() == Exprs.size());
+}
+
+bool TStructNode::DoInit(TContext& ctx, ISource* src) {
+ Nodes.push_back(BuildAtom(Pos, (Ordered || Exprs.size() < 2) ? "AsStruct" : "AsStructUnordered", TNodeFlags::Default));
+ size_t i = 0;
+ for (const auto& expr : Exprs) {
+ TNodePtr label;
+ if (Labels.empty()) {
+ if (!expr->GetLabel()) {
+ ctx.Error(expr->GetPos()) << "Structure does not allow anonymous members";
+ return false;
+ }
+ label = BuildQuotedAtom(expr->GetPos(), expr->GetLabel());
+ } else {
+ label = Labels[i++];
+ }
+ Nodes.push_back(Q(Y(label, expr)));
+ }
+ return TAstListNode::DoInit(ctx, src);
+}
+
+TNodePtr TStructNode::DoClone() const {
+ return new TStructNode(Pos, CloneContainer(Exprs), CloneContainer(Labels), Ordered);
+}
+
+TStructNode* TStructNode::GetStructNode() {
+ return this;
+}
+
+const TStructNode* TStructNode::GetStructNode() const {
+ return this;
+}
+
+void TStructNode::CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) {
+ for (auto& expr : Exprs) {
+ expr->CollectPreaggregateExprs(ctx, src, exprs);
+ }
+}
+
+const TString* TStructNode::GetSourceName() const {
+ return DeriveCommonSourceName(Exprs);
+}
+
+TNodePtr BuildStructure(TPosition pos, const TVector<TNodePtr>& exprs) {
+ bool ordered = false;
+ return new TStructNode(pos, exprs, {}, ordered);
+}
+
+TNodePtr BuildStructure(TPosition pos, const TVector<TNodePtr>& exprsUnlabeled, const TVector<TNodePtr>& labels) {
+ bool ordered = false;
+ return new TStructNode(pos, exprsUnlabeled, labels, ordered);
+}
+
+TNodePtr BuildOrderedStructure(TPosition pos, const TVector<TNodePtr>& exprsUnlabeled, const TVector<TNodePtr>& labels) {
+ bool ordered = true;
+ return new TStructNode(pos, exprsUnlabeled, labels, ordered);
+}
+
+TListOfNamedNodes::TListOfNamedNodes(TPosition pos, TVector<TNodePtr>&& exprs)
+ : INode(pos)
+ , Exprs(std::move(exprs))
+{}
+
+TVector<TNodePtr>* TListOfNamedNodes::ContentListPtr() {
+ return &Exprs;
+}
+
+TAstNode* TListOfNamedNodes::Translate(TContext& ctx) const {
+ YQL_ENSURE(!"Unexpected usage");
+ Y_UNUSED(ctx);
+ return nullptr;
+}
+
+TNodePtr TListOfNamedNodes::DoClone() const {
+ return new TListOfNamedNodes(GetPos(), CloneContainer(Exprs));
+}
+
+void TListOfNamedNodes::DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const {
+ for (auto& expr : Exprs) {
+ expr->VisitTree(func, visited);
+ }
+}
+
+TNodePtr BuildListOfNamedNodes(TPosition pos, TVector<TNodePtr>&& exprs) {
+ return new TListOfNamedNodes(pos, std::move(exprs));
+}
+
+TArgPlaceholderNode::TArgPlaceholderNode(TPosition pos, const TString &name) :
+ INode(pos),
+ Name(name)
+{
+}
+
+bool TArgPlaceholderNode::DoInit(TContext& ctx, ISource* src) {
+ Y_UNUSED(src);
+ ctx.Error(Pos) << Name << " can't be used as a part of expression.";
+ return false;
+}
+
+TAstNode* TArgPlaceholderNode::Translate(TContext& ctx) const {
+ Y_UNUSED(ctx);
+ return nullptr;
+}
+
+TString TArgPlaceholderNode::GetName() const {
+ return Name;
+}
+
+TNodePtr TArgPlaceholderNode::DoClone() const {
+ return new TArgPlaceholderNode(GetPos(), Name);
+}
+
+TNodePtr BuildArgPlaceholder(TPosition pos, const TString& name) {
+ return new TArgPlaceholderNode(pos, name);
+}
+
+class TAccessNode: public INode {
+public:
+ TAccessNode(TPosition pos, const TVector<TIdPart>& ids, bool isLookup)
+ : INode(pos)
+ , Ids(ids)
+ , IsLookup(isLookup)
+ , ColumnOnly(false)
+ , IsColumnRequired(false)
+ , AccessOpName("AccessNode")
+ {
+ Y_DEBUG_ABORT_UNLESS(Ids.size() > 1);
+ Y_DEBUG_ABORT_UNLESS(Ids[0].Expr);
+ auto column = Ids[0].Expr->GetColumnNode();
+ if (column) {
+ ui32 idx = 1;
+ TString source;
+ if (Ids.size() > 2) {
+ source = Ids[idx].Name;
+ ++idx;
+ }
+
+ ColumnOnly = !IsLookup && Ids.size() < 4;
+ if (ColumnOnly && Ids[idx].Expr) {
+ column->ResetColumn(Ids[idx].Expr, source);
+ } else {
+ column->ResetColumn(Ids[idx].Name, source);
+ }
+ }
+ }
+
+ void AssumeColumn() override {
+ IsColumnRequired = true;
+ }
+
+ TMaybe<TString> TryMakeTable() {
+ if (!ColumnOnly) {
+ return Nothing();
+ }
+
+ ui32 idx = 1;
+ if (Ids.size() > 2) {
+ return Nothing();
+ }
+
+ return Ids[idx].Name;
+ }
+
+ const TString* GetColumnName() const override {
+ return ColumnOnly ? Ids[0].Expr->GetColumnName() : nullptr;
+ }
+
+ const TString* GetSourceName() const override {
+ return Ids[0].Expr->GetSourceName();
+ }
+
+ TAccessNode* GetAccessNode() override {
+ return this;
+ }
+
+ const TAccessNode* GetAccessNode() const override {
+ return this;
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto expr = Ids[0].Expr;
+ const TPosition pos(expr->GetPos());
+ if (expr->IsAsterisk()) {
+ ctx.Error(pos) << "Asterisk column does not allow any access";
+ return false;
+ }
+ if (!expr->Init(ctx, src)) {
+ return false;
+ }
+ for (auto& id: Ids) {
+ if (id.Expr && !id.Expr->Init(ctx, src)) {
+ return false;
+ }
+ }
+ ui32 idx = 1;
+ auto column = expr->GetColumnNode();
+ if (column) {
+ const bool useSourceAsColumn = column->IsUseSourceAsColumn();
+ ColumnOnly &= !useSourceAsColumn;
+ if (IsColumnRequired && !ColumnOnly) {
+ ctx.Error(pos) << "Please use a full form (corellation.struct.field) or an alias (struct.field as alias) to access struct's field in the GROUP BY";
+ return false;
+ }
+
+ if (Ids.size() > 2) {
+ if (!CheckColumnId(pos, ctx, Ids[idx], ColumnOnly ? "Correlation" : "Column", true)) {
+ return false;
+ }
+ ++idx;
+ }
+ if (!useSourceAsColumn) {
+ if (!IsLookup && !CheckColumnId(pos, ctx, Ids[idx], ColumnOnly ? "Column" : "Member", false)) {
+ return false;
+ }
+ ++idx;
+ }
+ }
+ for (; idx < Ids.size(); ++idx) {
+ const auto& id = Ids[idx];
+ if (!id.Name.empty()) {
+ expr = Y("SqlAccess", Q("struct"), expr, id.Expr ? Y("EvaluateAtom", id.Expr) : BuildQuotedAtom(Pos, id.Name));
+ AccessOpName = "AccessStructMember";
+ } else if (id.Expr) {
+ expr = Y("SqlAccess", Q("dict"), expr, id.Expr);
+ AccessOpName = "AccessDictMember";
+ } else {
+ continue;
+ }
+
+ if (ctx.PragmaYsonAutoConvert || ctx.PragmaYsonStrict || ctx.PragmaYsonFast) {
+ auto ysonOptions = Y();
+ if (ctx.PragmaYsonAutoConvert) {
+ ysonOptions->Add(BuildQuotedAtom(Pos, "yson_auto_convert"));
+ }
+ if (ctx.PragmaYsonStrict) {
+ ysonOptions->Add(BuildQuotedAtom(Pos, "yson_strict"));
+ }
+ if (ctx.PragmaYsonFast) {
+ ysonOptions->Add(BuildQuotedAtom(Pos, "yson_fast"));
+ }
+ expr->Add(Q(ysonOptions));
+ }
+ }
+ Node = expr;
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+ }
+
+ TPtr DoClone() const override {
+ YQL_ENSURE(!Node, "TAccessNode::Clone: Node should not be initialized");
+ TVector<TIdPart> cloneIds;
+ cloneIds.reserve(Ids.size());
+ for (const auto& id: Ids) {
+ cloneIds.emplace_back(id.Clone());
+ }
+ auto copy = new TAccessNode(Pos, cloneIds, IsLookup);
+ copy->ColumnOnly = ColumnOnly;
+ return copy;
+ }
+
+ const TVector<TIdPart>& GetParts() const {
+ return Ids;
+ }
+
+protected:
+ void DoUpdateState() const override {
+ YQL_ENSURE(Node);
+ State.Set(ENodeState::Const, Node->IsConstant());
+ State.Set(ENodeState::MaybeConst, Node->MaybeConstant());
+ State.Set(ENodeState::Aggregated, Node->IsAggregated());
+ State.Set(ENodeState::AggregationKey, Node->HasState(ENodeState::AggregationKey));
+ State.Set(ENodeState::OverWindow, Node->IsOverWindow());
+ }
+
+ void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ Node->VisitTree(func, visited);
+ }
+
+ bool CheckColumnId(TPosition pos, TContext& ctx, const TIdPart& id, const TString& where, bool checkLookup) {
+ if (id.Name.empty()) {
+ ctx.Error(pos) << where << " name can not be empty";
+ return false;
+ }
+ if (checkLookup && id.Expr) {
+ ctx.Error(pos) << where << " name does not allow dict lookup";
+ return false;
+ }
+ return true;
+ }
+
+ TString GetOpName() const override {
+ return AccessOpName;
+ }
+
+ void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override {
+ for (auto& id : Ids) {
+ if (id.Expr) {
+ id.Expr->CollectPreaggregateExprs(ctx, src, exprs);
+ }
+ }
+ }
+
+private:
+ TNodePtr Node;
+ TVector<TIdPart> Ids;
+ bool IsLookup;
+ bool ColumnOnly;
+ bool IsColumnRequired;
+ TString AccessOpName;
+};
+
+TNodePtr BuildAccess(TPosition pos, const TVector<INode::TIdPart>& ids, bool isLookup) {
+ return new TAccessNode(pos, ids, isLookup);
+}
+
+TNodePtr BuildMatchRecognizeVarAccess(TPosition pos, const TString& var, const TString& column, bool theSameVar) {
+ return new TMatchRecognizeVarAccessNode(pos, var, column, theSameVar);
+}
+
+void WarnIfAliasFromSelectIsUsedInGroupBy(TContext& ctx, const TVector<TNodePtr>& selectTerms, const TVector<TNodePtr>& groupByTerms,
+ const TVector<TNodePtr>& groupByExprTerms)
+{
+ THashMap<TString, TNodePtr> termsByLabel;
+ for (auto& term : selectTerms) {
+ auto label = term->GetLabel();
+ if (!label || term->IsOverWindow()) {
+ continue;
+ }
+
+ auto column = term->GetColumnName();
+
+ // do not warn for trivial renaming such as '[X.]foo AS foo'
+ if (column && *column == label) {
+ continue;
+ }
+
+ // skip terms with aggregation functions inside
+ bool hasAggregationFunction = false;
+ auto visitor = [&](const INode& current) {
+ hasAggregationFunction = hasAggregationFunction || current.GetAggregation();
+ return !hasAggregationFunction;
+ };
+
+ term->VisitTree(visitor);
+ if (!hasAggregationFunction) {
+ termsByLabel[label] = term;
+ }
+ }
+
+ if (termsByLabel.empty()) {
+ return;
+ }
+
+ bool found = false;
+ auto visitor = [&](const INode& current) {
+ if (found) {
+ return false;
+ }
+
+ if (auto columnName = current.GetColumnName()) {
+ // do not warn if source name is set
+ auto src = current.GetSourceName();
+ if (src && *src) {
+ return true;
+ }
+ auto it = termsByLabel.find(*columnName);
+ if (it != termsByLabel.end()) {
+ found = true;
+ ctx.Warning(current.GetPos(), TIssuesIds::YQL_PROJECTION_ALIAS_IS_REFERENCED_IN_GROUP_BY)
+ << "GROUP BY will aggregate by column `" << *columnName << "` instead of aggregating by SELECT expression with same alias";
+ ctx.Warning(it->second->GetPos(), TIssuesIds::YQL_PROJECTION_ALIAS_IS_REFERENCED_IN_GROUP_BY)
+ << "You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details";
+ return false;
+ }
+ }
+
+ return true;
+ };
+
+ TVector<TNodePtr> originalGroupBy;
+ {
+ THashSet<TString> groupByExprLabels;
+ for (auto& expr : groupByExprTerms) {
+ auto label = expr->GetLabel();
+ YQL_ENSURE(label);
+ groupByExprLabels.insert(label);
+ }
+
+ originalGroupBy = groupByTerms;
+ EraseIf(originalGroupBy, [&](const TNodePtr& node) {
+ auto column = node->GetColumnName();
+ auto src = node->GetSourceName();
+
+ return (!src || src->empty()) && column && groupByExprLabels.contains(*column);
+ });
+
+ originalGroupBy.insert(originalGroupBy.end(), groupByExprTerms.begin(), groupByExprTerms.end());
+ }
+
+ for (auto& groupByTerm : originalGroupBy) {
+ groupByTerm->VisitTree(visitor);
+ if (found) {
+ return;
+ }
+ }
+}
+
+bool ValidateAllNodesForAggregation(TContext& ctx, const TVector<TNodePtr>& nodes) {
+ for (auto& node: nodes) {
+ if (!node->HasState(ENodeState::Initialized) || node->IsConstant() || node->MaybeConstant()) {
+ continue;
+ }
+ // TODO: "!node->IsOverWindow()" doesn't look right here
+ if (!node->IsAggregated() && !node->IsOverWindow() && !node->IsOverWindowDistinct()) {
+ // locate column which is not a key column and not aggregated
+ const INode* found = nullptr;
+ auto visitor = [&found](const INode& current) {
+ if (found || current.IsAggregated() || current.IsOverWindow() || current.IsOverWindowDistinct()) {
+ return false;
+ }
+
+ if (current.GetColumnNode() || current.GetAccessNode()) {
+ found = &current;
+ return false;
+ }
+ return true;
+ };
+
+ node->VisitTree(visitor);
+ if (found) {
+ TString columnName;
+ if (auto col = found->GetColumnName(); col && *col) {
+ columnName = "`";
+ if (auto src = found->GetSourceName(); src && *src) {
+ columnName += DotJoin(*src, *col);
+ } else {
+ columnName += *col;
+ }
+ columnName += "` ";
+ }
+ ctx.Error(found->GetPos()) << "Column " << columnName << "must either be a key column in GROUP BY or it should be used in aggregation function";
+ } else {
+ ctx.Error(node->GetPos()) << "Expression has to be an aggregation function or key column, because aggregation is used elsewhere in this subquery";
+ }
+
+ return false;
+ }
+ }
+ return true;
+}
+
+class TBindNode: public TAstListNode {
+public:
+ TBindNode(TPosition pos, const TString& module, const TString& alias)
+ : TAstListNode(pos)
+ {
+ Add("bind", AstNode(module), BuildQuotedAtom(pos, alias));
+ }
+private:
+ TBindNode(const TBindNode& other)
+ : TAstListNode(other.GetPos())
+ {
+ Nodes = CloneContainer(other.Nodes);
+ }
+
+ TPtr DoClone() const final {
+ return new TBindNode(*this);
+ }
+};
+
+TNodePtr BuildBind(TPosition pos, const TString& module, const TString& alias) {
+ return new TBindNode(pos, module, alias);
+}
+
+class TLambdaNode: public TAstListNode {
+public:
+ TLambdaNode(TPosition pos, TNodePtr params, TNodePtr body, const TString& resName)
+ : TAstListNode(pos)
+ {
+ if (!resName.empty()) {
+ body = Y("block", Q(L(body, Y("return", resName))));
+ }
+ Add("lambda", Q(params), body);
+ }
+
+ TLambdaNode(TPosition pos, TNodePtr params, TVector<TNodePtr> bodies)
+ : TAstListNode(pos)
+ {
+ Add("lambda", Q(params));
+ for (const auto& b : bodies) {
+ Add(b);
+ }
+ }
+
+ TLambdaNode* GetLambdaNode() override {
+ return this;
+ }
+
+ const TLambdaNode* GetLambdaNode() const override {
+ return this;
+ }
+
+private:
+ TLambdaNode(const TLambdaNode& other)
+ : TAstListNode(other.GetPos())
+ {
+ Nodes = CloneContainer(other.Nodes);
+ }
+
+ TPtr DoClone() const final {
+ return new TLambdaNode(*this);
+ }
+
+ void DoUpdateState() const final {
+ State.Set(ENodeState::Const);
+ }
+};
+
+TNodePtr BuildLambda(TPosition pos, TNodePtr params, TNodePtr body, const TString& resName) {
+ return new TLambdaNode(pos, params, body, resName);
+}
+
+TNodePtr BuildLambda(TPosition pos, TNodePtr params, const TVector<TNodePtr>& bodies) {
+ return new TLambdaNode(pos, params, bodies);
+}
+
+TNodePtr BuildDataType(TPosition pos, const TString& typeName) {
+ return new TCallNodeImpl(pos, "DataType", {BuildQuotedAtom(pos, typeName, TNodeFlags::Default)});
+}
+
+TMaybe<TString> LookupSimpleType(const TStringBuf& alias, bool flexibleTypes, bool isPgType) {
+ TString normalized = to_lower(TString(alias));
+ if (isPgType) {
+ // expecting original pg type (like _int4 or varchar) with optional pg suffix (i.e. _pgint4, pgvarchar)
+ if (normalized.StartsWith("pg")) {
+ normalized = normalized.substr(2);
+ } else if (normalized.StartsWith("_pg")) {
+ normalized = "_" + normalized.substr(3);
+ }
+
+ if (!NPg::HasType(normalized)) {
+ return {};
+ }
+
+ if (normalized.StartsWith("_")) {
+ return "_pg" + normalized.substr(1);
+ }
+ return "pg" + normalized;
+ }
+
+ if (auto sqlAlias = LookupSimpleTypeBySqlAlias(alias, flexibleTypes)) {
+ return TString(*sqlAlias);
+ }
+
+ TString pgType;
+ if (normalized.StartsWith("_pg")) {
+ pgType = normalized.substr(3);
+ } else if (normalized.StartsWith("pg")) {
+ pgType = normalized.substr(2);
+ } else {
+ return {};
+ }
+
+ if (NPg::HasType(pgType)) {
+ return normalized;
+ }
+
+ return {};
+}
+
+TNodePtr BuildSimpleType(TContext& ctx, TPosition pos, const TString& typeName, bool dataOnly) {
+ bool explicitPgType = ctx.GetColumnReferenceState() == EColumnRefState::AsPgType;
+ auto found = LookupSimpleType(typeName, ctx.FlexibleTypes, explicitPgType);
+ if (!found) {
+ ctx.Error(pos) << "Unknown " << (explicitPgType ? "pg" : "simple") << " type '" << typeName << "'";
+ return {};
+ }
+
+ auto type = *found;
+ if (type == "Void" || type == "Unit" || type == "Generic" || type == "EmptyList" || type == "EmptyDict") {
+ if (dataOnly) {
+ ctx.Error(pos) << "Only data types are allowed here, but got: '" << typeName << "'";
+ return {};
+ }
+ type += "Type";
+ return new TCallNodeImpl(pos, type, {});
+ }
+
+ if (type.StartsWith("_pg") || type.StartsWith("pg")) {
+ TString pgType;
+ if (type.StartsWith("_pg")) {
+ pgType = "_" + type.substr(3);
+ } else {
+ pgType = type.substr(2);
+ }
+ return new TCallNodeImpl(pos, "PgType", { BuildQuotedAtom(pos, pgType, TNodeFlags::Default) });
+ }
+
+ return new TCallNodeImpl(pos, "DataType", { BuildQuotedAtom(pos, type, TNodeFlags::Default) });
+}
+
+TString TypeByAlias(const TString& alias, bool normalize) {
+ TString type(alias);
+ TCiString typeAlias(alias);
+ if (typeAlias.StartsWith("varchar")) {
+ type = "String";
+ } else if (typeAlias == "tinyint") {
+ type = "Int8";
+ } else if (typeAlias == "byte") {
+ type = "Uint8";
+ } else if (typeAlias == "smallint") {
+ type = "Int16";
+ } else if (typeAlias == "int" || typeAlias == "integer") {
+ type = "Int32";
+ } else if (typeAlias == "bigint") {
+ type = "Int64";
+ }
+ return normalize ? NormalizeTypeString(type) : type;
+}
+
+TNodePtr BuildIsNullOp(TPosition pos, TNodePtr a) {
+ if (!a) {
+ return nullptr;
+ }
+ if (a->IsNull()) {
+ return BuildLiteralBool(pos, true);
+ }
+ return new TCallNodeImpl(pos, "Not", {new TCallNodeImpl(pos, "Exists", {a})});
+}
+
+
+
+TUdfNode::TUdfNode(TPosition pos, const TVector<TNodePtr>& args)
+ : INode(pos)
+ , Args(args)
+{
+ if (Args.size()) {
+ // If there aren't any named args, args are passed as vector of positional args,
+ // else Args has length 2: tuple for positional args and struct for named args,
+ // so let's construct tuple of args there. Other type checks will within DoInit call.
+ if (!Args[0]->GetTupleNode()) {
+ Args = {BuildTuple(pos, args)};
+ }
+ }
+}
+
+bool TUdfNode::DoInit(TContext& ctx, ISource* src) {
+ Y_UNUSED(src);
+ if (Args.size() < 1) {
+ ctx.Error(Pos) << "Udf: expected at least one argument";
+ return false;
+ }
+
+ TTupleNode* as_tuple = Args[0]->GetTupleNode();
+
+ if (!as_tuple || as_tuple->GetTupleSize() < 1) {
+ ctx.Error(Pos) << "Udf: first argument must be a callable, like Foo::Bar";
+ return false;
+ }
+
+ TNodePtr function = as_tuple->GetTupleElement(0);
+
+ if (!function || !function->FuncName()) {
+ ctx.Error(Pos) << "Udf: first argument must be a callable, like Foo::Bar";
+ return false;
+ }
+
+ FunctionName = function->FuncName();
+ ModuleName = function->ModuleName();
+ TVector<TNodePtr> external;
+ external.reserve(as_tuple->GetTupleSize() - 1);
+
+ for (size_t i = 1; i < as_tuple->GetTupleSize(); ++i) {
+ // TODO(): support named args in GetFunctionArgColumnStatus
+ TNodePtr current = as_tuple->GetTupleElement(i);
+ if (TAccessNode* as_access = current->GetAccessNode(); as_access) {
+ external.push_back(Y("DataType", Q(as_access->GetParts()[1].Name)));
+ continue;
+ }
+ external.push_back(current);
+ }
+
+ ExternalTypesTuple = new TCallNodeImpl(Pos, "TupleType", external);
+
+ if (Args.size() == 1) {
+ return true;
+ }
+
+ if (TStructNode* named_args = Args[1]->GetStructNode(); named_args) {
+ for (const auto &arg: named_args->GetExprs()) {
+ if (arg->GetLabel() == "TypeConfig") {
+ TypeConfig = MakeAtomFromExpression(Pos, ctx, arg);
+ } else if (arg->GetLabel() == "RunConfig") {
+ RunConfig = arg;
+ }
+ }
+ }
+
+ return true;
+}
+
+const TNodePtr TUdfNode::GetExternalTypes() const {
+ return ExternalTypesTuple;
+}
+
+const TString& TUdfNode::GetFunction() const {
+ return *FunctionName;
+}
+
+const TString& TUdfNode::GetModule() const {
+ return *ModuleName;
+}
+
+TNodePtr TUdfNode::GetRunConfig() const {
+ return RunConfig;
+}
+
+const TDeferredAtom& TUdfNode::GetTypeConfig() const {
+ return TypeConfig;
+}
+
+TUdfNode* TUdfNode::GetUdfNode() {
+ return this;
+}
+
+const TUdfNode* TUdfNode::GetUdfNode() const {
+ return this;
+}
+
+TAstNode* TUdfNode::Translate(TContext& ctx) const {
+ ctx.Error(Pos) << "Abstract Udf Node can't be used as a part of expression.";
+ return nullptr;
+}
+
+TNodePtr TUdfNode::DoClone() const {
+ return new TUdfNode(Pos, CloneContainer(Args));
+}
+
+
+class TBinaryOpNode final: public TCallNode {
+public:
+ TBinaryOpNode(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b);
+
+ TNodePtr DoClone() const final {
+ YQL_ENSURE(Args.size() == 2);
+ return new TBinaryOpNode(Pos, OpName, Args[0]->Clone(), Args[1]->Clone());
+ }
+};
+
+TBinaryOpNode::TBinaryOpNode(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b)
+ : TCallNode(pos, opName, 2, 2, { a, b })
+{
+}
+
+TNodePtr BuildBinaryOp(TContext& ctx, TPosition pos, const TString& opName, TNodePtr a, TNodePtr b) {
+ if (!a || !b) {
+ return nullptr;
+ }
+
+ static const THashSet<TStringBuf> nullSafeOps = {"IsDistinctFrom", "IsNotDistinctFrom"};
+ if (!nullSafeOps.contains(opName)) {
+ const bool bothArgNull = a->IsNull() && b->IsNull();
+ const bool oneArgNull = a->IsNull() || b->IsNull();
+
+ if (bothArgNull || (oneArgNull && opName != "Or" && opName != "And")) {
+ ctx.Warning(pos, TIssuesIds::YQL_OPERATION_WILL_RETURN_NULL) << "Binary operation "
+ << opName.substr(0, opName.size() - 7 * opName.EndsWith("MayWarn"))
+ << " will return NULL here";
+ }
+ }
+
+ return new TBinaryOpNode(pos, opName, a, b);
+}
+
+TNodePtr BuildBinaryOpRaw(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b) {
+ if (!a || !b) {
+ return nullptr;
+ }
+
+ return new TBinaryOpNode(pos, opName, a, b);
+}
+
+class TCalcOverWindow final: public INode {
+public:
+ TCalcOverWindow(TPosition pos, const TString& windowName, TNodePtr node)
+ : INode(pos)
+ , WindowName(windowName)
+ , FuncNode(node)
+ {}
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return FuncNode->Translate(ctx);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ YQL_ENSURE(src);
+ TSourcePtr overWindowSource = BuildOverWindowSource(ctx.Pos(), WindowName, src);
+ if (!FuncNode->Init(ctx, overWindowSource.Get())) {
+ return false;
+ }
+ return true;
+ }
+
+ TPtr DoClone() const final {
+ return new TCalcOverWindow(Pos, WindowName, SafeClone(FuncNode));
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, FuncNode->IsConstant());
+ State.Set(ENodeState::MaybeConst, FuncNode->MaybeConstant());
+ State.Set(ENodeState::Aggregated, FuncNode->IsAggregated());
+ State.Set(ENodeState::OverWindow, true);
+ }
+
+ void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
+ Y_DEBUG_ABORT_UNLESS(FuncNode);
+ FuncNode->VisitTree(func, visited);
+ }
+
+ void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override {
+ if (ctx.DistinctOverWindow) {
+ FuncNode->CollectPreaggregateExprs(ctx, src, exprs);
+ } else {
+ INode::CollectPreaggregateExprs(ctx, src, exprs);
+ }
+ }
+protected:
+ const TString WindowName;
+ TNodePtr FuncNode;
+};
+
+TNodePtr BuildCalcOverWindow(TPosition pos, const TString& windowName, TNodePtr call) {
+ return new TCalcOverWindow(pos, windowName, call);
+}
+
+template<bool Fast>
+class TYsonOptionsNode final: public INode {
+public:
+ TYsonOptionsNode(TPosition pos, bool autoConvert, bool strict)
+ : INode(pos)
+ , AutoConvert(autoConvert)
+ , Strict(strict)
+ {
+ auto udf = Y("Udf", Q(Fast ? "Yson2.Options" : "Yson.Options"));
+ auto autoConvertNode = BuildLiteralBool(pos, autoConvert);
+ autoConvertNode->SetLabel("AutoConvert");
+ auto strictNode = BuildLiteralBool(pos, strict);
+ strictNode->SetLabel("Strict");
+ Node = Y("NamedApply", udf, Q(Y()), BuildStructure(pos, { autoConvertNode, strictNode }));
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return Node->Translate(ctx);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!Node->Init(ctx, src)) {
+ return false;
+ }
+ return true;
+ }
+
+ TPtr DoClone() const final {
+ return new TYsonOptionsNode(Pos, AutoConvert, Strict);
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, true);
+ }
+
+protected:
+ TNodePtr Node;
+ const bool AutoConvert;
+ const bool Strict;
+};
+
+TNodePtr BuildYsonOptionsNode(TPosition pos, bool autoConvert, bool strict, bool fastYson) {
+ if (fastYson)
+ return new TYsonOptionsNode<true>(pos, autoConvert, strict);
+ else
+ return new TYsonOptionsNode<false>(pos, autoConvert, strict);
+}
+
+class TDoCall final : public INode {
+public:
+ TDoCall(TPosition pos, const TNodePtr& node)
+ : INode(pos)
+ , Node(node)
+ {
+ FakeSource = BuildFakeSource(pos);
+ }
+
+ ISource* GetSource() final {
+ return FakeSource.Get();
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ Y_UNUSED(src);
+ if (!Node->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const final {
+ return Node->Translate(ctx);
+ }
+
+ TPtr DoClone() const final {
+ return new TDoCall(Pos, Node->Clone());
+ }
+
+ void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ Node->VisitTree(func, visited);
+ }
+private:
+ TNodePtr Node;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildDoCall(TPosition pos, const TNodePtr& node) {
+ return new TDoCall(pos, node);
+}
+
+bool Parseui32(TNodePtr from, ui32& to) {
+ const TString* val;
+
+ if (!(val = from->GetLiteral("Int32"))) {
+ if (!(val = from->GetLiteral("Uint32"))) {
+ return false;
+ }
+ }
+
+ return TryFromString(*val, to);
+}
+
+TNodePtr GroundWithExpr(const TNodePtr& ground, const TNodePtr& expr) {
+ return ground ? expr->Y("block", expr->Q(expr->L(ground, expr->Y("return", expr)))) : expr;
+}
+
+TSourcePtr TryMakeSourceFromExpression(TPosition pos, TContext& ctx, const TString& currService, const TDeferredAtom& currCluster,
+ TNodePtr node, const TString& view) {
+ if (currCluster.Empty()) {
+ ctx.Error() << "No cluster name given and no default cluster is selected";
+ return nullptr;
+ }
+
+ if (auto literal = node->GetLiteral("String")) {
+ TNodePtr tableKey = BuildTableKey(node->GetPos(), currService, currCluster, TDeferredAtom(node->GetPos(), *literal), {view});
+ TTableRef table(ctx.MakeName("table"), currService, currCluster, tableKey);
+ table.Options = BuildInputOptions(node->GetPos(), GetContextHints(ctx));
+ return BuildTableSource(node->GetPos(), table);
+ }
+
+ if (node->GetLambdaNode()) {
+ ctx.Error() << "Lambda is not allowed to be used as source. Did you forget to call a subquery template?";
+ return nullptr;
+ }
+
+ auto wrappedNode = new TAstListNodeImpl(pos, {
+ new TAstAtomNodeImpl(pos, "EvaluateAtom", TNodeFlags::Default),
+ node
+ });
+
+ TNodePtr tableKey = BuildTableKey(node->GetPos(), currService, currCluster, TDeferredAtom(wrappedNode, ctx), {view});
+ TTableRef table(ctx.MakeName("table"), currService, currCluster, tableKey);
+ table.Options = BuildInputOptions(node->GetPos(), GetContextHints(ctx));
+ return BuildTableSource(node->GetPos(), table);
+}
+
+void MakeTableFromExpression(TPosition pos, TContext& ctx, TNodePtr node, TDeferredAtom& table, const TString& prefix) {
+ if (auto literal = node->GetLiteral("String")) {
+ table = TDeferredAtom(node->GetPos(), prefix + *literal);
+ return;
+ }
+
+ if (auto access = node->GetAccessNode()) {
+ auto ret = access->TryMakeTable();
+ if (ret) {
+ table = TDeferredAtom(node->GetPos(), prefix + *ret);
+ return;
+ }
+ }
+
+ if (!prefix.empty()) {
+ node = node->Y("Concat", node->Y("String", node->Q(prefix)), node);
+ }
+
+ auto wrappedNode = new TAstListNodeImpl(pos, {
+ new TAstAtomNodeImpl(pos, "EvaluateAtom", TNodeFlags::Default),
+ node
+ });
+
+ table = TDeferredAtom(wrappedNode, ctx);
+}
+
+TDeferredAtom MakeAtomFromExpression(TPosition pos, TContext& ctx, TNodePtr node, const TString& prefix) {
+ if (auto literal = node->GetLiteral("String")) {
+ return TDeferredAtom(node->GetPos(), prefix + *literal);
+ }
+
+ if (!prefix.empty()) {
+ node = node->Y("Concat", node->Y("String", node->Q(prefix)), node);
+ }
+
+ auto wrappedNode = new TAstListNodeImpl(pos, {
+ new TAstAtomNodeImpl(pos, "EvaluateAtom", TNodeFlags::Default),
+ node
+ });
+
+ return TDeferredAtom(wrappedNode, ctx);
+}
+
+class TTupleResultNode: public INode {
+public:
+ TTupleResultNode(TNodePtr&& tuple, size_t ensureTupleSize)
+ : INode(tuple->GetPos())
+ , Node(std::move(tuple))
+ , EnsureTupleSize(ensureTupleSize)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!Node->Init(ctx, src)) {
+ return false;
+ }
+
+ Node = Y("EnsureTupleSize", Node, Q(ToString(EnsureTupleSize)));
+
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return Node->Translate(ctx);
+ }
+
+ TPtr DoClone() const final {
+ return new TTupleResultNode(Node->Clone(), EnsureTupleSize);
+ }
+
+ void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ Node->VisitTree(func, visited);
+ }
+protected:
+ TNodePtr Node;
+ const size_t EnsureTupleSize;
+};
+
+TNodePtr BuildTupleResult(TNodePtr tuple, size_t ensureTupleSize) {
+ return new TTupleResultNode(std::move(tuple), ensureTupleSize);
+}
+
+class TNamedExprReferenceNode: public IProxyNode {
+public:
+ TNamedExprReferenceNode(TNodePtr parent, const TString& name, TMaybe<size_t> tupleIndex)
+ : IProxyNode(parent->GetPos(), parent)
+ , Name(name)
+ , TupleIndex(tupleIndex)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ Y_UNUSED(src);
+ if (!IProxyNode::DoInit(ctx, nullptr) || !IProxyNode::InitReference(ctx)) {
+ return false;
+ }
+
+ Node = BuildAtom(GetPos(), Name, TNodeFlags::Default);
+ if (TupleIndex.Defined()) {
+ Node = Y("Nth", Node, Q(ToString(*TupleIndex)));
+ }
+
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ YQL_ENSURE(Node, "Init() should be done before Translate()");
+ return Node->Translate(ctx);
+ }
+
+ TPtr DoClone() const final {
+ // do not clone Inner here
+ return new TNamedExprReferenceNode(Inner, Name, TupleIndex);
+ }
+
+private:
+ const TString Name;
+ const TMaybe<size_t> TupleIndex;
+ TNodePtr Node;
+};
+
+TNodePtr BuildNamedExprReference(TNodePtr parent, const TString& name, TMaybe<size_t> tupleIndex) {
+ YQL_ENSURE(parent);
+ return new TNamedExprReferenceNode(parent, name, tupleIndex);
+}
+
+class TNamedExprNode: public IProxyNode {
+public:
+ TNamedExprNode(TNodePtr parent)
+ : IProxyNode(parent->GetPos(), parent)
+ , FakeSource(BuildFakeSource(parent->GetPos()))
+ , Referenced(false)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ YQL_ENSURE(!Referenced, "Refrence is initialized before named expr itself");
+ Y_UNUSED(src);
+ if (ctx.ValidateUnusedExprs) {
+ return IProxyNode::DoInit(ctx, FakeSource.Get());
+ }
+ // do actual init in InitReference()
+ return true;
+ }
+
+ bool InitReference(TContext& ctx) final {
+ Referenced = true;
+ return IProxyNode::DoInit(ctx, FakeSource.Get());
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ if (ctx.ValidateUnusedExprs || Referenced) {
+ return Inner->Translate(ctx);
+ }
+ auto unused = BuildQuotedAtom(GetPos(), "unused", TNodeFlags::Default);
+ return unused->Translate(ctx);
+ }
+
+ TPtr DoClone() const final {
+ return new TNamedExprNode(Inner->Clone());
+ }
+
+private:
+ const TSourcePtr FakeSource;
+ bool Referenced;
+};
+
+TNodePtr BuildNamedExpr(TNodePtr parent) {
+ YQL_ENSURE(parent);
+ return new TNamedExprNode(parent);
+}
+
+bool TVectorIndexSettings::Validate(TContext& ctx) const {
+ if (!Distance && !Similarity) {
+ ctx.Error() << "either distance or similarity should be set";
+ return false;
+ }
+ if (!VectorType) {
+ ctx.Error() << "vector_type should be set";
+ return false;
+ }
+ if (!VectorDimension) {
+ ctx.Error() << "vector_dimension should be set";
+ return false;
+ }
+ return true;
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/node.h b/yql/essentials/sql/v1/node.h
new file mode 100644
index 00000000000..d9eb154031e
--- /dev/null
+++ b/yql/essentials/sql/v1/node.h
@@ -0,0 +1,1567 @@
+#pragma once
+
+#include <google/protobuf/message.h>
+#include <yql/essentials/utils/resetable_setting.h>
+#include <yql/essentials/parser/proto_ast/common.h>
+#include <yql/essentials/public/udf/udf_data_type.h>
+#include <yql/essentials/ast/yql_ast.h>
+#include <yql/essentials/ast/yql_expr.h>
+#include <util/generic/vector.h>
+#include <util/generic/set.h>
+#include <util/generic/map.h>
+#include <util/generic/hash.h>
+#include <util/generic/hash_set.h>
+#include <util/generic/maybe.h>
+#include <util/string/builder.h>
+
+#include <library/cpp/enumbitset/enumbitset.h>
+
+#include <array>
+#include <functional>
+#include <variant>
+
+namespace NSQLTranslationV1 {
+ constexpr const size_t SQL_MAX_INLINE_SCRIPT_LEN = 24;
+
+ using NYql::TPosition;
+ using NYql::TAstNode;
+
+ enum class ENodeState {
+ Begin,
+ Precached = Begin,
+ Initialized,
+ CountHint,
+ Const,
+ MaybeConst,
+ Aggregated,
+ AggregationKey,
+ OverWindow,
+ OverWindowDistinct,
+ Failed,
+ End,
+ };
+ typedef TEnumBitSet<ENodeState, static_cast<int>(ENodeState::Begin), static_cast<int>(ENodeState::End)> TNodeState;
+
+ enum class ESQLWriteColumnMode {
+ InsertInto,
+ InsertOrAbortInto,
+ InsertOrIgnoreInto,
+ InsertOrRevertInto,
+ UpsertInto,
+ ReplaceInto,
+ InsertIntoWithTruncate,
+ Update,
+ Delete,
+ };
+
+ enum class EWriteColumnMode {
+ Default,
+ Insert,
+ InsertOrAbort,
+ InsertOrIgnore,
+ InsertOrRevert,
+ Upsert,
+ Replace,
+ Renew,
+ Update,
+ UpdateOn,
+ Delete,
+ DeleteOn,
+ };
+
+ enum class EAlterTableIntentnt {
+ AddColumn,
+ DropColumn
+ };
+
+ enum class ETableType {
+ Table,
+ TableStore,
+ ExternalTable
+ };
+
+ class TContext;
+ class ITableKeys;
+ class ISource;
+ class IAggregation;
+ class TObjectOperatorContext;
+ typedef TIntrusivePtr<IAggregation> TAggregationPtr;
+ class TColumnNode;
+ class TTupleNode;
+ class TCallNode;
+ class TStructNode;
+ class TAccessNode;
+ class TLambdaNode;
+ class TUdfNode;
+ typedef TIntrusivePtr<ISource> TSourcePtr;
+
+ struct TScopedState;
+ typedef TIntrusivePtr<TScopedState> TScopedStatePtr;
+
+ inline TString DotJoin(const TString& lhs, const TString& rhs) {
+ TStringBuilder sb;
+ sb << lhs << "." << rhs;
+ return sb;
+ }
+
+ TString ErrorDistinctByGroupKey(const TString& column);
+ TString ErrorDistinctWithoutCorrelation(const TString& column);
+
+ class INode: public TSimpleRefCount<INode> {
+ public:
+ typedef TIntrusivePtr<INode> TPtr;
+
+ struct TIdPart {
+ TString Name;
+ TPtr Expr;
+
+ TIdPart(const TString& name)
+ : Name(name)
+ {
+ }
+ TIdPart(TPtr expr)
+ : Expr(expr)
+ {
+ }
+ TIdPart Clone() const {
+ TIdPart res(Name);
+ res.Expr = Expr ? Expr->Clone() : nullptr;
+ return res;
+ }
+ };
+
+ public:
+ INode(TPosition pos);
+ virtual ~INode();
+
+ TPosition GetPos() const;
+ const TString& GetLabel() const;
+ TMaybe<TPosition> GetLabelPos() const;
+ void SetLabel(const TString& label, TMaybe<TPosition> pos = {});
+ bool IsImplicitLabel() const;
+ void MarkImplicitLabel(bool isImplicitLabel);
+
+ void SetCountHint(bool isCount);
+ bool GetCountHint() const;
+ bool Init(TContext& ctx, ISource* src);
+ virtual bool InitReference(TContext& ctx);
+
+ bool IsConstant() const;
+ bool MaybeConstant() const;
+ bool IsAggregated() const;
+ bool IsAggregationKey() const;
+ bool IsOverWindow() const;
+ bool IsOverWindowDistinct() const;
+ bool HasState(ENodeState state) const {
+ PrecacheState();
+ return State.Test(state);
+ }
+
+ virtual bool IsNull() const;
+ virtual bool IsLiteral() const;
+ virtual TString GetLiteralType() const;
+ virtual TString GetLiteralValue() const;
+ virtual bool IsIntegerLiteral() const;
+ virtual TPtr ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const;
+ virtual bool IsAsterisk() const;
+ virtual const TString* SubqueryAlias() const;
+ virtual TString GetOpName() const;
+ virtual const TString* GetLiteral(const TString& type) const;
+ virtual const TString* GetColumnName() const;
+ virtual void AssumeColumn();
+ virtual const TString* GetSourceName() const;
+ virtual const TString* GetAtomContent() const;
+ virtual bool IsOptionalArg() const;
+ virtual size_t GetTupleSize() const;
+ virtual TPtr GetTupleElement(size_t index) const;
+ virtual ITableKeys* GetTableKeys();
+ virtual ISource* GetSource();
+ virtual TVector<INode::TPtr>* ContentListPtr();
+ virtual TAstNode* Translate(TContext& ctx) const = 0;
+ virtual TAggregationPtr GetAggregation() const;
+ virtual void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs);
+ virtual TPtr WindowSpecFunc(const TPtr& type) const;
+ virtual bool SetViewName(TContext& ctx, TPosition pos, const TString& view);
+ virtual bool SetPrimaryView(TContext& ctx, TPosition pos);
+ void UseAsInner();
+ void DisableSort();
+ virtual bool UsedSubquery() const;
+ virtual bool IsSelect() const;
+ virtual bool HasSelectResult() const;
+ virtual const TString* FuncName() const;
+ virtual const TString* ModuleName() const;
+ virtual bool HasSkip() const;
+
+ virtual TColumnNode* GetColumnNode();
+ virtual const TColumnNode* GetColumnNode() const;
+
+ virtual TTupleNode* GetTupleNode();
+ virtual const TTupleNode* GetTupleNode() const;
+
+ virtual TCallNode* GetCallNode();
+ virtual const TCallNode* GetCallNode() const;
+
+ virtual TStructNode* GetStructNode();
+ virtual const TStructNode* GetStructNode() const;
+
+ virtual TAccessNode* GetAccessNode();
+ virtual const TAccessNode* GetAccessNode() const;
+
+ virtual TLambdaNode* GetLambdaNode();
+ virtual const TLambdaNode* GetLambdaNode() const;
+
+ virtual TUdfNode* GetUdfNode();
+ virtual const TUdfNode* GetUdfNode() const;
+
+ using TVisitFunc = std::function<bool (const INode&)>;
+ using TVisitNodeSet = std::unordered_set<const INode*>;
+
+ void VisitTree(const TVisitFunc& func) const;
+ void VisitTree(const TVisitFunc& func, TVisitNodeSet& visited) const;
+
+ TPtr AstNode() const;
+ TPtr AstNode(TAstNode* node) const;
+ TPtr AstNode(TPtr node) const;
+ TPtr AstNode(const TString& str) const;
+
+ template <typename TVal, typename... TVals>
+ void Add(TVal val, TVals... vals) {
+ DoAdd(AstNode(val));
+ Add(vals...);
+ }
+
+ void Add() {}
+
+ // Y() Q() L()
+ TPtr Y() const {
+ return AstNode();
+ }
+
+ template <typename... TVals>
+ TPtr Y(TVals... vals) const {
+ TPtr node(AstNode());
+ node->Add(vals...);
+ return node;
+ }
+
+ template <typename T>
+ TPtr Q(T a) const {
+ return Y("quote", a);
+ }
+
+ template <typename... TVals>
+ TPtr L(TPtr list, TVals... vals) const {
+ Y_DEBUG_ABORT_UNLESS(list);
+ auto copy = list->ShallowCopy();
+ copy->Add(vals...);
+ return copy;
+ }
+
+ TPtr Clone() const;
+ protected:
+ virtual TPtr ShallowCopy() const;
+ virtual void DoUpdateState() const;
+ virtual TPtr DoClone() const = 0;
+ void PrecacheState() const;
+
+ virtual void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const;
+ private:
+ virtual bool DoInit(TContext& ctx, ISource* src);
+ virtual void DoAdd(TPtr node);
+
+ protected:
+ TPosition Pos;
+ TString Label;
+ TMaybe<TPosition> LabelPos;
+ bool ImplicitLabel = false;
+ mutable TNodeState State;
+ bool AsInner = false;
+ bool DisableSort_ = false;
+ };
+ typedef INode::TPtr TNodePtr;
+
+ class IProxyNode : public INode {
+ public:
+ IProxyNode(TPosition pos, const TNodePtr& parent)
+ : INode(pos)
+ , Inner(parent)
+ {}
+
+ protected:
+ virtual bool IsNull() const override;
+ virtual bool IsLiteral() const override;
+ virtual TString GetLiteralType() const override;
+ virtual TString GetLiteralValue() const override;
+ virtual bool IsIntegerLiteral() const override;
+ virtual TPtr ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const override;
+ virtual bool IsAsterisk() const override;
+ virtual const TString* SubqueryAlias() const override;
+ virtual TString GetOpName() const override;
+ virtual const TString* GetLiteral(const TString &type) const override;
+ virtual const TString* GetColumnName() const override;
+ virtual void AssumeColumn() override;
+ virtual const TString* GetSourceName() const override;
+ virtual const TString* GetAtomContent() const override;
+ virtual bool IsOptionalArg() const override;
+ virtual size_t GetTupleSize() const override;
+ virtual TPtr GetTupleElement(size_t index) const override;
+ virtual ITableKeys* GetTableKeys() override;
+ virtual ISource* GetSource() override;
+ virtual TVector<INode::TPtr>* ContentListPtr() override;
+ virtual TAggregationPtr GetAggregation() const override;
+ virtual void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override;
+ virtual TPtr WindowSpecFunc(const TPtr& type) const override;
+ virtual bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override;
+ virtual bool SetPrimaryView(TContext& ctx, TPosition pos) override;
+ virtual bool UsedSubquery() const override;
+ virtual bool IsSelect() const override;
+ virtual bool HasSelectResult() const override;
+ virtual const TString* FuncName() const override;
+ virtual const TString* ModuleName() const override;
+ virtual bool HasSkip() const override;
+
+ virtual TColumnNode* GetColumnNode() override;
+ virtual const TColumnNode* GetColumnNode() const override;
+
+ virtual TTupleNode* GetTupleNode() override;
+ virtual const TTupleNode* GetTupleNode() const override;
+
+ virtual TCallNode* GetCallNode() override;
+ virtual const TCallNode* GetCallNode() const override;
+
+ virtual TStructNode* GetStructNode() override;
+ virtual const TStructNode* GetStructNode() const override;
+
+ virtual TAccessNode* GetAccessNode() override;
+ virtual const TAccessNode* GetAccessNode() const override;
+
+ virtual TLambdaNode* GetLambdaNode() override;
+ virtual const TLambdaNode* GetLambdaNode() const override;
+
+ virtual TUdfNode* GetUdfNode() override;
+ virtual const TUdfNode* GetUdfNode() const override;
+
+ protected:
+ virtual void DoUpdateState() const override;
+ virtual void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const override;
+ virtual bool InitReference(TContext& ctx) override;
+ virtual bool DoInit(TContext& ctx, ISource* src) override;
+
+ private:
+ virtual void DoAdd(TPtr node) override;
+
+ protected:
+ const TNodePtr Inner;
+ };
+
+ using TTableHints = TMap<TString, TVector<TNodePtr>>;
+ void MergeHints(TTableHints& base, const TTableHints& overrides);
+
+ template<class T>
+ inline T SafeClone(const T& node) {
+ return node ? node->Clone() : nullptr;
+ }
+
+ template<class T>
+ inline TVector<T> CloneContainer(const TVector<T>& args) {
+ TVector<T> cloneArgs;
+ cloneArgs.reserve(args.size());
+ for (const auto& arg: args) {
+ cloneArgs.emplace_back(SafeClone(arg));
+ }
+ return cloneArgs;
+ }
+
+ TTableHints CloneContainer(const TTableHints& hints);
+
+ class TAstAtomNode: public INode {
+ public:
+ TAstAtomNode(TPosition pos, const TString& content, ui32 flags, bool isOptionalArg);
+
+ ~TAstAtomNode() override;
+
+ TAstNode* Translate(TContext& ctx) const override;
+ const TString& GetContent() const {
+ return Content;
+ }
+
+ const TString* GetAtomContent() const override;
+ bool IsOptionalArg() const override;
+
+ protected:
+ TString Content;
+ ui32 Flags;
+ bool IsOptionalArg_;
+
+ void DoUpdateState() const override;
+ };
+
+ class TAstAtomNodeImpl final: public TAstAtomNode {
+ public:
+ TAstAtomNodeImpl(TPosition pos, const TString& content, ui32 flags, bool isOptionalArg = false)
+ : TAstAtomNode(pos, content, flags, isOptionalArg)
+ {}
+
+ TNodePtr DoClone() const final {
+ return new TAstAtomNodeImpl(Pos, Content, Flags, IsOptionalArg_);
+ }
+ };
+
+ class TAstDirectNode final: public INode {
+ public:
+ TAstDirectNode(TAstNode* node);
+
+ TAstNode* Translate(TContext& ctx) const override;
+
+ TPtr DoClone() const final {
+ return new TAstDirectNode(Node);
+ }
+ protected:
+ TAstNode* Node;
+ };
+
+ class TAstListNode: public INode {
+ public:
+ TAstListNode(TPosition pos);
+ virtual ~TAstListNode();
+
+ TAstNode* Translate(TContext& ctx) const override;
+
+ protected:
+ explicit TAstListNode(const TAstListNode& node);
+ explicit TAstListNode(TPosition pos, TVector<TNodePtr>&& nodes);
+ TPtr ShallowCopy() const override;
+ bool DoInit(TContext& ctx, ISource* src) override;
+ void DoAdd(TNodePtr node) override;
+ void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const override;
+
+ void DoUpdateState() const override;
+
+ void UpdateStateByListNodes(const TVector<TNodePtr>& Nodes) const;
+
+ protected:
+ TVector<TNodePtr> Nodes;
+ mutable TMaybe<bool> CacheGroupKey;
+ };
+
+ class TAstListNodeImpl final: public TAstListNode {
+ public:
+ TAstListNodeImpl(TPosition pos);
+ TAstListNodeImpl(TPosition pos, TVector<TNodePtr> nodes);
+ void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override;
+
+ protected:
+ TNodePtr DoClone() const final;
+ };
+
+ class TCallNode: public TAstListNode {
+ public:
+ TCallNode(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args);
+ TCallNode(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, args.size(), args.size(), args)
+ {}
+
+ TString GetOpName() const override;
+ const TString* GetSourceName() const override;
+
+ const TVector<TNodePtr>& GetArgs() const;
+ TCallNode* GetCallNode() override;
+ const TCallNode* GetCallNode() const override;
+
+ protected:
+ bool DoInit(TContext& ctx, ISource* src) override;
+ bool ValidateArguments(TContext& ctx) const;
+ TString GetCallExplain() const;
+ void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override;
+
+ protected:
+ TString OpName;
+ i32 MinArgs;
+ i32 MaxArgs;
+ TVector<TNodePtr> Args;
+ mutable TMaybe<bool> CacheGroupKey;
+
+ void DoUpdateState() const override;
+ };
+
+ class TCallNodeImpl final: public TCallNode {
+ TPtr DoClone() const final;
+ public:
+ TCallNodeImpl(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args);
+ TCallNodeImpl(TPosition pos, const TString& opName, const TVector<TNodePtr>& args);
+ };
+
+ class TFuncNodeImpl final : public TCallNode {
+ TPtr DoClone() const final;
+ public:
+ TFuncNodeImpl(TPosition pos, const TString& opName);
+ const TString* FuncName() const override;
+ };
+
+ class TCallNodeDepArgs final : public TCallNode {
+ TPtr DoClone() const final;
+ public:
+ TCallNodeDepArgs(ui32 reqArgsCount, TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args);
+ TCallNodeDepArgs(ui32 reqArgsCount, TPosition pos, const TString& opName, const TVector<TNodePtr>& args);
+ protected:
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+ private:
+ const ui32 ReqArgsCount;
+ };
+
+ class TCallDirectRow final : public TCallNode {
+ TPtr DoClone() const final;
+ public:
+ TCallDirectRow(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args);
+ TCallDirectRow(TPosition pos, const TString& opName, const TVector<TNodePtr>& args);
+ protected:
+ bool DoInit(TContext& ctx, ISource* src) override;
+ void DoUpdateState() const override;
+ };
+
+ class TWinAggrEmulation: public TCallNode {
+ protected:
+ void DoUpdateState() const override;
+ bool DoInit(TContext& ctx, ISource* src) override;
+ TPtr WindowSpecFunc(const TNodePtr& type) const override;
+ public:
+ TWinAggrEmulation(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args);
+ protected:
+ template<class TNodeType>
+ TPtr CallNodeClone() const {
+ return new TNodeType(GetPos(), OpName, MinArgs, MaxArgs, CloneContainer(Args));
+ }
+ TString FuncAlias;
+ };
+
+ using TFunctionConfig = TMap<TString, TNodePtr>;
+
+ class TExternalFunctionConfig final: public TAstListNode {
+ public:
+ TExternalFunctionConfig(TPosition pos, const TFunctionConfig& config)
+ : TAstListNode(pos)
+ , Config(config)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+ TPtr DoClone() const final;
+
+ private:
+ TFunctionConfig Config;
+ };
+
+ class TWinRowNumber final: public TWinAggrEmulation {
+ TPtr DoClone() const final {
+ return CallNodeClone<TWinRowNumber>();
+ }
+ public:
+ TWinRowNumber(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args);
+ };
+
+ class TWinCumeDist final: public TWinAggrEmulation {
+ TPtr DoClone() const final {
+ return CallNodeClone<TWinCumeDist>();
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+ public:
+ TWinCumeDist(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args);
+ };
+
+ class TWinNTile final: public TWinAggrEmulation {
+ TPtr DoClone() const final {
+ return CallNodeClone<TWinNTile>();
+ }
+ bool DoInit(TContext& ctx, ISource* src) override;
+ public:
+ TWinNTile(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args);
+
+ private:
+ TSourcePtr FakeSource;
+ };
+
+ class TWinLeadLag final: public TWinAggrEmulation {
+ TPtr DoClone() const final {
+ return CallNodeClone<TWinLeadLag>();
+ }
+ bool DoInit(TContext& ctx, ISource* src) override;
+ public:
+ TWinLeadLag(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args);
+ };
+
+ class TWinRank final: public TWinAggrEmulation {
+ TPtr DoClone() const final {
+ return CallNodeClone<TWinRank>();
+ }
+ bool DoInit(TContext& ctx, ISource* src) override;
+ public:
+ TWinRank(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args);
+ };
+
+ struct TViewDescription {
+ TString ViewName = "";
+ bool PrimaryFlag = false;
+
+ bool empty() const { return *this == TViewDescription(); }
+ bool operator == (const TViewDescription&) const = default;
+ };
+
+ class ITableKeys: public INode {
+ public:
+ enum class EBuildKeysMode {
+ CREATE,
+ DROP,
+ INPUT,
+ WRITE
+ };
+
+ ITableKeys(TPosition pos);
+ virtual const TString* GetTableName() const;
+ virtual TNodePtr BuildKeys(TContext& ctx, EBuildKeysMode mode) = 0;
+
+ protected:
+ TNodePtr AddView(TNodePtr key, const TViewDescription& view);
+
+ private:
+ /// all TableKeys no clonnable
+ TPtr DoClone() const final {
+ return {};
+ }
+
+ ITableKeys* GetTableKeys() override;
+ TAstNode* Translate(TContext& ctx) const override;
+ };
+
+ enum class ESampleClause {
+ TableSample, //from SQL standard, percantage rate (0..100)
+ Sample //simplified (implied Bernulli mode), fraction (0..1)
+ };
+
+ enum class ESampleMode {
+ Bernoulli,
+ System
+ };
+
+ class TDeferredAtom {
+ public:
+ TDeferredAtom();
+ TDeferredAtom(TPosition pos, const TString& str);
+ TDeferredAtom(TNodePtr node, TContext& ctx);
+ const TString* GetLiteral() const;
+ bool GetLiteral(TString& value, TContext& ctx) const;
+ TNodePtr Build() const;
+ TString GetRepr() const;
+ bool Empty() const;
+ bool HasNode() const;
+
+ private:
+ TMaybe<TString> Explicit;
+ TNodePtr Node; // atom or evaluation node
+ TString Repr;
+ };
+
+ struct TTopicRef {
+ TString RefName;
+ TDeferredAtom Cluster;
+ TNodePtr Consumers;
+ TNodePtr Settings;
+ TNodePtr Keys;
+
+ TTopicRef() = default;
+ TTopicRef(const TString& refName, const TDeferredAtom& cluster, TNodePtr keys);
+ TTopicRef(const TTopicRef&) = default;
+ TTopicRef& operator=(const TTopicRef&) = default;
+ };
+
+ struct TIdentifier {
+ TPosition Pos;
+ TString Name;
+
+ TIdentifier(TPosition pos, const TString& name)
+ : Pos(pos)
+ , Name(name) {}
+ };
+
+ struct TColumnConstraints {
+ TNodePtr DefaultExpr;
+ bool Nullable = true;
+
+ TColumnConstraints(TNodePtr defaultExpr, bool nullable);
+ };
+
+ struct TColumnSchema {
+ enum class ETypeOfChange {
+ Nothing,
+ DropNotNullConstraint,
+ SetNotNullConstraint, // todo flown4qqqq
+ SetFamily
+ };
+
+ TPosition Pos;
+ TString Name;
+ TNodePtr Type;
+ bool Nullable;
+ TVector<TIdentifier> Families;
+ bool Serial;
+ TNodePtr DefaultExpr;
+ const ETypeOfChange TypeOfChange;
+
+ TColumnSchema(TPosition pos, const TString& name, const TNodePtr& type, bool nullable,
+ TVector<TIdentifier> families, bool serial, TNodePtr defaultExpr, ETypeOfChange typeOfChange = ETypeOfChange::Nothing);
+ };
+
+ struct TColumns: public TSimpleRefCount<TColumns> {
+ TSet<TString> Real;
+ TSet<TString> Artificial;
+ TVector<TString> List;
+ TVector<bool> NamedColumns;
+ bool All = false;
+ bool QualifiedAll = false;
+ bool HasUnreliable = false;
+ bool HasUnnamed = false;
+
+ bool Add(const TString* column, bool countHint, bool isArtificial = false, bool isReliable = true);
+ TString AddUnnamed();
+ void Merge(const TColumns& columns);
+ void SetPrefix(const TString& prefix);
+ void SetAll();
+ bool IsColumnPossible(TContext& ctx, const TString& column) const;
+ };
+
+ class TSortSpecification: public TSimpleRefCount<TSortSpecification> {
+ public:
+ TSortSpecification(const TNodePtr& orderExpr, bool ascending);
+ const TNodePtr OrderExpr;
+ const bool Ascending;
+ TIntrusivePtr<TSortSpecification> Clone() const;
+ ~TSortSpecification() {}
+ private:
+ const TNodePtr CleanOrderExpr;
+ };
+ typedef TIntrusivePtr<TSortSpecification> TSortSpecificationPtr;
+
+ enum EFrameType {
+ FrameByRows,
+ FrameByRange,
+ FrameByGroups,
+ };
+ enum EFrameExclusions {
+ FrameExclNone, // same as EXCLUDE NO OTHERS
+ FrameExclCurRow,
+ FrameExclGroup,
+ FrameExclTies,
+ };
+ enum EFrameSettings {
+ // keep order
+ FrameUndefined,
+ FramePreceding,
+ FrameCurrentRow,
+ FrameFollowing,
+ };
+
+ struct TFrameBound: public TSimpleRefCount<TFrameBound> {
+ TPosition Pos;
+ TNodePtr Bound;
+ EFrameSettings Settings = FrameUndefined;
+
+ TIntrusivePtr<TFrameBound> Clone() const;
+ ~TFrameBound() {}
+ };
+ typedef TIntrusivePtr<TFrameBound> TFrameBoundPtr;
+
+
+ struct TFrameSpecification: public TSimpleRefCount<TFrameSpecification> {
+ EFrameType FrameType = FrameByRows;
+ TFrameBoundPtr FrameBegin;
+ TFrameBoundPtr FrameEnd;
+ EFrameExclusions FrameExclusion = FrameExclNone;
+
+ TIntrusivePtr<TFrameSpecification> Clone() const;
+ ~TFrameSpecification() {}
+ };
+ typedef TIntrusivePtr<TFrameSpecification> TFrameSpecificationPtr;
+
+ struct TLegacyHoppingWindowSpec: public TSimpleRefCount<TLegacyHoppingWindowSpec> {
+ TNodePtr TimeExtractor;
+ TNodePtr Hop;
+ TNodePtr Interval;
+ TNodePtr Delay;
+ bool DataWatermarks;
+
+ TIntrusivePtr<TLegacyHoppingWindowSpec> Clone() const;
+ ~TLegacyHoppingWindowSpec() {}
+ };
+ typedef TIntrusivePtr<TLegacyHoppingWindowSpec> TLegacyHoppingWindowSpecPtr;
+
+ struct TWindowSpecification: public TSimpleRefCount<TWindowSpecification> {
+ TMaybe<TString> ExistingWindowName;
+ TVector<TNodePtr> Partitions;
+ bool IsCompact = false;
+ TVector<TSortSpecificationPtr> OrderBy;
+ TNodePtr Session;
+ TFrameSpecificationPtr Frame;
+
+ TIntrusivePtr<TWindowSpecification> Clone() const;
+ ~TWindowSpecification() {}
+ };
+ typedef TIntrusivePtr<TWindowSpecification> TWindowSpecificationPtr;
+ typedef TMap<TString, TWindowSpecificationPtr> TWinSpecs;
+
+ TWinSpecs CloneContainer(const TWinSpecs& specs);
+
+ void WarnIfAliasFromSelectIsUsedInGroupBy(TContext& ctx, const TVector<TNodePtr>& selectTerms, const TVector<TNodePtr>& groupByTerms,
+ const TVector<TNodePtr>& groupByExprTerms);
+ bool ValidateAllNodesForAggregation(TContext& ctx, const TVector<TNodePtr>& nodes);
+
+ struct TWriteSettings {
+ bool Discard = false;
+ TDeferredAtom Label;
+ };
+
+ class TColumnNode final: public INode {
+ public:
+ TColumnNode(TPosition pos, const TString& column, const TString& source, bool maybeType);
+ TColumnNode(TPosition pos, const TNodePtr& column, const TString& source);
+
+ virtual ~TColumnNode();
+ bool IsAsterisk() const override;
+ virtual bool IsArtificial() const;
+ const TString* GetColumnName() const override;
+ const TString* GetSourceName() const override;
+ TColumnNode* GetColumnNode() override;
+ const TColumnNode* GetColumnNode() const override;
+ TAstNode* Translate(TContext& ctx) const override;
+ void ResetColumn(const TString& column, const TString& source);
+ void ResetColumn(const TNodePtr& column, const TString& source);
+
+ void SetUseSourceAsColumn();
+ void SetUseSource();
+ void ResetAsReliable();
+ void SetAsNotReliable();
+ bool IsReliable() const;
+ bool IsUseSourceAsColumn() const;
+ bool IsUseSource() const;
+ bool CanBeType() const;
+
+ private:
+ bool DoInit(TContext& ctx, ISource* src) override;
+ TPtr DoClone() const final;
+
+ void DoUpdateState() const override;
+
+ private:
+ static const TString Empty;
+ TNodePtr Node;
+ TString ColumnName;
+ TNodePtr ColumnExpr;
+ TString Source;
+ bool GroupKey = false;
+ bool Artificial = false;
+ bool Reliable = true;
+ bool UseSource = false;
+ bool UseSourceAsColumn = false;
+ bool MaybeType = false;
+ };
+
+ class TArgPlaceholderNode final: public INode
+ {
+ public:
+ TArgPlaceholderNode(TPosition pos, const TString &name);
+
+ TAstNode* Translate(TContext& ctx) const override;
+
+ TString GetName() const;
+ TNodePtr DoClone() const final;
+
+ protected:
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+ private:
+ TString Name;
+ };
+
+ enum class EAggregateMode {
+ Normal,
+ Distinct,
+ OverWindow,
+ OverWindowDistinct,
+ };
+
+ class TTupleNode: public TAstListNode {
+ public:
+ TTupleNode(TPosition pos, const TVector<TNodePtr>& exprs);
+
+ bool IsEmpty() const;
+ const TVector<TNodePtr>& Elements() const;
+ TTupleNode* GetTupleNode() override;
+ const TTupleNode* GetTupleNode() const override;
+ bool DoInit(TContext& ctx, ISource* src) override;
+ size_t GetTupleSize() const override;
+ TPtr GetTupleElement(size_t index) const override;
+ TNodePtr DoClone() const final;
+ private:
+ void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override;
+ const TString* GetSourceName() const override;
+
+ const TVector<TNodePtr> Exprs;
+ };
+
+ class TStructNode: public TAstListNode {
+ public:
+ TStructNode(TPosition pos, const TVector<TNodePtr>& exprs, const TVector<TNodePtr>& labels, bool ordered);
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+ TNodePtr DoClone() const final;
+ const TVector<TNodePtr>& GetExprs() {
+ return Exprs;
+ }
+ TStructNode* GetStructNode() override;
+ const TStructNode* GetStructNode() const override;
+
+ private:
+ void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override;
+ const TString* GetSourceName() const override;
+
+ const TVector<TNodePtr> Exprs;
+ const TVector<TNodePtr> Labels;
+ const bool Ordered;
+ };
+
+
+ class TUdfNode: public INode {
+ public:
+ TUdfNode(TPosition pos, const TVector<TNodePtr>& args);
+ bool DoInit(TContext& ctx, ISource* src) override final;
+ TNodePtr DoClone() const override final;
+ TAstNode* Translate(TContext& ctx) const override;
+ const TNodePtr GetExternalTypes() const;
+ const TString& GetFunction() const;
+ const TString& GetModule() const;
+ TNodePtr GetRunConfig() const;
+ const TDeferredAtom& GetTypeConfig() const;
+ TUdfNode* GetUdfNode() override;
+ const TUdfNode* GetUdfNode() const override;
+ private:
+ TVector<TNodePtr> Args;
+ const TString* FunctionName;
+ const TString* ModuleName;
+ TNodePtr ExternalTypesTuple = nullptr;
+ TNodePtr RunConfig;
+ TDeferredAtom TypeConfig;
+ };
+
+ class IAggregation: public INode {
+ public:
+ bool IsDistinct() const;
+
+ void DoUpdateState() const override;
+
+ virtual const TString* GetGenericKey() const;
+
+ virtual bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) = 0;
+
+ virtual std::pair<TNodePtr, bool> AggregationTraits(const TNodePtr& type, bool overState, bool many, bool allowAggApply, TContext& ctx) const;
+
+ virtual TNodePtr AggregationTraitsFactory() const = 0;
+
+ virtual std::vector<ui32> GetFactoryColumnIndices() const;
+
+ virtual void AddFactoryArguments(TNodePtr& apply) const;
+
+ virtual TNodePtr WindowTraits(const TNodePtr& type, TContext& ctx) const;
+
+ const TString& GetName() const;
+
+ EAggregateMode GetAggregationMode() const;
+ void MarkKeyColumnAsGenerated();
+
+ virtual void Join(IAggregation* aggr);
+
+ private:
+ virtual TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const = 0;
+
+ protected:
+ IAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode mode);
+ TAstNode* Translate(TContext& ctx) const override;
+ TNodePtr WrapIfOverState(const TNodePtr& input, bool overState, bool many, TContext& ctx) const;
+ virtual TNodePtr GetExtractor(bool many, TContext& ctx) const = 0;
+
+ TString Name;
+ TString Func;
+ const EAggregateMode AggMode;
+ TString DistinctKey;
+ bool IsGeneratedKeyColumn = false;
+ };
+
+ enum class EExprSeat: int {
+ Open = 0,
+ FlattenByExpr,
+ FlattenBy,
+ GroupBy,
+ DistinctAggr,
+ WindowPartitionBy,
+ Max
+ };
+
+ enum class EExprType: int {
+ WithExpression,
+ ColumnOnly,
+ };
+
+ enum class EOrderKind: int {
+ None,
+ Sort,
+ Assume,
+ Passthrough
+ };
+
+ class TListOfNamedNodes final: public INode {
+ public:
+ TListOfNamedNodes(TPosition pos, TVector<TNodePtr>&& exprs);
+
+ TVector<TNodePtr>* ContentListPtr() override;
+ TAstNode* Translate(TContext& ctx) const override;
+ TPtr DoClone() const final;
+ void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final;
+ private:
+ TVector<TNodePtr> Exprs;
+ TString Meaning;
+ };
+
+ class TLiteralNode: public TAstListNode {
+ public:
+ TLiteralNode(TPosition pos, bool isNull);
+ TLiteralNode(TPosition pos, const TString& type, const TString& value);
+ TLiteralNode(TPosition pos, const TString& value, ui32 nodeFlags);
+ TLiteralNode(TPosition pos, const TString& value, ui32 nodeFlags, const TString& type);
+ bool IsNull() const override;
+ const TString* GetLiteral(const TString& type) const override;
+ void DoUpdateState() const override;
+ TPtr DoClone() const override;
+ bool IsLiteral() const override;
+ TString GetLiteralType() const override;
+ TString GetLiteralValue() const override;
+ protected:
+ bool Null;
+ bool Void;
+ TString Type;
+ TString Value;
+ };
+
+ class TAsteriskNode: public INode {
+ public:
+ TAsteriskNode(TPosition pos);
+ bool IsAsterisk() const override;
+ TPtr DoClone() const override;
+ TAstNode* Translate(TContext& ctx) const override;
+ };
+
+ template<typename T>
+ class TLiteralNumberNode: public TLiteralNode {
+ public:
+ TLiteralNumberNode(TPosition pos, const TString& type, const TString& value, bool implicitType = false);
+ TPtr DoClone() const override final;
+ bool DoInit(TContext& ctx, ISource* src) override;
+ bool IsIntegerLiteral() const override;
+ TPtr ApplyUnaryOp(TContext& ctx, TPosition pos, const TString& opName) const override;
+ private:
+ const bool ImplicitType;
+ };
+
+ struct TTableArg {
+ bool HasAt = false;
+ TNodePtr Expr;
+ TDeferredAtom Id;
+ TViewDescription View;
+ };
+
+ class TTableRows final : public INode {
+ public:
+ TTableRows(TPosition pos, const TVector<TNodePtr>& args);
+ TTableRows(TPosition pos, ui32 argsCount);
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+ void DoUpdateState() const override;
+
+ TNodePtr DoClone() const final;
+ TAstNode* Translate(TContext& ctx) const override;
+
+ private:
+ ui32 ArgsCount;
+ TNodePtr Node;
+ };
+
+ struct TStringContent {
+ TString Content;
+ NYql::NUdf::EDataSlot Type = NYql::NUdf::EDataSlot::String;
+ TMaybe<TString> PgType;
+ ui32 Flags = NYql::TNodeFlags::Default;
+ };
+
+ TMaybe<TStringContent> StringContent(TContext& ctx, TPosition pos, const TString& input);
+ TMaybe<TStringContent> StringContentOrIdContent(TContext& ctx, TPosition pos, const TString& input);
+
+ struct TTtlSettings {
+ enum class EUnit {
+ Seconds /* "seconds" */,
+ Milliseconds /* "milliseconds" */,
+ Microseconds /* "microseconds" */,
+ Nanoseconds /* "nanoseconds" */,
+ };
+
+ TIdentifier ColumnName;
+ TNodePtr Expr;
+ TMaybe<EUnit> ColumnUnit;
+
+ TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr, const TMaybe<EUnit>& columnUnit = {});
+ };
+
+ struct TTableSettings {
+ TNodePtr CompactionPolicy;
+ TMaybe<TIdentifier> AutoPartitioningBySize;
+ TNodePtr PartitionSizeMb;
+ TMaybe<TIdentifier> AutoPartitioningByLoad;
+ TNodePtr MinPartitions;
+ TNodePtr MaxPartitions;
+ TNodePtr PartitionCount;
+ TNodePtr UniformPartitions;
+ TVector<TVector<TNodePtr>> PartitionAtKeys;
+ TMaybe<TIdentifier> KeyBloomFilter;
+ TNodePtr ReadReplicasSettings;
+ NYql::TResetableSetting<TTtlSettings, void> TtlSettings;
+ NYql::TResetableSetting<TNodePtr, void> Tiering;
+ TMaybe<TIdentifier> StoreType;
+ TNodePtr PartitionByHashFunction;
+ TMaybe<TIdentifier> StoreExternalBlobs;
+
+ TNodePtr DataSourcePath;
+ NYql::TResetableSetting<TNodePtr, void> Location;
+ TVector<NYql::TResetableSetting<std::pair<TIdentifier, TNodePtr>, TIdentifier>> ExternalSourceParameters;
+
+ bool IsSet() const {
+ return CompactionPolicy || AutoPartitioningBySize || PartitionSizeMb || AutoPartitioningByLoad
+ || MinPartitions || MaxPartitions || UniformPartitions || PartitionAtKeys || KeyBloomFilter
+ || ReadReplicasSettings || TtlSettings || Tiering || StoreType || PartitionByHashFunction
+ || StoreExternalBlobs || DataSourcePath || Location || ExternalSourceParameters;
+ }
+ };
+
+ struct TFamilyEntry {
+ TFamilyEntry(const TIdentifier& name)
+ :Name(name)
+ {}
+
+ TIdentifier Name;
+ TNodePtr Data;
+ TNodePtr Compression;
+ TNodePtr CompressionLevel;
+ };
+
+ struct TVectorIndexSettings {
+ enum class EDistance {
+ Cosine /* "cosine" */
+ , Manhattan /* "manhattan" */
+ , Euclidean /* "euclidean" */
+ };
+
+ enum class ESimilarity {
+ Cosine /* "cosine" */
+ , InnerProduct /* "inner_product" */
+ };
+
+ enum class EVectorType {
+ Float /* "float" */
+ , Uint8 /* "uint8" */
+ , Int8 /* "int8" */
+ , Bit /* "bit" */
+ };
+
+ std::optional<EDistance> Distance;
+ std::optional<ESimilarity> Similarity;
+ std::optional<EVectorType> VectorType;
+ ui32 VectorDimension = 0;
+ ui32 Clusters = 0;
+ ui32 Levels = 0;
+
+ bool Validate(TContext& ctx) const;
+ };
+
+ struct TIndexDescription {
+ enum class EType {
+ GlobalSync,
+ GlobalAsync,
+ GlobalSyncUnique,
+ GlobalVectorKmeansTree,
+ };
+
+ TIndexDescription(const TIdentifier& name, EType type = EType::GlobalSync)
+ : Name(name)
+ , Type(type)
+ {}
+
+ TIdentifier Name;
+ EType Type;
+ TVector<TIdentifier> IndexColumns;
+ TVector<TIdentifier> DataColumns;
+ TTableSettings TableSettings;
+
+ using TIndexSettings = std::variant<std::monostate, TVectorIndexSettings>;
+ TIndexSettings IndexSettings;
+ };
+
+ struct TChangefeedSettings {
+ struct TLocalSinkSettings {
+ // no special settings
+ };
+
+ TNodePtr Mode;
+ TNodePtr Format;
+ TNodePtr InitialScan;
+ TNodePtr VirtualTimestamps;
+ TNodePtr ResolvedTimestamps;
+ TNodePtr RetentionPeriod;
+ TNodePtr TopicAutoPartitioning;
+ TNodePtr TopicPartitions;
+ TNodePtr TopicMaxActivePartitions;
+ TNodePtr AwsRegion;
+ std::optional<std::variant<TLocalSinkSettings>> SinkSettings;
+ };
+
+ struct TChangefeedDescription {
+ TChangefeedDescription(const TIdentifier& name)
+ : Name(name)
+ , Disable(false)
+ {}
+
+ TIdentifier Name;
+ TChangefeedSettings Settings;
+ bool Disable;
+ };
+
+ struct TCreateTableParameters {
+ TVector<TColumnSchema> Columns;
+ TVector<TIdentifier> PkColumns;
+ TVector<TIdentifier> PartitionByColumns;
+ TVector<std::pair<TIdentifier, bool>> OrderByColumns;
+ TVector<TIndexDescription> Indexes;
+ TVector<TFamilyEntry> ColumnFamilies;
+ TVector<TChangefeedDescription> Changefeeds;
+ TTableSettings TableSettings;
+ ETableType TableType = ETableType::Table;
+ bool Temporary = false;
+ };
+
+ struct TTableRef;
+ struct TAnalyzeParams {
+ std::shared_ptr<TTableRef> Table;
+ TVector<TString> Columns;
+ };
+
+ struct TAlterTableParameters {
+ TVector<TColumnSchema> AddColumns;
+ TVector<TString> DropColumns;
+ TVector<TColumnSchema> AlterColumns;
+ TVector<TFamilyEntry> AddColumnFamilies;
+ TVector<TFamilyEntry> AlterColumnFamilies;
+ TTableSettings TableSettings;
+ TVector<TIndexDescription> AddIndexes;
+ TVector<TIndexDescription> AlterIndexes;
+ TVector<TIdentifier> DropIndexes;
+ TMaybe<std::pair<TIdentifier, TIdentifier>> RenameIndexTo;
+ TMaybe<TIdentifier> RenameTo;
+ TVector<TChangefeedDescription> AddChangefeeds;
+ TVector<TChangefeedDescription> AlterChangefeeds;
+ TVector<TIdentifier> DropChangefeeds;
+ ETableType TableType = ETableType::Table;
+
+ bool IsEmpty() const {
+ return AddColumns.empty() && DropColumns.empty() && AlterColumns.empty()
+ && AddColumnFamilies.empty() && AlterColumnFamilies.empty()
+ && !TableSettings.IsSet()
+ && AddIndexes.empty() && AlterIndexes.empty() && DropIndexes.empty() && !RenameIndexTo.Defined()
+ && !RenameTo.Defined()
+ && AddChangefeeds.empty() && AlterChangefeeds.empty() && DropChangefeeds.empty();
+ }
+ };
+
+ struct TRoleParameters {
+ TMaybe<TDeferredAtom> Password;
+ bool IsPasswordEncrypted = false;
+ TVector<TDeferredAtom> Roles;
+ };
+
+ struct TTopicConsumerSettings {
+ struct TLocalSinkSettings {
+ // no special settings
+ };
+
+ TNodePtr Important;
+ NYql::TResetableSetting<TNodePtr, void> ReadFromTs;
+ NYql::TResetableSetting<TNodePtr, void> SupportedCodecs;
+ };
+
+ struct TTopicConsumerDescription {
+ TTopicConsumerDescription(const TIdentifier& name)
+ : Name(name)
+ {}
+
+ TIdentifier Name;
+ TTopicConsumerSettings Settings;
+ };
+ struct TTopicSettings {
+ NYql::TResetableSetting<TNodePtr, void> MinPartitions;
+ NYql::TResetableSetting<TNodePtr, void> MaxPartitions;
+ NYql::TResetableSetting<TNodePtr, void> RetentionPeriod;
+ NYql::TResetableSetting<TNodePtr, void> RetentionStorage;
+ NYql::TResetableSetting<TNodePtr, void> SupportedCodecs;
+ NYql::TResetableSetting<TNodePtr, void> PartitionWriteSpeed;
+ NYql::TResetableSetting<TNodePtr, void> PartitionWriteBurstSpeed;
+ NYql::TResetableSetting<TNodePtr, void> MeteringMode;
+ NYql::TResetableSetting<TNodePtr, void> AutoPartitioningStabilizationWindow;
+ NYql::TResetableSetting<TNodePtr, void> AutoPartitioningUpUtilizationPercent;
+ NYql::TResetableSetting<TNodePtr, void> AutoPartitioningDownUtilizationPercent;
+ NYql::TResetableSetting<TNodePtr, void> AutoPartitioningStrategy;
+
+ bool IsSet() const {
+ return MinPartitions ||
+ MaxPartitions ||
+ RetentionPeriod ||
+ RetentionStorage ||
+ SupportedCodecs ||
+ PartitionWriteSpeed ||
+ PartitionWriteBurstSpeed ||
+ MeteringMode ||
+ AutoPartitioningStabilizationWindow ||
+ AutoPartitioningUpUtilizationPercent ||
+ AutoPartitioningDownUtilizationPercent ||
+ AutoPartitioningStrategy
+ ;
+ }
+ };
+
+
+ struct TCreateTopicParameters {
+ TVector<TTopicConsumerDescription> Consumers;
+ TTopicSettings TopicSettings;
+ bool ExistingOk;
+ };
+
+ struct TAlterTopicParameters {
+ TVector<TTopicConsumerDescription> AddConsumers;
+ THashMap<TString, TTopicConsumerDescription> AlterConsumers;
+ TVector<TIdentifier> DropConsumers;
+ TTopicSettings TopicSettings;
+ bool MissingOk;
+ };
+
+ struct TDropTopicParameters {
+ bool MissingOk;
+ };
+
+ struct TCreateBackupCollectionParameters {
+ std::map<TString, TDeferredAtom> Settings;
+
+ bool Database;
+ TVector<TDeferredAtom> Tables;
+
+ bool ExistingOk;
+ };
+
+ struct TAlterBackupCollectionParameters {
+ enum class EDatabase {
+ Unchanged,
+ Add,
+ Drop,
+ };
+
+ std::map<TString, TDeferredAtom> Settings;
+ std::set<TString> SettingsToReset;
+
+ EDatabase Database = EDatabase::Unchanged;
+ TVector<TDeferredAtom> TablesToAdd;
+ TVector<TDeferredAtom> TablesToDrop;
+
+ bool MissingOk;
+ };
+
+ struct TDropBackupCollectionParameters {
+ bool MissingOk;
+ };
+
+ struct TBackupParameters {
+ bool Incremental = false;
+ };
+
+ struct TRestoreParameters {
+ TString At;
+ };
+
+ TString IdContent(TContext& ctx, const TString& str);
+ TString IdContentFromString(TContext& ctx, const TString& str);
+ TTableHints GetContextHints(TContext& ctx);
+
+ TString TypeByAlias(const TString& alias, bool normalize = true);
+
+ TNodePtr BuildAtom(TPosition pos, const TString& content, ui32 flags = NYql::TNodeFlags::ArbitraryContent,
+ bool isOptionalArg = false);
+ TNodePtr BuildQuotedAtom(TPosition pos, const TString& content, ui32 flags = NYql::TNodeFlags::ArbitraryContent);
+
+ TNodePtr BuildLiteralNull(TPosition pos);
+ TNodePtr BuildLiteralVoid(TPosition pos);
+ /// String is checked as quotable, support escaping and multiline
+ TNodePtr BuildLiteralSmartString(TContext& ctx, const TString& value);
+
+ struct TExprOrIdent {
+ TNodePtr Expr;
+ TString Ident;
+ };
+ TMaybe<TExprOrIdent> BuildLiteralTypedSmartStringOrId(TContext& ctx, const TString& value);
+
+ TNodePtr BuildLiteralRawString(TPosition pos, const TString& value, bool isUtf8 = false);
+ TNodePtr BuildLiteralBool(TPosition pos, bool value);
+ TNodePtr BuildEmptyAction(TPosition pos);
+
+ TNodePtr BuildTuple(TPosition pos, const TVector<TNodePtr>& exprs);
+
+ TNodePtr BuildStructure(TPosition pos, const TVector<TNodePtr>& exprs);
+ TNodePtr BuildStructure(TPosition pos, const TVector<TNodePtr>& exprsUnlabeled, const TVector<TNodePtr>& labels);
+ TNodePtr BuildOrderedStructure(TPosition pos, const TVector<TNodePtr>& exprsUnlabeled, const TVector<TNodePtr>& labels);
+
+ TNodePtr BuildListOfNamedNodes(TPosition pos, TVector<TNodePtr>&& exprs);
+
+ TNodePtr BuildArgPlaceholder(TPosition pos, const TString& name);
+
+ TNodePtr BuildColumn(TPosition pos, const TString& column = TString(), const TString& source = TString());
+ TNodePtr BuildColumn(TPosition pos, const TNodePtr& column, const TString& source = TString());
+ TNodePtr BuildColumn(TPosition pos, const TDeferredAtom& column, const TString& source = TString());
+ TNodePtr BuildColumnOrType(TPosition pos, const TString& column = TString());
+ TNodePtr BuildAccess(TPosition pos, const TVector<INode::TIdPart>& ids, bool isLookup);
+ TNodePtr BuildMatchRecognizeVarAccess(TPosition pos, const TString& var, const TString& column, bool theSameVar);
+ TNodePtr BuildBind(TPosition pos, const TString& module, const TString& alias);
+ TNodePtr BuildLambda(TPosition pos, TNodePtr params, TNodePtr body, const TString& resName = TString());
+ TNodePtr BuildLambda(TPosition pos, TNodePtr params, const TVector<TNodePtr>& bodies);
+ TNodePtr BuildDataType(TPosition pos, const TString& typeName);
+ TMaybe<TString> LookupSimpleType(const TStringBuf& alias, bool flexibleTypes, bool isPgType);
+ TNodePtr BuildSimpleType(TContext& ctx, TPosition pos, const TString& typeName, bool dataOnly);
+ TNodePtr BuildIsNullOp(TPosition pos, TNodePtr a);
+ TNodePtr BuildBinaryOp(TContext& ctx, TPosition pos, const TString& opName, TNodePtr a, TNodePtr b);
+ TNodePtr BuildBinaryOpRaw(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b);
+
+ TNodePtr BuildCalcOverWindow(TPosition pos, const TString& windowName, TNodePtr call);
+ TNodePtr BuildYsonOptionsNode(TPosition pos, bool autoConvert, bool strict, bool fastYson);
+
+ TNodePtr BuildDoCall(TPosition pos, const TNodePtr& node);
+ TNodePtr BuildTupleResult(TNodePtr tuple, size_t ensureTupleSize);
+ TNodePtr BuildNamedExprReference(TNodePtr parent, const TString& name, TMaybe<size_t> tupleIndex);
+ TNodePtr BuildNamedExpr(TNodePtr parent);
+
+ // Implemented in aggregation.cpp
+ TAggregationPtr BuildFactoryAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode, bool multi = false);
+ TAggregationPtr BuildKeyPayloadFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ TAggregationPtr BuildPayloadPredicateFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ TAggregationPtr BuildTwoArgsFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ TAggregationPtr BuildHistogramFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ TAggregationPtr BuildLinearHistogramFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ template <bool HasKey>
+ TAggregationPtr BuildTopFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ TAggregationPtr BuildTopFreqFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ TAggregationPtr BuildCountDistinctEstimateFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ TAggregationPtr BuildListFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ TAggregationPtr BuildPercentileFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ TAggregationPtr BuildCountAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode);
+ TAggregationPtr BuildUserDefinedFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+ TAggregationPtr BuildPGFactoryAggregation(TPosition pos, const TString& name, EAggregateMode aggMode);
+ TAggregationPtr BuildNthFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode);
+
+
+ // Implemented in builtin.cpp
+ TNodePtr BuildCallable(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, bool forReduce = false);
+ TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args);
+ TNodePtr BuildBuiltinFunc(
+ TContext& ctx,
+ TPosition pos,
+ TString name,
+ const TVector<TNodePtr>& args,
+ const TString& nameSpace = TString(),
+ EAggregateMode aggMode = EAggregateMode::Normal,
+ bool* mustUseNamed = nullptr,
+ bool warnOnYqlNameSpace = true
+ );
+
+ // Implemented in query.cpp
+ TNodePtr BuildCreateUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TMaybe<TRoleParameters>& params, TScopedStatePtr scoped);
+ TNodePtr BuildCreateGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TMaybe<TRoleParameters>& params, TScopedStatePtr scoped);
+ TNodePtr BuildAlterUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TRoleParameters& params, TScopedStatePtr scoped);
+ TNodePtr BuildRenameUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped);
+ TNodePtr BuildAlterGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TVector<TDeferredAtom>& toChange, bool isDrop,
+ TScopedStatePtr scoped);
+ TNodePtr BuildRenameGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped);
+ TNodePtr BuildDropRoles(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& toDrop, bool isUser, bool missingOk, TScopedStatePtr scoped);
+ TNodePtr BuildGrantPermissions(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& permissions, const TVector<TDeferredAtom>& schemaPaths, const TVector<TDeferredAtom>& roleName, TScopedStatePtr scoped);
+ TNodePtr BuildRevokePermissions(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& permissions, const TVector<TDeferredAtom>& schemaPaths, const TVector<TDeferredAtom>& roleName, TScopedStatePtr scoped);
+ TNodePtr BuildUpsertObjectOperation(TPosition pos, const TString& objectId, const TString& typeId,
+ std::map<TString, TDeferredAtom>&& features, const TObjectOperatorContext& context);
+ TNodePtr BuildCreateObjectOperation(TPosition pos, const TString& objectId, const TString& typeId,
+ bool existingOk, bool replaceIfExists, std::map<TString, TDeferredAtom>&& features, const TObjectOperatorContext& context);
+ TNodePtr BuildAlterObjectOperation(TPosition pos, const TString& secretId, const TString& typeId,
+ std::map<TString, TDeferredAtom>&& features, std::set<TString>&& featuresToReset, const TObjectOperatorContext& context);
+ TNodePtr BuildDropObjectOperation(TPosition pos, const TString& secretId, const TString& typeId,
+ bool missingOk, std::map<TString, TDeferredAtom>&& options, const TObjectOperatorContext& context);
+ TNodePtr BuildCreateAsyncReplication(TPosition pos, const TString& id,
+ std::vector<std::pair<TString, TString>>&& targets,
+ std::map<TString, TNodePtr>&& settings,
+ const TObjectOperatorContext& context);
+ TNodePtr BuildAlterAsyncReplication(TPosition pos, const TString& id,
+ std::map<TString, TNodePtr>&& settings,
+ const TObjectOperatorContext& context);
+ TNodePtr BuildDropAsyncReplication(TPosition pos, const TString& id, bool cascade, const TObjectOperatorContext& context);
+ TNodePtr BuildWriteResult(TPosition pos, const TString& label, TNodePtr settings);
+ TNodePtr BuildCommitClusters(TPosition pos);
+ TNodePtr BuildRollbackClusters(TPosition pos);
+ TNodePtr BuildQuery(TPosition pos, const TVector<TNodePtr>& blocks, bool topLevel, TScopedStatePtr scoped);
+ TNodePtr BuildPragma(TPosition pos, const TString& prefix, const TString& name, const TVector<TDeferredAtom>& values, bool valueDefault);
+ TNodePtr BuildSqlLambda(TPosition pos, TVector<TString>&& args, TVector<TNodePtr>&& exprSeq);
+ TNodePtr BuildWorldIfNode(TPosition pos, TNodePtr predicate, TNodePtr thenNode, TNodePtr elseNode, bool isEvaluate);
+ TNodePtr BuildWorldForNode(TPosition pos, TNodePtr list, TNodePtr bodyNode, TNodePtr elseNode, bool isEvaluate, bool isParallel);
+
+ TNodePtr BuildCreateTopic(TPosition pos, const TTopicRef& tr, const TCreateTopicParameters& params,
+ TScopedStatePtr scoped);
+ TNodePtr BuildAlterTopic(TPosition pos, const TTopicRef& tr, const TAlterTopicParameters& params,
+ TScopedStatePtr scoped);
+ TNodePtr BuildDropTopic(TPosition pos, const TTopicRef& topic, const TDropTopicParameters& params,
+ TScopedStatePtr scoped);
+
+ TNodePtr BuildCreateBackupCollection(TPosition pos, const TString& id,
+ const TCreateBackupCollectionParameters& params,
+ const TObjectOperatorContext& context);
+ TNodePtr BuildAlterBackupCollection(TPosition pos, const TString& id,
+ const TAlterBackupCollectionParameters& params,
+ const TObjectOperatorContext& context);
+ TNodePtr BuildDropBackupCollection(TPosition pos, const TString& id,
+ const TDropBackupCollectionParameters& params,
+ const TObjectOperatorContext& context);
+
+ TNodePtr BuildBackup(TPosition pos, const TString& id,
+ const TBackupParameters& params,
+ const TObjectOperatorContext& context);
+ TNodePtr BuildRestore(TPosition pos, const TString& id,
+ const TRestoreParameters& params,
+ const TObjectOperatorContext& context);
+
+ template<class TContainer>
+ TMaybe<TString> FindMistypeIn(const TContainer& container, const TString& name) {
+ for (auto& item: container) {
+ if (NLevenshtein::Distance(name, item) < NYql::DefaultMistypeDistance) {
+ return item;
+ }
+ }
+ return {};
+ }
+
+ bool Parseui32(TNodePtr from, ui32& to);
+ TNodePtr GroundWithExpr(const TNodePtr& ground, const TNodePtr& expr);
+ const TString* DeriveCommonSourceName(const TVector<TNodePtr> &nodes);
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/object_processing.cpp b/yql/essentials/sql/v1/object_processing.cpp
new file mode 100644
index 00000000000..80e3962bf8d
--- /dev/null
+++ b/yql/essentials/sql/v1/object_processing.cpp
@@ -0,0 +1,68 @@
+#include "object_processing.h"
+
+#include <yql/essentials/core/sql_types/yql_callable_names.h>
+
+namespace NSQLTranslationV1 {
+using namespace NYql;
+
+INode::TPtr TObjectProcessorImpl::BuildKeys() const {
+ auto keys = Y("Key");
+ keys = L(keys, Q(Y(Q("objectId"), Y("String", BuildQuotedAtom(Pos, ObjectId)))));
+ keys = L(keys, Q(Y(Q("typeId"), Y("String", BuildQuotedAtom(Pos, TypeId)))));
+ return keys;
+}
+
+TObjectProcessorImpl::TObjectProcessorImpl(TPosition pos, const TString& objectId, const TString& typeId, const TObjectOperatorContext& context)
+ : TBase(pos)
+ , TObjectOperatorContext(context)
+ , ObjectId(objectId)
+ , TypeId(typeId)
+{
+
+}
+
+bool TObjectProcessorImpl::DoInit(TContext& ctx, ISource* src) {
+ Y_UNUSED(src);
+ Scoped->UseCluster(ServiceId, Cluster);
+ auto options = FillFeatures(BuildOptions());
+ auto keys = BuildKeys();
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, ServiceId), Scoped->WrapCluster(Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(options))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+ return TAstListNode::DoInit(ctx, src);
+}
+
+INode::TPtr TCreateObject::FillFeatures(INode::TPtr options) const {
+ if (!Features.empty()) {
+ auto features = Y();
+ for (auto&& i : Features) {
+ if (i.second.HasNode()) {
+ features->Add(Q(Y(BuildQuotedAtom(Pos, i.first), i.second.Build())));
+ } else {
+ features->Add(Q(Y(BuildQuotedAtom(Pos, i.first))));
+ }
+ }
+ options->Add(Q(Y(Q("features"), Q(features))));
+ }
+ if (!FeaturesToReset.empty()) {
+ auto reset = Y();
+ for (const auto& featureName : FeaturesToReset) {
+ reset->Add(BuildQuotedAtom(Pos, featureName));
+ }
+ options->Add(Q(Y(Q("resetFeatures"), Q(reset))));
+ }
+ return options;
+}
+
+TObjectOperatorContext::TObjectOperatorContext(TScopedStatePtr scoped)
+ : Scoped(scoped)
+ , ServiceId(Scoped->CurrService)
+ , Cluster(Scoped->CurrCluster)
+{
+
+}
+
+}
diff --git a/yql/essentials/sql/v1/object_processing.h b/yql/essentials/sql/v1/object_processing.h
new file mode 100644
index 00000000000..4114235ee6d
--- /dev/null
+++ b/yql/essentials/sql/v1/object_processing.h
@@ -0,0 +1,105 @@
+#pragma once
+#include "node.h"
+#include "context.h"
+
+namespace NSQLTranslationV1 {
+
+class TObjectOperatorContext {
+protected:
+ TScopedStatePtr Scoped;
+public:
+ TString ServiceId;
+ TDeferredAtom Cluster;
+ TObjectOperatorContext(const TObjectOperatorContext& baseItem) = default;
+ TObjectOperatorContext(TScopedStatePtr scoped);
+};
+
+class TObjectProcessorImpl: public TAstListNode, public TObjectOperatorContext {
+protected:
+ using TBase = TAstListNode;
+ TString ObjectId;
+ TString TypeId;
+
+ virtual INode::TPtr BuildOptions() const = 0;
+ virtual INode::TPtr FillFeatures(INode::TPtr options) const = 0;
+ INode::TPtr BuildKeys() const;
+public:
+ TObjectProcessorImpl(TPosition pos, const TString& objectId, const TString& typeId, const TObjectOperatorContext& context);
+
+ bool DoInit(TContext& ctx, ISource* src) override;
+
+ TPtr DoClone() const final {
+ return {};
+ }
+};
+
+class TCreateObject: public TObjectProcessorImpl {
+private:
+ using TBase = TObjectProcessorImpl;
+ std::map<TString, TDeferredAtom> Features;
+ std::set<TString> FeaturesToReset;
+protected:
+ bool ExistingOk = false;
+ bool ReplaceIfExists = false;
+protected:
+ virtual INode::TPtr BuildOptions() const override {
+ TString mode;
+ if (ExistingOk) {
+ mode = "createObjectIfNotExists";
+ } else if (ReplaceIfExists) {
+ mode = "createObjectOrReplace";
+ } else {
+ mode = "createObject";
+ }
+
+ return Y(Q(Y(Q("mode"), Q(mode))));
+ }
+ virtual INode::TPtr FillFeatures(INode::TPtr options) const override;
+public:
+ TCreateObject(TPosition pos, const TString& objectId,
+ const TString& typeId, bool existingOk, bool replaceIfExists, std::map<TString, TDeferredAtom>&& features, std::set<TString>&& featuresToReset, const TObjectOperatorContext& context)
+ : TBase(pos, objectId, typeId, context)
+ , Features(std::move(features))
+ , FeaturesToReset(std::move(featuresToReset))
+ , ExistingOk(existingOk)
+ , ReplaceIfExists(replaceIfExists) {
+ }
+};
+
+class TUpsertObject final: public TCreateObject {
+private:
+ using TBase = TCreateObject;
+protected:
+ virtual INode::TPtr BuildOptions() const override {
+ return Y(Q(Y(Q("mode"), Q("upsertObject"))));
+ }
+public:
+ using TBase::TBase;
+};
+
+class TAlterObject final: public TCreateObject {
+private:
+ using TBase = TCreateObject;
+protected:
+ virtual INode::TPtr BuildOptions() const override {
+ return Y(Q(Y(Q("mode"), Q("alterObject"))));
+ }
+public:
+ using TBase::TBase;
+};
+
+class TDropObject final: public TCreateObject {
+private:
+ using TBase = TCreateObject;
+ bool MissingOk() const {
+ return ExistingOk; // Because we were derived from TCreateObject
+ }
+protected:
+ virtual INode::TPtr BuildOptions() const override {
+ return Y(Q(Y(Q("mode"), Q(MissingOk() ? "dropObjectIfExists" : "dropObject"))));
+ }
+public:
+ using TBase::TBase;
+};
+
+}
diff --git a/yql/essentials/sql/v1/perf/parse.cpp b/yql/essentials/sql/v1/perf/parse.cpp
new file mode 100644
index 00000000000..33174bb95c0
--- /dev/null
+++ b/yql/essentials/sql/v1/perf/parse.cpp
@@ -0,0 +1,70 @@
+#include <yql/essentials/sql/v1/sql.h>
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+#include <util/datetime/cputimer.h>
+#include <util/string/builder.h>
+
+using namespace NSQLTranslationV1;
+
+enum class EDebugOutput {
+ None,
+ ToCerr,
+};
+
+TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) {
+ TStringStream s;
+ res.Issues.PrintTo(s);
+
+ if (debug == EDebugOutput::ToCerr) {
+ Cerr << s.Str() << Endl;
+ }
+ return s.Str();
+}
+
+NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
+ google::protobuf::Arena arena;
+ const auto service = provider ? provider : TString(NYql::YtProviderName);
+ const TString cluster = "plato";
+ NSQLTranslation::TTranslationSettings settings;
+ settings.ClusterMapping[cluster] = service;
+ settings.MaxErrors = maxErrors;
+ settings.Mode = mode;
+ settings.Arena = &arena;
+ auto res = SqlToYql(query, settings);
+ if (debug == EDebugOutput::ToCerr) {
+ Err2Str(res, debug);
+ }
+ return res;
+}
+
+NYql::TAstParseResult SqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
+ return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug);
+}
+
+int main(int, char**) {
+ TStringBuilder builder;
+ builder << "USE plato;\n";
+ for (ui32 i = 0; i < 10; ++i) {
+ builder << "$query = SELECT ";
+ for (ui32 j = 0; j < 500; ++j) {
+ if (j > 0) {
+ builder << ",";
+ }
+
+ builder << "fld" << j;
+ };
+
+ builder << " FROM " << (i == 0? "Input" : "$query") << ";\n";
+ }
+
+ builder << "SELECT * FROM $query;\n";
+ TString sql = builder;
+ //Cerr << sql;
+ TSimpleTimer timer;
+ for (ui32 i = 0; i < 100; ++i) {
+ NYql::TAstParseResult res = SqlToYql(sql);
+ Y_ENSURE(res.Root);
+ }
+
+ Cerr << "Elapsed: " << timer.Get() << "\n";
+ return 0;
+}
diff --git a/yql/essentials/sql/v1/perf/ya.make b/yql/essentials/sql/v1/perf/ya.make
new file mode 100644
index 00000000000..99e9087682c
--- /dev/null
+++ b/yql/essentials/sql/v1/perf/ya.make
@@ -0,0 +1,14 @@
+PROGRAM()
+
+SRCS(
+ parse.cpp
+)
+
+PEERDIR(
+ yql/essentials/public/udf/service/exception_policy
+ yql/essentials/sql
+ yql/essentials/sql/v1
+ yql/essentials/sql/pg_dummy
+)
+
+END()
diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.cpp b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp
new file mode 100644
index 00000000000..f977f57411e
--- /dev/null
+++ b/yql/essentials/sql/v1/proto_parser/proto_parser.cpp
@@ -0,0 +1,152 @@
+#include "proto_parser.h"
+
+#include <yql/essentials/utils/yql_panic.h>
+
+#include <yql/essentials/parser/proto_ast/antlr3/proto_ast_antlr3.h>
+#include <yql/essentials/parser/proto_ast/antlr4/proto_ast_antlr4.h>
+#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h>
+#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Parser.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_ansi/SQLv1Lexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_ansi/SQLv1Parser.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Parser.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4Parser.h>
+
+#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
+
+#include <library/cpp/protobuf/util/simple_reflection.h>
+#include <util/generic/algorithm.h>
+
+#if defined(_tsan_enabled_)
+#include <util/system/mutex.h>
+#endif
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+
+#if defined(_tsan_enabled_)
+ TMutex SanitizerSQLTranslationMutex;
+#endif
+
+using namespace NSQLv1Generated;
+
+void ValidateMessagesImpl(const google::protobuf::Message* msg1, const google::protobuf::Message* msg2, bool hasNonAscii) {
+ YQL_ENSURE(!msg1 == !msg2);
+ if (!msg1) {
+ return;
+ }
+
+ YQL_ENSURE(msg1->GetDescriptor() == msg2->GetDescriptor());
+ const auto descr = msg1->GetDescriptor();
+ if (descr == NSQLv1Generated::TToken::GetDescriptor()) {
+ const auto& token1 = dynamic_cast<const NSQLv1Generated::TToken&>(*msg1);
+ const auto& token2 = dynamic_cast<const NSQLv1Generated::TToken&>(*msg2);
+ const bool isEof1 = token1.GetId() == Max<ui32>();
+ const bool isEof2 = token2.GetId() == Max<ui32>();
+ YQL_ENSURE(isEof1 == isEof2);
+ YQL_ENSURE(token1.GetValue() == token2.GetValue());
+ if (!isEof1) {
+ YQL_ENSURE(token1.GetLine() == token2.GetLine());
+ if (!hasNonAscii) {
+ YQL_ENSURE(token1.GetColumn() == token2.GetColumn());
+ }
+ }
+
+ return;
+ }
+
+ for (int i = 0; i < descr->field_count(); ++i) {
+ const NProtoBuf::FieldDescriptor* fd = descr->field(i);
+ NProtoBuf::TConstField field1(*msg1, fd);
+ NProtoBuf::TConstField field2(*msg2, fd);
+ YQL_ENSURE(field1.IsMessage() == field2.IsMessage());
+ if (field1.IsMessage()) {
+ YQL_ENSURE(field1.Size() == field2.Size());
+ for (size_t j = 0; j < field1.Size(); ++j) {
+ ValidateMessagesImpl(field1.template Get<NProtoBuf::Message>(j), field2.template Get<NProtoBuf::Message>(j), hasNonAscii);
+ }
+ }
+ }
+}
+
+void ValidateMessages(const TString& query, const google::protobuf::Message* msg1, const google::protobuf::Message* msg2) {
+ const bool hasNonAscii = AnyOf(query, [](char c) { return !isascii(c);});
+ return ValidateMessagesImpl(msg1, msg2, hasNonAscii);
+}
+
+google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, TIssues& err,
+ size_t maxErrors, bool ansiLexer, bool anlr4Parser, bool testAntlr4, google::protobuf::Arena* arena) {
+ YQL_ENSURE(arena);
+#if defined(_tsan_enabled_)
+ TGuard<TMutex> grd(SanitizerSQLTranslationMutex);
+#endif
+ NSQLTranslation::TErrorCollectorOverIssues collector(err, maxErrors, "");
+ if (ansiLexer && !anlr4Parser) {
+ NProtoAST::TProtoASTBuilder3<NALPAnsi::SQLv1Parser, NALPAnsi::SQLv1Lexer> builder(query, queryName, arena);
+ auto res = builder.BuildAST(collector);
+ if (testAntlr4) {
+ NProtoAST::TProtoASTBuilder4<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
+ auto res2 = builder.BuildAST(collector);
+ ValidateMessages(query, res, res2);
+ }
+
+ return res;
+ } else if (!ansiLexer && !anlr4Parser) {
+ NProtoAST::TProtoASTBuilder3<NALPDefault::SQLv1Parser, NALPDefault::SQLv1Lexer> builder(query, queryName, arena);
+ auto res = builder.BuildAST(collector);
+ if (testAntlr4) {
+ NProtoAST::TProtoASTBuilder4<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
+ auto res2 = builder.BuildAST(collector);
+ ValidateMessages(query, res, res2);
+ }
+
+ return res;
+ } else if (ansiLexer && anlr4Parser) {
+ NProtoAST::TProtoASTBuilder4<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
+ return builder.BuildAST(collector);
+ } else {
+ NProtoAST::TProtoASTBuilder4<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
+ return builder.BuildAST(collector);
+ }
+}
+
+google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err,
+ bool ansiLexer, bool anlr4Parser, bool testAntlr4, google::protobuf::Arena* arena) {
+ YQL_ENSURE(arena);
+#if defined(_tsan_enabled_)
+ TGuard<TMutex> grd(SanitizerSQLTranslationMutex);
+#endif
+ if (ansiLexer && !anlr4Parser) {
+ NProtoAST::TProtoASTBuilder3<NALPAnsi::SQLv1Parser, NALPAnsi::SQLv1Lexer> builder(query, queryName, arena);
+ auto res = builder.BuildAST(err);
+ if (testAntlr4) {
+ NProtoAST::TProtoASTBuilder4<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
+ auto res2 = builder.BuildAST(err);
+ ValidateMessages(query, res, res2);
+ }
+
+ return res;
+ } else if (!ansiLexer && !anlr4Parser) {
+ NProtoAST::TProtoASTBuilder3<NALPDefault::SQLv1Parser, NALPDefault::SQLv1Lexer> builder(query, queryName, arena);
+ auto res = builder.BuildAST(err);
+ if (testAntlr4) {
+ NProtoAST::TProtoASTBuilder4<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
+ auto res2 = builder.BuildAST(err);
+ ValidateMessages(query, res, res2);
+ }
+
+ return res;
+ } else if (ansiLexer && anlr4Parser) {
+ NProtoAST::TProtoASTBuilder4<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
+ return builder.BuildAST(err);
+ } else {
+ NProtoAST::TProtoASTBuilder4<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
+ return builder.BuildAST(err);
+ }
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/proto_parser/proto_parser.h b/yql/essentials/sql/v1/proto_parser/proto_parser.h
new file mode 100644
index 00000000000..14440953196
--- /dev/null
+++ b/yql/essentials/sql/v1/proto_parser/proto_parser.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <yql/essentials/ast/yql_ast.h>
+#include <yql/essentials/parser/proto_ast/common.h>
+#include <yql/essentials/public/issue/yql_warning.h>
+#include <yql/essentials/public/issue/yql_issue_manager.h>
+#include <yql/essentials/sql/settings/translation_settings.h>
+
+#include <google/protobuf/message.h>
+
+namespace NSQLTranslation {
+ struct TTranslationSettings;
+}
+
+namespace NSQLTranslationV1 {
+
+ google::protobuf::Message* SqlAST(const TString& query, const TString& queryName,
+ NYql::TIssues& err, size_t maxErrors, bool ansiLexer, bool antlr4Parser, bool testAntlr4, google::protobuf::Arena* arena);
+ google::protobuf::Message* SqlAST(const TString& query, const TString& queryName,
+ NProtoAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, bool testAntlr4, google::protobuf::Arena* arena);
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/proto_parser/ya.make b/yql/essentials/sql/v1/proto_parser/ya.make
new file mode 100644
index 00000000000..edb24868baf
--- /dev/null
+++ b/yql/essentials/sql/v1/proto_parser/ya.make
@@ -0,0 +1,21 @@
+LIBRARY()
+
+PEERDIR(
+ yql/essentials/utils
+ yql/essentials/ast
+
+ yql/essentials/parser/proto_ast/antlr3
+ yql/essentials/parser/proto_ast/antlr4
+ yql/essentials/parser/proto_ast/collect_issues
+ yql/essentials/parser/proto_ast/gen/v1
+ yql/essentials/parser/proto_ast/gen/v1_ansi
+ yql/essentials/parser/proto_ast/gen/v1_proto_split
+ yql/essentials/parser/proto_ast/gen/v1_antlr4
+ yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4
+)
+
+SRCS(
+ proto_parser.cpp
+)
+
+END()
diff --git a/yql/essentials/sql/v1/query.cpp b/yql/essentials/sql/v1/query.cpp
new file mode 100644
index 00000000000..8abc2a92bff
--- /dev/null
+++ b/yql/essentials/sql/v1/query.cpp
@@ -0,0 +1,3567 @@
+#include "node.h"
+#include "context.h"
+#include "object_processing.h"
+
+#include <yql/essentials/ast/yql_type_string.h>
+#include <yql/essentials/core/sql_types/yql_callable_names.h>
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+
+#include <library/cpp/charset/ci_string.h>
+
+#include <util/digest/fnv.h>
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+bool ValidateView(TPosition pos, TContext& ctx, TStringBuf service, TViewDescription& view) {
+ if (view.PrimaryFlag && !(service == KikimrProviderName || service == YdbProviderName)) {
+ ctx.Error(pos) << "primary view is not supported for " << service << " tables";
+ return false;
+ }
+ return true;
+}
+
+class TUniqueTableKey: public ITableKeys {
+public:
+ TUniqueTableKey(TPosition pos, const TString& service, const TDeferredAtom& cluster,
+ const TDeferredAtom& name, const TViewDescription& view)
+ : ITableKeys(pos)
+ , Service(service)
+ , Cluster(cluster)
+ , Name(name)
+ , View(view)
+ , Full(name.GetRepr())
+ {
+ if (!View.ViewName.empty()) {
+ Full += ":" + View.ViewName;
+ }
+ }
+
+ bool SetPrimaryView(TContext& ctx, TPosition pos) override {
+ Y_UNUSED(ctx);
+ Y_UNUSED(pos);
+ View = {"", true};
+ return true;
+ }
+
+ bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override {
+ Y_UNUSED(ctx);
+ Y_UNUSED(pos);
+ Full = Name.GetRepr();
+ View = {view};
+ if (!View.empty()) {
+ Full = ":" + View.ViewName;
+ }
+
+ return true;
+ }
+
+ const TString* GetTableName() const override {
+ return Name.GetLiteral() ? &Full : nullptr;
+ }
+
+ TNodePtr BuildKeys(TContext& ctx, ITableKeys::EBuildKeysMode mode) override {
+ if (View == TViewDescription{"@"}) {
+ auto key = Y("TempTable", Name.Build());
+ return key;
+ }
+
+ bool tableScheme = mode == ITableKeys::EBuildKeysMode::CREATE;
+ if (tableScheme && !View.empty()) {
+ ctx.Error(Pos) << "Table view can not be created with CREATE TABLE clause";
+ return nullptr;
+ }
+ auto path = ctx.GetPrefixedPath(Service, Cluster, Name);
+ if (!path) {
+ return nullptr;
+ }
+ auto key = Y("Key", Q(Y(Q(tableScheme ? "tablescheme" : "table"), Y("String", path))));
+ key = AddView(key, View);
+ if (!ValidateView(GetPos(), ctx, Service, View)) {
+ return nullptr;
+ }
+ if (mode == ITableKeys::EBuildKeysMode::INPUT &&
+ IsQueryMode(ctx.Settings.Mode) &&
+ Service != KikimrProviderName &&
+ Service != RtmrProviderName &&
+ Service != YdbProviderName) {
+
+ key = Y("MrTableConcat", key);
+ }
+ return key;
+ }
+
+private:
+ TString Service;
+ TDeferredAtom Cluster;
+ TDeferredAtom Name;
+ TViewDescription View;
+ TString Full;
+};
+
+TNodePtr BuildTableKey(TPosition pos, const TString& service, const TDeferredAtom& cluster,
+ const TDeferredAtom& name, const TViewDescription& view) {
+ return new TUniqueTableKey(pos, service, cluster, name, view);
+}
+
+class TTopicKey: public ITableKeys {
+public:
+ TTopicKey(TPosition pos, const TDeferredAtom& cluster, const TDeferredAtom& name)
+ : ITableKeys(pos)
+ , Cluster(cluster)
+ , Name(name)
+ , Full(name.GetRepr())
+ {
+ }
+
+ const TString* GetTableName() const override {
+ return Name.GetLiteral() ? &Full : nullptr;
+ }
+
+ TNodePtr BuildKeys(TContext& ctx, ITableKeys::EBuildKeysMode) override {
+ const auto path = ctx.GetPrefixedPath(Service, Cluster, Name);
+ if (!path) {
+ return nullptr;
+ }
+ auto key = Y("Key", Q(Y(Q("topic"), Y("String", path))));
+ return key;
+ }
+
+private:
+ TString Service;
+ TDeferredAtom Cluster;
+ TDeferredAtom Name;
+ TString View;
+ TString Full;
+};
+
+TNodePtr BuildTopicKey(TPosition pos, const TDeferredAtom& cluster, const TDeferredAtom& name) {
+ return new TTopicKey(pos, cluster, name);
+}
+
+static INode::TPtr CreateIndexType(TIndexDescription::EType type, const INode& node) {
+ switch (type) {
+ case TIndexDescription::EType::GlobalSync:
+ return node.Q("syncGlobal");
+ case TIndexDescription::EType::GlobalAsync:
+ return node.Q("asyncGlobal");
+ case TIndexDescription::EType::GlobalSyncUnique:
+ return node.Q("syncGlobalUnique");
+ case TIndexDescription::EType::GlobalVectorKmeansTree:
+ return node.Q("globalVectorKmeansTree");
+ }
+}
+
+enum class ETableSettingsParsingMode {
+ Create,
+ Alter
+};
+
+static INode::TPtr CreateTableSettings(const TTableSettings& tableSettings, ETableSettingsParsingMode parsingMode, const INode& node) {
+ // short aliases for member function calls
+ auto Y = [&node](auto&&... args) { return node.Y(std::forward<decltype(args)>(args)...); };
+ auto Q = [&node](auto&&... args) { return node.Q(std::forward<decltype(args)>(args)...); };
+ auto L = [&node](auto&&... args) { return node.L(std::forward<decltype(args)>(args)...); };
+
+ auto settings = Y();
+
+ if (tableSettings.DataSourcePath) {
+ settings = L(settings, Q(Y(Q("data_source_path"), tableSettings.DataSourcePath)));
+ }
+ if (tableSettings.Location) {
+ if (tableSettings.Location.IsSet()) {
+ settings = L(settings, Q(Y(Q("location"), tableSettings.Location.GetValueSet())));
+ } else {
+ Y_ENSURE(parsingMode != ETableSettingsParsingMode::Create, "Can't reset LOCATION in create mode");
+ settings = L(settings, Q(Y(Q("location"))));
+ }
+ }
+ for (const auto& resetableParam : tableSettings.ExternalSourceParameters) {
+ Y_ENSURE(resetableParam, "Empty parameter");
+ if (resetableParam.IsSet()) {
+ const auto& [id, value] = resetableParam.GetValueSet();
+ settings = L(settings, Q(Y(Q(id.Name), value)));
+ } else {
+ Y_ENSURE(parsingMode != ETableSettingsParsingMode::Create,
+ "Can't reset " << resetableParam.GetValueReset().Name << " in create mode"
+ );
+ settings = L(settings, Q(Y(Q(resetableParam.GetValueReset().Name))));
+ }
+ }
+ if (tableSettings.CompactionPolicy) {
+ settings = L(settings, Q(Y(Q("compactionPolicy"), tableSettings.CompactionPolicy)));
+ }
+ if (tableSettings.AutoPartitioningBySize) {
+ const auto& ref = tableSettings.AutoPartitioningBySize.GetRef();
+ settings = L(settings, Q(Y(Q("autoPartitioningBySize"), BuildQuotedAtom(ref.Pos, ref.Name))));
+ }
+ if (tableSettings.UniformPartitions && parsingMode == ETableSettingsParsingMode::Create) {
+ settings = L(settings, Q(Y(Q("uniformPartitions"), tableSettings.UniformPartitions)));
+ }
+ if (tableSettings.PartitionAtKeys && parsingMode == ETableSettingsParsingMode::Create) {
+ auto keysDesc = Y();
+ for (const auto& key : tableSettings.PartitionAtKeys) {
+ auto columnsDesc = Y();
+ for (auto column : key) {
+ columnsDesc = L(columnsDesc, column);
+ }
+ keysDesc = L(keysDesc, Q(columnsDesc));
+ }
+ settings = L(settings, Q(Y(Q("partitionAtKeys"), Q(keysDesc))));
+ }
+ if (tableSettings.PartitionSizeMb) {
+ settings = L(settings, Q(Y(Q("partitionSizeMb"), tableSettings.PartitionSizeMb)));
+ }
+ if (tableSettings.AutoPartitioningByLoad) {
+ const auto& ref = tableSettings.AutoPartitioningByLoad.GetRef();
+ settings = L(settings, Q(Y(Q("autoPartitioningByLoad"), BuildQuotedAtom(ref.Pos, ref.Name))));
+ }
+ if (tableSettings.MinPartitions) {
+ settings = L(settings, Q(Y(Q("minPartitions"), tableSettings.MinPartitions)));
+ }
+ if (tableSettings.MaxPartitions) {
+ settings = L(settings, Q(Y(Q("maxPartitions"), tableSettings.MaxPartitions)));
+ }
+ if (tableSettings.PartitionCount) {
+ settings = L(settings, Q(Y(Q("maxPartitions"), tableSettings.PartitionCount)));
+ settings = L(settings, Q(Y(Q("minPartitions"), tableSettings.PartitionCount)));
+ }
+ if (tableSettings.KeyBloomFilter) {
+ const auto& ref = tableSettings.KeyBloomFilter.GetRef();
+ settings = L(settings, Q(Y(Q("keyBloomFilter"), BuildQuotedAtom(ref.Pos, ref.Name))));
+ }
+ if (tableSettings.ReadReplicasSettings) {
+ settings = L(settings, Q(Y(Q("readReplicasSettings"), tableSettings.ReadReplicasSettings)));
+ }
+ if (const auto& ttl = tableSettings.TtlSettings) {
+ if (ttl.IsSet()) {
+ const auto& ttlSettings = ttl.GetValueSet();
+ auto opts = Y();
+
+ opts = L(opts, Q(Y(Q("columnName"), BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name))));
+ opts = L(opts, Q(Y(Q("expireAfter"), ttlSettings.Expr)));
+
+ if (ttlSettings.ColumnUnit) {
+ opts = L(opts, Q(Y(Q("columnUnit"), Q(ToString(*ttlSettings.ColumnUnit)))));
+ }
+
+ settings = L(settings, Q(Y(Q("setTtlSettings"), Q(opts))));
+ } else {
+ YQL_ENSURE(parsingMode != ETableSettingsParsingMode::Create, "Can't reset TTL settings in create mode");
+ settings = L(settings, Q(Y(Q("resetTtlSettings"), Q(Y()))));
+ }
+ }
+ if (const auto& tiering = tableSettings.Tiering) {
+ if (tiering.IsSet()) {
+ settings = L(settings, Q(Y(Q("setTiering"), tiering.GetValueSet())));
+ } else {
+ YQL_ENSURE(parsingMode != ETableSettingsParsingMode::Create, "Can't reset TIERING in create mode");
+ settings = L(settings, Q(Y(Q("resetTiering"), Q(Y()))));
+ }
+ }
+ if (tableSettings.StoreExternalBlobs) {
+ const auto& ref = tableSettings.StoreExternalBlobs.GetRef();
+ settings = L(settings, Q(Y(Q("storeExternalBlobs"), BuildQuotedAtom(ref.Pos, ref.Name))));
+ }
+ if (tableSettings.StoreType && parsingMode == ETableSettingsParsingMode::Create) {
+ const auto& ref = tableSettings.StoreType.GetRef();
+ settings = L(settings, Q(Y(Q("storeType"), BuildQuotedAtom(ref.Pos, ref.Name))));
+ }
+ if (tableSettings.PartitionByHashFunction && parsingMode == ETableSettingsParsingMode::Create) {
+ settings = L(settings, Q(Y(Q("partitionByHashFunction"), tableSettings.PartitionByHashFunction)));
+ }
+
+ return settings;
+}
+
+static INode::TPtr CreateVectorIndexSettings(const TVectorIndexSettings& vectorIndexSettings, const INode& node) {
+ // short aliases for member function calls
+ auto Y = [&node](auto&&... args) { return node.Y(std::forward<decltype(args)>(args)...); };
+ auto Q = [&node](auto&&... args) { return node.Q(std::forward<decltype(args)>(args)...); };
+ auto L = [&node](auto&&... args) { return node.L(std::forward<decltype(args)>(args)...); };
+
+ auto settings = Y();
+
+ if (vectorIndexSettings.Distance && vectorIndexSettings.Similarity) {
+ Y_ENSURE(false, "distance and similarity shouldn't be set at the same time");
+ } else if (vectorIndexSettings.Distance) {
+ settings = L(settings, Q(Y(Q("distance"), Q(ToString(*vectorIndexSettings.Distance)))));
+ } else if (vectorIndexSettings.Similarity) {
+ settings = L(settings, Q(Y(Q("similarity"), Q(ToString(*vectorIndexSettings.Similarity)))));
+ } else {
+ Y_ENSURE(false, "distance or similarity should be set");
+ }
+
+ settings = L(settings, Q(Y(Q("vector_type"), Q(ToString(*vectorIndexSettings.VectorType)))));
+ settings = L(settings, Q(Y(Q("vector_dimension"), Q(ToString(vectorIndexSettings.VectorDimension)))));
+ settings = L(settings, Q(Y(Q("clusters"), Q(ToString(vectorIndexSettings.Clusters)))));
+ settings = L(settings, Q(Y(Q("levels"), Q(ToString(vectorIndexSettings.Levels)))));
+
+ return settings;
+}
+
+static INode::TPtr CreateIndexDesc(const TIndexDescription& index, ETableSettingsParsingMode parsingMode, const INode& node) {
+ auto indexColumns = node.Y();
+ for (const auto& col : index.IndexColumns) {
+ indexColumns = node.L(indexColumns, BuildQuotedAtom(col.Pos, col.Name));
+ }
+ auto dataColumns = node.Y();
+ for (const auto& col : index.DataColumns) {
+ dataColumns = node.L(dataColumns, BuildQuotedAtom(col.Pos, col.Name));
+ }
+ const auto& indexType = node.Y(node.Q("indexType"), CreateIndexType(index.Type, node));
+ const auto& indexName = node.Y(node.Q("indexName"), BuildQuotedAtom(index.Name.Pos, index.Name.Name));
+ auto indexNode = node.Y(
+ node.Q(indexName),
+ node.Q(indexType),
+ node.Q(node.Y(node.Q("indexColumns"), node.Q(indexColumns))),
+ node.Q(node.Y(node.Q("dataColumns"), node.Q(dataColumns)))
+ );
+ if (index.TableSettings.IsSet()) {
+ const auto& tableSettings = node.Y(
+ node.Q("tableSettings"),
+ node.Q(CreateTableSettings(index.TableSettings, parsingMode, node))
+ );
+ indexNode = node.L(indexNode, tableSettings);
+ }
+ if (const auto* indexSettingsPtr = std::get_if<TVectorIndexSettings>(&index.IndexSettings)) {
+ const auto& indexSettings = node.Q(node.Y(
+ node.Q("indexSettings"),
+ node.Q(CreateVectorIndexSettings(*indexSettingsPtr, node))));
+ indexNode = node.L(indexNode, indexSettings);
+ }
+ return indexNode;
+}
+
+static INode::TPtr CreateAlterIndex(const TIndexDescription& index, const INode& node) {
+ const auto& indexName = node.Y(node.Q("indexName"), BuildQuotedAtom(index.Name.Pos, index.Name.Name));
+ const auto& tableSettings = node.Y(
+ node.Q("tableSettings"),
+ node.Q(CreateTableSettings(index.TableSettings, ETableSettingsParsingMode::Alter, node))
+ );
+ return node.Y(
+ node.Q(indexName),
+ node.Q(tableSettings)
+ );
+}
+
+static INode::TPtr CreateChangefeedDesc(const TChangefeedDescription& desc, const INode& node) {
+ auto settings = node.Y();
+ if (desc.Settings.Mode) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("mode"), desc.Settings.Mode)));
+ }
+ if (desc.Settings.Format) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("format"), desc.Settings.Format)));
+ }
+ if (desc.Settings.InitialScan) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("initial_scan"), desc.Settings.InitialScan)));
+ }
+ if (desc.Settings.VirtualTimestamps) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("virtual_timestamps"), desc.Settings.VirtualTimestamps)));
+ }
+ if (desc.Settings.ResolvedTimestamps) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("resolved_timestamps"), desc.Settings.ResolvedTimestamps)));
+ }
+ if (desc.Settings.RetentionPeriod) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("retention_period"), desc.Settings.RetentionPeriod)));
+ }
+ if (desc.Settings.TopicAutoPartitioning) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("topic_auto_partitioning"), desc.Settings.TopicAutoPartitioning)));
+ }
+ if (desc.Settings.TopicMaxActivePartitions) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("topic_max_active_partitions"), desc.Settings.TopicMaxActivePartitions)));
+ }
+ if (desc.Settings.TopicPartitions) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("topic_min_active_partitions"), desc.Settings.TopicPartitions)));
+ }
+ if (desc.Settings.AwsRegion) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("aws_region"), desc.Settings.AwsRegion)));
+ }
+ if (const auto& sink = desc.Settings.SinkSettings) {
+ switch (sink->index()) {
+ case 0: // local
+ settings = node.L(settings, node.Q(node.Y(node.Q("local"), node.Q(node.Y()))));
+ break;
+ default:
+ YQL_ENSURE(false, "Unexpected sink settings");
+ }
+ }
+
+ auto state = node.Y();
+ if (desc.Disable) {
+ state = node.Q("disable");
+ }
+
+ return node.Y(
+ node.Q(node.Y(node.Q("name"), BuildQuotedAtom(desc.Name.Pos, desc.Name.Name))),
+ node.Q(node.Y(node.Q("settings"), node.Q(settings))),
+ node.Q(node.Y(node.Q("state"), node.Q(state)))
+ );
+}
+
+class TPrepTableKeys: public ITableKeys {
+public:
+ TPrepTableKeys(TPosition pos, const TString& service, const TDeferredAtom& cluster,
+ const TString& func, const TVector<TTableArg>& args)
+ : ITableKeys(pos)
+ , Service(service)
+ , Cluster(cluster)
+ , Func(func)
+ , Args(args)
+ {
+ }
+
+ void ExtractTableName(TContext&ctx, TTableArg& arg) {
+ MakeTableFromExpression(Pos, ctx, arg.Expr, arg.Id);
+ }
+
+ TNodePtr BuildKeys(TContext& ctx, ITableKeys::EBuildKeysMode mode) override {
+ if (mode == ITableKeys::EBuildKeysMode::CREATE) {
+ // TODO: allow creation of multiple tables
+ ctx.Error(Pos) << "Mutiple table creation is not implemented yet";
+ return nullptr;
+ }
+
+ TCiString func(Func);
+ if (func != "object" && func != "walkfolders") {
+ for (auto& arg: Args) {
+ if (arg.Expr->GetLabel()) {
+ ctx.Error(Pos) << "Named arguments are not supported for table function " << to_upper(Func);
+ return nullptr;
+ }
+ }
+ }
+ if (func == "concat_strict") {
+ auto tuple = Y();
+ for (auto& arg: Args) {
+ ExtractTableName(ctx, arg);
+ TNodePtr key;
+ if (arg.HasAt) {
+ key = Y("TempTable", arg.Id.Build());
+ } else {
+ auto path = ctx.GetPrefixedPath(Service, Cluster, arg.Id);
+ if (!path) {
+ return nullptr;
+ }
+
+ key = Y("Key", Q(Y(Q("table"), Y("String", path))));
+ key = AddView(key, arg.View);
+ if (!ValidateView(GetPos(), ctx, Service, arg.View)) {
+ return nullptr;
+ }
+ }
+
+ tuple = L(tuple, key);
+ }
+ return Q(tuple);
+ }
+ else if (func == "concat") {
+ auto concat = Y("MrTableConcat");
+ for (auto& arg : Args) {
+ ExtractTableName(ctx, arg);
+ TNodePtr key;
+ if (arg.HasAt) {
+ key = Y("TempTable", arg.Id.Build());
+ } else {
+ auto path = ctx.GetPrefixedPath(Service, Cluster, arg.Id);
+ if (!path) {
+ return nullptr;
+ }
+
+ key = Y("Key", Q(Y(Q("table"), Y("String", path))));
+ key = AddView(key, arg.View);
+ if (!ValidateView(GetPos(), ctx, Service, arg.View)) {
+ return nullptr;
+ }
+ }
+
+ concat = L(concat, key);
+ }
+
+ return concat;
+ }
+
+ else if (func == "range" || func == "range_strict" || func == "like" || func == "like_strict" ||
+ func == "regexp" || func == "regexp_strict" || func == "filter" || func == "filter_strict") {
+ bool isRange = func.StartsWith("range");
+ bool isFilter = func.StartsWith("filter");
+ size_t minArgs = isRange ? 1 : 2;
+ size_t maxArgs = isRange ? 5 : 4;
+ if (Args.size() < minArgs || Args.size() > maxArgs) {
+ ctx.Error(Pos) << Func << " requires from " << minArgs << " to " << maxArgs << " arguments, but got: " << Args.size();
+ return nullptr;
+ }
+ if (ctx.DiscoveryMode) {
+ ctx.Error(Pos, TIssuesIds::YQL_NOT_ALLOWED_IN_DISCOVERY) << Func << " is not allowed in Discovery mode";
+ return nullptr;
+ }
+
+ for (ui32 index=0; index < Args.size(); ++index) {
+ auto& arg = Args[index];
+ if (arg.HasAt) {
+ ctx.Error(Pos) << "Temporary tables are not supported here";
+ return nullptr;
+ }
+
+ if (!arg.View.empty()) {
+ TStringBuilder sb;
+ sb << "Use the last argument of " << Func << " to specify a VIEW." << Endl;
+ if (isRange) {
+ sb << "Possible arguments are: prefix, from, to, suffix, view." << Endl;
+ } else if (isFilter) {
+ sb << "Possible arguments are: prefix, filtering callable, suffix, view." << Endl;
+ } else {
+ sb << "Possible arguments are: prefix, pattern, suffix, view." << Endl;
+ }
+ sb << "Pass empty string in arguments if you want to skip.";
+
+ ctx.Error(Pos) << sb;
+ return nullptr;
+ }
+
+ if (!func.StartsWith("filter") || index != 1) {
+ ExtractTableName(ctx, arg);
+ }
+ }
+
+ auto path = ctx.GetPrefixedPath(Service, Cluster, Args[0].Id);
+ if (!path) {
+ return nullptr;
+ }
+ auto range = Y(func.EndsWith("_strict") ? "MrTableRangeStrict" : "MrTableRange", path);
+ TNodePtr predicate;
+ TDeferredAtom suffix;
+ if (func.StartsWith("range")) {
+ TDeferredAtom min;
+ TDeferredAtom max;
+ if (Args.size() > 1) {
+ min = Args[1].Id;
+ }
+
+ if (Args.size() > 2) {
+ max = Args[2].Id;
+ }
+
+ if (Args.size() > 3) {
+ suffix = Args[3].Id;
+ }
+
+ if (min.Empty() && max.Empty()) {
+ predicate = BuildLambda(Pos, Y("item"), Y("Bool", Q("true")));
+ }
+ else {
+ auto minPred = !min.Empty() ? Y(">=", "item", Y("String", min.Build())) : nullptr;
+ auto maxPred = !max.Empty() ? Y("<=", "item", Y("String", max.Build())) : nullptr;
+ if (!minPred) {
+ predicate = BuildLambda(Pos, Y("item"), maxPred);
+ } else if (!maxPred) {
+ predicate = BuildLambda(Pos, Y("item"), minPred);
+ } else {
+ predicate = BuildLambda(Pos, Y("item"), Y("And", minPred, maxPred));
+ }
+ }
+ } else {
+ if (Args.size() > 2) {
+ suffix = Args[2].Id;
+ }
+
+ if (func.StartsWith("regexp")) {
+ if (!ctx.PragmaRegexUseRe2) {
+ ctx.Warning(Pos, TIssuesIds::CORE_LEGACY_REGEX_ENGINE) << "Legacy regex engine works incorrectly with unicode. Use PRAGMA RegexUseRe2='true';";
+ }
+
+ auto pattern = Args[1].Id;
+ auto udf = ctx.PragmaRegexUseRe2 ?
+ Y("Udf", Q("Re2.Grep"), Q(Y(Y("String", pattern.Build()), Y("Null")))):
+ Y("Udf", Q("Pcre.BacktrackingGrep"), Y("String", pattern.Build()));
+ predicate = BuildLambda(Pos, Y("item"), Y("Apply", udf, "item"));
+ } else if (func.StartsWith("like")) {
+ auto pattern = Args[1].Id;
+ auto convertedPattern = Y("Apply", Y("Udf", Q("Re2.PatternFromLike")),
+ Y("String", pattern.Build()));
+ auto udf = Y("Udf", Q("Re2.Match"), Q(Y(convertedPattern, Y("Null"))));
+ predicate = BuildLambda(Pos, Y("item"), Y("Apply", udf, "item"));
+ } else {
+ predicate = BuildLambda(Pos, Y("item"), Y("Apply", Args[1].Expr, "item"));
+ }
+ }
+
+ range = L(range, predicate);
+ range = L(range, suffix.Build() ? suffix.Build() : BuildQuotedAtom(Pos, ""));
+ auto key = Y("Key", Q(Y(Q("table"), range)));
+ if (Args.size() == maxArgs) {
+ const auto& lastArg = Args.back();
+ if (!lastArg.View.empty()) {
+ ctx.Error(Pos) << Func << " requires that view should be set as last argument";
+ return nullptr;
+ }
+
+ if (!lastArg.Id.Empty()) {
+ key = L(key, Q(Y(Q("view"), Y("String", lastArg.Id.Build()))));
+ }
+ }
+
+ return key;
+ } else if (func == "each" || func == "each_strict") {
+ auto each = Y(func == "each" ? "MrTableEach" : "MrTableEachStrict");
+ for (auto& arg : Args) {
+ if (arg.HasAt) {
+ ctx.Error(Pos) << "Temporary tables are not supported here";
+ return nullptr;
+ }
+
+ auto type = Y("ListType", Y("DataType", Q("String")));
+ auto key = Y("Key", Q(Y(Q("table"), Y("EvaluateExpr",
+ Y("EnsureType", Y("Coalesce", arg.Expr,
+ Y("List", type)), type)))));
+
+ key = AddView(key, arg.View);
+ if (!ValidateView(GetPos(), ctx, Service, arg.View)) {
+ return nullptr;
+ }
+ each = L(each, key);
+ }
+ if (ctx.PragmaUseTablePrefixForEach) {
+ TStringBuf prefixPath = ctx.GetPrefixPath(Service, Cluster);
+ if (prefixPath) {
+ each = L(each, BuildQuotedAtom(Pos, TString(prefixPath)));
+ }
+ }
+ return each;
+ }
+ else if (func == "folder") {
+ size_t minArgs = 1;
+ size_t maxArgs = 2;
+ if (Args.size() < minArgs || Args.size() > maxArgs) {
+ ctx.Error(Pos) << Func << " requires from " << minArgs << " to " << maxArgs << " arguments, but found: " << Args.size();
+ return nullptr;
+ }
+
+ if (ctx.DiscoveryMode) {
+ ctx.Error(Pos, TIssuesIds::YQL_NOT_ALLOWED_IN_DISCOVERY) << Func << " is not allowed in Discovery mode";
+ return nullptr;
+ }
+
+ for (ui32 index = 0; index < Args.size(); ++index) {
+ auto& arg = Args[index];
+ if (arg.HasAt) {
+ ctx.Error(Pos) << "Temporary tables are not supported here";
+ return nullptr;
+ }
+
+ if (!arg.View.empty()) {
+ ctx.Error(Pos) << Func << " doesn't supports views";
+ return nullptr;
+ }
+
+ ExtractTableName(ctx, arg);
+ }
+
+ auto folder = Y("MrFolder");
+ folder = L(folder, Args[0].Id.Build());
+ folder = L(folder, Args.size() > 1 ? Args[1].Id.Build() : BuildQuotedAtom(Pos, ""));
+ return folder;
+ }
+ else if (func == "walkfolders") {
+ const size_t minPositionalArgs = 1;
+ const size_t maxPositionalArgs = 2;
+
+ size_t positionalArgsCnt = 0;
+ for (const auto& arg : Args) {
+ if (!arg.Expr->GetLabel()) {
+ positionalArgsCnt++;
+ } else {
+ break;
+ }
+ }
+ if (positionalArgsCnt < minPositionalArgs || positionalArgsCnt > maxPositionalArgs) {
+ ctx.Error(Pos) << Func << " requires from " << minPositionalArgs
+ << " to " << maxPositionalArgs
+ << " positional arguments, but got: " << positionalArgsCnt;
+ return nullptr;
+ }
+
+ constexpr auto walkFoldersModuleName = "walk_folders_module";
+ ctx.RequiredModules.emplace(walkFoldersModuleName, "/lib/yql/walk_folders.yql");
+
+ auto& rootFolderArg = Args[0];
+ if (rootFolderArg.HasAt) {
+ ctx.Error(Pos) << "Temporary tables are not supported here";
+ return nullptr;
+ }
+ if (!rootFolderArg.View.empty()) {
+ ctx.Error(Pos) << Func << " doesn't supports views";
+ return nullptr;
+ }
+ ExtractTableName(ctx, rootFolderArg);
+
+ const auto initState =
+ positionalArgsCnt > 1
+ ? Args[1].Expr
+ : Y("List", Y("ListType", Y("DataType", Q("String"))));
+
+ TNodePtr rootAttributes;
+ TNodePtr preHandler;
+ TNodePtr resolveHandler;
+ TNodePtr diveHandler;
+ TNodePtr postHandler;
+ for (auto it = Args.begin() + positionalArgsCnt; it != Args.end(); ++it) {
+ auto& arg = *it;
+ const auto label = arg.Expr->GetLabel();
+ if (label == "RootAttributes") {
+ ExtractTableName(ctx, arg);
+ rootAttributes = arg.Id.Build();
+ }
+ else if (label == "PreHandler") {
+ preHandler = arg.Expr;
+ }
+ else if (label == "ResolveHandler") {
+ resolveHandler = arg.Expr;
+ }
+ else if (label == "DiveHandler") {
+ diveHandler = arg.Expr;
+ }
+ else if (label == "PostHandler") {
+ postHandler = arg.Expr;
+ }
+ else {
+ ctx.Warning(Pos, DEFAULT_ERROR) << "Unsupported named argument: "
+ << label << " in " << Func;
+ }
+ }
+ if (rootAttributes == nullptr) {
+ rootAttributes = BuildQuotedAtom(Pos, "");
+ }
+
+ if (preHandler != nullptr || postHandler != nullptr) {
+ const auto makePrePostHandlerType = BuildBind(Pos, walkFoldersModuleName, "MakePrePostHandlersType");
+ const auto prePostHandlerType = Y("EvaluateType", Y("TypeHandle", Y("Apply", makePrePostHandlerType, Y("TypeOf", initState))));
+
+ if (preHandler != nullptr) {
+ preHandler = Y("Callable", prePostHandlerType, preHandler);
+ }
+ if (postHandler != nullptr) {
+ postHandler = Y("Callable", prePostHandlerType, postHandler);
+ }
+ }
+ if (preHandler == nullptr) {
+ preHandler = Y("Void");
+ }
+ if (postHandler == nullptr) {
+ postHandler = Y("Void");
+ }
+
+ const auto makeResolveDiveHandlerType = BuildBind(Pos, walkFoldersModuleName, "MakeResolveDiveHandlersType");
+ const auto resolveDiveHandlerType = Y("EvaluateType", Y("TypeHandle", Y("Apply", makeResolveDiveHandlerType, Y("TypeOf", initState))));
+ if (resolveHandler == nullptr) {
+ resolveHandler = BuildBind(Pos, walkFoldersModuleName, "AnyNodeDiveHandler");
+ }
+ if (diveHandler == nullptr) {
+ diveHandler = BuildBind(Pos, walkFoldersModuleName, "AnyNodeDiveHandler");
+ }
+
+ resolveHandler = Y("Callable", resolveDiveHandlerType, resolveHandler);
+ diveHandler = Y("Callable", resolveDiveHandlerType, diveHandler);
+
+ const auto initStateType = Y("EvaluateType", Y("TypeHandle", Y("TypeOf", initState)));
+ const auto pickledInitState = Y("Pickle", initState);
+
+ const auto initPath = rootFolderArg.Id.Build();
+
+ return Y("MrWalkFolders", initPath, rootAttributes, pickledInitState, initStateType,
+ preHandler, resolveHandler, diveHandler, postHandler);
+ }
+ else if (func == "tables") {
+ if (!Args.empty()) {
+ ctx.Error(Pos) << Func << " doesn't accept arguments";
+ return nullptr;
+ }
+
+ return L(Y("DataTables"));
+ }
+ else if (func == "object") {
+ const size_t positionalArgs = 2;
+ auto result = Y("MrObject");
+ auto settings = Y();
+ //TVector<TNodePtr> settings;
+ size_t argc = 0;
+ for (ui32 index = 0; index < Args.size(); ++index) {
+ auto& arg = Args[index];
+ if (arg.HasAt) {
+ ctx.Error(arg.Expr->GetPos()) << "Temporary tables are not supported here";
+ return nullptr;
+ }
+
+ if (!arg.View.empty()) {
+ ctx.Error(Pos) << to_upper(Func) << " doesn't supports views";
+ return nullptr;
+ }
+
+ if (!arg.Expr->GetLabel()) {
+ ExtractTableName(ctx, arg);
+ result = L(result, arg.Id.Build());
+ ++argc;
+ } else {
+ settings = L(settings, Q(Y(BuildQuotedAtom(arg.Expr->GetPos(), arg.Expr->GetLabel()), arg.Expr)));
+ }
+ }
+
+ if (argc != positionalArgs) {
+ ctx.Error(Pos) << to_upper(Func) << " requires exacty " << positionalArgs << " positional args, but got " << argc;
+ return nullptr;
+ }
+
+ result = L(result, Q(settings));
+ return result;
+ }
+
+ ctx.Error(Pos) << "Unknown table name preprocessor: " << Func;
+ return nullptr;
+ }
+
+private:
+ TString Service;
+ TDeferredAtom Cluster;
+ TString Func;
+ TVector<TTableArg> Args;
+};
+
+TNodePtr BuildTableKeys(TPosition pos, const TString& service, const TDeferredAtom& cluster,
+ const TString& func, const TVector<TTableArg>& args) {
+ return new TPrepTableKeys(pos, service, cluster, func, args);
+}
+
+class TInputOptions final: public TAstListNode {
+public:
+ TInputOptions(TPosition pos, const TTableHints& hints)
+ : TAstListNode(pos)
+ , Hints(hints)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ for (auto& hint: Hints) {
+ TString hintName = hint.first;
+ TMaybe<TIssue> normalizeError = NormalizeName(Pos, hintName);
+ if (!normalizeError.Empty()) {
+ ctx.Error() << normalizeError->GetMessage();
+ ctx.IncrementMonCounter("sql_errors", "NormalizeHintError");
+ return false;
+ }
+ TNodePtr option = Y(BuildQuotedAtom(Pos, hintName));
+ for (auto& x : hint.second) {
+ if (!x->Init(ctx, src)) {
+ return false;
+ }
+
+ option = L(option, x);
+ }
+
+ Nodes.push_back(Q(option));
+ }
+ return true;
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+
+private:
+ TTableHints Hints;
+};
+
+TNodePtr BuildInputOptions(TPosition pos, const TTableHints& hints) {
+ if (hints.empty()) {
+ return nullptr;
+ }
+
+ return new TInputOptions(pos, hints);
+}
+
+class TIntoTableOptions: public TAstListNode {
+public:
+ TIntoTableOptions(TPosition pos, const TVector<TString>& columns, const TTableHints& hints)
+ : TAstListNode(pos)
+ , Columns(columns)
+ , Hints(hints)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(ctx);
+ Y_UNUSED(src);
+
+ TNodePtr options = Y();
+ for (const auto& column: Columns) {
+ options->Add(Q(column));
+ }
+ if (Columns) {
+ Add(Q(Y(Q("erase_columns"), Q(options))));
+ }
+
+ for (const auto& hint : Hints) {
+ TString hintName = hint.first;
+ TMaybe<TIssue> normalizeError = NormalizeName(Pos, hintName);
+ if (!normalizeError.Empty()) {
+ ctx.Error() << normalizeError->GetMessage();
+ ctx.IncrementMonCounter("sql_errors", "NormalizeHintError");
+ return false;
+ }
+ TNodePtr option = Y(BuildQuotedAtom(Pos, hintName));
+ for (auto& x : hint.second) {
+ if (!x->Init(ctx, src)) {
+ return false;
+ }
+ option = L(option, x);
+ }
+ Add(Q(option));
+ }
+
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TIntoTableOptions(GetPos(), Columns, CloneContainer(Hints));
+ }
+
+private:
+ TVector<TString> Columns;
+ TTableHints Hints;
+};
+
+TNodePtr BuildIntoTableOptions(TPosition pos, const TVector<TString>& eraseColumns, const TTableHints& hints) {
+ return new TIntoTableOptions(pos, eraseColumns, hints);
+}
+
+class TInputTablesNode final: public TAstListNode {
+public:
+ TInputTablesNode(TPosition pos, const TTableList& tables, bool inSubquery, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Tables(tables)
+ , InSubquery(inSubquery)
+ , Scoped(scoped)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ THashSet<TString> processedTables;
+ for (auto& tr: Tables) {
+ if (!processedTables.insert(tr.RefName).second) {
+ continue;
+ }
+
+ Scoped->UseCluster(tr.Service, tr.Cluster);
+ auto tableKeys = tr.Keys->GetTableKeys();
+ auto keys = tableKeys->BuildKeys(ctx, ITableKeys::EBuildKeysMode::INPUT);
+ if (!keys || !keys->Init(ctx, src)) {
+ return false;
+ }
+ auto fields = Y("Void");
+ auto source = Y("DataSource", BuildQuotedAtom(Pos, tr.Service), Scoped->WrapCluster(tr.Cluster, ctx));
+ auto options = tr.Options ? Q(tr.Options) : Q(Y());
+ Add(Y("let", "x", keys->Y(TString(ReadName), "world", source, keys, fields, options)));
+
+ if (IsIn({KikimrProviderName, YdbProviderName}, tr.Service) && InSubquery) {
+ ctx.Error() << "Using of system '" << tr.Service << "' is not allowed in SUBQUERY";
+ return false;
+ }
+
+ if (tr.Service != YtProviderName || ctx.Settings.SaveWorldDependencies) {
+ Add(Y("let", "world", Y(TString(LeftName), "x")));
+ }
+
+ Add(Y("let", tr.RefName, Y(TString(RightName), "x")));
+ }
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+
+private:
+ TTableList Tables;
+ const bool InSubquery;
+ TScopedStatePtr Scoped;
+};
+
+TNodePtr BuildInputTables(TPosition pos, const TTableList& tables, bool inSubquery, TScopedStatePtr scoped) {
+ return new TInputTablesNode(pos, tables, inSubquery, scoped);
+}
+
+class TCreateTableNode final: public TAstListNode {
+public:
+ TCreateTableNode(TPosition pos, const TTableRef& tr, bool existingOk, bool replaceIfExists, const TCreateTableParameters& params, TSourcePtr values, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Table(tr)
+ , Params(params)
+ , ExistingOk(existingOk)
+ , ReplaceIfExists(replaceIfExists)
+ , Values(std::move(values))
+ , Scoped(scoped)
+ {
+ scoped->UseCluster(Table.Service, Table.Cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto keys = Table.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::CREATE);
+ if (!keys || !keys->Init(ctx, src)) {
+ return false;
+ }
+
+ if (!Params.PkColumns.empty()
+ || !Params.PartitionByColumns.empty()
+ || !Params.OrderByColumns.empty()
+ || !Params.Indexes.empty()
+ || !Params.Changefeeds.empty())
+ {
+ THashSet<TString> columnsSet;
+ for (auto& col : Params.Columns) {
+ columnsSet.insert(col.Name);
+ }
+
+ const bool allowUndefinedColumns = (Values != nullptr) && columnsSet.empty();
+
+ THashSet<TString> pkColumns;
+ for (auto& keyColumn : Params.PkColumns) {
+ if (!allowUndefinedColumns && !columnsSet.contains(keyColumn.Name)) {
+ ctx.Error(keyColumn.Pos) << "Undefined column: " << keyColumn.Name;
+ return false;
+ }
+ if (!pkColumns.insert(keyColumn.Name).second) {
+ ctx.Error(keyColumn.Pos) << "Duplicated column in PK: " << keyColumn.Name;
+ return false;
+ }
+ }
+ for (auto& keyColumn : Params.PartitionByColumns) {
+ if (!allowUndefinedColumns && !columnsSet.contains(keyColumn.Name)) {
+ ctx.Error(keyColumn.Pos) << "Undefined column: " << keyColumn.Name;
+ return false;
+ }
+ }
+ for (auto& keyColumn : Params.OrderByColumns) {
+ if (!allowUndefinedColumns && !columnsSet.contains(keyColumn.first.Name)) {
+ ctx.Error(keyColumn.first.Pos) << "Undefined column: " << keyColumn.first.Name;
+ return false;
+ }
+ }
+
+ THashSet<TString> indexNames;
+ for (const auto& index : Params.Indexes) {
+ if (!indexNames.insert(index.Name.Name).second) {
+ ctx.Error(index.Name.Pos) << "Index " << index.Name.Name << " must be defined once";
+ return false;
+ }
+
+ for (const auto& indexColumn : index.IndexColumns) {
+ if (!allowUndefinedColumns && !columnsSet.contains(indexColumn.Name)) {
+ ctx.Error(indexColumn.Pos) << "Undefined column: " << indexColumn.Name;
+ return false;
+ }
+ }
+
+ for (const auto& dataColumn : index.DataColumns) {
+ if (!allowUndefinedColumns && !columnsSet.contains(dataColumn.Name)) {
+ ctx.Error(dataColumn.Pos) << "Undefined column: " << dataColumn.Name;
+ return false;
+ }
+ }
+ }
+
+ THashSet<TString> cfNames;
+ for (const auto& cf : Params.Changefeeds) {
+ if (!cfNames.insert(cf.Name.Name).second) {
+ ctx.Error(cf.Name.Pos) << "Changefeed " << cf.Name.Name << " must be defined once";
+ return false;
+ }
+ }
+ }
+
+ auto opts = Y();
+ if (Table.Options) {
+ if (!Table.Options->Init(ctx, src)) {
+ return false;
+ }
+ opts = Table.Options;
+ }
+
+ if (ExistingOk) {
+ opts = L(opts, Q(Y(Q("mode"), Q("create_if_not_exists"))));
+ } else if (ReplaceIfExists) {
+ opts = L(opts, Q(Y(Q("mode"), Q("create_or_replace"))));
+ } else {
+ opts = L(opts, Q(Y(Q("mode"), Q("create"))));
+ }
+
+ THashSet<TString> columnFamilyNames;
+
+ if (Params.ColumnFamilies) {
+ auto columnFamilies = Y();
+ for (const auto& family : Params.ColumnFamilies) {
+ if (!columnFamilyNames.insert(family.Name.Name).second) {
+ ctx.Error(family.Name.Pos) << "Family " << family.Name.Name << " specified more than once";
+ return false;
+ }
+ auto familyDesc = Y();
+ familyDesc = L(familyDesc, Q(Y(Q("name"), BuildQuotedAtom(family.Name.Pos, family.Name.Name))));
+ if (family.Data) {
+ familyDesc = L(familyDesc, Q(Y(Q("data"), family.Data)));
+ }
+ if (family.Compression) {
+ familyDesc = L(familyDesc, Q(Y(Q("compression"), family.Compression)));
+ }
+ if (family.CompressionLevel) {
+ familyDesc = L(familyDesc, Q(Y(Q("compression_level"), family.CompressionLevel)));
+ }
+ columnFamilies = L(columnFamilies, Q(familyDesc));
+ }
+ opts = L(opts, Q(Y(Q("columnFamilies"), Q(columnFamilies))));
+ }
+
+ auto columns = Y();
+ THashSet<TString> columnsWithDefaultValue;
+ auto columnsDefaultValueSettings = Y();
+
+ for (auto& col : Params.Columns) {
+ auto columnDesc = Y();
+ columnDesc = L(columnDesc, BuildQuotedAtom(Pos, col.Name));
+ auto type = col.Type;
+
+ if (type) {
+ if (col.Nullable) {
+ type = Y("AsOptionalType", type);
+ }
+
+ columnDesc = L(columnDesc, type);
+
+ auto columnConstraints = Y();
+
+ if (!col.Nullable) {
+ columnConstraints = L(columnConstraints, Q(Y(Q("not_null"))));
+ }
+
+ if (col.Serial) {
+ columnConstraints = L(columnConstraints, Q(Y(Q("serial"))));
+ }
+
+ if (col.DefaultExpr) {
+ if (!col.DefaultExpr->Init(ctx, src)) {
+ return false;
+ }
+
+ columnConstraints = L(columnConstraints, Q(Y(Q("default"), col.DefaultExpr)));
+ }
+
+ columnDesc = L(columnDesc, Q(Y(Q("columnConstrains"), Q(columnConstraints))));
+
+ auto familiesDesc = Y();
+
+ if (col.Families) {
+ for (const auto& family : col.Families) {
+ if (columnFamilyNames.find(family.Name) == columnFamilyNames.end()) {
+ ctx.Error(family.Pos) << "Unknown family " << family.Name;
+ return false;
+ }
+ familiesDesc = L(familiesDesc, BuildQuotedAtom(family.Pos, family.Name));
+ }
+ }
+
+ columnDesc = L(columnDesc, Q(familiesDesc));
+ }
+
+ columns = L(columns, Q(columnDesc));
+ }
+ opts = L(opts, Q(Y(Q("columns"), Q(columns))));
+
+ if (!columnsWithDefaultValue.empty()) {
+ opts = L(opts, Q(Y(Q("columnsDefaultValues"), Q(columnsDefaultValueSettings))));
+ }
+
+ if (Table.Service == RtmrProviderName) {
+ if (!Params.PkColumns.empty() && !Params.PartitionByColumns.empty()) {
+ ctx.Error() << "Only one of PRIMARY KEY or PARTITION BY constraints may be specified";
+ return false;
+ }
+ } else {
+ if (!Params.OrderByColumns.empty()) {
+ ctx.Error() << "ORDER BY is supported only for " << RtmrProviderName << " provider";
+ return false;
+ }
+ }
+
+ if (!Params.PkColumns.empty()) {
+ auto primaryKey = Y();
+ for (auto& col : Params.PkColumns) {
+ primaryKey = L(primaryKey, BuildQuotedAtom(col.Pos, col.Name));
+ }
+ opts = L(opts, Q(Y(Q("primarykey"), Q(primaryKey))));
+ if (!Params.OrderByColumns.empty()) {
+ ctx.Error() << "PRIMARY KEY cannot be used with ORDER BY, use PARTITION BY instead";
+ return false;
+ }
+ }
+
+ if (!Params.PartitionByColumns.empty()) {
+ auto partitionBy = Y();
+ for (auto& col : Params.PartitionByColumns) {
+ partitionBy = L(partitionBy, BuildQuotedAtom(col.Pos, col.Name));
+ }
+ opts = L(opts, Q(Y(Q("partitionby"), Q(partitionBy))));
+ }
+
+ if (!Params.OrderByColumns.empty()) {
+ auto orderBy = Y();
+ for (auto& col : Params.OrderByColumns) {
+ orderBy = L(orderBy, Q(Y(BuildQuotedAtom(col.first.Pos, col.first.Name), col.second ? Q("1") : Q("0"))));
+ }
+ opts = L(opts, Q(Y(Q("orderby"), Q(orderBy))));
+ }
+
+ for (const auto& index : Params.Indexes) {
+ const auto& desc = CreateIndexDesc(index, ETableSettingsParsingMode::Create, *this);
+ opts = L(opts, Q(Y(Q("index"), Q(desc))));
+ }
+
+ for (const auto& cf : Params.Changefeeds) {
+ const auto& desc = CreateChangefeedDesc(cf, *this);
+ opts = L(opts, Q(Y(Q("changefeed"), Q(desc))));
+ }
+
+ if (Params.TableSettings.IsSet()) {
+ opts = L(opts, Q(Y(Q("tableSettings"), Q(
+ CreateTableSettings(Params.TableSettings, ETableSettingsParsingMode::Create, *this)
+ ))));
+ }
+
+ switch (Params.TableType) {
+ case ETableType::TableStore:
+ opts = L(opts, Q(Y(Q("tableType"), Q("tableStore"))));
+ break;
+ case ETableType::ExternalTable:
+ opts = L(opts, Q(Y(Q("tableType"), Q("externalTable"))));
+ break;
+ case ETableType::Table:
+ break;
+ }
+
+ if (Params.Temporary) {
+ opts = L(opts, Q(Y(Q("temporary"))));
+ }
+
+ TNodePtr node = nullptr;
+ if (Values) {
+ if (!Values->Init(ctx, nullptr)) {
+ return false;
+ }
+ TTableList tableList;
+ Values->GetInputTables(tableList);
+ auto valuesSource = Values.Get();
+ auto values = Values->Build(ctx);
+ if (!Values) {
+ return false;
+ }
+
+ TNodePtr inputTables(BuildInputTables(Pos, tableList, false, Scoped));
+ if (!inputTables->Init(ctx, valuesSource)) {
+ return false;
+ }
+
+ node = inputTables;
+ node = L(node, Y("let", "values", values));
+ } else {
+ node = Y(Y("let", "values", Y("Void")));
+ }
+
+ auto write = Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Table.Service), Scoped->WrapCluster(Table.Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, "values", Q(opts))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ );
+
+ node = L(node, Y("let", "world", Y("block", Q(write))));
+ node = L(node, Y("return", "world"));
+
+ Add("block", Q(node));
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ const TTableRef Table;
+ const TCreateTableParameters Params;
+ const bool ExistingOk;
+ const bool ReplaceIfExists;
+ const TSourcePtr Values;
+ TScopedStatePtr Scoped;
+};
+
+TNodePtr BuildCreateTable(TPosition pos, const TTableRef& tr, bool existingOk, bool replaceIfExists, const TCreateTableParameters& params, TSourcePtr values, TScopedStatePtr scoped)
+{
+ return new TCreateTableNode(pos, tr, existingOk, replaceIfExists, params, std::move(values), scoped);
+}
+
+class TAlterTableNode final: public TAstListNode {
+public:
+ TAlterTableNode(TPosition pos, const TTableRef& tr, const TAlterTableParameters& params, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Table(tr)
+ , Params(params)
+ , Scoped(scoped)
+ {
+ scoped->UseCluster(Table.Service, Table.Cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto keys = Table.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::CREATE);
+ if (!keys || !keys->Init(ctx, src)) {
+ return false;
+ }
+
+ auto actions = Y();
+
+ if (Params.AddColumns) {
+ auto columns = Y();
+ for (auto& col : Params.AddColumns) {
+ auto columnDesc = Y();
+ columnDesc = L(columnDesc, BuildQuotedAtom(Pos, col.Name));
+ auto type = col.Type;
+ if (col.Nullable) {
+ type = Y("AsOptionalType", type);
+ }
+
+ columnDesc = L(columnDesc, type);
+ auto columnConstraints = Y();
+ if (!col.Nullable) {
+ columnConstraints = L(columnConstraints, Q(Y(Q("not_null"))));
+ }
+
+ if (col.Serial) {
+ columnConstraints = L(columnConstraints, Q(Y(Q("serial"))));
+ }
+
+ if (col.DefaultExpr) {
+ if (!col.DefaultExpr->Init(ctx, src)) {
+ return false;
+ }
+
+ columnConstraints = L(columnConstraints, Q(Y(Q("default"), col.DefaultExpr)));
+ }
+
+ columnDesc = L(columnDesc, Q(Y(Q("columnConstrains"), Q(columnConstraints))));
+
+ auto familiesDesc = Y();
+ for (const auto& family : col.Families) {
+ familiesDesc = L(familiesDesc, BuildQuotedAtom(family.Pos, family.Name));
+ }
+ columnDesc = L(columnDesc, Q(familiesDesc));
+
+ columns = L(columns, Q(columnDesc));
+ }
+ actions = L(actions, Q(Y(Q("addColumns"), Q(columns))));
+ }
+
+ if (Params.DropColumns) {
+ auto columns = Y();
+ for (auto& colName : Params.DropColumns) {
+ columns = L(columns, BuildQuotedAtom(Pos, colName));
+ }
+ actions = L(actions, Q(Y(Q("dropColumns"), Q(columns))));
+ }
+
+ if (Params.AlterColumns) {
+ auto columns = Y();
+ for (auto& col : Params.AlterColumns) {
+ if (col.TypeOfChange == TColumnSchema::ETypeOfChange::DropNotNullConstraint) {
+ auto columnDesc = Y();
+ columnDesc = L(columnDesc, BuildQuotedAtom(Pos, col.Name));
+
+ auto columnConstraints = Y();
+ columnConstraints = L(columnConstraints, Q(Y(Q("drop_not_null"))));
+ columnDesc = L(columnDesc, Q(Y(Q("changeColumnConstraints"), Q(columnConstraints))));
+ columns = L(columns, Q(columnDesc));
+ } else if (col.TypeOfChange == TColumnSchema::ETypeOfChange::SetNotNullConstraint) {
+ // todo flown4qqqq
+ } else if (col.TypeOfChange == TColumnSchema::ETypeOfChange::SetFamily) {
+ auto columnDesc = Y();
+ columnDesc = L(columnDesc, BuildQuotedAtom(Pos, col.Name));
+ auto familiesDesc = Y();
+ for (const auto& family : col.Families) {
+ familiesDesc = L(familiesDesc, BuildQuotedAtom(family.Pos, family.Name));
+ }
+
+ columnDesc = L(columnDesc, Q(Y(Q("setFamily"), Q(familiesDesc))));
+ columns = L(columns, Q(columnDesc));
+ } else if (col.TypeOfChange == TColumnSchema::ETypeOfChange::Nothing) {
+ // do nothing
+ } else {
+ ctx.Error(Pos) << " action is not supported";
+ }
+ }
+ actions = L(actions, Q(Y(Q("alterColumns"), Q(columns))));
+ }
+
+ if (Params.AddColumnFamilies) {
+ auto columnFamilies = Y();
+ for (const auto& family : Params.AddColumnFamilies) {
+ auto familyDesc = Y();
+ familyDesc = L(familyDesc, Q(Y(Q("name"), BuildQuotedAtom(family.Name.Pos, family.Name.Name))));
+ if (family.Data) {
+ familyDesc = L(familyDesc, Q(Y(Q("data"), family.Data)));
+ }
+ if (family.Compression) {
+ familyDesc = L(familyDesc, Q(Y(Q("compression"), family.Compression)));
+ }
+ if (family.CompressionLevel) {
+ familyDesc = L(familyDesc, Q(Y(Q("compression_level"), family.CompressionLevel)));
+ }
+ columnFamilies = L(columnFamilies, Q(familyDesc));
+ }
+ actions = L(actions, Q(Y(Q("addColumnFamilies"), Q(columnFamilies))));
+ }
+
+ if (Params.AlterColumnFamilies) {
+ auto columnFamilies = Y();
+ for (const auto& family : Params.AlterColumnFamilies) {
+ auto familyDesc = Y();
+ familyDesc = L(familyDesc, Q(Y(Q("name"), BuildQuotedAtom(family.Name.Pos, family.Name.Name))));
+ if (family.Data) {
+ familyDesc = L(familyDesc, Q(Y(Q("data"), family.Data)));
+ }
+ if (family.Compression) {
+ familyDesc = L(familyDesc, Q(Y(Q("compression"), family.Compression)));
+ }
+ if (family.CompressionLevel) {
+ familyDesc = L(familyDesc, Q(Y(Q("compression_level"), family.CompressionLevel)));
+ }
+ columnFamilies = L(columnFamilies, Q(familyDesc));
+ }
+ actions = L(actions, Q(Y(Q("alterColumnFamilies"), Q(columnFamilies))));
+ }
+
+ if (Params.TableSettings.IsSet()) {
+ actions = L(actions, Q(Y(Q("setTableSettings"), Q(
+ CreateTableSettings(Params.TableSettings, ETableSettingsParsingMode::Alter, *this)
+ ))));
+ }
+
+ for (const auto& index : Params.AddIndexes) {
+ const auto& desc = CreateIndexDesc(index, ETableSettingsParsingMode::Alter, *this);
+ actions = L(actions, Q(Y(Q("addIndex"), Q(desc))));
+ }
+
+ for (const auto& index : Params.AlterIndexes) {
+ const auto& desc = CreateAlterIndex(index, *this);
+ actions = L(actions, Q(Y(Q("alterIndex"), Q(desc))));
+ }
+
+ for (const auto& id : Params.DropIndexes) {
+ auto indexName = BuildQuotedAtom(id.Pos, id.Name);
+ actions = L(actions, Q(Y(Q("dropIndex"), indexName)));
+ }
+
+ if (Params.RenameIndexTo) {
+ auto src = BuildQuotedAtom(Params.RenameIndexTo->first.Pos, Params.RenameIndexTo->first.Name);
+ auto dst = BuildQuotedAtom(Params.RenameIndexTo->second.Pos, Params.RenameIndexTo->second.Name);
+
+ auto desc = Y();
+
+ desc = L(desc, Q(Y(Q("src"), src)));
+ desc = L(desc, Q(Y(Q("dst"), dst)));
+
+ actions = L(actions, Q(Y(Q("renameIndexTo"), Q(desc))));
+ }
+
+ if (Params.RenameTo) {
+ auto destination = ctx.GetPrefixedPath(Scoped->CurrService, Scoped->CurrCluster,
+ TDeferredAtom(Params.RenameTo->Pos, Params.RenameTo->Name));
+ actions = L(actions, Q(Y(Q("renameTo"), destination)));
+ }
+
+ for (const auto& cf : Params.AddChangefeeds) {
+ const auto& desc = CreateChangefeedDesc(cf, *this);
+ actions = L(actions, Q(Y(Q("addChangefeed"), Q(desc))));
+ }
+
+ for (const auto& cf : Params.AlterChangefeeds) {
+ const auto& desc = CreateChangefeedDesc(cf, *this);
+ actions = L(actions, Q(Y(Q("alterChangefeed"), Q(desc))));
+ }
+
+ for (const auto& id : Params.DropChangefeeds) {
+ const auto name = BuildQuotedAtom(id.Pos, id.Name);
+ actions = L(actions, Q(Y(Q("dropChangefeed"), name)));
+ }
+
+ auto opts = Y();
+
+ opts = L(opts, Q(Y(Q("mode"), Q("alter"))));
+ opts = L(opts, Q(Y(Q("actions"), Q(actions))));
+
+ switch (Params.TableType) {
+ case ETableType::TableStore:
+ opts = L(opts, Q(Y(Q("tableType"), Q("tableStore"))));
+ break;
+ case ETableType::ExternalTable:
+ opts = L(opts, Q(Y(Q("tableType"), Q("externalTable"))));
+ break;
+ case ETableType::Table:
+ break;
+ }
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Table.Service), Scoped->WrapCluster(Table.Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ TTableRef Table;
+ const TAlterTableParameters Params;
+ TScopedStatePtr Scoped;
+};
+
+TNodePtr BuildAlterTable(TPosition pos, const TTableRef& tr, const TAlterTableParameters& params, TScopedStatePtr scoped)
+{
+ return new TAlterTableNode(pos, tr, params, scoped);
+}
+
+class TDropTableNode final: public TAstListNode {
+public:
+ TDropTableNode(TPosition pos, const TTableRef& tr, bool missingOk, ETableType tableType, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Table(tr)
+ , TableType(tableType)
+ , Scoped(scoped)
+ , MissingOk(missingOk)
+ {
+ FakeSource = BuildFakeSource(pos);
+ scoped->UseCluster(Table.Service, Table.Cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ auto keys = Table.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::DROP);
+ if (!keys || !keys->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ auto opts = Y();
+
+ opts = L(opts, Q(Y(Q("mode"), Q(MissingOk ? "drop_if_exists" : "drop"))));
+
+ switch (TableType) {
+ case ETableType::TableStore:
+ opts = L(opts, Q(Y(Q("tableType"), Q("tableStore"))));
+ break;
+ case ETableType::ExternalTable:
+ opts = L(opts, Q(Y(Q("tableType"), Q("externalTable"))));
+ break;
+ case ETableType::Table:
+ break;
+ }
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Table.Service), Scoped->WrapCluster(Table.Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, FakeSource.Get());
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ TTableRef Table;
+ ETableType TableType;
+ TScopedStatePtr Scoped;
+ TSourcePtr FakeSource;
+ const bool MissingOk;
+};
+
+TNodePtr BuildDropTable(TPosition pos, const TTableRef& tr, bool missingOk, ETableType tableType, TScopedStatePtr scoped) {
+ return new TDropTableNode(pos, tr, missingOk, tableType, scoped);
+}
+
+
+static INode::TPtr CreateConsumerDesc(const TTopicConsumerDescription& desc, const INode& node, bool alter) {
+ auto settings = node.Y();
+ if (desc.Settings.Important) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("important"), desc.Settings.Important)));
+ }
+ if (const auto& readFromTs = desc.Settings.ReadFromTs) {
+ if (readFromTs.IsSet()) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("setReadFromTs"), readFromTs.GetValueSet())));
+ } else if (alter) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("resetReadFromTs"), node.Q(node.Y()))));
+ } else {
+ YQL_ENSURE(false, "Cannot reset on create");
+ }
+ }
+ if (const auto& readFromTs = desc.Settings.SupportedCodecs) {
+ if (readFromTs.IsSet()) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("setSupportedCodecs"), readFromTs.GetValueSet())));
+ } else if (alter) {
+ settings = node.L(settings, node.Q(node.Y(node.Q("resetSupportedCodecs"), node.Q(node.Y()))));
+ } else {
+ YQL_ENSURE(false, "Cannot reset on create");
+ }
+ }
+ return node.Y(
+ node.Q(node.Y(node.Q("name"), BuildQuotedAtom(desc.Name.Pos, desc.Name.Name))),
+ node.Q(node.Y(node.Q("settings"), node.Q(settings)))
+ );
+}
+
+class TCreateTopicNode final: public TAstListNode {
+public:
+ TCreateTopicNode(TPosition pos, const TTopicRef& tr, const TCreateTopicParameters& params, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Topic(tr)
+ , Params(params)
+ , Scoped(scoped)
+ {
+ scoped->UseCluster(TString(KikimrProviderName), Topic.Cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto keys = Topic.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::CREATE);
+ if (!keys || !keys->Init(ctx, src)) {
+ return false;
+ }
+
+ if (!Params.Consumers.empty())
+ {
+ THashSet<TString> consumerNames;
+ for (const auto& consumer : Params.Consumers) {
+ if (!consumerNames.insert(consumer.Name.Name).second) {
+ ctx.Error(consumer.Name.Pos) << "Consumer " << consumer.Name.Name << " defined more than once";
+ return false;
+ }
+ }
+ }
+
+ auto opts = Y();
+ TString mode = Params.ExistingOk ? "create_if_not_exists" : "create";
+ opts = L(opts, Q(Y(Q("mode"), Q(mode))));
+
+ for (const auto& consumer : Params.Consumers) {
+ const auto& desc = CreateConsumerDesc(consumer, *this, false);
+ opts = L(opts, Q(Y(Q("consumer"), Q(desc))));
+ }
+
+ if (Params.TopicSettings.IsSet()) {
+ auto settings = Y();
+
+#define INSERT_TOPIC_SETTING(NAME) \
+ if (const auto& NAME##Val = Params.TopicSettings.NAME) { \
+ if (NAME##Val.IsSet()) { \
+ settings = L(settings, Q(Y(Q(Y_STRINGIZE(set##NAME)), NAME##Val.GetValueSet()))); \
+ } else { \
+ YQL_ENSURE(false, "Can't reset on create"); \
+ } \
+ }
+
+ INSERT_TOPIC_SETTING(MaxPartitions)
+ INSERT_TOPIC_SETTING(MinPartitions)
+ INSERT_TOPIC_SETTING(RetentionPeriod)
+ INSERT_TOPIC_SETTING(SupportedCodecs)
+ INSERT_TOPIC_SETTING(PartitionWriteSpeed)
+ INSERT_TOPIC_SETTING(PartitionWriteBurstSpeed)
+ INSERT_TOPIC_SETTING(MeteringMode)
+ INSERT_TOPIC_SETTING(AutoPartitioningStabilizationWindow)
+ INSERT_TOPIC_SETTING(AutoPartitioningUpUtilizationPercent)
+ INSERT_TOPIC_SETTING(AutoPartitioningDownUtilizationPercent)
+ INSERT_TOPIC_SETTING(AutoPartitioningStrategy)
+
+#undef INSERT_TOPIC_SETTING
+
+ opts = L(opts, Q(Y(Q("topicSettings"), Q(settings))));
+ }
+
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, TString(KikimrProviderName)),
+ Scoped->WrapCluster(Topic.Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ const TTopicRef Topic;
+ const TCreateTopicParameters Params;
+ TScopedStatePtr Scoped;
+};
+
+TNodePtr BuildCreateTopic(
+ TPosition pos, const TTopicRef& tr, const TCreateTopicParameters& params, TScopedStatePtr scoped
+){
+ return new TCreateTopicNode(pos, tr, params, scoped);
+}
+
+class TAlterTopicNode final: public TAstListNode {
+public:
+ TAlterTopicNode(TPosition pos, const TTopicRef& tr, const TAlterTopicParameters& params, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Topic(tr)
+ , Params(params)
+ , Scoped(scoped)
+ {
+ scoped->UseCluster(TString(KikimrProviderName), Topic.Cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto keys = Topic.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::CREATE);
+ if (!keys || !keys->Init(ctx, src)) {
+ return false;
+ }
+
+ if (!Params.AddConsumers.empty())
+ {
+ THashSet<TString> consumerNames;
+ for (const auto& consumer : Params.AddConsumers) {
+ if (!consumerNames.insert(consumer.Name.Name).second) {
+ ctx.Error(consumer.Name.Pos) << "Consumer " << consumer.Name.Name << " defined more than once";
+ return false;
+ }
+ }
+ }
+ if (!Params.AlterConsumers.empty())
+ {
+ THashSet<TString> consumerNames;
+ for (const auto& [_, consumer] : Params.AlterConsumers) {
+ if (!consumerNames.insert(consumer.Name.Name).second) {
+ ctx.Error(consumer.Name.Pos) << "Consumer " << consumer.Name.Name << " altered more than once";
+ return false;
+ }
+ }
+ }
+ if (!Params.DropConsumers.empty())
+ {
+ THashSet<TString> consumerNames;
+ for (const auto& consumer : Params.DropConsumers) {
+ if (!consumerNames.insert(consumer.Name).second) {
+ ctx.Error(consumer.Pos) << "Consumer " << consumer.Name << " dropped more than once";
+ return false;
+ }
+ }
+ }
+
+ auto opts = Y();
+ TString mode = Params.MissingOk ? "alter_if_exists" : "alter";
+ opts = L(opts, Q(Y(Q("mode"), Q(mode))));
+
+ for (const auto& consumer : Params.AddConsumers) {
+ const auto& desc = CreateConsumerDesc(consumer, *this, false);
+ opts = L(opts, Q(Y(Q("addConsumer"), Q(desc))));
+ }
+
+ for (const auto& [_, consumer] : Params.AlterConsumers) {
+ const auto& desc = CreateConsumerDesc(consumer, *this, true);
+ opts = L(opts, Q(Y(Q("alterConsumer"), Q(desc))));
+ }
+
+ for (const auto& consumer : Params.DropConsumers) {
+ const auto name = BuildQuotedAtom(consumer.Pos, consumer.Name);
+ opts = L(opts, Q(Y(Q("dropConsumer"), name)));
+ }
+
+ if (Params.TopicSettings.IsSet()) {
+ auto settings = Y();
+
+#define INSERT_TOPIC_SETTING(NAME) \
+ if (const auto& NAME##Val = Params.TopicSettings.NAME) { \
+ if (NAME##Val.IsSet()) { \
+ settings = L(settings, Q(Y(Q(Y_STRINGIZE(set##NAME)), NAME##Val.GetValueSet()))); \
+ } else { \
+ settings = L(settings, Q(Y(Q(Y_STRINGIZE(reset##NAME)), Y()))); \
+ } \
+ }
+
+ INSERT_TOPIC_SETTING(MaxPartitions)
+ INSERT_TOPIC_SETTING(MinPartitions)
+ INSERT_TOPIC_SETTING(RetentionPeriod)
+ INSERT_TOPIC_SETTING(SupportedCodecs)
+ INSERT_TOPIC_SETTING(PartitionWriteSpeed)
+ INSERT_TOPIC_SETTING(PartitionWriteBurstSpeed)
+ INSERT_TOPIC_SETTING(MeteringMode)
+ INSERT_TOPIC_SETTING(AutoPartitioningStabilizationWindow)
+ INSERT_TOPIC_SETTING(AutoPartitioningUpUtilizationPercent)
+ INSERT_TOPIC_SETTING(AutoPartitioningDownUtilizationPercent)
+ INSERT_TOPIC_SETTING(AutoPartitioningStrategy)
+
+#undef INSERT_TOPIC_SETTING
+
+ opts = L(opts, Q(Y(Q("topicSettings"), Q(settings))));
+ }
+
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, TString(KikimrProviderName)),
+ Scoped->WrapCluster(Topic.Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ const TTopicRef Topic;
+ const TAlterTopicParameters Params;
+ TScopedStatePtr Scoped;
+};
+
+TNodePtr BuildAlterTopic(
+ TPosition pos, const TTopicRef& tr, const TAlterTopicParameters& params, TScopedStatePtr scoped
+){
+ return new TAlterTopicNode(pos, tr, params, scoped);
+}
+
+class TDropTopicNode final: public TAstListNode {
+public:
+ TDropTopicNode(TPosition pos, const TTopicRef& tr, const TDropTopicParameters& params, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Topic(tr)
+ , Params(params)
+ , Scoped(scoped)
+ {
+ scoped->UseCluster(TString(KikimrProviderName), Topic.Cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ auto keys = Topic.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::DROP);
+ if (!keys || !keys->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ auto opts = Y();
+
+ TString mode = Params.MissingOk ? "drop_if_exists" : "drop";
+ opts = L(opts, Q(Y(Q("mode"), Q(mode))));
+
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, TString(KikimrProviderName)),
+ Scoped->WrapCluster(Topic.Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, FakeSource.Get());
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ TTopicRef Topic;
+ TDropTopicParameters Params;
+ TScopedStatePtr Scoped;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildDropTopic(TPosition pos, const TTopicRef& tr, const TDropTopicParameters& params, TScopedStatePtr scoped) {
+ return new TDropTopicNode(pos, tr, params, scoped);
+}
+
+class TCreateRole final: public TAstListNode {
+public:
+ TCreateRole(TPosition pos, bool isUser, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TMaybe<TRoleParameters>& params, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , IsUser(isUser)
+ , Service(service)
+ , Cluster(cluster)
+ , Name(name)
+ , Params(params)
+ , Scoped(scoped)
+ {
+ FakeSource = BuildFakeSource(pos);
+ scoped->UseCluster(service, cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ auto name = Name.Build();
+ TNodePtr password;
+ if (Params && Params->Password) {
+ password = Params->Password->Build();
+ }
+ TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx);
+
+ if (!name->Init(ctx, FakeSource.Get()) || !cluster->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ if (password && !password->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ TVector<TNodePtr> roles;
+ if (Params && !Params->Roles.empty()) {
+ for (auto& item : Params->Roles) {
+ roles.push_back(item.Build());
+ if (!roles.back()->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ }
+ }
+
+
+ auto options = Y(Q(Y(Q("mode"), Q(IsUser ? "createUser" : "createGroup"))));
+ if (Params) {
+ if (Params->IsPasswordEncrypted) {
+ options = L(options, Q(Y(Q("passwordEncrypted"))));
+ }
+ if (Params->Password) {
+ options = L(options, Q(Y(Q("password"), password)));
+ } else {
+ options = L(options, Q(Y(Q("nullPassword"))));
+ }
+ if (!Params->Roles.empty()) {
+ options = L(options, Q(Y(Q("roles"), Q(new TAstListNodeImpl(Pos, std::move(roles))))));
+ }
+ }
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster)),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("role"), Y("String", name)))), Y("Void"), Q(options))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, FakeSource.Get());
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ const bool IsUser;
+ const TString Service;
+ TDeferredAtom Cluster;
+ TDeferredAtom Name;
+ const TMaybe<TRoleParameters> Params;
+ TScopedStatePtr Scoped;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildCreateUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TMaybe<TRoleParameters>& params, TScopedStatePtr scoped) {
+ bool isUser = true;
+ return new TCreateRole(pos, isUser, service, cluster, name, params, scoped);
+}
+
+TNodePtr BuildCreateGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TMaybe<TRoleParameters>& params, TScopedStatePtr scoped) {
+ bool isUser = false;
+ return new TCreateRole(pos, isUser, service, cluster, name, params, scoped);
+}
+
+class TAlterUser final: public TAstListNode {
+public:
+ TAlterUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TRoleParameters& params, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Service(service)
+ , Cluster(cluster)
+ , Name(name)
+ , Params(params)
+ , Scoped(scoped)
+ {
+ FakeSource = BuildFakeSource(pos);
+ scoped->UseCluster(service, cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ auto name = Name.Build();
+ TNodePtr password;
+ if (Params.Password) {
+ password = Params.Password->Build();
+ }
+ TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx);
+
+ if (!name->Init(ctx, FakeSource.Get()) || !cluster->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ if (password && !password->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ auto options = Y(Q(Y(Q("mode"), Q("alterUser"))));
+ if (Params.IsPasswordEncrypted) {
+ options = L(options, Q(Y(Q("passwordEncrypted"))));
+ }
+ if (Params.Password) {
+ options = L(options, Q(Y(Q("password"), password)));
+ } else {
+ options = L(options, Q(Y(Q("nullPassword"))));
+ }
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster)),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("role"), Y("String", name)))), Y("Void"), Q(options))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, FakeSource.Get());
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ const TString Service;
+ TDeferredAtom Cluster;
+ TDeferredAtom Name;
+ const TRoleParameters Params;
+ TScopedStatePtr Scoped;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildAlterUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TRoleParameters& params, TScopedStatePtr scoped) {
+ return new TAlterUser(pos, service, cluster, name, params, scoped);
+}
+
+class TRenameRole final: public TAstListNode {
+public:
+ TRenameRole(TPosition pos, bool isUser, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , IsUser(isUser)
+ , Service(service)
+ , Cluster(cluster)
+ , Name(name)
+ , NewName(newName)
+ , Scoped(scoped)
+ {
+ FakeSource = BuildFakeSource(pos);
+ scoped->UseCluster(service, cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ auto name = Name.Build();
+ auto newName = NewName.Build();
+ TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx);
+
+ if (!name->Init(ctx, FakeSource.Get()) ||
+ !newName->Init(ctx, FakeSource.Get()) ||
+ !cluster->Init(ctx, FakeSource.Get()))
+ {
+ return false;
+ }
+
+ auto options = Y(Q(Y(Q("mode"), Q(IsUser ? "renameUser" : "renameGroup"))));
+ options = L(options, Q(Y(Q("newName"), newName)));
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster)),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("role"), Y("String", name)))), Y("Void"), Q(options))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, FakeSource.Get());
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ const bool IsUser;
+ const TString Service;
+ TDeferredAtom Cluster;
+ TDeferredAtom Name;
+ TDeferredAtom NewName;
+ TScopedStatePtr Scoped;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildRenameUser(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped) {
+ const bool isUser = true;
+ return new TRenameRole(pos, isUser, service, cluster, name, newName, scoped);
+}
+
+TNodePtr BuildRenameGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped) {
+ const bool isUser = false;
+ return new TRenameRole(pos, isUser, service, cluster, name, newName, scoped);
+}
+
+class TAlterGroup final: public TAstListNode {
+public:
+ TAlterGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TVector<TDeferredAtom>& toChange, bool isDrop, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Service(service)
+ , Cluster(cluster)
+ , Name(name)
+ , ToChange(toChange)
+ , IsDrop(isDrop)
+ , Scoped(scoped)
+ {
+ FakeSource = BuildFakeSource(pos);
+ scoped->UseCluster(service, cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ auto name = Name.Build();
+ TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx);
+
+ if (!name->Init(ctx, FakeSource.Get()) || !cluster->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ TVector<TNodePtr> toChange;
+ for (auto& item : ToChange) {
+ toChange.push_back(item.Build());
+ if (!toChange.back()->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ }
+
+ auto options = Y(Q(Y(Q("mode"), Q(IsDrop ? "dropUsersFromGroup" : "addUsersToGroup"))));
+ options = L(options, Q(Y(Q("roles"), Q(new TAstListNodeImpl(Pos, std::move(toChange))))));
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster)),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("role"), Y("String", name)))), Y("Void"), Q(options))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, FakeSource.Get());
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ const TString Service;
+ TDeferredAtom Cluster;
+ TDeferredAtom Name;
+ TVector<TDeferredAtom> ToChange;
+ const bool IsDrop;
+ TScopedStatePtr Scoped;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildAlterGroup(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TVector<TDeferredAtom>& toChange, bool isDrop,
+ TScopedStatePtr scoped)
+{
+ return new TAlterGroup(pos, service, cluster, name, toChange, isDrop, scoped);
+}
+
+class TDropRoles final: public TAstListNode {
+public:
+ TDropRoles(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& toDrop, bool isUser, bool missingOk, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Service(service)
+ , Cluster(cluster)
+ , ToDrop(toDrop)
+ , IsUser(isUser)
+ , MissingOk(missingOk)
+ , Scoped(scoped)
+ {
+ FakeSource = BuildFakeSource(pos);
+ scoped->UseCluster(service, cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx);
+
+ if (!cluster->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ const char* mode = IsUser ?
+ (MissingOk ? "dropUserIfExists" : "dropUser") :
+ (MissingOk ? "dropGroupIfExists" : "dropGroup");
+
+ auto options = Y(Q(Y(Q("mode"), Q(mode))));
+
+ auto block = Y(Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster)));
+ for (auto& item : ToDrop) {
+ auto name = item.Build();
+ if (!name->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ block = L(block, Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("role"), Y("String", name)))), Y("Void"), Q(options))));
+ }
+ block = L(block, Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")));
+ Add("block", Q(block));
+
+ return TAstListNode::DoInit(ctx, FakeSource.Get());
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ const TString Service;
+ TDeferredAtom Cluster;
+ TVector<TDeferredAtom> ToDrop;
+ const bool IsUser;
+ const bool MissingOk;
+ TScopedStatePtr Scoped;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildUpsertObjectOperation(TPosition pos, const TString& objectId, const TString& typeId,
+ std::map<TString, TDeferredAtom>&& features, const TObjectOperatorContext& context) {
+ return new TUpsertObject(pos, objectId, typeId, false, false, std::move(features), std::set<TString>(), context);
+}
+TNodePtr BuildCreateObjectOperation(TPosition pos, const TString& objectId, const TString& typeId,
+ bool existingOk, bool replaceIfExists, std::map<TString, TDeferredAtom>&& features, const TObjectOperatorContext& context) {
+ return new TCreateObject(pos, objectId, typeId, existingOk, replaceIfExists, std::move(features), std::set<TString>(), context);
+}
+TNodePtr BuildAlterObjectOperation(TPosition pos, const TString& secretId, const TString& typeId,
+ std::map<TString, TDeferredAtom>&& features, std::set<TString>&& featuresToReset, const TObjectOperatorContext& context)
+{
+ return new TAlterObject(pos, secretId, typeId, false, false, std::move(features), std::move(featuresToReset), context);
+}
+TNodePtr BuildDropObjectOperation(TPosition pos, const TString& secretId, const TString& typeId,
+ bool missingOk, std::map<TString, TDeferredAtom>&& options, const TObjectOperatorContext& context)
+{
+ return new TDropObject(pos, secretId, typeId, missingOk, false, std::move(options), std::set<TString>(), context);
+}
+
+TNodePtr BuildDropRoles(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& toDrop, bool isUser, bool missingOk, TScopedStatePtr scoped) {
+ return new TDropRoles(pos, service, cluster, toDrop, isUser, missingOk, scoped);
+}
+
+class TPermissionsAction final : public TAstListNode {
+public:
+ struct TPermissionParameters {
+ TString PermissionAction;
+ TVector<TDeferredAtom> Permissions;
+ TVector<TDeferredAtom> SchemaPaths;
+ TVector<TDeferredAtom> RoleNames;
+ };
+
+ TPermissionsAction(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TPermissionParameters& parameters, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Service(service)
+ , Cluster(cluster)
+ , Parameters(parameters)
+ , Scoped(scoped)
+ {
+ FakeSource = BuildFakeSource(pos);
+ scoped->UseCluster(service, cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+
+ TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx);
+ TNodePtr permissionAction = TDeferredAtom(Pos, Parameters.PermissionAction).Build();
+
+ if (!permissionAction->Init(ctx, FakeSource.Get()) ||
+ !cluster->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ TVector<TNodePtr> paths;
+ paths.reserve(Parameters.SchemaPaths.size());
+ for (auto& item : Parameters.SchemaPaths) {
+ paths.push_back(item.Build());
+ if (!paths.back()->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ }
+ auto options = Y(Q(Y(Q("paths"), Q(new TAstListNodeImpl(Pos, std::move(paths))))));
+
+ TVector<TNodePtr> permissions;
+ permissions.reserve(Parameters.Permissions.size());
+ for (auto& item : Parameters.Permissions) {
+ permissions.push_back(item.Build());
+ if (!permissions.back()->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ }
+ options = L(options, Q(Y(Q("permissions"), Q(new TAstListNodeImpl(Pos, std::move(permissions))))));
+
+ TVector<TNodePtr> roles;
+ roles.reserve(Parameters.RoleNames.size());
+ for (auto& item : Parameters.RoleNames) {
+ roles.push_back(item.Build());
+ if (!roles.back()->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ }
+ options = L(options, Q(Y(Q("roles"), Q(new TAstListNodeImpl(Pos, std::move(roles))))));
+
+ auto block = Y(Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), cluster)));
+ block = L(block, Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("permission"), Y("String", permissionAction)))), Y("Void"), Q(options))));
+ block = L(block, Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")));
+ Add("block", Q(block));
+
+ return TAstListNode::DoInit(ctx, FakeSource.Get());
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+
+private:
+ const TString Service;
+ TDeferredAtom Cluster;
+ TPermissionParameters Parameters;
+ TScopedStatePtr Scoped;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildGrantPermissions(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& permissions, const TVector<TDeferredAtom>& schemaPaths, const TVector<TDeferredAtom>& roleNames, TScopedStatePtr scoped) {
+ return new TPermissionsAction(pos,
+ service,
+ cluster,
+ {.PermissionAction = "grant",
+ .Permissions = permissions,
+ .SchemaPaths = schemaPaths,
+ .RoleNames = roleNames},
+ scoped);
+}
+
+TNodePtr BuildRevokePermissions(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TVector<TDeferredAtom>& permissions, const TVector<TDeferredAtom>& schemaPaths, const TVector<TDeferredAtom>& roleNames, TScopedStatePtr scoped) {
+ return new TPermissionsAction(pos,
+ service,
+ cluster,
+ {.PermissionAction = "revoke",
+ .Permissions = permissions,
+ .SchemaPaths = schemaPaths,
+ .RoleNames = roleNames},
+ scoped);
+}
+
+class TAsyncReplication
+ : public TAstListNode
+ , protected TObjectOperatorContext
+{
+protected:
+ virtual INode::TPtr FillOptions(INode::TPtr options) const = 0;
+
+public:
+ explicit TAsyncReplication(TPosition pos, const TString& id, const TString& mode, const TObjectOperatorContext& context)
+ : TAstListNode(pos)
+ , TObjectOperatorContext(context)
+ , Id(id)
+ , Mode(mode)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Scoped->UseCluster(ServiceId, Cluster);
+
+ auto keys = Y("Key", Q(Y(Q("replication"), Y("String", BuildQuotedAtom(Pos, Id)))));
+ auto options = FillOptions(Y(Q(Y(Q("mode"), Q(Mode)))));
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, ServiceId), Scoped->WrapCluster(Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(options))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+
+private:
+ const TString Id;
+ const TString Mode;
+
+}; // TAsyncReplication
+
+class TCreateAsyncReplication final: public TAsyncReplication {
+public:
+ explicit TCreateAsyncReplication(TPosition pos, const TString& id,
+ std::vector<std::pair<TString, TString>>&& targets,
+ std::map<TString, TNodePtr>&& settings,
+ const TObjectOperatorContext& context)
+ : TAsyncReplication(pos, id, "create", context)
+ , Targets(std::move(targets))
+ , Settings(std::move(settings))
+ {
+ }
+
+protected:
+ INode::TPtr FillOptions(INode::TPtr options) const override {
+ if (!Targets.empty()) {
+ auto targets = Y();
+ for (auto&& [remote, local] : Targets) {
+ auto target = Y();
+ target = L(target, Q(Y(Q("remote"), Q(remote))));
+ target = L(target, Q(Y(Q("local"), Q(local))));
+ targets = L(targets, Q(target));
+ }
+ options = L(options, Q(Y(Q("targets"), Q(targets))));
+ }
+
+ if (!Settings.empty()) {
+ auto settings = Y();
+ for (auto&& [k, v] : Settings) {
+ if (v) {
+ settings = L(settings, Q(Y(BuildQuotedAtom(Pos, k), v)));
+ } else {
+ settings = L(settings, Q(Y(BuildQuotedAtom(Pos, k))));
+ }
+ }
+ options = L(options, Q(Y(Q("settings"), Q(settings))));
+ }
+
+ return options;
+ }
+
+private:
+ std::vector<std::pair<TString, TString>> Targets; // (remote, local)
+ std::map<TString, TNodePtr> Settings;
+
+}; // TCreateAsyncReplication
+
+TNodePtr BuildCreateAsyncReplication(TPosition pos, const TString& id,
+ std::vector<std::pair<TString, TString>>&& targets,
+ std::map<TString, TNodePtr>&& settings,
+ const TObjectOperatorContext& context)
+{
+ return new TCreateAsyncReplication(pos, id, std::move(targets), std::move(settings), context);
+}
+
+class TDropAsyncReplication final: public TAsyncReplication {
+public:
+ explicit TDropAsyncReplication(TPosition pos, const TString& id, bool cascade, const TObjectOperatorContext& context)
+ : TAsyncReplication(pos, id, cascade ? "dropCascade" : "drop", context)
+ {
+ }
+
+protected:
+ INode::TPtr FillOptions(INode::TPtr options) const override {
+ return options;
+ }
+
+}; // TDropAsyncReplication
+
+TNodePtr BuildDropAsyncReplication(TPosition pos, const TString& id, bool cascade, const TObjectOperatorContext& context) {
+ return new TDropAsyncReplication(pos, id, cascade, context);
+}
+
+class TAlterAsyncReplication final: public TAsyncReplication {
+public:
+ explicit TAlterAsyncReplication(TPosition pos, const TString& id,
+ std::map<TString, TNodePtr>&& settings,
+ const TObjectOperatorContext& context)
+ : TAsyncReplication(pos, id, "alter", context)
+ , Settings(std::move(settings))
+ {
+ }
+
+protected:
+ INode::TPtr FillOptions(INode::TPtr options) const override {
+ if (!Settings.empty()) {
+ auto settings = Y();
+ for (auto&& [k, v] : Settings) {
+ if (v) {
+ settings = L(settings, Q(Y(BuildQuotedAtom(Pos, k), v)));
+ } else {
+ settings = L(settings, Q(Y(BuildQuotedAtom(Pos, k))));
+ }
+ }
+ options = L(options, Q(Y(Q("settings"), Q(settings))));
+ }
+
+ return options;
+ }
+
+private:
+ std::map<TString, TNodePtr> Settings;
+
+}; // TAlterAsyncReplication
+
+TNodePtr BuildAlterAsyncReplication(TPosition pos, const TString& id,
+ std::map<TString, TNodePtr>&& settings,
+ const TObjectOperatorContext& context)
+{
+ return new TAlterAsyncReplication(pos, id, std::move(settings), context);
+}
+
+static const TMap<EWriteColumnMode, TString> columnModeToStrMapMR {
+ {EWriteColumnMode::Default, ""},
+ {EWriteColumnMode::Insert, "append"},
+ {EWriteColumnMode::Renew, "renew"}
+};
+
+static const TMap<EWriteColumnMode, TString> columnModeToStrMapStat {
+ {EWriteColumnMode::Upsert, "upsert"}
+};
+
+static const TMap<EWriteColumnMode, TString> columnModeToStrMapKikimr {
+ {EWriteColumnMode::Default, ""},
+ {EWriteColumnMode::Insert, "insert_abort"},
+ {EWriteColumnMode::InsertOrAbort, "insert_abort"},
+ {EWriteColumnMode::InsertOrIgnore, "insert_ignore"},
+ {EWriteColumnMode::InsertOrRevert, "insert_revert"},
+ {EWriteColumnMode::Upsert, "upsert"},
+ {EWriteColumnMode::Replace, "replace"},
+ {EWriteColumnMode::Update, "update"},
+ {EWriteColumnMode::UpdateOn, "update_on"},
+ {EWriteColumnMode::Delete, "delete"},
+ {EWriteColumnMode::DeleteOn, "delete_on"},
+};
+
+class TWriteTableNode final: public TAstListNode {
+public:
+ TWriteTableNode(TPosition pos, const TString& label, const TTableRef& table, EWriteColumnMode mode,
+ TNodePtr options, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Label(label)
+ , Table(table)
+ , Mode(mode)
+ , Options(options)
+ , Scoped(scoped)
+ {
+ scoped->UseCluster(Table.Service, Table.Cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto keys = Table.Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::WRITE);
+ if (!keys || !keys->Init(ctx, src)) {
+ return false;
+ }
+
+ auto getModesMap = [] (const TString& serviceName) -> const TMap<EWriteColumnMode, TString>& {
+ if (serviceName == KikimrProviderName || serviceName == YdbProviderName) {
+ return columnModeToStrMapKikimr;
+ } else if (serviceName == StatProviderName) {
+ return columnModeToStrMapStat;
+ } else {
+ return columnModeToStrMapMR;
+ }
+ };
+
+ auto options = Y();
+ if (Options) {
+ if (!Options->Init(ctx, src)) {
+ return false;
+ }
+
+ options = L(Options);
+ }
+
+ if (Mode != EWriteColumnMode::Default) {
+ auto modeStr = getModesMap(Table.Service).FindPtr(Mode);
+
+ options->Add(Q(Y(Q("mode"), Q(modeStr ? *modeStr : "unsupported"))));
+ }
+
+ Add("block", Q((Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Table.Service), Scoped->WrapCluster(Table.Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Label, Q(options))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ ))));
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ TString Label;
+ TTableRef Table;
+ EWriteColumnMode Mode;
+ TNodePtr Options;
+ TScopedStatePtr Scoped;
+};
+
+TNodePtr BuildWriteTable(TPosition pos, const TString& label, const TTableRef& table, EWriteColumnMode mode, TNodePtr options,
+ TScopedStatePtr scoped)
+{
+ return new TWriteTableNode(pos, label, table, mode, std::move(options), scoped);
+}
+
+class TClustersSinkOperationBase: public TAstListNode {
+protected:
+ TClustersSinkOperationBase(TPosition pos)
+ : TAstListNode(pos)
+ {}
+
+ virtual TPtr ProduceOperation() = 0;
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto block(Y());
+
+ auto op = ProduceOperation();
+ if (!op) {
+ return false;
+ }
+
+ block = L(block, op);
+ block = L(block, Y("return", "world"));
+ Add("block", Q(block));
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+};
+
+class TCommitClustersNode: public TClustersSinkOperationBase {
+public:
+ TCommitClustersNode(TPosition pos)
+ : TClustersSinkOperationBase(pos)
+ {
+ }
+
+ TPtr ProduceOperation() override {
+ return Y("let", "world", Y("CommitAll!", "world"));
+ }
+};
+
+TNodePtr BuildCommitClusters(TPosition pos) {
+ return new TCommitClustersNode(pos);
+}
+
+class TRollbackClustersNode: public TClustersSinkOperationBase {
+public:
+ TRollbackClustersNode(TPosition pos)
+ : TClustersSinkOperationBase(pos)
+ {
+ }
+
+ TPtr ProduceOperation() override {
+ return Y("let", "world", Y("CommitAll!", "world", Q(Y(Q(Y(Q("mode"), Q("rollback")))))));
+ }
+};
+
+TNodePtr BuildRollbackClusters(TPosition pos) {
+ return new TRollbackClustersNode(pos);
+}
+
+class TWriteResultNode final: public TAstListNode {
+public:
+ TWriteResultNode(TPosition pos, const TString& label, TNodePtr settings)
+ : TAstListNode(pos)
+ , Label(label)
+ , Settings(settings)
+ , CommitClusters(BuildCommitClusters(Pos))
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto block(Y(
+ Y("let", "result_sink", Y("DataSink", Q(TString(ResultProviderName)))),
+ Y("let", "world", Y(TString(WriteName), "world", "result_sink", Y("Key"), Label, Q(Settings)))
+ ));
+ if (ctx.PragmaAutoCommit) {
+ block = L(block, Y("let", "world", CommitClusters));
+ }
+
+ block = L(block, Y("return", Y(TString(CommitName), "world", "result_sink")));
+ Add("block", Q(block));
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ TString Label;
+ TNodePtr Settings;
+ TNodePtr CommitClusters;
+};
+
+TNodePtr BuildWriteResult(TPosition pos, const TString& label, TNodePtr settings) {
+ return new TWriteResultNode(pos, label, settings);
+}
+
+class TYqlProgramNode: public TAstListNode {
+public:
+ TYqlProgramNode(TPosition pos, const TVector<TNodePtr>& blocks, bool topLevel, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Blocks(blocks)
+ , TopLevel(topLevel)
+ , Scoped(scoped)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ bool hasError = false;
+ if (TopLevel) {
+ for (auto& var: ctx.Variables) {
+ if (!var.second.second->Init(ctx, src)) {
+ hasError = true;
+ continue;
+ }
+ Add(Y(
+ "declare",
+ new TAstAtomNodeImpl(var.second.first, var.first, TNodeFlags::ArbitraryContent),
+ var.second.second));
+ }
+
+ for (const auto& overrideLibrary: ctx.OverrideLibraries) {
+ auto node = Y(
+ "override_library",
+ new TAstAtomNodeImpl(
+ std::get<TPosition>(overrideLibrary.second),
+ overrideLibrary.first, TNodeFlags::ArbitraryContent
+ ));
+
+ Add(node);
+ }
+
+ for (const auto& package: ctx.Packages) {
+ const auto& [url, urlPosition] = std::get<1U>(package.second);
+
+ auto node = Y(
+ "package",
+ new TAstAtomNodeImpl(
+ std::get<TPosition>(package.second), package.first,
+ TNodeFlags::ArbitraryContent
+ ),
+ new TAstAtomNodeImpl(urlPosition, url, TNodeFlags::ArbitraryContent));
+
+ if (const auto& tokenWithPosition = std::get<2U>(package.second)) {
+ const auto& [token, tokenPosition] = *tokenWithPosition;
+
+ node = L(node, new TAstAtomNodeImpl(tokenPosition, token, TNodeFlags::ArbitraryContent));
+ }
+
+ Add(node);
+ }
+
+ for (const auto& lib : ctx.Libraries) {
+ auto node = Y("library", new TAstAtomNodeImpl(std::get<TPosition>(lib.second), lib.first, TNodeFlags::ArbitraryContent));
+ if (const auto& first = std::get<1U>(lib.second)) {
+ node = L(node, new TAstAtomNodeImpl(first->second, first->first, TNodeFlags::ArbitraryContent));
+ if (const auto& second = std::get<2U>(lib.second)) {
+ node = L(node, new TAstAtomNodeImpl(second->second, second->first, TNodeFlags::ArbitraryContent));
+ }
+ }
+
+ Add(node);
+ }
+
+ for (const auto& p : ctx.PackageVersions) {
+ Add(Y("set_package_version", BuildQuotedAtom(Pos, p.first), BuildQuotedAtom(Pos, ToString(p.second))));
+ }
+
+ Add(Y("import", "aggregate_module", BuildQuotedAtom(Pos, "/lib/yql/aggregate.yql")));
+ Add(Y("import", "window_module", BuildQuotedAtom(Pos, "/lib/yql/window.yql")));
+ for (const auto& module : ctx.Settings.ModuleMapping) {
+ TString moduleName(module.first + "_module");
+ moduleName.to_lower();
+ Add(Y("import", moduleName, BuildQuotedAtom(Pos, module.second)));
+ }
+ for (const auto& moduleAlias : ctx.ImportModuleAliases) {
+ Add(Y("import", moduleAlias.second, BuildQuotedAtom(Pos, moduleAlias.first)));
+ }
+
+ for (const auto& x : ctx.SimpleUdfs) {
+ Add(Y("let", x.second, Y("Udf", BuildQuotedAtom(Pos, x.first))));
+ }
+
+ if (!ctx.CompactNamedExprs) {
+ for (auto& nodes: Scoped->NamedNodes) {
+ if (src || ctx.Exports.contains(nodes.first)) {
+ auto& item = nodes.second.front();
+ if (!item->Node->Init(ctx, src)) {
+ hasError = true;
+ continue;
+ }
+
+ // Some constants may be used directly by YQL code and need to be translated without reference from SQL AST
+ if (item->Node->IsConstant() || ctx.Exports.contains(nodes.first)) {
+ Add(Y("let", BuildAtom(item->Node->GetPos(), nodes.first), item->Node));
+ }
+ }
+ }
+ }
+
+ if (ctx.Settings.Mode != NSQLTranslation::ESqlMode::LIBRARY) {
+ auto configSource = Y("DataSource", BuildQuotedAtom(Pos, TString(ConfigProviderName)));
+ auto resultSink = Y("DataSink", BuildQuotedAtom(Pos, TString(ResultProviderName)));
+
+ for (const auto& warningPragma : ctx.WarningPolicy.GetRules()) {
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource,
+ BuildQuotedAtom(Pos, "Warning"), BuildQuotedAtom(Pos, warningPragma.GetPattern()),
+ BuildQuotedAtom(Pos, to_lower(ToString(warningPragma.GetAction()))))));
+ }
+
+ if (ctx.ResultSizeLimit > 0) {
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", resultSink,
+ BuildQuotedAtom(Pos, "SizeLimit"), BuildQuotedAtom(Pos, ToString(ctx.ResultSizeLimit)))));
+ }
+
+ if (!ctx.PragmaPullUpFlatMapOverJoin) {
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource,
+ BuildQuotedAtom(Pos, "DisablePullUpFlatMapOverJoin"))));
+ }
+
+ if (ctx.FilterPushdownOverJoinOptionalSide) {
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource,
+ BuildQuotedAtom(Pos, "FilterPushdownOverJoinOptionalSide"))));
+ }
+
+ if (!ctx.RotateJoinTree) {
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource,
+ BuildQuotedAtom(Pos, "RotateJoinTree"), BuildQuotedAtom(Pos, "false"))));
+ }
+
+ if (ctx.DiscoveryMode) {
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource,
+ BuildQuotedAtom(Pos, "DiscoveryMode"))));
+ }
+
+ if (ctx.DqEngineEnable) {
+ TString mode = "auto";
+ if (ctx.PqReadByRtmrCluster && ctx.PqReadByRtmrCluster != "dq") {
+ mode = "disable";
+ } else if (ctx.DqEngineForce) {
+ mode = "force";
+ }
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource,
+ BuildQuotedAtom(Pos, "DqEngine"), BuildQuotedAtom(Pos, mode))));
+ }
+
+ if (ctx.CostBasedOptimizer) {
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource,
+ BuildQuotedAtom(Pos, "CostBasedOptimizer"), BuildQuotedAtom(Pos, ctx.CostBasedOptimizer))));
+ }
+
+ if (ctx.JsonQueryReturnsJsonDocument.Defined()) {
+ TString pragmaName = "DisableJsonQueryReturnsJsonDocument";
+ if (*ctx.JsonQueryReturnsJsonDocument) {
+ pragmaName = "JsonQueryReturnsJsonDocument";
+ }
+
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, BuildQuotedAtom(Pos, pragmaName))));
+ }
+
+ if (ctx.OrderedColumns) {
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource,
+ BuildQuotedAtom(Pos, "OrderedColumns"))));
+ }
+
+ if (ctx.PqReadByRtmrCluster) {
+ auto pqSourceAll = Y("DataSource", BuildQuotedAtom(Pos, TString(PqProviderName)), BuildQuotedAtom(Pos, "$all"));
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", pqSourceAll,
+ BuildQuotedAtom(Pos, "Attr"), BuildQuotedAtom(Pos, "PqReadByRtmrCluster_"), BuildQuotedAtom(Pos, ctx.PqReadByRtmrCluster))));
+
+ auto rtmrSourceAll = Y("DataSource", BuildQuotedAtom(Pos, TString(RtmrProviderName)), BuildQuotedAtom(Pos, "$all"));
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", rtmrSourceAll,
+ BuildQuotedAtom(Pos, "Attr"), BuildQuotedAtom(Pos, "PqReadByRtmrCluster_"), BuildQuotedAtom(Pos, ctx.PqReadByRtmrCluster))));
+
+ if (ctx.PqReadByRtmrCluster != "dq") {
+ // set any dynamic settings for particular RTMR cluster for CommitAll!
+ auto rtmrSource = Y("DataSource", BuildQuotedAtom(Pos, TString(RtmrProviderName)), BuildQuotedAtom(Pos, ctx.PqReadByRtmrCluster));
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", rtmrSource,
+ BuildQuotedAtom(Pos, "Attr"), BuildQuotedAtom(Pos, "Dummy_"), BuildQuotedAtom(Pos, "1"))));
+ }
+ }
+
+ if (ctx.YsonCastToString.Defined()) {
+ const TString pragmaName = *ctx.YsonCastToString ? "YsonCastToString" : "DisableYsonCastToString";
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, BuildQuotedAtom(Pos, pragmaName))));
+ }
+
+ if (ctx.UseBlocks) {
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource, BuildQuotedAtom(Pos, "UseBlocks"))));
+ }
+
+ if (ctx.BlockEngineEnable) {
+ TString mode = ctx.BlockEngineForce ? "force" : "auto";
+ Add(Y("let", "world", Y(TString(ConfigureName), "world", configSource,
+ BuildQuotedAtom(Pos, "BlockEngine"), BuildQuotedAtom(Pos, mode))));
+ }
+ }
+ }
+
+ for (auto& block: Blocks) {
+ if (block->SubqueryAlias()) {
+ continue;
+ }
+ if (!block->Init(ctx, nullptr)) {
+ hasError = true;
+ continue;
+ }
+ }
+
+ for (const auto& x : Scoped->Local.ExprClusters) {
+ auto& data = Scoped->Local.ExprClustersMap[x.Get()];
+ auto& node = data.second;
+
+ if (!node->Init(ctx, nullptr)) {
+ hasError = true;
+ continue;
+ }
+
+ Add(Y("let", data.first, node));
+ }
+
+ for (auto& block: Blocks) {
+ const auto subqueryAliasPtr = block->SubqueryAlias();
+ if (subqueryAliasPtr) {
+ if (block->UsedSubquery()) {
+ const auto& ref = block->GetLabel();
+ YQL_ENSURE(!ref.empty());
+ Add(block);
+ Add(Y("let", "world", Y("Nth", *subqueryAliasPtr, Q("0"))));
+ Add(Y("let", ref, Y("Nth", *subqueryAliasPtr, Q("1"))));
+ }
+ } else {
+ const auto& ref = block->GetLabel();
+ Add(Y("let", ref ? ref : "world", block));
+ }
+ }
+
+ if (TopLevel) {
+ if (ctx.UniversalAliases) {
+ decltype(Nodes) preparedNodes;
+ preparedNodes.swap(Nodes);
+ for (const auto& [name, node] : ctx.UniversalAliases) {
+ Add(Y("let", name, node));
+ }
+ Nodes.insert(Nodes.end(), preparedNodes.begin(), preparedNodes.end());
+ }
+
+ decltype(Nodes) imports;
+ for (const auto& [alias, path]: ctx.RequiredModules) {
+ imports.push_back(Y("import", alias, BuildQuotedAtom(Pos, path)));
+ }
+ Nodes.insert(Nodes.begin(), std::make_move_iterator(imports.begin()), std::make_move_iterator(imports.end()));
+
+ for (const auto& symbol: ctx.Exports) {
+ if (ctx.CompactNamedExprs) {
+ auto node = Scoped->LookupNode(symbol);
+ YQL_ENSURE(node);
+ if (!node->Init(ctx, src)) {
+ hasError = true;
+ continue;
+ }
+ Add(Y("let", BuildAtom(node->GetPos(), symbol), node));
+ }
+ Add(Y("export", symbol));
+ }
+ }
+
+ if (!TopLevel || ctx.Settings.Mode != NSQLTranslation::ESqlMode::LIBRARY) {
+ Add(Y("return", "world"));
+ }
+
+ return !hasError;
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ TVector<TNodePtr> Blocks;
+ const bool TopLevel;
+ TScopedStatePtr Scoped;
+};
+
+TNodePtr BuildQuery(TPosition pos, const TVector<TNodePtr>& blocks, bool topLevel, TScopedStatePtr scoped) {
+ return new TYqlProgramNode(pos, blocks, topLevel, scoped);
+}
+
+class TPragmaNode final: public INode {
+public:
+ TPragmaNode(TPosition pos, const TString& prefix, const TString& name, const TVector<TDeferredAtom>& values, bool valueDefault)
+ : INode(pos)
+ , Prefix(prefix)
+ , Name(name)
+ , Values(values)
+ , ValueDefault(valueDefault)
+ {
+ FakeSource = BuildFakeSource(pos);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ TString serviceName;
+ TString cluster;
+ if (std::find(Providers.cbegin(), Providers.cend(), Prefix) != Providers.cend()) {
+ cluster = "$all";
+ serviceName = Prefix;
+ } else {
+ serviceName = *ctx.GetClusterProvider(Prefix, cluster);
+ }
+
+ auto datasource = Y("DataSource", BuildQuotedAtom(Pos, serviceName));
+ if (Prefix != ConfigProviderName) {
+ datasource = L(datasource, BuildQuotedAtom(Pos, cluster));
+ }
+
+ Node = Y();
+ Node = L(Node, AstNode(TString(ConfigureName)));
+ Node = L(Node, AstNode(TString(TStringBuf("world"))));
+ Node = L(Node, datasource);
+
+ if (Name == TStringBuf("flags")) {
+ for (ui32 i = 0; i < Values.size(); ++i) {
+ Node = L(Node, Values[i].Build());
+ }
+ }
+ else if (Name == TStringBuf("AddFileByUrl") || Name == TStringBuf("SetFileOption") || Name == TStringBuf("AddFolderByUrl") || Name == TStringBuf("ImportUdfs") || Name == TStringBuf("SetPackageVersion")) {
+ Node = L(Node, BuildQuotedAtom(Pos, Name));
+ for (ui32 i = 0; i < Values.size(); ++i) {
+ Node = L(Node, Values[i].Build());
+ }
+ }
+ else if (Name == TStringBuf("auth")) {
+ Node = L(Node, BuildQuotedAtom(Pos, "Auth"));
+ Node = L(Node, Values.empty() ? BuildQuotedAtom(Pos, TString()) : Values.front().Build());
+ }
+ else {
+ Node = L(Node, BuildQuotedAtom(Pos, "Attr"));
+ Node = L(Node, BuildQuotedAtom(Pos, Name));
+ if (!ValueDefault) {
+ Node = L(Node, Values.empty() ? BuildQuotedAtom(Pos, TString()) : Values.front().Build());
+ }
+ }
+
+ if (!Node->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const final {
+ return Node->Translate(ctx);
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+
+private:
+ TString Prefix;
+ TString Name;
+ TVector<TDeferredAtom> Values;
+ bool ValueDefault;
+ TNodePtr Node;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildPragma(TPosition pos, const TString& prefix, const TString& name, const TVector<TDeferredAtom>& values, bool valueDefault) {
+ return new TPragmaNode(pos, prefix, name, values, valueDefault);
+}
+
+class TSqlLambda final: public TAstListNode {
+public:
+ TSqlLambda(TPosition pos, TVector<TString>&& args, TVector<TNodePtr>&& exprSeq)
+ : TAstListNode(pos)
+ , Args(args)
+ , ExprSeq(exprSeq)
+ {
+ FakeSource = BuildFakeSource(pos);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ for (auto& exprPtr: ExprSeq) {
+ if (!exprPtr->Init(ctx, FakeSource.Get())) {
+ return {};
+ }
+ }
+ YQL_ENSURE(!ExprSeq.empty());
+ auto body = Y();
+ auto end = ExprSeq.end() - 1;
+ for (auto iter = ExprSeq.begin(); iter != end; ++iter) {
+ auto exprPtr = *iter;
+ const auto& label = exprPtr->GetLabel();
+ YQL_ENSURE(label);
+ body = L(body, Y("let", label, exprPtr));
+ }
+ body = Y("block", Q(L(body, Y("return", *end))));
+ auto args = Y();
+ for (const auto& arg: Args) {
+ args = L(args, BuildAtom(GetPos(), arg));
+ }
+ Add("lambda", Q(args), body);
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return new TSqlLambda(Pos, TVector<TString>(Args), CloneContainer(ExprSeq));
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const);
+ }
+
+private:
+ TVector<TString> Args;
+ TVector<TNodePtr> ExprSeq;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildSqlLambda(TPosition pos, TVector<TString>&& args, TVector<TNodePtr>&& exprSeq) {
+ return new TSqlLambda(pos, std::move(args), std::move(exprSeq));
+}
+
+class TWorldIf final : public TAstListNode {
+public:
+ TWorldIf(TPosition pos, TNodePtr predicate, TNodePtr thenNode, TNodePtr elseNode, bool isEvaluate)
+ : TAstListNode(pos)
+ , Predicate(predicate)
+ , ThenNode(thenNode)
+ , ElseNode(elseNode)
+ , IsEvaluate(isEvaluate)
+ {
+ FakeSource = BuildFakeSource(pos);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!Predicate->Init(ctx, FakeSource.Get())) {
+ return{};
+ }
+ Add(IsEvaluate ? "EvaluateIf!" : "If!");
+ Add("world");
+ auto coalesced = Y("Coalesce", Predicate, Y("Bool", Q("false")));
+ Add(IsEvaluate ? Y("EvaluateExpr", Y("EnsureType", coalesced, Y("DataType", Q("Bool")))) : coalesced);
+
+ if (!ThenNode->Init(ctx, FakeSource.Get())) {
+ return{};
+ }
+
+ Add(ThenNode);
+ if (ElseNode) {
+ if (!ElseNode->Init(ctx, FakeSource.Get())) {
+ return{};
+ }
+
+ Add(ElseNode);
+ }
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return new TWorldIf(GetPos(), SafeClone(Predicate), SafeClone(ThenNode), SafeClone(ElseNode), IsEvaluate);
+ }
+
+private:
+ TNodePtr Predicate;
+ TNodePtr ThenNode;
+ TNodePtr ElseNode;
+ bool IsEvaluate;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildWorldIfNode(TPosition pos, TNodePtr predicate, TNodePtr thenNode, TNodePtr elseNode, bool isEvaluate) {
+ return new TWorldIf(pos, predicate, thenNode, elseNode, isEvaluate);
+}
+
+class TWorldFor final : public TAstListNode {
+public:
+ TWorldFor(TPosition pos, TNodePtr list, TNodePtr bodyNode, TNodePtr elseNode, bool isEvaluate, bool isParallel)
+ : TAstListNode(pos)
+ , List(list)
+ , BodyNode(bodyNode)
+ , ElseNode(elseNode)
+ , IsEvaluate(isEvaluate)
+ , IsParallel(isParallel)
+ {
+ FakeSource = BuildFakeSource(pos);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!List->Init(ctx, FakeSource.Get())) {
+ return{};
+ }
+ Add(TStringBuilder() << (IsEvaluate ? "Evaluate": "") << (IsParallel ? "Parallel" : "") << "For!");
+ Add("world");
+ Add(IsEvaluate ? Y("EvaluateExpr", List) : List);
+
+ if (!BodyNode->Init(ctx, FakeSource.Get())) {
+ return{};
+ }
+ Add(BodyNode);
+
+ if (ElseNode) {
+ if (!ElseNode->Init(ctx, FakeSource.Get())) {
+ return{};
+ }
+ Add(ElseNode);
+ }
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return new TWorldFor(GetPos(), SafeClone(List), SafeClone(BodyNode), SafeClone(ElseNode), IsEvaluate, IsParallel);
+ }
+
+private:
+ TNodePtr List;
+ TNodePtr BodyNode;
+ TNodePtr ElseNode;
+ bool IsEvaluate;
+ bool IsParallel;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildWorldForNode(TPosition pos, TNodePtr list, TNodePtr bodyNode, TNodePtr elseNode, bool isEvaluate, bool isParallel) {
+ return new TWorldFor(pos, list, bodyNode, elseNode, isEvaluate, isParallel);
+}
+
+class TAnalyzeNode final: public TAstListNode {
+public:
+ TAnalyzeNode(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TAnalyzeParams& params, TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Service(service)
+ , Cluster(cluster)
+ , Params(params)
+ , Scoped(scoped)
+ {
+ FakeSource = BuildFakeSource(pos);
+ scoped->UseCluster(Service, Cluster);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ auto keys = Params.Table->Keys->GetTableKeys()->BuildKeys(ctx, ITableKeys::EBuildKeysMode::DROP);
+ if (!keys || !keys->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ auto opts = Y();
+
+ auto columns = Y();
+ for (const auto& column: Params.Columns) {
+ columns->Add(Q(column));
+ }
+ opts->Add(Q(Y(Q("columns"), Q(columns))));
+
+ opts->Add(Q(Y(Q("mode"), Q("analyze"))));
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, Service), Scoped->WrapCluster(Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, FakeSource.Get());
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+private:
+ TString Service;
+ TDeferredAtom Cluster;
+ TAnalyzeParams Params;
+
+ TScopedStatePtr Scoped;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildAnalyze(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TAnalyzeParams& params, TScopedStatePtr scoped) {
+ return new TAnalyzeNode(pos, service, cluster, params, scoped);
+}
+
+class TBaseBackupCollectionNode
+ : public TAstListNode
+ , public TObjectOperatorContext
+{
+ using TBase = TAstListNode;
+public:
+ TBaseBackupCollectionNode(
+ TPosition pos,
+ const TString& objectId,
+ const TObjectOperatorContext& context)
+ : TBase(pos)
+ , TObjectOperatorContext(context)
+ , Id(objectId)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ auto keys = Y("Key");
+ keys = L(keys, Q(Y(Q("backupCollection"), Y("String", BuildQuotedAtom(Pos, Id)))));
+ auto options = this->FillOptions(ctx, Y());
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, ServiceId), Scoped->WrapCluster(Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(options))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ virtual INode::TPtr FillOptions(TContext& ctx, INode::TPtr options) const = 0;
+
+protected:
+ TString Id;
+};
+
+class TCreateBackupCollectionNode
+ : public TBaseBackupCollectionNode
+{
+ using TBase = TBaseBackupCollectionNode;
+public:
+ TCreateBackupCollectionNode(
+ TPosition pos,
+ const TString& objectId,
+ const TCreateBackupCollectionParameters& params,
+ const TObjectOperatorContext& context)
+ : TBase(pos, objectId, context)
+ , Params(params)
+ {}
+
+ virtual INode::TPtr FillOptions(TContext& ctx, INode::TPtr options) const final {
+ options->Add(Q(Y(Q("mode"), Q("create"))));
+
+ auto settings = Y();
+ for (auto& [key, value] : Params.Settings) {
+ settings->Add(Q(Y(BuildQuotedAtom(Pos, key), value.Build())));
+ }
+ options->Add(Q(Y(Q("settings"), Q(settings))));
+
+ auto entries = Y();
+ if (Params.Database) {
+ entries->Add(Q(Y(Q(Y(Q("type"), Q("database"))))));
+ }
+ for (auto& table : Params.Tables) {
+ auto path = ctx.GetPrefixedPath(ServiceId, Cluster, table);
+ entries->Add(Q(Y(Q(Y(Q("type"), Q("table"))), Q(Y(Q("path"), path)))));
+ }
+ options->Add(Q(Y(Q("entries"), Q(entries))));
+
+ return options;
+ }
+
+ TPtr DoClone() const final {
+ return new TCreateBackupCollectionNode(GetPos(), Id, Params, *this);
+ }
+
+private:
+ TCreateBackupCollectionParameters Params;
+};
+
+class TAlterBackupCollectionNode
+ : public TBaseBackupCollectionNode
+{
+ using TBase = TBaseBackupCollectionNode;
+public:
+ TAlterBackupCollectionNode(
+ TPosition pos,
+ const TString& objectId,
+ const TAlterBackupCollectionParameters& params,
+ const TObjectOperatorContext& context)
+ : TBase(pos, objectId, context)
+ , Params(params)
+ {}
+
+ virtual INode::TPtr FillOptions(TContext& ctx, INode::TPtr options) const final {
+ options->Add(Q(Y(Q("mode"), Q("alter"))));
+
+ auto settings = Y();
+ for (auto& [key, value] : Params.Settings) {
+ settings->Add(Q(Y(BuildQuotedAtom(Pos, key), value.Build())));
+ }
+ options->Add(Q(Y(Q("settings"), Q(settings))));
+
+ auto resetSettings = Y();
+ for (auto& key : Params.SettingsToReset) {
+ resetSettings->Add(BuildQuotedAtom(Pos, key));
+ }
+ options->Add(Q(Y(Q("resetSettings"), Q(resetSettings))));
+
+ auto entries = Y();
+ if (Params.Database != TAlterBackupCollectionParameters::EDatabase::Unchanged) {
+ entries->Add(Q(Y(Q(Y(Q("type"), Q("database"))), Q(Y(Q("action"), Q(Params.Database == TAlterBackupCollectionParameters::EDatabase::Add ? "add" : "drop"))))));
+ }
+ for (auto& table : Params.TablesToAdd) {
+ auto path = ctx.GetPrefixedPath(ServiceId, Cluster, table);
+ entries->Add(Q(Y(Q(Y(Q("type"), Q("table"))), Q(Y(Q("path"), path)), Q(Y(Q("action"), Q("add"))))));
+ }
+ for (auto& table : Params.TablesToDrop) {
+ auto path = ctx.GetPrefixedPath(ServiceId, Cluster, table);
+ entries->Add(Q(Y(Q(Y(Q("type"), Q("table"))), Q(Y(Q("path"), path)), Q(Y(Q("action"), Q("drop"))))));
+ }
+ options->Add(Q(Y(Q("alterEntries"), Q(entries))));
+
+ return options;
+ }
+
+ TPtr DoClone() const final {
+ return new TAlterBackupCollectionNode(GetPos(), Id, Params, *this);
+ }
+
+private:
+ TAlterBackupCollectionParameters Params;
+};
+
+class TDropBackupCollectionNode
+ : public TBaseBackupCollectionNode
+{
+ using TBase = TBaseBackupCollectionNode;
+public:
+ TDropBackupCollectionNode(
+ TPosition pos,
+ const TString& objectId,
+ const TDropBackupCollectionParameters&,
+ const TObjectOperatorContext& context)
+ : TBase(pos, objectId, context)
+ {}
+
+ virtual INode::TPtr FillOptions(TContext&, INode::TPtr options) const final {
+ options->Add(Q(Y(Q("mode"), Q("drop"))));
+
+ return options;
+ }
+
+ TPtr DoClone() const final {
+ TDropBackupCollectionParameters params;
+ return new TDropBackupCollectionNode(GetPos(), Id, params, *this);
+ }
+};
+
+TNodePtr BuildCreateBackupCollection(TPosition pos, const TString& id,
+ const TCreateBackupCollectionParameters& params,
+ const TObjectOperatorContext& context)
+{
+ return new TCreateBackupCollectionNode(pos, id, params, context);
+}
+
+TNodePtr BuildAlterBackupCollection(TPosition pos, const TString& id,
+ const TAlterBackupCollectionParameters& params,
+ const TObjectOperatorContext& context)
+{
+ return new TAlterBackupCollectionNode(pos, id, params, context);
+}
+
+TNodePtr BuildDropBackupCollection(TPosition pos, const TString& id,
+ const TDropBackupCollectionParameters& params,
+ const TObjectOperatorContext& context)
+{
+ return new TDropBackupCollectionNode(pos, id, params, context);
+}
+
+class TBackupNode final
+ : public TAstListNode
+ , public TObjectOperatorContext
+{
+ using TBase = TAstListNode;
+public:
+ TBackupNode(
+ TPosition pos,
+ const TString& id,
+ const TBackupParameters& params,
+ const TObjectOperatorContext& context)
+ : TBase(pos)
+ , TObjectOperatorContext(context)
+ , Id(id)
+ , Params(params)
+ {
+ Y_UNUSED(Params);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto keys = Y("Key");
+ keys = L(keys, Q(Y(Q("backup"), Y("String", BuildQuotedAtom(Pos, Id)))));
+
+ auto opts = Y();
+ opts->Add(Q(Y(Q("mode"), Q("backup"))));
+
+ if (Params.Incremental) {
+ opts->Add(Q(Y(Q("incremental"))));
+ }
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, ServiceId), Scoped->WrapCluster(Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return new TBackupNode(GetPos(), Id, Params, *this);
+ }
+private:
+ TString Id;
+ TBackupParameters Params;
+};
+
+TNodePtr BuildBackup(TPosition pos, const TString& id,
+ const TBackupParameters& params,
+ const TObjectOperatorContext& context)
+{
+ return new TBackupNode(pos, id, params, context);
+}
+
+class TRestoreNode final
+ : public TAstListNode
+ , public TObjectOperatorContext
+{
+ using TBase = TAstListNode;
+public:
+ TRestoreNode(
+ TPosition pos,
+ const TString& id,
+ const TRestoreParameters& params,
+ const TObjectOperatorContext& context)
+ : TBase(pos)
+ , TObjectOperatorContext(context)
+ , Id(id)
+ , Params(params)
+ {
+ Y_UNUSED(Params);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto keys = Y("Key");
+ keys = L(keys, Q(Y(Q("restore"), Y("String", BuildQuotedAtom(Pos, Id)))));
+
+ auto opts = Y();
+ opts->Add(Q(Y(Q("mode"), Q("restore"))));
+
+ if (Params.At) {
+ opts->Add(Q(Y(Q("at"), BuildQuotedAtom(Pos, Params.At))));
+ }
+
+ Add("block", Q(Y(
+ Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, ServiceId), Scoped->WrapCluster(Cluster, ctx))),
+ Y("let", "world", Y(TString(WriteName), "world", "sink", keys, Y("Void"), Q(opts))),
+ Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world"))
+ )));
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return new TRestoreNode(GetPos(), Id, Params, *this);
+ }
+private:
+ TString Id;
+ TRestoreParameters Params;
+};
+
+TNodePtr BuildRestore(TPosition pos, const TString& id,
+ const TRestoreParameters& params,
+ const TObjectOperatorContext& context)
+{
+ return new TRestoreNode(pos, id, params, context);
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/select.cpp b/yql/essentials/sql/v1/select.cpp
new file mode 100644
index 00000000000..4eee1efb266
--- /dev/null
+++ b/yql/essentials/sql/v1/select.cpp
@@ -0,0 +1,3195 @@
+#include "sql.h"
+#include "source.h"
+
+#include "context.h"
+#include "match_recognize.h"
+
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+#include <yql/essentials/utils/yql_panic.h>
+
+#include <library/cpp/charset/ci_string.h>
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+class TSubqueryNode: public INode {
+public:
+ TSubqueryNode(TSourcePtr&& source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped)
+ : INode(source->GetPos())
+ , Source(std::move(source))
+ , Alias(alias)
+ , InSubquery(inSubquery)
+ , EnsureTupleSize(ensureTupleSize)
+ , Scoped(scoped)
+ {
+ YQL_ENSURE(!Alias.empty());
+ }
+
+ ISource* GetSource() override {
+ return Source.Get();
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ YQL_ENSURE(!src, "Source not expected for subquery node");
+ Source->UseAsInner();
+ if (!Source->Init(ctx, nullptr)) {
+ return false;
+ }
+
+ TTableList tableList;
+ Source->GetInputTables(tableList);
+
+ auto tables = BuildInputTables(Pos, tableList, InSubquery, Scoped);
+ if (!tables->Init(ctx, Source.Get())) {
+ return false;
+ }
+
+ auto source = Source->Build(ctx);
+ if (!source) {
+ return false;
+ }
+ if (EnsureTupleSize != -1) {
+ source = Y("EnsureTupleSize", source, Q(ToString(EnsureTupleSize)));
+ }
+
+ Node = Y("let", Alias, Y("block", Q(L(tables, Y("return", Q(Y("world", source)))))));
+ IsUsed = true;
+ return true;
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, true);
+ }
+
+ bool UsedSubquery() const override {
+ return IsUsed;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+ }
+
+ const TString* SubqueryAlias() const override {
+ return &Alias;
+ }
+
+ TPtr DoClone() const final {
+ return new TSubqueryNode(Source->CloneSource(), Alias, InSubquery, EnsureTupleSize, Scoped);
+ }
+
+protected:
+ TSourcePtr Source;
+ TNodePtr Node;
+ const TString Alias;
+ const bool InSubquery;
+ const int EnsureTupleSize;
+ bool IsUsed = false;
+ TScopedStatePtr Scoped;
+};
+
+TNodePtr BuildSubquery(TSourcePtr source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped) {
+ return new TSubqueryNode(std::move(source), alias, inSubquery, ensureTupleSize, scoped);
+}
+
+class TSourceNode: public INode {
+public:
+ TSourceNode(TPosition pos, TSourcePtr&& source, bool checkExist)
+ : INode(pos)
+ , Source(std::move(source))
+ , CheckExist(checkExist)
+ {}
+
+ ISource* GetSource() override {
+ return Source.Get();
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (AsInner) {
+ Source->UseAsInner();
+ }
+ if (!Source->Init(ctx, src)) {
+ return false;
+ }
+ Node = Source->Build(ctx);
+ if (!Node) {
+ return false;
+ }
+ if (src) {
+ if (IsSubquery()) {
+ /// should be not used?
+ auto columnsPtr = Source->GetColumns();
+ if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) {
+ Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0"))));
+ } else {
+ ctx.Error(Pos) << "Source used in expression should contain one concrete column";
+ return false;
+ }
+ }
+ src->AddDependentSource(Source.Get());
+ }
+ return true;
+ }
+
+ bool IsSubquery() const {
+ return !AsInner && Source->IsSelect() && !CheckExist;
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, IsSubquery());
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+ }
+
+ TPtr DoClone() const final {
+ return new TSourceNode(Pos, Source->CloneSource(), CheckExist);
+ }
+protected:
+ TSourcePtr Source;
+ TNodePtr Node;
+ bool CheckExist;
+};
+
+TNodePtr BuildSourceNode(TPosition pos, TSourcePtr source, bool checkExist) {
+ return new TSourceNode(pos, std::move(source), checkExist);
+}
+
+class TFakeSource: public ISource {
+public:
+ TFakeSource(TPosition pos, bool missingFrom, bool inSubquery)
+ : ISource(pos)
+ , MissingFrom(missingFrom)
+ , InSubquery(inSubquery)
+ {}
+
+ bool IsFake() const override {
+ return true;
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
+ // TODO: fix column reference scope - with proper scopes error below should happen earlier
+ if (column.CanBeType()) {
+ return true;
+ }
+ ctx.Error(Pos) << (MissingFrom ? "Column references are not allowed without FROM" : "Source does not allow column references");
+ ctx.Error(column.GetPos()) << "Column reference "
+ << (column.GetColumnName() ? "'" + *column.GetColumnName() + "'" : "(expr)");
+ return {};
+ }
+
+ bool AddFilter(TContext& ctx, TNodePtr filter) override {
+ Y_UNUSED(filter);
+ auto pos = filter ? filter->GetPos() : Pos;
+ ctx.Error(pos) << (MissingFrom ? "Filtering is not allowed without FROM" : "Source does not allow filtering");
+ return false;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ Y_UNUSED(ctx);
+ auto ret = Y("AsList", Y("AsStruct"));
+ if (InSubquery) {
+ return Y("WithWorld", ret, "world");
+ } else {
+ return ret;
+ }
+ }
+
+ bool AddGroupKey(TContext& ctx, const TString& column) override {
+ Y_UNUSED(column);
+ ctx.Error(Pos) << "Grouping is not allowed " << (MissingFrom ? "without FROM" : "in this context");
+ return false;
+ }
+
+ bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override {
+ YQL_ENSURE(aggr);
+ ctx.Error(aggr->GetPos()) << "Aggregation is not allowed " << (MissingFrom ? "without FROM" : "in this context");
+ return false;
+ }
+
+ bool AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func) override {
+ Y_UNUSED(windowName);
+ YQL_ENSURE(func);
+ ctx.Error(func->GetPos()) << "Aggregation is not allowed " << (MissingFrom ? "without FROM" : "in this context");
+ return false;
+ }
+
+ bool AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func) override {
+ Y_UNUSED(windowName);
+ YQL_ENSURE(func);
+ ctx.Error(func->GetPos()) << "Window functions are not allowed " << (MissingFrom ? "without FROM" : "in this context");
+ return false;
+ }
+
+ TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const override {
+ Y_UNUSED(windowName);
+ ctx.Error(Pos) << "Window and aggregation functions are not allowed " << (MissingFrom ? "without FROM" : "in this context");
+ return {};
+ }
+
+ bool IsGroupByColumn(const TString& column) const override {
+ Y_UNUSED(column);
+ return false;
+ }
+
+ TNodePtr BuildFilter(TContext& ctx, const TString& label) override {
+ Y_UNUSED(ctx);
+ Y_UNUSED(label);
+ return nullptr;
+ }
+
+ std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx) override {
+ Y_UNUSED(label);
+ Y_UNUSED(ctx);
+ return { nullptr, true };
+ }
+
+ TPtr DoClone() const final {
+ return new TFakeSource(Pos, MissingFrom, InSubquery);
+ }
+private:
+ const bool MissingFrom;
+ const bool InSubquery;
+};
+
+TSourcePtr BuildFakeSource(TPosition pos, bool missingFrom, bool inSubquery) {
+ return new TFakeSource(pos, missingFrom, inSubquery);
+}
+
+class TNodeSource: public ISource {
+public:
+ TNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList)
+ : ISource(pos)
+ , Node(node)
+ , WrapToList(wrapToList)
+ {
+ YQL_ENSURE(Node);
+ FakeSource = BuildFakeSource(pos);
+ }
+
+ bool ShouldUseSourceAsColumn(const TString& source) const final {
+ return source && source != GetLabel();
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final {
+ Y_UNUSED(ctx);
+ Y_UNUSED(column);
+ return true;
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!Node->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ return ISource::DoInit(ctx, src);
+ }
+
+ TNodePtr Build(TContext& /*ctx*/) final {
+ auto nodeAst = AstNode(Node);
+ if (WrapToList) {
+ nodeAst = Y("ToList", nodeAst);
+ }
+ return nodeAst;
+ }
+
+ TPtr DoClone() const final {
+ return new TNodeSource(Pos, SafeClone(Node), WrapToList);
+ }
+
+private:
+ TNodePtr Node;
+ bool WrapToList;
+ TSourcePtr FakeSource;
+};
+
+TSourcePtr BuildNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList) {
+ return new TNodeSource(pos, node, wrapToList);
+}
+
+class IProxySource: public ISource {
+protected:
+ IProxySource(TPosition pos, ISource* src)
+ : ISource(pos)
+ , Source(src)
+ {}
+
+ void AllColumns() override {
+ Y_DEBUG_ABORT_UNLESS(Source);
+ return Source->AllColumns();
+ }
+
+ const TColumns* GetColumns() const override {
+ Y_DEBUG_ABORT_UNLESS(Source);
+ return Source->GetColumns();
+ }
+
+ void GetInputTables(TTableList& tableList) const override {
+ Source->GetInputTables(tableList);
+ ISource::GetInputTables(tableList);
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
+ Y_DEBUG_ABORT_UNLESS(Source);
+ const TString label(Source->GetLabel());
+ Source->SetLabel(Label);
+ const auto ret = Source->AddColumn(ctx, column);
+ Source->SetLabel(label);
+ return ret;
+ }
+
+ bool ShouldUseSourceAsColumn(const TString& source) const override {
+ return Source->ShouldUseSourceAsColumn(source);
+ }
+
+ bool IsStream() const override {
+ Y_DEBUG_ABORT_UNLESS(Source);
+ return Source->IsStream();
+ }
+
+ EOrderKind GetOrderKind() const override {
+ Y_DEBUG_ABORT_UNLESS(Source);
+ return Source->GetOrderKind();
+ }
+
+ TWriteSettings GetWriteSettings() const override {
+ Y_DEBUG_ABORT_UNLESS(Source);
+ return Source->GetWriteSettings();
+ }
+
+protected:
+ void SetSource(ISource* source) {
+ Source = source;
+ }
+
+ ISource* Source;
+};
+
+class IRealSource: public ISource {
+protected:
+ IRealSource(TPosition pos)
+ : ISource(pos)
+ {
+ }
+
+ void AllColumns() override {
+ Columns.SetAll();
+ }
+
+ const TColumns* GetColumns() const override {
+ return &Columns;
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
+ const auto& label = *column.GetSourceName();
+ const auto& source = GetLabel();
+ if (!label.empty() && label != source && !(source.StartsWith(label) && source[label.size()] == ':')) {
+ if (column.IsReliable()) {
+ ctx.Error(column.GetPos()) << "Unknown correlation name: " << label;
+ }
+ return {};
+ }
+ if (column.IsAsterisk()) {
+ return true;
+ }
+ const auto* name = column.GetColumnName();
+ if (name && !column.CanBeType() && !Columns.IsColumnPossible(ctx, *name) && !IsAlias(EExprSeat::GroupBy, *name) && !IsAlias(EExprSeat::DistinctAggr, *name)) {
+ if (column.IsReliable()) {
+ TStringBuilder sb;
+ sb << "Column " << *name << " is not in source column set";
+ if (const auto mistype = FindColumnMistype(*name)) {
+ sb << ". Did you mean " << mistype.GetRef() << "?";
+ }
+ ctx.Error(column.GetPos()) << sb;
+ }
+ return {};
+ }
+ return true;
+ }
+
+ TMaybe<TString> FindColumnMistype(const TString& name) const override {
+ auto result = FindMistypeIn(Columns.Real, name);
+ if (!result) {
+ auto result = FindMistypeIn(Columns.Artificial, name);
+ }
+ return result ? result : ISource::FindColumnMistype(name);
+ }
+
+protected:
+ TColumns Columns;
+};
+
+class IComposableSource : private TNonCopyable {
+public:
+ virtual ~IComposableSource() = default;
+ virtual void BuildProjectWindowDistinct(TNodePtr& blocks, TContext& ctx, bool ordered) = 0;
+};
+
+using TComposableSourcePtr = TIntrusivePtr<IComposableSource>;
+
+class TMuxSource: public ISource {
+public:
+ TMuxSource(TPosition pos, TVector<TSourcePtr>&& sources)
+ : ISource(pos)
+ , Sources(std::move(sources))
+ {
+ YQL_ENSURE(Sources.size() > 1);
+ }
+
+ void AllColumns() final {
+ for (auto& source: Sources) {
+ source->AllColumns();
+ }
+ }
+
+ const TColumns* GetColumns() const final {
+ // Columns are equal in all sources. Return from the first one
+ return Sources.front()->GetColumns();
+ }
+
+ void GetInputTables(TTableList& tableList) const final {
+ for (auto& source: Sources) {
+ source->GetInputTables(tableList);
+ }
+ ISource::GetInputTables(tableList);
+ }
+
+ bool IsStream() const final {
+ return AnyOf(Sources, [] (const TSourcePtr& s) { return s->IsStream(); });
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ for (auto& source: Sources) {
+ if (AsInner) {
+ source->UseAsInner();
+ }
+
+ if (src) {
+ src->AddDependentSource(source.Get());
+ }
+ if (!source->Init(ctx, src)) {
+ return false;
+ }
+ if (!source->InitFilters(ctx)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final {
+ for (auto& source: Sources) {
+ if (!source->AddColumn(ctx, column)) {
+ return {};
+ }
+ }
+ return true;
+ }
+
+ TNodePtr Build(TContext& ctx) final {
+ TNodePtr block;
+ auto muxArgs = Y();
+ for (size_t i = 0; i < Sources.size(); ++i) {
+ auto& source = Sources[i];
+ auto input = source->Build(ctx);
+ auto ref = ctx.MakeName("src");
+ muxArgs->Add(ref);
+ if (block) {
+ block = L(block, Y("let", ref, input));
+ } else {
+ block = Y(Y("let", ref, input));
+ }
+ auto filter = source->BuildFilter(ctx, ref);
+ if (filter) {
+ block = L(block, Y("let", ref, filter));
+ }
+ if (ctx.EnableSystemColumns) {
+ block = L(block, Y("let", ref, Y("RemoveSystemMembers", ref)));
+ }
+ }
+ return GroundWithExpr(block, Y("Mux", Q(muxArgs)));
+ }
+
+ bool AddFilter(TContext& ctx, TNodePtr filter) final {
+ Y_UNUSED(filter);
+ ctx.Error() << "Filter is not allowed for multiple sources";
+ return false;
+ }
+
+ TPtr DoClone() const final {
+ return new TMuxSource(Pos, CloneContainer(Sources));
+ }
+
+protected:
+ TVector<TSourcePtr> Sources;
+};
+
+TSourcePtr BuildMuxSource(TPosition pos, TVector<TSourcePtr>&& sources) {
+ return new TMuxSource(pos, std::move(sources));
+}
+
+class TSubqueryRefNode: public IRealSource {
+public:
+ TSubqueryRefNode(const TNodePtr& subquery, const TString& alias, int tupleIndex)
+ : IRealSource(subquery->GetPos())
+ , Subquery(subquery)
+ , Alias(alias)
+ , TupleIndex(tupleIndex)
+ {
+ YQL_ENSURE(subquery->GetSource());
+ }
+
+ ISource* GetSource() override {
+ return this;
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ // independent subquery should not connect source
+ Subquery->UseAsInner();
+ if (!Subquery->Init(ctx, nullptr)) {
+ return false;
+ }
+ Columns = *Subquery->GetSource()->GetColumns();
+ Node = BuildAtom(Pos, Alias, TNodeFlags::Default);
+ if (TupleIndex != -1) {
+ Node = Y("Nth", Node, Q(ToString(TupleIndex)));
+ }
+ if (!Node->Init(ctx, src)) {
+ return false;
+ }
+ if (src && Subquery->GetSource()->IsSelect()) {
+ auto columnsPtr = &Columns;
+ if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) {
+ Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0"))));
+ } else {
+ ctx.Error(Pos) << "Source used in expression should contain one concrete column";
+ return false;
+ }
+ }
+ TNodePtr sample;
+ if (!BuildSamplingLambda(sample)) {
+ return false;
+ } else if (sample) {
+ Node = Y("block", Q(Y(Y("let", Node, Y("OrderedFlatMap", Node, sample)), Y("return", Node))));
+ }
+ return true;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ Y_UNUSED(ctx);
+ return Node;
+ }
+
+ bool SetSamplingOptions(
+ TContext& ctx,
+ TPosition pos,
+ ESampleClause sampleClause,
+ ESampleMode mode,
+ TNodePtr samplingRate,
+ TNodePtr samplingSeed) override {
+ if (mode == ESampleMode::System) {
+ ctx.Error(pos) << "only Bernoulli sampling mode is supported for subqueries";
+ return false;
+ }
+ if (samplingSeed) {
+ ctx.Error(pos) << "'Repeatable' keyword is not supported for subqueries";
+ return false;
+ }
+ return SetSamplingRate(ctx, sampleClause, samplingRate);
+ }
+
+ bool IsStream() const override {
+ return Subquery->GetSource()->IsStream();
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, true);
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+ }
+
+ TPtr DoClone() const final {
+ return new TSubqueryRefNode(Subquery, Alias, TupleIndex);
+ }
+
+protected:
+ TNodePtr Subquery;
+ const TString Alias;
+ const int TupleIndex;
+ TNodePtr Node;
+};
+
+TNodePtr BuildSubqueryRef(TNodePtr subquery, const TString& alias, int tupleIndex) {
+ return new TSubqueryRefNode(std::move(subquery), alias, tupleIndex);
+}
+
+class TInvalidSubqueryRefNode: public ISource {
+public:
+ TInvalidSubqueryRefNode(TPosition pos)
+ : ISource(pos)
+ , Pos(pos)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ ctx.Error(Pos) << "Named subquery can not be used as a top level statement in libraries";
+ return false;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ Y_UNUSED(ctx);
+ return {};
+ }
+
+ TPtr DoClone() const final {
+ return new TInvalidSubqueryRefNode(Pos);
+ }
+
+protected:
+ const TPosition Pos;
+};
+
+TNodePtr BuildInvalidSubqueryRef(TPosition subqueryPos) {
+ return new TInvalidSubqueryRefNode(subqueryPos);
+}
+
+class TTableSource: public IRealSource {
+public:
+ TTableSource(TPosition pos, const TTableRef& table, const TString& label)
+ : IRealSource(pos)
+ , Table(table)
+ , FakeSource(BuildFakeSource(pos))
+ {
+ SetLabel(label.empty() ? Table.ShortName() : label);
+ }
+
+ void GetInputTables(TTableList& tableList) const override {
+ tableList.push_back(Table);
+ ISource::GetInputTables(tableList);
+ }
+
+ bool ShouldUseSourceAsColumn(const TString& source) const override {
+ const auto& label = GetLabel();
+ return source && source != label && !(label.StartsWith(source) && label[source.size()] == ':');
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
+ Columns.Add(column.GetColumnName(), column.GetCountHint(), column.IsArtificial(), column.IsReliable());
+ if (!IRealSource::AddColumn(ctx, column)) {
+ return {};
+ }
+ return false;
+ }
+
+ bool SetSamplingOptions(
+ TContext& ctx,
+ TPosition pos,
+ ESampleClause sampleClause,
+ ESampleMode mode,
+ TNodePtr samplingRate,
+ TNodePtr samplingSeed) override
+ {
+ Y_UNUSED(pos);
+ TString modeName;
+ if (!samplingSeed) {
+ samplingSeed = Y("Int32", Q("0"));
+ }
+ if (ESampleClause::Sample == sampleClause) {
+ YQL_ENSURE(ESampleMode::Bernoulli == mode, "Internal logic error");
+ }
+ switch (mode) {
+ case ESampleMode::Bernoulli:
+ modeName = "bernoulli";
+ break;
+ case ESampleMode::System:
+ modeName = "system";
+ break;
+ }
+
+ if (!samplingRate->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ samplingRate = PrepareSamplingRate(pos, sampleClause, samplingRate);
+
+ auto sampleSettings = Q(Y(Q(modeName), Y("EvaluateAtom", Y("ToString", samplingRate)), Y("EvaluateAtom", Y("ToString", samplingSeed))));
+ auto sampleOption = Q(Y(Q("sample"), sampleSettings));
+ if (Table.Options) {
+ if (!Table.Options->Init(ctx, this)) {
+ return false;
+ }
+ Table.Options = L(Table.Options, sampleOption);
+ } else {
+ Table.Options = Y(sampleOption);
+ }
+ return true;
+ }
+
+ bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override {
+ Y_UNUSED(ctx);
+ TTableHints merged = contextHints;
+ MergeHints(merged, hints);
+ Table.Options = BuildInputOptions(pos, merged);
+ return true;
+ }
+
+ bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override {
+ return Table.Keys->SetViewName(ctx, pos, view);
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ if (!Table.Keys->Init(ctx, nullptr)) {
+ return nullptr;
+ }
+ return AstNode(Table.RefName);
+ }
+
+ bool IsStream() const override {
+ return IsStreamingService(Table.Service);
+ }
+
+ TPtr DoClone() const final {
+ return new TTableSource(Pos, Table, GetLabel());
+ }
+
+ bool IsTableSource() const override {
+ return true;
+ }
+protected:
+ TTableRef Table;
+private:
+ const TSourcePtr FakeSource;
+};
+
+TSourcePtr BuildTableSource(TPosition pos, const TTableRef& table, const TString& label) {
+ return new TTableSource(pos, table, label);
+}
+
+class TInnerSource: public IProxySource {
+public:
+ TInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label)
+ : IProxySource(pos, nullptr)
+ , Node(node)
+ , Service(service)
+ , Cluster(cluster)
+ {
+ SetLabel(label);
+ }
+
+ bool SetSamplingOptions(TContext& ctx, TPosition pos, ESampleClause sampleClause, ESampleMode mode, TNodePtr samplingRate, TNodePtr samplingSeed) override {
+ Y_UNUSED(ctx);
+ SamplingPos = pos;
+ SamplingClause = sampleClause;
+ SamplingMode = mode;
+ SamplingRate = samplingRate;
+ SamplingSeed = samplingSeed;
+ return true;
+ }
+
+ bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override {
+ Y_UNUSED(ctx);
+ HintsPos = pos;
+ Hints = hints;
+ ContextHints = contextHints;
+ return true;
+ }
+
+ bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override {
+ Y_UNUSED(ctx);
+ ViewPos = pos;
+ View = view;
+ return true;
+ }
+
+ bool ShouldUseSourceAsColumn(const TString& source) const override {
+ return source && source != GetLabel();
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
+ if (const TString* columnName = column.GetColumnName()) {
+ if (columnName && IsExprAlias(*columnName)) {
+ return true;
+ }
+ }
+ return IProxySource::AddColumn(ctx, column);
+ }
+
+ bool DoInit(TContext& ctx, ISource* initSrc) override {
+ Y_UNUSED(initSrc);
+ auto source = Node->GetSource();
+ if (!source) {
+ NewSource = TryMakeSourceFromExpression(Pos, ctx, Service, Cluster, Node);
+ source = NewSource.Get();
+ }
+
+ if (!source) {
+ ctx.Error(Pos) << "Invalid inner source node";
+ return false;
+ }
+
+ if (SamplingPos) {
+ if (!source->SetSamplingOptions(ctx, *SamplingPos, SamplingClause, SamplingMode, SamplingRate, SamplingSeed)) {
+ return false;
+ }
+ }
+
+ if (ViewPos) {
+ if (!source->SetViewName(ctx, *ViewPos, View)) {
+ return false;
+ }
+ }
+
+ if (HintsPos) {
+ if (!source->SetTableHints(ctx, *HintsPos, Hints, ContextHints)) {
+ return false;
+ }
+ }
+
+ source->SetLabel(Label);
+ if (!NewSource) {
+ Node->UseAsInner();
+ if (!Node->Init(ctx, nullptr)) {
+ return false;
+ }
+ }
+
+ SetSource(source);
+ if (NewSource && !NewSource->Init(ctx, nullptr)) {
+ return false;
+ }
+
+ return ISource::DoInit(ctx, source);
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ Y_UNUSED(ctx);
+ return NewSource ? NewSource->Build(ctx) : Node;
+ }
+
+ bool IsStream() const override {
+ auto source = Node->GetSource();
+ if (source) {
+ return source->IsStream();
+ }
+ // NewSource will be built later in DoInit->TryMakeSourceFromExpression
+ // where Service will be used in all situations
+ // let's detect IsStream by Service value
+ return IsStreamingService(Service);
+ }
+
+ TPtr DoClone() const final {
+ return new TInnerSource(Pos, SafeClone(Node), Service, Cluster, GetLabel());
+ }
+protected:
+ TNodePtr Node;
+ TString Service;
+ TDeferredAtom Cluster;
+ TSourcePtr NewSource;
+
+private:
+ TMaybe<TPosition> SamplingPos;
+ ESampleClause SamplingClause;
+ ESampleMode SamplingMode;
+ TNodePtr SamplingRate;
+ TNodePtr SamplingSeed;
+
+ TMaybe<TPosition> ViewPos;
+ TString View;
+
+ TMaybe<TPosition> HintsPos;
+ TTableHints Hints;
+ TTableHints ContextHints;
+};
+
+TSourcePtr BuildInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label) {
+ return new TInnerSource(pos, node, service, cluster, label);
+}
+
+static bool IsComparableExpression(TContext& ctx, const TNodePtr& expr, bool assume, const char* sqlConstruction) {
+ if (assume && !expr->GetColumnName()) {
+ ctx.Error(expr->GetPos()) << "Only column names can be used in " << sqlConstruction;
+ return false;
+ }
+
+ if (expr->IsConstant()) {
+ ctx.Error(expr->GetPos()) << "Unable to " << sqlConstruction << " constant expression";
+ return false;
+ }
+ if (expr->IsAggregated() && !expr->HasState(ENodeState::AggregationKey)) {
+ ctx.Error(expr->GetPos()) << "Unable to " << sqlConstruction << " aggregated values";
+ return false;
+ }
+ if (expr->GetColumnName()) {
+ return true;
+ }
+ if (expr->GetOpName().empty()) {
+ ctx.Error(expr->GetPos()) << "You should use in " << sqlConstruction << " column name, qualified field, callable function or expression";
+ return false;
+ }
+ return true;
+}
+
+/// \todo move to reduce.cpp? or mapreduce.cpp?
+class TReduceSource: public IRealSource {
+public:
+ TReduceSource(TPosition pos,
+ ReduceMode mode,
+ TSourcePtr source,
+ TVector<TSortSpecificationPtr>&& orderBy,
+ TVector<TNodePtr>&& keys,
+ TVector<TNodePtr>&& args,
+ TNodePtr udf,
+ TNodePtr having,
+ const TWriteSettings& settings,
+ const TVector<TSortSpecificationPtr>& assumeOrderBy,
+ bool listCall)
+ : IRealSource(pos)
+ , Mode(mode)
+ , Source(std::move(source))
+ , OrderBy(std::move(orderBy))
+ , Keys(std::move(keys))
+ , Args(std::move(args))
+ , Udf(udf)
+ , Having(having)
+ , Settings(settings)
+ , AssumeOrderBy(assumeOrderBy)
+ , ListCall(listCall)
+ {
+ YQL_ENSURE(!Keys.empty());
+ YQL_ENSURE(Source);
+ }
+
+ void GetInputTables(TTableList& tableList) const override {
+ Source->GetInputTables(tableList);
+ ISource::GetInputTables(tableList);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (AsInner) {
+ Source->UseAsInner();
+ }
+
+ YQL_ENSURE(!src);
+ if (!Source->Init(ctx, src)) {
+ return false;
+ }
+ if (!Source->InitFilters(ctx)) {
+ return false;
+ }
+ src = Source.Get();
+ for (auto& key: Keys) {
+ if (!key->Init(ctx, src)) {
+ return false;
+ }
+ auto keyNamePtr = key->GetColumnName();
+ YQL_ENSURE(keyNamePtr);
+ if (!src->AddGroupKey(ctx, *keyNamePtr)) {
+ return false;
+ }
+ }
+ if (Having && !Having->Init(ctx, nullptr)) {
+ return false;
+ }
+
+ /// SIN: verify reduce one argument
+ if (Args.size() != 1) {
+ ctx.Error(Pos) << "REDUCE requires exactly one UDF argument";
+ return false;
+ }
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+
+ for (auto orderSpec: OrderBy) {
+ if (!orderSpec->OrderExpr->Init(ctx, src)) {
+ return false;
+ }
+ }
+
+ if (!Udf->Init(ctx, src)) {
+ return false;
+ }
+
+ if (Udf->GetLabel().empty()) {
+ Columns.SetAll();
+ } else {
+ Columns.Add(&Udf->GetLabel(), false);
+ }
+
+ const auto label = GetLabel();
+ for (const auto& sortSpec: AssumeOrderBy) {
+ auto& expr = sortSpec->OrderExpr;
+ SetLabel(Source->GetLabel());
+ if (!expr->Init(ctx, this)) {
+ return false;
+ }
+ if (!IsComparableExpression(ctx, expr, true, "ASSUME ORDER BY")) {
+ return false;
+ }
+ }
+ SetLabel(label);
+
+ return true;
+ }
+
+ TNodePtr Build(TContext& ctx) final {
+ auto input = Source->Build(ctx);
+ if (!input) {
+ return nullptr;
+ }
+
+ auto keysTuple = Y();
+ if (Keys.size() == 1) {
+ keysTuple = Y("Member", "row", BuildQuotedAtom(Pos, *Keys.back()->GetColumnName()));
+ }
+ else {
+ for (const auto& key: Keys) {
+ keysTuple = L(keysTuple, Y("Member", "row", BuildQuotedAtom(Pos, *key->GetColumnName())));
+ }
+ keysTuple = Q(keysTuple);
+ }
+ auto extractKey = Y("SqlExtractKey", "row", BuildLambda(Pos, Y("row"), keysTuple));
+ auto extractKeyLambda = BuildLambda(Pos, Y("row"), extractKey);
+
+ TNodePtr processPartitions;
+ if (ListCall) {
+ if (Mode != ReduceMode::ByAll) {
+ ctx.Error(Pos) << "TableRows() must be used only with USING ALL";
+ return nullptr;
+ }
+
+ TNodePtr expr = BuildAtom(Pos, "partitionStream");
+ processPartitions = Y("SqlReduce", "partitionStream", BuildQuotedAtom(Pos, "byAllList", TNodeFlags::Default), Udf, expr);
+ } else {
+ switch (Mode) {
+ case ReduceMode::ByAll: {
+ auto columnPtr = Args[0]->GetColumnName();
+ TNodePtr expr = BuildAtom(Pos, "partitionStream");
+ if (!columnPtr || *columnPtr != "*") {
+ expr = Y("Map", "partitionStream", BuildLambda(Pos, Y("keyPair"), Q(L(Y(),\
+ Y("Nth", "keyPair", Q(ToString("0"))),\
+ Y("Map", Y("Nth", "keyPair", Q(ToString("1"))), BuildLambda(Pos, Y("row"), Args[0]))))));
+ }
+ processPartitions = Y("SqlReduce", "partitionStream", BuildQuotedAtom(Pos, "byAll", TNodeFlags::Default), Udf, expr);
+ break;
+ }
+ case ReduceMode::ByPartition: {
+ processPartitions = Y("SqlReduce", "partitionStream", extractKeyLambda, Udf,
+ BuildLambda(Pos, Y("row"), Args[0]));
+ break;
+ }
+ default:
+ YQL_ENSURE(false, "Unexpected REDUCE mode");
+ }
+ }
+
+ TNodePtr sortDirection;
+ TNodePtr sortKeySelector;
+ FillSortParts(OrderBy, sortDirection, sortKeySelector);
+ if (!OrderBy.empty()) {
+ sortKeySelector = BuildLambda(Pos, Y("row"), Y("SqlExtractKey", "row", sortKeySelector));
+ }
+
+ auto partitionByKey = Y(!ListCall && Mode == ReduceMode::ByAll ? "PartitionByKey" : "PartitionsByKeys", "core", extractKeyLambda,
+ sortDirection, sortKeySelector, BuildLambda(Pos, Y("partitionStream"), processPartitions));
+
+ auto inputLabel = ListCall ? "inputRowsList" : "core";
+ auto block(Y(Y("let", inputLabel, input)));
+ auto filter = Source->BuildFilter(ctx, inputLabel);
+ if (filter) {
+ block = L(block, Y("let", inputLabel, filter));
+ }
+ if (ListCall) {
+ block = L(block, Y("let", "core", "inputRowsList"));
+ }
+
+ if (ctx.EnableSystemColumns) {
+ block = L(block, Y("let", "core", Y("RemoveSystemMembers", "core")));
+ }
+ block = L(block, Y("let", "core", Y("AutoDemux", partitionByKey)));
+ if (Having) {
+ block = L(block, Y("let", "core",
+ Y("Filter", "core", BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false")))))
+ ));
+ }
+ return Y("block", Q(L(block, Y("return", "core"))));
+ }
+
+ TNodePtr BuildSort(TContext& ctx, const TString& label) override {
+ Y_UNUSED(ctx);
+ if (AssumeOrderBy.empty()) {
+ return nullptr;
+ }
+
+ return Y("let", label, BuildSortSpec(AssumeOrderBy, label, false, true));
+ }
+
+ EOrderKind GetOrderKind() const override {
+ return AssumeOrderBy.empty() ? EOrderKind::None : EOrderKind::Assume;
+ }
+
+ TWriteSettings GetWriteSettings() const final {
+ return Settings;
+ }
+
+ bool HasSelectResult() const final {
+ return !Settings.Discard;
+ }
+
+ TPtr DoClone() const final {
+ return new TReduceSource(Pos, Mode, Source->CloneSource(), CloneContainer(OrderBy),
+ CloneContainer(Keys), CloneContainer(Args), SafeClone(Udf), SafeClone(Having), Settings,
+ CloneContainer(AssumeOrderBy), ListCall);
+ }
+private:
+ ReduceMode Mode;
+ TSourcePtr Source;
+ TVector<TSortSpecificationPtr> OrderBy;
+ TVector<TNodePtr> Keys;
+ TVector<TNodePtr> Args;
+ TNodePtr Udf;
+ TNodePtr Having;
+ const TWriteSettings Settings;
+ TVector<TSortSpecificationPtr> AssumeOrderBy;
+ const bool ListCall;
+};
+
+TSourcePtr BuildReduce(TPosition pos,
+ ReduceMode mode,
+ TSourcePtr source,
+ TVector<TSortSpecificationPtr>&& orderBy,
+ TVector<TNodePtr>&& keys,
+ TVector<TNodePtr>&& args,
+ TNodePtr udf,
+ TNodePtr having,
+ const TWriteSettings& settings,
+ const TVector<TSortSpecificationPtr>& assumeOrderBy,
+ bool listCall) {
+ return new TReduceSource(pos, mode, std::move(source), std::move(orderBy), std::move(keys),
+ std::move(args), udf, having, settings, assumeOrderBy, listCall);
+}
+
+namespace {
+
+bool InitAndGetGroupKey(TContext& ctx, const TNodePtr& expr, ISource* src, TStringBuf where, TString& keyColumn) {
+ keyColumn.clear();
+
+ YQL_ENSURE(src);
+ const bool isJoin = src->GetJoin();
+
+ if (!expr->Init(ctx, src)) {
+ return false;
+ }
+
+ auto keyNamePtr = expr->GetColumnName();
+ if (keyNamePtr && expr->GetLabel().empty()) {
+ keyColumn = *keyNamePtr;
+ auto sourceNamePtr = expr->GetSourceName();
+ auto columnNode = expr->GetColumnNode();
+ if (isJoin && (!columnNode || !columnNode->IsArtificial())) {
+ if (!sourceNamePtr || sourceNamePtr->empty()) {
+ if (!src->IsAlias(EExprSeat::GroupBy, keyColumn)) {
+ ctx.Error(expr->GetPos()) << "Columns in " << where << " should have correlation name, error in key: " << keyColumn;
+ return false;
+ }
+ } else {
+ keyColumn = DotJoin(*sourceNamePtr, keyColumn);
+ }
+ }
+ }
+
+ return true;
+}
+
+}
+
+class TCompositeSelect: public IRealSource {
+public:
+ TCompositeSelect(TPosition pos, TSourcePtr source, TSourcePtr originalSource, const TWriteSettings& settings)
+ : IRealSource(pos)
+ , Source(std::move(source))
+ , OriginalSource(std::move(originalSource))
+ , Settings(settings)
+ {
+ YQL_ENSURE(Source);
+ }
+
+ void SetSubselects(TVector<TSourcePtr>&& subselects, TVector<TNodePtr>&& grouping, TVector<TNodePtr>&& groupByExpr) {
+ Subselects = std::move(subselects);
+ Grouping = std::move(grouping);
+ GroupByExpr = std::move(groupByExpr);
+ Y_DEBUG_ABORT_UNLESS(Subselects.size() > 1);
+ }
+
+ void GetInputTables(TTableList& tableList) const override {
+ for (const auto& select: Subselects) {
+ select->GetInputTables(tableList);
+ }
+ ISource::GetInputTables(tableList);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (AsInner) {
+ Source->UseAsInner();
+ }
+
+ if (src) {
+ src->AddDependentSource(Source.Get());
+ }
+ if (!Source->Init(ctx, src)) {
+ return false;
+ }
+ if (!Source->InitFilters(ctx)) {
+ return false;
+ }
+
+ if (!CalculateGroupingCols(ctx, src)) {
+ return false;
+ }
+
+ auto origSrc = OriginalSource.Get();
+ if (!origSrc->Init(ctx, src)) {
+ return false;
+ }
+
+ if (origSrc->IsFlattenByColumns() || origSrc->IsFlattenColumns()) {
+ Flatten = origSrc->IsFlattenByColumns() ?
+ origSrc->BuildFlattenByColumns("row") :
+ origSrc->BuildFlattenColumns("row");
+ if (!Flatten || !Flatten->Init(ctx, src)) {
+ return false;
+ }
+ }
+
+ if (origSrc->IsFlattenByExprs()) {
+ for (auto& expr : static_cast<ISource const*>(origSrc)->Expressions(EExprSeat::FlattenByExpr)) {
+ if (!expr->Init(ctx, origSrc)) {
+ return false;
+ }
+ }
+ PreFlattenMap = origSrc->BuildPreFlattenMap(ctx);
+ if (!PreFlattenMap) {
+ return false;
+ }
+ }
+
+ for (const auto& select: Subselects) {
+ select->SetLabel(Label);
+ if (AsInner) {
+ select->UseAsInner();
+ }
+
+ if (!select->Init(ctx, Source.Get())) {
+ return false;
+ }
+ }
+
+ TMaybe<size_t> groupingColumnsCount;
+ size_t idx = 0;
+ for (const auto& select : Subselects) {
+ size_t count = select->GetGroupingColumnsCount();
+ if (!groupingColumnsCount.Defined()) {
+ groupingColumnsCount = count;
+ } else if (*groupingColumnsCount != count) {
+ ctx.Error(select->GetPos()) << TStringBuilder() << "Mismatch GROUPING() column count in composite select input #"
+ << idx << ": expected " << *groupingColumnsCount << ", got: " << count << ". Please submit bug report";
+ return false;
+ }
+ ++idx;
+ }
+ return true;
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
+ for (const auto& select: Subselects) {
+ if (!select->AddColumn(ctx, column)) {
+ return {};
+ }
+ }
+ return true;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ auto input = Source->Build(ctx);
+ auto block(Y(Y("let", "composite", input)));
+
+ bool ordered = ctx.UseUnordered(*this);
+ if (PreFlattenMap) {
+ block = L(block, Y("let", "composite", Y(ordered ? "OrderedFlatMap" : "FlatMap", "composite", BuildLambda(Pos, Y("row"), PreFlattenMap))));
+ }
+ if (Flatten) {
+ block = L(block, Y("let", "composite", Y(ordered ? "OrderedFlatMap" : "FlatMap", "composite", BuildLambda(Pos, Y("row"), Flatten, "res"))));
+ }
+ auto filter = Source->BuildFilter(ctx, "composite");
+ if (filter) {
+ block = L(block, Y("let", "composite", filter));
+ }
+
+ TNodePtr compositeNode = Y("UnionAll");
+ for (const auto& select: Subselects) {
+ YQL_ENSURE(dynamic_cast<IComposableSource*>(select.Get()));
+ auto addNode = select->Build(ctx);
+ if (!addNode) {
+ return nullptr;
+ }
+ compositeNode->Add(addNode);
+ }
+
+ block = L(block, Y("let", "core", compositeNode));
+ YQL_ENSURE(!Subselects.empty());
+ dynamic_cast<IComposableSource*>(Subselects.front().Get())->BuildProjectWindowDistinct(block, ctx, false);
+ return Y("block", Q(L(block, Y("return", "core"))));
+ }
+
+ bool IsGroupByColumn(const TString& column) const override {
+ YQL_ENSURE(!GroupingCols.empty());
+ return GroupingCols.contains(column);
+ }
+
+ const TSet<TString>& GetGroupingCols() const {
+ return GroupingCols;
+ }
+
+ TNodePtr BuildSort(TContext& ctx, const TString& label) override {
+ return Subselects.front()->BuildSort(ctx, label);
+ }
+
+ EOrderKind GetOrderKind() const override {
+ return Subselects.front()->GetOrderKind();
+ }
+
+ const TColumns* GetColumns() const override{
+ return Subselects.front()->GetColumns();
+ }
+
+ ISource* RealSource() const {
+ return Source.Get();
+ }
+
+ TWriteSettings GetWriteSettings() const override {
+ return Settings;
+ }
+
+ bool HasSelectResult() const override {
+ return !Settings.Discard;
+ }
+
+ TNodePtr DoClone() const final {
+ auto newSource = MakeIntrusive<TCompositeSelect>(Pos, Source->CloneSource(), OriginalSource->CloneSource(), Settings);
+ newSource->SetSubselects(CloneContainer(Subselects), CloneContainer(Grouping), CloneContainer(GroupByExpr));
+ return newSource;
+ }
+private:
+ bool CalculateGroupingCols(TContext& ctx, ISource* initSrc) {
+ auto origSrc = OriginalSource->CloneSource();
+ if (!origSrc->Init(ctx, initSrc)) {
+ return false;
+ }
+
+ bool hasError = false;
+ for (auto& expr: GroupByExpr) {
+ if (!expr->Init(ctx, origSrc.Get()) || !IsComparableExpression(ctx, expr, false, "GROUP BY")) {
+ hasError = true;
+ }
+ }
+ if (!origSrc->AddExpressions(ctx, GroupByExpr, EExprSeat::GroupBy)) {
+ hasError = true;
+ }
+
+ YQL_ENSURE(!Grouping.empty());
+ for (auto& grouping : Grouping) {
+ TString keyColumn;
+ if (!InitAndGetGroupKey(ctx, grouping, origSrc.Get(), "grouping sets", keyColumn)) {
+ hasError = true;
+ } else if (!keyColumn.empty()) {
+ GroupingCols.insert(keyColumn);
+ }
+ }
+
+ return !hasError;
+ }
+
+ TSourcePtr Source;
+ TSourcePtr OriginalSource;
+ TNodePtr Flatten;
+ TNodePtr PreFlattenMap;
+ const TWriteSettings Settings;
+ TVector<TSourcePtr> Subselects;
+ TVector<TNodePtr> Grouping;
+ TVector<TNodePtr> GroupByExpr;
+ TSet<TString> GroupingCols;
+};
+
+namespace {
+ TString FullColumnName(const TColumnNode& column) {
+ YQL_ENSURE(column.GetColumnName());
+ TString columnName = *column.GetColumnName();
+ if (column.IsUseSource()) {
+ columnName = DotJoin(*column.GetSourceName(), columnName);
+ }
+ return columnName;
+ }
+}
+
+/// \todo simplify class
+class TSelectCore: public IRealSource, public IComposableSource {
+public:
+ TSelectCore(
+ TPosition pos,
+ TSourcePtr source,
+ const TVector<TNodePtr>& groupByExpr,
+ const TVector<TNodePtr>& groupBy,
+ bool compactGroupBy,
+ const TString& groupBySuffix,
+ bool assumeSorted,
+ const TVector<TSortSpecificationPtr>& orderBy,
+ TNodePtr having,
+ const TWinSpecs& winSpecs,
+ TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
+ const TVector<TNodePtr>& terms,
+ bool distinct,
+ const TVector<TNodePtr>& without,
+ bool selectStream,
+ const TWriteSettings& settings,
+ TColumnsSets&& uniqueSets,
+ TColumnsSets&& distinctSets
+ )
+ : IRealSource(pos)
+ , Source(std::move(source))
+ , GroupByExpr(groupByExpr)
+ , GroupBy(groupBy)
+ , AssumeSorted(assumeSorted)
+ , CompactGroupBy(compactGroupBy)
+ , GroupBySuffix(groupBySuffix)
+ , OrderBy(orderBy)
+ , Having(having)
+ , WinSpecs(winSpecs)
+ , Terms(terms)
+ , Without(without)
+ , Distinct(distinct)
+ , LegacyHoppingWindowSpec(legacyHoppingWindowSpec)
+ , SelectStream(selectStream)
+ , Settings(settings)
+ , UniqueSets(std::move(uniqueSets))
+ , DistinctSets(std::move(distinctSets))
+ {
+ }
+
+ void AllColumns() override {
+ if (!OrderByInit) {
+ Columns.SetAll();
+ }
+ }
+
+ void GetInputTables(TTableList& tableList) const override {
+ Source->GetInputTables(tableList);
+ ISource::GetInputTables(tableList);
+ }
+
+ size_t GetGroupingColumnsCount() const override {
+ return Source->GetGroupingColumnsCount();
+ }
+
+ bool DoInit(TContext& ctx, ISource* initSrc) override {
+ if (AsInner) {
+ Source->UseAsInner();
+ }
+
+ if (!Source->Init(ctx, initSrc)) {
+ return false;
+ }
+ if (SelectStream && !Source->IsStream()) {
+ ctx.Error(Pos) << "SELECT STREAM is unsupported for non-streaming sources";
+ return false;
+ }
+
+ auto src = Source.Get();
+ bool hasError = false;
+
+ if (src->IsFlattenByExprs()) {
+ for (auto& expr : static_cast<ISource const*>(src)->Expressions(EExprSeat::FlattenByExpr)) {
+ if (!expr->Init(ctx, src)) {
+ hasError = true;
+ continue;
+ }
+ }
+ }
+
+ if (hasError) {
+ return false;
+ }
+
+ src->SetCompactGroupBy(CompactGroupBy);
+ src->SetGroupBySuffix(GroupBySuffix);
+
+ for (auto& term: Terms) {
+ term->CollectPreaggregateExprs(ctx, *src, DistinctAggrExpr);
+ }
+
+ if (Having) {
+ Having->CollectPreaggregateExprs(ctx, *src, DistinctAggrExpr);
+ }
+
+ for (auto& expr: GroupByExpr) {
+ if (auto sessionWindow = dynamic_cast<TSessionWindow*>(expr.Get())) {
+ if (Source->IsStream()) {
+ ctx.Error(Pos) << "SessionWindow is unsupported for streaming sources";
+ return false;
+ }
+ sessionWindow->MarkValid();
+ }
+
+ if (auto hoppingWindow = dynamic_cast<THoppingWindow*>(expr.Get())) {
+ hoppingWindow->MarkValid();
+ }
+
+ // need to collect and Init() preaggregated exprs before calling Init() on GROUP BY expression
+ TVector<TNodePtr> distinctAggrsInGroupBy;
+ expr->CollectPreaggregateExprs(ctx, *src, distinctAggrsInGroupBy);
+ for (auto& distinct : distinctAggrsInGroupBy) {
+ if (!distinct->Init(ctx, src)) {
+ return false;
+ }
+ }
+ DistinctAggrExpr.insert(DistinctAggrExpr.end(), distinctAggrsInGroupBy.begin(), distinctAggrsInGroupBy.end());
+
+ if (!expr->Init(ctx, src) || !IsComparableExpression(ctx, expr, false, "GROUP BY")) {
+ hasError = true;
+ }
+ }
+ if (hasError || !src->AddExpressions(ctx, GroupByExpr, EExprSeat::GroupBy)) {
+ return false;
+ }
+
+ for (auto& expr: DistinctAggrExpr) {
+ if (!expr->Init(ctx, src)) {
+ hasError = true;
+ }
+ }
+ if (hasError || !src->AddExpressions(ctx, DistinctAggrExpr, EExprSeat::DistinctAggr)) {
+ return false;
+ }
+
+ /// grouped expressions are available in filters
+ if (!Source->InitFilters(ctx)) {
+ return false;
+ }
+
+ for (auto& expr: GroupBy) {
+ TString usedColumn;
+ if (!InitAndGetGroupKey(ctx, expr, src, "GROUP BY", usedColumn)) {
+ hasError = true;
+ } else if (usedColumn) {
+ if (!src->AddGroupKey(ctx, usedColumn)) {
+ hasError = true;
+ }
+ }
+ }
+
+ if (hasError) {
+ return false;
+ }
+
+ if (Having && !Having->Init(ctx, src)) {
+ return false;
+ }
+ src->AddWindowSpecs(WinSpecs);
+
+ const bool isJoin = Source->GetJoin();
+ if (!InitSelect(ctx, src, isJoin, hasError)) {
+ return false;
+ }
+
+ src->FinishColumns();
+ auto aggRes = src->BuildAggregation("core", ctx);
+ if (!aggRes.second) {
+ return false;
+ }
+
+ Aggregate = aggRes.first;
+ if (src->IsFlattenByColumns() || src->IsFlattenColumns()) {
+ Flatten = src->IsFlattenByColumns() ?
+ src->BuildFlattenByColumns("row") :
+ src->BuildFlattenColumns("row");
+ if (!Flatten || !Flatten->Init(ctx, src)) {
+ return false;
+ }
+ }
+
+ if (src->IsFlattenByExprs()) {
+ PreFlattenMap = src->BuildPreFlattenMap(ctx);
+ if (!PreFlattenMap) {
+ return false;
+ }
+ }
+
+ if (GroupByExpr || DistinctAggrExpr) {
+ PreaggregatedMap = src->BuildPreaggregatedMap(ctx);
+ if (!PreaggregatedMap) {
+ return false;
+ }
+ }
+ if (Aggregate) {
+ if (!Aggregate->Init(ctx, src)) {
+ return false;
+ }
+ if (Having) {
+ Aggregate = Y(
+ "Filter",
+ Aggregate,
+ BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false"))))
+ );
+ }
+ } else if (Having) {
+ if (Distinct) {
+ Aggregate = Y(
+ "Filter",
+ "core",
+ BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false"))))
+ );
+ ctx.Warning(Having->GetPos(), TIssuesIds::YQL_HAVING_WITHOUT_AGGREGATION_IN_SELECT_DISTINCT)
+ << "The usage of HAVING without aggregations with SELECT DISTINCT is non-standard and will stop working soon. Please use WHERE instead.";
+ } else {
+ ctx.Error(Having->GetPos()) << "HAVING with meaning GROUP BY () should be with aggregation function.";
+ return false;
+ }
+ } else if (!Distinct && !GroupBy.empty()) {
+ ctx.Error(Pos) << "No aggregations were specified";
+ return false;
+ }
+ if (hasError) {
+ return false;
+ }
+
+ if (src->IsCalcOverWindow()) {
+ if (src->IsExprSeat(EExprSeat::WindowPartitionBy, EExprType::WithExpression)) {
+ PrewindowMap = src->BuildPrewindowMap(ctx);
+ if (!PrewindowMap) {
+ return false;
+ }
+ }
+ CalcOverWindow = src->BuildCalcOverWindow(ctx, "core");
+ if (!CalcOverWindow || !CalcOverWindow->Init(ctx, src)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ auto input = Source->Build(ctx);
+ if (!input) {
+ return nullptr;
+ }
+
+ auto block(Y(Y("let", "core", input)));
+
+ if (Source->HasMatchRecognize()) {
+ if (auto matchRecognize = Source->BuildMatchRecognize(ctx, "core")) {
+ //use unique name match_recognize to find this block easily in unit tests
+ block = L(block, Y("let", "match_recognize", matchRecognize));
+ //then bind to the conventional name
+ block = L(block, Y("let", "core", "match_recognize"));
+ } else {
+ return nullptr;
+ }
+ }
+
+ bool ordered = ctx.UseUnordered(*this);
+ if (PreFlattenMap) {
+ block = L(block, Y("let", "core", Y(ordered ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), PreFlattenMap))));
+ }
+ if (Flatten) {
+ block = L(block, Y("let", "core", Y(ordered ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), Flatten, "res"))));
+ }
+ if (PreaggregatedMap) {
+ block = L(block, Y("let", "core", PreaggregatedMap));
+ if (Source->IsCompositeSource() && !Columns.QualifiedAll) {
+ block = L(block, Y("let", "preaggregated", "core"));
+ }
+ } else if (Source->IsCompositeSource() && !Columns.QualifiedAll) {
+ block = L(block, Y("let", "origcore", "core"));
+ }
+ auto filter = Source->BuildFilter(ctx, "core");
+ if (filter) {
+ block = L(block, Y("let", "core", filter));
+ }
+ if (Aggregate) {
+ block = L(block, Y("let", "core", Aggregate));
+ ordered = false;
+ }
+
+ const bool haveCompositeTerms = Source->IsCompositeSource() && !Columns.All && !Columns.QualifiedAll && !Columns.List.empty();
+ if (haveCompositeTerms) {
+ // column order does not matter here - it will be set in projection
+ YQL_ENSURE(Aggregate);
+ block = L(block, Y("let", "core", Y("Map", "core", BuildLambda(Pos, Y("row"), CompositeTerms, "row"))));
+ }
+
+ if (auto grouping = Source->BuildGroupingColumns("core")) {
+ block = L(block, Y("let", "core", grouping));
+ }
+
+ if (!Source->GetCompositeSource()) {
+ BuildProjectWindowDistinct(block, ctx, ordered);
+ }
+
+ return Y("block", Q(L(block, Y("return", "core"))));
+ }
+
+ void BuildProjectWindowDistinct(TNodePtr& block, TContext& ctx, bool ordered) override {
+ if (PrewindowMap) {
+ block = L(block, Y("let", "core", PrewindowMap));
+ }
+ if (CalcOverWindow) {
+ block = L(block, Y("let", "core", CalcOverWindow));
+ }
+
+ block = L(block, Y("let", "core", Y("PersistableRepr", BuildSqlProject(ctx, ordered))));
+
+ if (Distinct) {
+ block = L(block, Y("let", "core", Y("PersistableRepr", Y("SqlAggregateAll", Y("RemoveSystemMembers", "core")))));
+ }
+ }
+
+ TNodePtr BuildSort(TContext& ctx, const TString& label) override {
+ Y_UNUSED(ctx);
+ if (OrderBy.empty() || DisableSort_) {
+ return nullptr;
+ }
+
+ auto sorted = BuildSortSpec(OrderBy, label, false, AssumeSorted);
+ if (ExtraSortColumns.empty()) {
+ return Y("let", label, sorted);
+ }
+ auto body = Y();
+ for (const auto& [column, _] : ExtraSortColumns) {
+ body = L(body, Y("let", "row", Y("RemoveMember", "row", Q(column))));
+ }
+ body = L(body, Y("let", "res", "row"));
+ return Y("let", label, Y("OrderedMap", sorted, BuildLambda(Pos, Y("row"), body, "res")));
+ }
+
+ TNodePtr BuildCleanupColumns(TContext& ctx, const TString& label) override {
+ TNodePtr cleanup;
+ if (ctx.EnableSystemColumns && ctx.Settings.Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW) {
+ if (Columns.All) {
+ cleanup = Y("let", label, Y("RemoveSystemMembers", label));
+ } else if (!Columns.List.empty()) {
+ const bool isJoin = Source->GetJoin();
+ if (!isJoin && Columns.QualifiedAll) {
+ if (ctx.SimpleColumns) {
+ cleanup = Y("let", label, Y("RemoveSystemMembers", label));
+ } else {
+ TNodePtr members;
+ for (auto& term: Terms) {
+ if (term->IsAsterisk()) {
+ auto sourceName = term->GetSourceName();
+ YQL_ENSURE(*sourceName && !sourceName->empty());
+ auto prefix = *sourceName + "._yql_";
+ members = members ? L(members, Q(prefix)) : Y(Q(prefix));
+ }
+ }
+ if (members) {
+ cleanup = Y("let", label, Y("RemovePrefixMembers", label, Q(members)));
+ }
+ }
+ }
+ }
+ }
+ return cleanup;
+ }
+
+ bool IsSelect() const override {
+ return true;
+ }
+
+ bool HasSelectResult() const override {
+ return !Settings.Discard;
+ }
+
+ bool IsStream() const override {
+ return Source->IsStream();
+ }
+
+ EOrderKind GetOrderKind() const override {
+ if (OrderBy.empty()) {
+ return EOrderKind::None;
+ }
+ return AssumeSorted ? EOrderKind::Assume : EOrderKind::Sort;
+ }
+
+ TWriteSettings GetWriteSettings() const override {
+ return Settings;
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
+ const bool aggregated = Source->HasAggregations() || Distinct;
+ if (OrderByInit && (Source->GetJoin() || !aggregated)) {
+ // ORDER BY will try to find column not only in projection items, but also in Source.
+ // ```SELECT a, b FROM T ORDER BY c``` should work if c is present in T
+ const bool reliable = column.IsReliable();
+ column.SetAsNotReliable();
+ auto maybeExist = IRealSource::AddColumn(ctx, column);
+ if (reliable && !Source->GetJoin()) {
+ column.ResetAsReliable();
+ }
+ if (!maybeExist || !maybeExist.GetRef()) {
+ maybeExist = Source->AddColumn(ctx, column);
+ }
+ if (!maybeExist.Defined()) {
+ return maybeExist;
+ }
+ if (!DisableSort_ && !aggregated && column.GetColumnName() && IsMissingInProjection(ctx, column)) {
+ ExtraSortColumns[FullColumnName(column)] = &column;
+ }
+ return maybeExist;
+ }
+
+ return IRealSource::AddColumn(ctx, column);
+ }
+
+ bool IsMissingInProjection(TContext& ctx, const TColumnNode& column) const {
+ TString columnName = FullColumnName(column);
+ if (Columns.Real.contains(columnName) || Columns.Artificial.contains(columnName)) {
+ return false;
+ }
+
+ if (!Columns.IsColumnPossible(ctx, columnName)) {
+ return true;
+ }
+
+ for (auto without: Without) {
+ auto name = *without->GetColumnName();
+ if (Source && Source->GetJoin()) {
+ name = DotJoin(*without->GetSourceName(), name);
+ }
+ if (name == columnName) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ TNodePtr PrepareWithout(const TNodePtr& base) {
+ auto terms = base;
+ if (Without) {
+ for (auto without: Without) {
+ auto name = *without->GetColumnName();
+ if (Source && Source->GetJoin()) {
+ name = DotJoin(*without->GetSourceName(), name);
+ }
+ terms = L(terms, Y("let", "row", Y("RemoveMember", "row", Q(name))));
+ }
+ }
+
+ if (Source) {
+ for (auto column : Source->GetTmpWindowColumns()) {
+ terms = L(terms, Y("let", "row", Y("RemoveMember", "row", Q(column))));
+ }
+ }
+
+ return terms;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TSelectCore(Pos, Source->CloneSource(), CloneContainer(GroupByExpr),
+ CloneContainer(GroupBy), CompactGroupBy, GroupBySuffix, AssumeSorted, CloneContainer(OrderBy),
+ SafeClone(Having), CloneContainer(WinSpecs), SafeClone(LegacyHoppingWindowSpec),
+ CloneContainer(Terms), Distinct, Without, SelectStream, Settings, TColumnsSets(UniqueSets), TColumnsSets(DistinctSets));
+ }
+
+private:
+ bool InitSelect(TContext& ctx, ISource* src, bool isJoin, bool& hasError) {
+ for (auto& [name, winSpec] : WinSpecs) {
+ for (size_t i = 0; i < winSpec->Partitions.size(); ++i) {
+ auto partitionNode = winSpec->Partitions[i];
+ if (auto sessionWindow = dynamic_cast<TSessionWindow*>(partitionNode.Get())) {
+ if (winSpec->Session) {
+ ctx.Error(partitionNode->GetPos()) << "Duplicate session window specification:";
+ ctx.Error(winSpec->Session->GetPos()) << "Previous session window is declared here";
+ hasError = true;
+ continue;
+ }
+ sessionWindow->MarkValid();
+ winSpec->Session = partitionNode;
+ }
+
+ if (!partitionNode->Init(ctx, src)) {
+ hasError = true;
+ continue;
+ }
+ if (!partitionNode->GetLabel() && !partitionNode->GetColumnName()) {
+ TString label = TStringBuilder() << "group_" << name << "_" << i;
+ partitionNode->SetLabel(label);
+ src->AddTmpWindowColumn(label);
+ }
+ }
+ if (!src->AddExpressions(ctx, winSpec->Partitions, EExprSeat::WindowPartitionBy)) {
+ hasError = true;
+ }
+ }
+
+ if (LegacyHoppingWindowSpec) {
+ if (!LegacyHoppingWindowSpec->TimeExtractor->Init(ctx, src)) {
+ hasError = true;
+ }
+ src->SetLegacyHoppingWindowSpec(LegacyHoppingWindowSpec);
+ }
+
+ for (auto& term: Terms) {
+ if (!term->Init(ctx, src)) {
+ hasError = true;
+ continue;
+ }
+ auto column = term->GetColumnName();
+ TString label(term->GetLabel());
+ bool hasName = true;
+ if (label.empty()) {
+ auto source = term->GetSourceName();
+ if (term->IsAsterisk() && !source->empty()) {
+ Columns.QualifiedAll = true;
+ label = DotJoin(*source, "*");
+ } else if (column) {
+ label = isJoin && source && *source ? DotJoin(*source, *column) : *column;
+ } else {
+ label = Columns.AddUnnamed();
+ hasName = false;
+ if (ctx.WarnUnnamedColumns) {
+ ctx.Warning(term->GetPos(), TIssuesIds::YQL_UNNAMED_COLUMN)
+ << "Autogenerated column name " << label << " will be used for expression";
+ }
+ }
+ }
+ if (hasName && !Columns.Add(&label, false, false, true)) {
+ ctx.Error(Pos) << "Duplicate column: " << label;
+ hasError = true;
+ }
+ }
+
+ CompositeTerms = Y();
+ if (!hasError && Source->IsCompositeSource() && !Columns.All && !Columns.QualifiedAll && !Columns.List.empty()) {
+ auto compositeSrcPtr = static_cast<TCompositeSelect*>(Source->GetCompositeSource());
+ if (compositeSrcPtr) {
+ const auto& groupings = compositeSrcPtr->GetGroupingCols();
+ for (const auto& column: groupings) {
+ if (Source->IsGroupByColumn(column)) {
+ continue;
+ }
+ const TString tableName = (GroupByExpr || DistinctAggrExpr) ? "preaggregated" : "origcore";
+ CompositeTerms = L(CompositeTerms, Y("let", "row", Y("AddMember", "row", BuildQuotedAtom(Pos, column), Y("Nothing", Y("MatchType",
+ Y("StructMemberType", Y("ListItemType", Y("TypeOf", tableName)), Q(column)),
+ Q("Optional"), Y("lambda", Q(Y("item")), "item"), Y("lambda", Q(Y("item")), Y("OptionalType", "item")))))));
+ }
+ }
+ }
+
+ for (auto iter: WinSpecs) {
+ auto winSpec = *iter.second;
+ for (auto orderSpec: winSpec.OrderBy) {
+ if (!orderSpec->OrderExpr->Init(ctx, src)) {
+ hasError = true;
+ }
+ }
+ }
+
+ if (Columns.All || Columns.QualifiedAll) {
+ Source->AllColumns();
+ }
+ for (const auto& without: Without) {
+ auto namePtr = without->GetColumnName();
+ auto sourcePtr = without->GetSourceName();
+ YQL_ENSURE(namePtr && *namePtr);
+ if (isJoin && !(sourcePtr && *sourcePtr)) {
+ ctx.Error(without->GetPos()) << "Expected correlation name for WITHOUT in JOIN";
+ hasError = true;
+ continue;
+ }
+ }
+ if (Having && !Having->Init(ctx, src)) {
+ hasError = true;
+ }
+ if (!src->IsCompositeSource() && !Columns.All && src->HasAggregations()) {
+ WarnIfAliasFromSelectIsUsedInGroupBy(ctx, Terms, GroupBy, GroupByExpr);
+
+ /// verify select aggregation compatibility
+ TVector<TNodePtr> exprs(Terms);
+ if (Having) {
+ exprs.push_back(Having);
+ }
+ for (const auto& iter: WinSpecs) {
+ for (const auto& sortSpec: iter.second->OrderBy) {
+ exprs.push_back(sortSpec->OrderExpr);
+ }
+ }
+ if (!ValidateAllNodesForAggregation(ctx, exprs)) {
+ hasError = true;
+ }
+ }
+ const auto label = GetLabel();
+ for (const auto& sortSpec: OrderBy) {
+ auto& expr = sortSpec->OrderExpr;
+ SetLabel(Source->GetLabel());
+ OrderByInit = true;
+ if (!expr->Init(ctx, this)) {
+ hasError = true;
+ continue;
+ }
+ OrderByInit = false;
+ if (!IsComparableExpression(ctx, expr, AssumeSorted, AssumeSorted ? "ASSUME ORDER BY" : "ORDER BY")) {
+ hasError = true;
+ continue;
+ }
+ }
+ SetLabel(label);
+
+ return !hasError;
+ }
+
+ TNodePtr PrepareJoinCoalesce(TContext& ctx, const TNodePtr& base, bool multipleQualifiedAll, const TVector<TString>& coalesceLabels) {
+ const bool isJoin = Source->GetJoin();
+ const bool needCoalesce = isJoin && ctx.SimpleColumns &&
+ (Columns.All || multipleQualifiedAll || ctx.CoalesceJoinKeysOnQualifiedAll);
+
+ if (!needCoalesce) {
+ return base;
+ }
+
+ auto terms = base;
+ const auto& sameKeyMap = Source->GetJoin()->GetSameKeysMap();
+ if (sameKeyMap) {
+ terms = L(terms, Y("let", "flatSameKeys", "row"));
+ for (const auto& [key, sources]: sameKeyMap) {
+ auto coalesceKeys = Y();
+ for (const auto& label : coalesceLabels) {
+ if (sources.contains(label)) {
+ coalesceKeys = L(coalesceKeys, Q(DotJoin(label, key)));
+ }
+ }
+ terms = L(terms, Y("let", "flatSameKeys", Y("CoalesceMembers", "flatSameKeys", Q(coalesceKeys))));
+ }
+ terms = L(terms, Y("let", "row", "flatSameKeys"));
+ }
+
+ return terms;
+ }
+
+ TNodePtr BuildSqlProject(TContext& ctx, bool ordered) {
+ auto sqlProjectArgs = Y();
+ const bool isJoin = Source->GetJoin();
+
+ if (Columns.All) {
+ YQL_ENSURE(Columns.List.empty());
+ auto terms = PrepareWithout(Y());
+ auto options = Y();
+ if (isJoin && ctx.SimpleColumns) {
+ terms = PrepareJoinCoalesce(ctx, terms, false, Source->GetJoin()->GetJoinLabels());
+
+ auto members = Y();
+ for (auto& source : Source->GetJoin()->GetJoinLabels()) {
+ YQL_ENSURE(!source.empty());
+ members = L(members, BuildQuotedAtom(Pos, source + "."));
+ }
+ if (GroupByExpr.empty() || ctx.BogousStarInGroupByOverJoin) {
+ terms = L(terms, Y("let", "res", Y("DivePrefixMembers", "row", Q(members))));
+ } else {
+ auto groupExprStruct = Y("AsStruct");
+ for (auto node : GroupByExpr) {
+ auto label = node->GetLabel();
+ YQL_ENSURE(label);
+ if (Source->IsGroupByColumn(label)) {
+ auto name = BuildQuotedAtom(Pos, label);
+ groupExprStruct = L(groupExprStruct, Q(Y(name, Y("Member", "row", name))));
+ }
+ }
+ auto groupColumnsStruct = Y("DivePrefixMembers", "row", Q(members));
+
+ terms = L(terms, Y("let", "res", Y("FlattenMembers", Q(Y(BuildQuotedAtom(Pos, ""), groupExprStruct)),
+ Q(Y(BuildQuotedAtom(Pos, ""), groupColumnsStruct)))));
+ }
+ options = L(options, Q(Y(Q("divePrefix"), Q(members))));
+ } else {
+ terms = L(terms, Y("let", "res", "row"));
+ }
+ sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, ""), BuildLambda(Pos, Y("row"), terms, "res"), Q(options)));
+ } else {
+ YQL_ENSURE(!Columns.List.empty());
+ YQL_ENSURE(Columns.List.size() == Terms.size());
+
+ TVector<TString> coalesceLabels;
+ bool multipleQualifiedAll = false;
+
+ if (isJoin && ctx.SimpleColumns) {
+ THashSet<TString> starTerms;
+ for (auto& term: Terms) {
+ if (term->IsAsterisk()) {
+ auto sourceName = term->GetSourceName();
+ YQL_ENSURE(*sourceName && !sourceName->empty());
+ YQL_ENSURE(Columns.QualifiedAll);
+ starTerms.insert(*sourceName);
+ }
+ }
+
+ TVector<TString> matched;
+ TVector<TString> unmatched;
+ for (auto& label : Source->GetJoin()->GetJoinLabels()) {
+ if (starTerms.contains(label)) {
+ matched.push_back(label);
+ } else {
+ unmatched.push_back(label);
+ }
+ }
+
+ coalesceLabels.insert(coalesceLabels.end(), matched.begin(), matched.end());
+ coalesceLabels.insert(coalesceLabels.end(), unmatched.begin(), unmatched.end());
+
+ multipleQualifiedAll = starTerms.size() > 1;
+ }
+
+ auto column = Columns.List.begin();
+ auto isNamedColumn = Columns.NamedColumns.begin();
+ for (auto& term: Terms) {
+ auto sourceName = term->GetSourceName();
+ if (!term->IsAsterisk()) {
+ auto body = Y();
+ body = L(body, Y("let", "res", term));
+ TPosition lambdaPos = Pos;
+ TPosition aliasPos = Pos;
+ if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) {
+ // TODO: recanonize for positions below
+ lambdaPos = term->GetPos();
+ aliasPos = term->GetLabelPos() ? *term->GetLabelPos() : lambdaPos;
+ }
+ auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(aliasPos, *isNamedColumn ? *column : ""), BuildLambda(lambdaPos, Y("row"), body, "res"));
+ if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) {
+ projectItem = L(projectItem, Q(Y(Q(Y(Q("warnShadow"))))));
+ }
+ if (!*isNamedColumn) {
+ projectItem = L(projectItem, Q(Y(Q(Y(Q("autoName"))))));
+ }
+ sqlProjectArgs = L(sqlProjectArgs, projectItem);
+ } else {
+ auto terms = PrepareWithout(Y());
+ auto options = Y();
+ if (ctx.SimpleColumns && !isJoin) {
+ terms = L(terms, Y("let", "res", "row"));
+ } else {
+ terms = PrepareJoinCoalesce(ctx, terms, multipleQualifiedAll, coalesceLabels);
+
+ auto members = isJoin ? Y() : Y("FlattenMembers");
+ if (isJoin) {
+ members = L(members, BuildQuotedAtom(Pos, *sourceName + "."));
+ if (ctx.SimpleColumns) {
+ options = L(options, Q(Y(Q("divePrefix"), Q(members))));
+ }
+ members = Y(ctx.SimpleColumns ? "DivePrefixMembers" : "SelectMembers", "row", Q(members));
+ } else {
+ auto prefix = BuildQuotedAtom(Pos, ctx.SimpleColumns ? "" : *sourceName + ".");
+ members = L(members, Q(Y(prefix, "row")));
+ if (!ctx.SimpleColumns) {
+ options = L(options, Q(Y(Q("addPrefix"), prefix)));
+ }
+ }
+
+ terms = L(terms, Y("let", "res", members));
+ }
+ sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, *sourceName), BuildLambda(Pos, Y("row"), terms, "res"), Q(options)));
+ }
+ ++column;
+ ++isNamedColumn;
+ }
+ }
+
+ for (const auto& [columnName, column]: ExtraSortColumns) {
+ auto body = Y();
+ body = L(body, Y("let", "res", column));
+ TPosition pos = column->GetPos();
+ auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(pos, columnName), BuildLambda(pos, Y("row"), body, "res"));
+ sqlProjectArgs = L(sqlProjectArgs, projectItem);
+ }
+
+ auto block(Y(Y("let", "projectCoreType", Y("TypeOf", "core"))));
+ block = L(block, Y("let", "core", Y(ordered ? "OrderedSqlProject" : "SqlProject", "core", Q(sqlProjectArgs))));
+ if (!(UniqueSets.empty() && DistinctSets.empty())) {
+ block = L(block, Y("let", "core", Y("RemoveSystemMembers", "core")));
+ const auto MakeUniqueHint = [this](INode::TPtr& block, const TColumnsSets& sets, bool distinct) {
+ if (!sets.empty()) {
+ auto assume = Y(distinct ? "AssumeDistinctHint" : "AssumeUniqueHint", "core");
+ if (!sets.front().empty()) {
+ for (const auto& columns : sets) {
+ auto set = Y();
+ for (const auto& column : columns) {
+ set = L(set, Q(column));
+ }
+
+ assume = L(assume, Q(set));
+ }
+ }
+ block = L(block, Y("let", "core", assume));
+ }
+ };
+
+ MakeUniqueHint(block, DistinctSets, true);
+ MakeUniqueHint(block, UniqueSets, false);
+ }
+
+ return Y("block", Q(L(block, Y("return", "core"))));
+ }
+
+private:
+ TSourcePtr Source;
+ TVector<TNodePtr> GroupByExpr;
+ TVector<TNodePtr> DistinctAggrExpr;
+ TVector<TNodePtr> GroupBy;
+ bool AssumeSorted = false;
+ bool CompactGroupBy = false;
+ TString GroupBySuffix;
+ TVector<TSortSpecificationPtr> OrderBy;
+ TNodePtr Having;
+ TWinSpecs WinSpecs;
+ TNodePtr Flatten;
+ TNodePtr PreFlattenMap;
+ TNodePtr PreaggregatedMap;
+ TNodePtr PrewindowMap;
+ TNodePtr Aggregate;
+ TNodePtr CalcOverWindow;
+ TNodePtr CompositeTerms;
+ TVector<TNodePtr> Terms;
+ TVector<TNodePtr> Without;
+ const bool Distinct;
+ bool OrderByInit = false;
+ TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec;
+ const bool SelectStream;
+ const TWriteSettings Settings;
+ const TColumnsSets UniqueSets, DistinctSets;
+ TMap<TString, TNodePtr> ExtraSortColumns;
+};
+
+class TProcessSource: public IRealSource {
+public:
+ TProcessSource(
+ TPosition pos,
+ TSourcePtr source,
+ TNodePtr with,
+ bool withExtFunction,
+ TVector<TNodePtr>&& terms,
+ bool listCall,
+ bool processStream,
+ const TWriteSettings& settings,
+ const TVector<TSortSpecificationPtr>& assumeOrderBy
+ )
+ : IRealSource(pos)
+ , Source(std::move(source))
+ , With(with)
+ , WithExtFunction(withExtFunction)
+ , Terms(std::move(terms))
+ , ListCall(listCall)
+ , ProcessStream(processStream)
+ , Settings(settings)
+ , AssumeOrderBy(assumeOrderBy)
+ {
+ }
+
+ void GetInputTables(TTableList& tableList) const override {
+ Source->GetInputTables(tableList);
+ ISource::GetInputTables(tableList);
+ }
+
+ bool DoInit(TContext& ctx, ISource* initSrc) override {
+ if (AsInner) {
+ Source->UseAsInner();
+ }
+
+ if (!Source->Init(ctx, initSrc)) {
+ return false;
+ }
+
+ if (ProcessStream && !Source->IsStream()) {
+ ctx.Error(Pos) << "PROCESS STREAM is unsupported for non-streaming sources";
+ return false;
+ }
+
+ auto src = Source.Get();
+ if (!With) {
+ src->AllColumns();
+ Columns.SetAll();
+ src->FinishColumns();
+ return true;
+ }
+
+ /// grouped expressions are available in filters
+ if (!Source->InitFilters(ctx)) {
+ return false;
+ }
+
+ TSourcePtr fakeSource = nullptr;
+ if (ListCall && !WithExtFunction) {
+ fakeSource = BuildFakeSource(src->GetPos());
+ src->AllColumns();
+ }
+
+ auto processSource = fakeSource != nullptr ? fakeSource.Get() : src;
+ Y_DEBUG_ABORT_UNLESS(processSource != nullptr);
+ if (!With->Init(ctx, processSource)) {
+ return false;
+ }
+ if (With->GetLabel().empty()) {
+ Columns.SetAll();
+ } else {
+ if (ListCall) {
+ ctx.Error(With->GetPos()) << "Label is not allowed to use with TableRows()";
+ return false;
+ }
+ Columns.Add(&With->GetLabel(), false);
+ }
+
+ bool hasError = false;
+
+ TNodePtr produce;
+ if (WithExtFunction) {
+ produce = Y();
+ } else {
+ TString processCall = (ListCall ? "SqlProcess" : "Apply");
+ produce = Y(processCall, With);
+ }
+ TMaybe<ui32> listPosIndex;
+ ui32 termIndex = 0;
+ for (auto& term: Terms) {
+ if (!term->GetLabel().empty()) {
+ ctx.Error(term->GetPos()) << "Labels are not allowed for PROCESS terms";
+ hasError = true;
+ continue;
+ }
+
+ if (!term->Init(ctx, processSource)) {
+ hasError = true;
+ continue;
+ }
+
+ if (ListCall) {
+ if (auto atom = dynamic_cast<TTableRows*>(term.Get())) {
+ listPosIndex = termIndex;
+ }
+ }
+ ++termIndex;
+
+ produce = L(produce, term);
+ }
+
+ if (hasError) {
+ return false;
+ }
+
+ if (ListCall && !WithExtFunction) {
+ YQL_ENSURE(listPosIndex.Defined());
+ produce = L(produce, Q(ToString(*listPosIndex)));
+ }
+
+ if (!produce->Init(ctx, src)) {
+ hasError = true;
+ }
+
+ if (!(WithExtFunction && Terms.empty())) {
+ TVector<TNodePtr>(1, produce).swap(Terms);
+ }
+
+ src->FinishColumns();
+
+ const auto label = GetLabel();
+ for (const auto& sortSpec: AssumeOrderBy) {
+ auto& expr = sortSpec->OrderExpr;
+ SetLabel(Source->GetLabel());
+ if (!expr->Init(ctx, this)) {
+ hasError = true;
+ continue;
+ }
+ if (!IsComparableExpression(ctx, expr, true, "ASSUME ORDER BY")) {
+ hasError = true;
+ continue;
+ }
+ }
+ SetLabel(label);
+
+ return !hasError;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ auto input = Source->Build(ctx);
+ if (!input) {
+ return nullptr;
+ }
+
+ if (!With) {
+ auto res = input;
+ if (ctx.EnableSystemColumns) {
+ res = Y("RemoveSystemMembers", res);
+ }
+
+ return res;
+ }
+
+ TString inputLabel = ListCall ? "inputRowsList" : "core";
+
+ auto block(Y(Y("let", inputLabel, input)));
+
+ auto filter = Source->BuildFilter(ctx, inputLabel);
+ if (filter) {
+ block = L(block, Y("let", inputLabel, filter));
+ }
+
+ if (WithExtFunction) {
+ auto preTransform = Y("RemoveSystemMembers", inputLabel);
+ if (Terms.size() > 0) {
+ preTransform = Y("Map", preTransform, BuildLambda(Pos, Y("row"), Q(Terms[0])));
+ }
+ block = L(block, Y("let", inputLabel, preTransform));
+ block = L(block, Y("let", "transform", With));
+ block = L(block, Y("let", "core", Y("Apply", "transform", inputLabel)));
+ } else if (ListCall) {
+ block = L(block, Y("let", "core", Terms[0]));
+ } else {
+ auto terms = BuildColumnsTerms(ctx);
+ block = L(block, Y("let", "core", Y(ctx.UseUnordered(*this) ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), terms, "res"))));
+ }
+ block = L(block, Y("let", "core", Y("AutoDemux", Y("PersistableRepr", "core"))));
+ return Y("block", Q(L(block, Y("return", "core"))));
+ }
+
+ TNodePtr BuildSort(TContext& ctx, const TString& label) override {
+ Y_UNUSED(ctx);
+ if (AssumeOrderBy.empty()) {
+ return nullptr;
+ }
+
+ return Y("let", label, BuildSortSpec(AssumeOrderBy, label, false, true));
+ }
+
+ EOrderKind GetOrderKind() const override {
+ if (!With) {
+ return EOrderKind::Passthrough;
+ }
+ return AssumeOrderBy.empty() ? EOrderKind::None : EOrderKind::Assume;
+ }
+
+ bool IsSelect() const override {
+ return false;
+ }
+
+ bool HasSelectResult() const override {
+ return !Settings.Discard;
+ }
+
+ bool IsStream() const override {
+ return Source->IsStream();
+ }
+
+ TWriteSettings GetWriteSettings() const override {
+ return Settings;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TProcessSource(Pos, Source->CloneSource(), SafeClone(With), WithExtFunction,
+ CloneContainer(Terms), ListCall, ProcessStream, Settings, CloneContainer(AssumeOrderBy));
+ }
+
+private:
+ TNodePtr BuildColumnsTerms(TContext& ctx) {
+ Y_UNUSED(ctx);
+ TNodePtr terms;
+ Y_DEBUG_ABORT_UNLESS(Terms.size() == 1);
+ if (Columns.All) {
+ terms = Y(Y("let", "res", Y("ToSequence", Terms.front())));
+ } else {
+ Y_DEBUG_ABORT_UNLESS(Columns.List.size() == Terms.size());
+ terms = L(Y(), Y("let", "res",
+ L(Y("AsStructUnordered"), Q(Y(BuildQuotedAtom(Pos, Columns.List.front()), Terms.front())))));
+ terms = L(terms, Y("let", "res", Y("Just", "res")));
+ }
+ return terms;
+ }
+
+private:
+ TSourcePtr Source;
+ TNodePtr With;
+ const bool WithExtFunction;
+ TVector<TNodePtr> Terms;
+ const bool ListCall;
+ const bool ProcessStream;
+ const TWriteSettings Settings;
+ TVector<TSortSpecificationPtr> AssumeOrderBy;
+};
+
+TSourcePtr BuildProcess(
+ TPosition pos,
+ TSourcePtr source,
+ TNodePtr with,
+ bool withExtFunction,
+ TVector<TNodePtr>&& terms,
+ bool listCall,
+ bool processStream,
+ const TWriteSettings& settings,
+ const TVector<TSortSpecificationPtr>& assumeOrderBy
+) {
+ return new TProcessSource(pos, std::move(source), with, withExtFunction, std::move(terms), listCall, processStream, settings, assumeOrderBy);
+}
+
+class TNestedProxySource: public IProxySource {
+public:
+ TNestedProxySource(TPosition pos, const TVector<TNodePtr>& groupBy, TSourcePtr source)
+ : IProxySource(pos, source.Get())
+ , CompositeSelect(nullptr)
+ , Holder(std::move(source))
+ , GroupBy(groupBy)
+ {}
+
+ TNestedProxySource(TCompositeSelect* compositeSelect, const TVector<TNodePtr>& groupBy)
+ : IProxySource(compositeSelect->GetPos(), compositeSelect->RealSource())
+ , CompositeSelect(compositeSelect)
+ , GroupBy(groupBy)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ return Source->Init(ctx, src);
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ return CompositeSelect ? BuildAtom(Pos, "composite", TNodeFlags::Default) : Source->Build(ctx);
+ }
+
+ bool InitFilters(TContext& ctx) override {
+ return CompositeSelect ? true : Source->InitFilters(ctx);
+ }
+
+ TNodePtr BuildFilter(TContext& ctx, const TString& label) override {
+ return CompositeSelect ? nullptr : Source->BuildFilter(ctx, label);
+ }
+
+ IJoin* GetJoin() override {
+ return Source->GetJoin();
+ }
+
+ bool IsCompositeSource() const override {
+ return true;
+ }
+
+ ISource* GetCompositeSource() override {
+ return CompositeSelect;
+ }
+
+ bool AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& hintColumn) override {
+ Y_UNUSED(ctx);
+ hintColumn = TStringBuilder() << "GroupingHint" << Hints.size();
+ ui64 hint = 0;
+ if (GroupByColumns.empty()) {
+ const bool isJoin = GetJoin();
+ for (const auto& groupByNode: GroupBy) {
+ auto namePtr = groupByNode->GetColumnName();
+ YQL_ENSURE(namePtr);
+ TString column = *namePtr;
+ if (isJoin) {
+ auto sourceNamePtr = groupByNode->GetSourceName();
+ if (sourceNamePtr && !sourceNamePtr->empty()) {
+ column = DotJoin(*sourceNamePtr, column);
+ }
+ }
+ GroupByColumns.insert(column);
+ }
+ }
+ for (const auto& column: columns) {
+ hint <<= 1;
+ if (!GroupByColumns.contains(column)) {
+ hint += 1;
+ }
+ }
+ Hints.push_back(hint);
+ return true;
+ }
+
+ size_t GetGroupingColumnsCount() const override {
+ return Hints.size();
+ }
+
+ TNodePtr BuildGroupingColumns(const TString& label) override {
+ if (Hints.empty()) {
+ return nullptr;
+ }
+
+ auto body = Y();
+ for (size_t i = 0; i < Hints.size(); ++i) {
+ TString hintColumn = TStringBuilder() << "GroupingHint" << i;
+ TString hintValue = ToString(Hints[i]);
+ body = L(body, Y("let", "row", Y("AddMember", "row", Q(hintColumn), Y("Uint64", Q(hintValue)))));
+ }
+ return Y("Map", label, BuildLambda(Pos, Y("row"), body, "row"));
+ }
+
+
+ void FinishColumns() override {
+ Source->FinishColumns();
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
+ if (const TString* columnName = column.GetColumnName()) {
+ if (columnName && IsExprAlias(*columnName)) {
+ return true;
+ }
+ }
+ return Source->AddColumn(ctx, column);
+ }
+
+ TPtr DoClone() const final {
+ YQL_ENSURE(Hints.empty());
+ return Holder.Get() ? new TNestedProxySource(Pos, CloneContainer(GroupBy), Holder->CloneSource()) :
+ new TNestedProxySource(CompositeSelect, CloneContainer(GroupBy));
+ }
+
+private:
+ TCompositeSelect* CompositeSelect;
+ TSourcePtr Holder;
+ TVector<TNodePtr> GroupBy;
+ mutable TSet<TString> GroupByColumns;
+ mutable TVector<ui64> Hints;
+};
+
+
+namespace {
+TSourcePtr DoBuildSelectCore(
+ TContext& ctx,
+ TPosition pos,
+ TSourcePtr originalSource,
+ TSourcePtr source,
+ const TVector<TNodePtr>& groupByExpr,
+ const TVector<TNodePtr>& groupBy,
+ bool compactGroupBy,
+ const TString& groupBySuffix,
+ bool assumeSorted,
+ const TVector<TSortSpecificationPtr>& orderBy,
+ TNodePtr having,
+ TWinSpecs&& winSpecs,
+ TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
+ TVector<TNodePtr>&& terms,
+ bool distinct,
+ TVector<TNodePtr>&& without,
+ bool selectStream,
+ const TWriteSettings& settings,
+ TColumnsSets&& uniqueSets,
+ TColumnsSets&& distinctSets
+) {
+ if (groupBy.empty() || !groupBy.front()->ContentListPtr()) {
+ return new TSelectCore(pos, std::move(source), groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted,
+ orderBy, having, winSpecs, legacyHoppingWindowSpec, terms, distinct, without, selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
+ }
+ if (groupBy.size() == 1) {
+ /// actualy no big idea to use grouping function in this case (result allways 0)
+ auto contentPtr = groupBy.front()->ContentListPtr();
+ source = new TNestedProxySource(pos, *contentPtr, source);
+ return DoBuildSelectCore(ctx, pos, originalSource, source, groupByExpr, *contentPtr, compactGroupBy, groupBySuffix,
+ assumeSorted, orderBy, having, std::move(winSpecs),
+ legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
+ }
+ /// \todo some smart merge logic, generalize common part of grouping (expr, flatten, etc)?
+ TIntrusivePtr<TCompositeSelect> compositeSelect = new TCompositeSelect(pos, std::move(source), originalSource->CloneSource(), settings);
+ size_t totalGroups = 0;
+ TVector<TSourcePtr> subselects;
+ TVector<TNodePtr> groupingCols;
+ for (auto& grouping: groupBy) {
+ auto contentPtr = grouping->ContentListPtr();
+ TVector<TNodePtr> cache(1, nullptr);
+ if (!contentPtr) {
+ cache[0] = grouping;
+ contentPtr = &cache;
+ }
+ groupingCols.insert(groupingCols.end(), contentPtr->cbegin(), contentPtr->cend());
+ TSourcePtr proxySource = new TNestedProxySource(compositeSelect.Get(), CloneContainer(*contentPtr));
+ if (!subselects.empty()) {
+ /// clone terms for others usage
+ TVector<TNodePtr> termsCopy;
+ for (const auto& term: terms) {
+ termsCopy.emplace_back(term->Clone());
+ }
+ std::swap(terms, termsCopy);
+ }
+ totalGroups += contentPtr->size();
+ TSelectCore* selectCore = new TSelectCore(pos, std::move(proxySource), CloneContainer(groupByExpr),
+ CloneContainer(*contentPtr), compactGroupBy, groupBySuffix, assumeSorted, orderBy, SafeClone(having), CloneContainer(winSpecs),
+ legacyHoppingWindowSpec, terms, distinct, without, selectStream, settings, TColumnsSets(uniqueSets), TColumnsSets(distinctSets));
+ subselects.emplace_back(selectCore);
+ }
+ if (totalGroups > ctx.PragmaGroupByLimit) {
+ ctx.Error(pos) << "Unable to GROUP BY more than " << ctx.PragmaGroupByLimit << " groups, you try use " << totalGroups << " groups";
+ return nullptr;
+ }
+ compositeSelect->SetSubselects(std::move(subselects), std::move(groupingCols), CloneContainer(groupByExpr));
+ return compositeSelect;
+}
+
+}
+
+TSourcePtr BuildSelectCore(
+ TContext& ctx,
+ TPosition pos,
+ TSourcePtr source,
+ const TVector<TNodePtr>& groupByExpr,
+ const TVector<TNodePtr>& groupBy,
+ bool compactGroupBy,
+ const TString& groupBySuffix,
+ bool assumeSorted,
+ const TVector<TSortSpecificationPtr>& orderBy,
+ TNodePtr having,
+ TWinSpecs&& winSpecs,
+ TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
+ TVector<TNodePtr>&& terms,
+ bool distinct,
+ TVector<TNodePtr>&& without,
+ bool selectStream,
+ const TWriteSettings& settings,
+ TColumnsSets&& uniqueSets,
+ TColumnsSets&& distinctSets
+)
+{
+ return DoBuildSelectCore(ctx, pos, source, source, groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted, orderBy,
+ having, std::move(winSpecs), legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
+}
+
+class TUnion: public IRealSource {
+public:
+ TUnion(TPosition pos, TVector<TSourcePtr>&& sources, bool quantifierAll, const TWriteSettings& settings)
+ : IRealSource(pos)
+ , Sources(std::move(sources))
+ , QuantifierAll(quantifierAll)
+ , Settings(settings)
+ {
+ }
+
+ const TColumns* GetColumns() const override {
+ return IRealSource::GetColumns();
+ }
+
+ void GetInputTables(TTableList& tableList) const override {
+ for (auto& x : Sources) {
+ x->GetInputTables(tableList);
+ }
+
+ ISource::GetInputTables(tableList);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ bool first = true;
+ for (auto& s: Sources) {
+ s->UseAsInner();
+ if (!s->Init(ctx, src)) {
+ return false;
+ }
+ if (!ctx.PositionalUnionAll || first) {
+ auto c = s->GetColumns();
+ Y_DEBUG_ABORT_UNLESS(c);
+ Columns.Merge(*c);
+ first = false;
+ }
+ }
+ return true;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ TPtr res;
+ if (QuantifierAll) {
+ res = ctx.PositionalUnionAll ? Y("UnionAllPositional") : Y("UnionAll");
+ } else {
+ res = ctx.PositionalUnionAll ? Y("UnionPositional") : Y("Union");
+ }
+
+ for (auto& s: Sources) {
+ auto input = s->Build(ctx);
+ if (!input) {
+ return nullptr;
+ }
+ res->Add(input);
+ }
+ return res;
+ }
+
+
+ bool IsStream() const override {
+ for (auto& s: Sources) {
+ if (!s->IsStream()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ TNodePtr DoClone() const final {
+ return MakeIntrusive<TUnion>(Pos, CloneContainer(Sources), QuantifierAll, Settings);
+ }
+
+ bool IsSelect() const override {
+ return true;
+ }
+
+ bool HasSelectResult() const override {
+ return !Settings.Discard;
+ }
+
+ TWriteSettings GetWriteSettings() const override {
+ return Settings;
+ }
+
+private:
+ TVector<TSourcePtr> Sources;
+ bool QuantifierAll;
+ const TWriteSettings Settings;
+};
+
+TSourcePtr BuildUnion(
+ TPosition pos,
+ TVector<TSourcePtr>&& sources,
+ bool quantifierAll,
+ const TWriteSettings& settings
+) {
+ return new TUnion(pos, std::move(sources), quantifierAll, settings);
+}
+
+class TOverWindowSource: public IProxySource {
+public:
+ TOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource)
+ : IProxySource(pos, origSource)
+ , WindowName(windowName)
+ {
+ Source->SetLabel(origSource->GetLabel());
+ }
+
+ TString MakeLocalName(const TString& name) override {
+ return Source->MakeLocalName(name);
+ }
+
+ void AddTmpWindowColumn(const TString& column) override {
+ return Source->AddTmpWindowColumn(column);
+ }
+
+ bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override {
+ if (aggr->IsOverWindow() || aggr->IsOverWindowDistinct()) {
+ return Source->AddAggregationOverWindow(ctx, WindowName, aggr);
+ }
+ return Source->AddAggregation(ctx, aggr);
+ }
+
+ bool AddFuncOverWindow(TContext& ctx, TNodePtr expr) override {
+ return Source->AddFuncOverWindow(ctx, WindowName, expr);
+ }
+
+ bool IsOverWindowSource() const override {
+ return true;
+ }
+
+ TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
+ return Source->AddColumn(ctx, column);
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ Y_UNUSED(ctx);
+ Y_ABORT("Unexpected call");
+ }
+
+ const TString* GetWindowName() const override {
+ return &WindowName;
+ }
+
+ TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const override {
+ return Source->FindWindowSpecification(ctx, windowName);
+ }
+
+ TNodePtr GetSessionWindowSpec() const override {
+ return Source->GetSessionWindowSpec();
+ }
+
+ TNodePtr DoClone() const final {
+ return {};
+ }
+
+private:
+ const TString WindowName;
+};
+
+TSourcePtr BuildOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource) {
+ return new TOverWindowSource(pos, windowName, origSource);
+}
+
+class TSkipTakeNode final: public TAstListNode {
+public:
+ TSkipTakeNode(TPosition pos, const TNodePtr& skip, const TNodePtr& take)
+ : TAstListNode(pos), IsSkipProvided_(!!skip)
+ {
+ TNodePtr select(AstNode("select"));
+ if (skip) {
+ select = Y("Skip", select, Y("Coalesce", skip, Y("Uint64", Q("0"))));
+ }
+ static const TString uiMax = ::ToString(std::numeric_limits<ui64>::max());
+ Add("let", "select", Y("Take", select, Y("Coalesce", take, Y("Uint64", Q(uiMax)))));
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+
+ bool HasSkip() const {
+ return IsSkipProvided_;
+ }
+private:
+ const bool IsSkipProvided_;
+};
+
+TNodePtr BuildSkipTake(TPosition pos, const TNodePtr& skip, const TNodePtr& take) {
+ return new TSkipTakeNode(pos, skip, take);
+}
+
+class TSelect: public IProxySource {
+public:
+ TSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake)
+ : IProxySource(pos, source.Get())
+ , Source(std::move(source))
+ , SkipTake(skipTake)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Source->SetLabel(Label);
+ if (AsInner) {
+ Source->UseAsInner();
+ }
+
+ if (IgnoreSort()) {
+ Source->DisableSort();
+ ctx.Warning(Source->GetPos(), TIssuesIds::YQL_ORDER_BY_WITHOUT_LIMIT_IN_SUBQUERY) << "ORDER BY without LIMIT in subquery will be ignored";
+ }
+
+ if (!Source->Init(ctx, src)) {
+ return false;
+ }
+ src = Source.Get();
+ if (SkipTake) {
+ FakeSource = BuildFakeSource(SkipTake->GetPos());
+ if (!SkipTake->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+ if (SkipTake->HasSkip() && EOrderKind::Sort != Source->GetOrderKind()) {
+ ctx.Warning(Source->GetPos(), TIssuesIds::YQL_OFFSET_WITHOUT_SORT) << "LIMIT with OFFSET without ORDER BY may provide different results from run to run";
+ }
+ }
+
+ return true;
+ }
+
+ TNodePtr Build(TContext& ctx) override {
+ auto input = Source->Build(ctx);
+ if (!input) {
+ return nullptr;
+ }
+ const auto label = "select";
+ auto block(Y(Y("let", label, input)));
+
+ auto sortNode = Source->BuildSort(ctx, label);
+ if (sortNode && !IgnoreSort()) {
+ block = L(block, sortNode);
+ }
+
+ if (SkipTake) {
+ block = L(block, SkipTake);
+ }
+
+ TNodePtr sample;
+ if (!BuildSamplingLambda(sample)) {
+ return nullptr;
+ } else if (sample) {
+ block = L(block, Y("let", "select", Y("OrderedFlatMap", "select", sample)));
+ }
+
+ if (auto removeNode = Source->BuildCleanupColumns(ctx, label)) {
+ block = L(block, removeNode);
+ }
+
+ block = L(block, Y("return", label));
+ return Y("block", Q(block));
+ }
+
+ bool SetSamplingOptions(
+ TContext& ctx,
+ TPosition pos,
+ ESampleClause sampleClause,
+ ESampleMode mode,
+ TNodePtr samplingRate,
+ TNodePtr samplingSeed) override {
+ if (mode == ESampleMode::System) {
+ ctx.Error(pos) << "only Bernoulli sampling mode is supported for subqueries";
+ return false;
+ }
+ if (samplingSeed) {
+ ctx.Error(pos) << "'Repeatable' keyword is not supported for subqueries";
+ return false;
+ }
+ return SetSamplingRate(ctx, sampleClause, samplingRate);
+ }
+
+ bool IsSelect() const override {
+ return Source->IsSelect();
+ }
+
+ bool HasSelectResult() const override {
+ return Source->HasSelectResult();
+ }
+
+ TPtr DoClone() const final {
+ return MakeIntrusive<TSelect>(Pos, Source->CloneSource(), SafeClone(SkipTake));
+ }
+protected:
+ bool IgnoreSort() const {
+ return AsInner && !SkipTake && EOrderKind::Sort == Source->GetOrderKind();
+ }
+
+ TSourcePtr Source;
+ TNodePtr SkipTake;
+ TSourcePtr FakeSource;
+};
+
+TSourcePtr BuildSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake) {
+ return new TSelect(pos, std::move(source), skipTake);
+}
+
+class TSelectResultNode final: public TAstListNode {
+public:
+ TSelectResultNode(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery,
+ TScopedStatePtr scoped)
+ : TAstListNode(pos)
+ , Source(std::move(source))
+ , WriteResult(writeResult)
+ , InSubquery(inSubquery)
+ , Scoped(scoped)
+ {
+ YQL_ENSURE(Source, "Invalid source node");
+ FakeSource = BuildFakeSource(pos);
+ }
+
+ bool IsSelect() const override {
+ return true;
+ }
+
+ bool HasSelectResult() const override {
+ return Source->HasSelectResult();
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!Source->Init(ctx, src)) {
+ return false;
+ }
+
+ src = Source.Get();
+ TTableList tableList;
+ Source->GetInputTables(tableList);
+
+ TNodePtr node(BuildInputTables(Pos, tableList, InSubquery, Scoped));
+ if (!node->Init(ctx, src)) {
+ return false;
+ }
+
+ auto writeSettings = src->GetWriteSettings();
+ bool asRef = ctx.PragmaRefSelect;
+ bool asAutoRef = true;
+ if (ctx.PragmaSampleSelect) {
+ asRef = false;
+ asAutoRef = false;
+ }
+
+ auto settings = Y(Q(Y(Q("type"))));
+ if (writeSettings.Discard) {
+ settings = L(settings, Q(Y(Q("discard"))));
+ }
+
+ if (!writeSettings.Label.Empty()) {
+ auto labelNode = writeSettings.Label.Build();
+ if (!writeSettings.Label.GetLiteral()) {
+ labelNode = Y("EvaluateAtom", labelNode);
+ }
+
+ if (!labelNode->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ settings = L(settings, Q(Y(Q("label"), labelNode)));
+ }
+
+ if (asRef) {
+ settings = L(settings, Q(Y(Q("ref"))));
+ } else if (asAutoRef) {
+ settings = L(settings, Q(Y(Q("autoref"))));
+ }
+
+ auto columns = Source->GetColumns();
+ if (columns && !columns->All && !(columns->QualifiedAll && ctx.SimpleColumns)) {
+ auto list = Y();
+ YQL_ENSURE(columns->List.size() == columns->NamedColumns.size());
+ for (size_t i = 0; i < columns->List.size(); ++i) {
+ auto& c = columns->List[i];
+ if (c.EndsWith('*')) {
+ list = L(list, Q(Y(Q("prefix"), BuildQuotedAtom(Pos, c.substr(0, c.size() - 1)))));
+ } else if (columns->NamedColumns[i]) {
+ list = L(list, BuildQuotedAtom(Pos, c));
+ } else {
+ list = L(list, Q(Y(Q("auto"))));
+ }
+ }
+ settings = L(settings, Q(Y(Q("columns"), Q(list))));
+ }
+
+ if (ctx.ResultRowsLimit > 0) {
+ settings = L(settings, Q(Y(Q("take"), Q(ToString(ctx.ResultRowsLimit)))));
+ }
+
+ auto output = Source->Build(ctx);
+ if (!output) {
+ return false;
+ }
+ node = L(node, Y("let", "output", output));
+ if (WriteResult || writeSettings.Discard) {
+ if (EOrderKind::None == Source->GetOrderKind() && ctx.UseUnordered(*Source)) {
+ node = L(node, Y("let", "output", Y("Unordered", "output")));
+ if (ctx.UnorderedResult) {
+ settings = L(settings, Q(Y(Q("unordered"))));
+ }
+ }
+ auto writeResult(BuildWriteResult(Pos, "output", settings));
+ if (!writeResult->Init(ctx, src)) {
+ return false;
+ }
+ node = L(node, Y("let", "world", writeResult));
+ node = L(node, Y("return", "world"));
+ } else {
+ node = L(node, Y("return", "output"));
+ }
+
+ Add("block", Q(node));
+ return true;
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+protected:
+ TSourcePtr Source;
+
+ const bool WriteResult;
+ const bool InSubquery;
+ TScopedStatePtr Scoped;
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildSelectResult(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery,
+ TScopedStatePtr scoped) {
+ return new TSelectResultNode(pos, std::move(source), writeResult, inSubquery, scoped);
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/source.cpp b/yql/essentials/sql/v1/source.cpp
new file mode 100644
index 00000000000..4231a9d370a
--- /dev/null
+++ b/yql/essentials/sql/v1/source.cpp
@@ -0,0 +1,992 @@
+#include "source.h"
+#include "context.h"
+
+#include <yql/essentials/ast/yql_ast_escaping.h>
+#include <yql/essentials/ast/yql_expr.h>
+#include <yql/essentials/core/sql_types/simple_types.h>
+#include <yql/essentials/minikql/mkql_type_ops.h>
+#include <yql/essentials/parser/pg_catalog/catalog.h>
+#include <yql/essentials/utils/yql_panic.h>
+
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+#include <library/cpp/charset/ci_string.h>
+#include <util/generic/hash_set.h>
+#include <util/stream/str.h>
+#include <util/string/cast.h>
+#include <util/string/escape.h>
+#include <util/string/subst.h>
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+
+TTableRef::TTableRef(const TString& refName, const TString& service, const TDeferredAtom& cluster, TNodePtr keys)
+ : RefName(refName)
+ , Service(to_lower(service))
+ , Cluster(cluster)
+ , Keys(keys)
+{
+}
+
+TString TTableRef::ShortName() const {
+ Y_DEBUG_ABORT_UNLESS(Keys);
+ if (Keys->GetTableKeys()->GetTableName()) {
+ return *Keys->GetTableKeys()->GetTableName();
+ }
+ return TString();
+}
+
+ISource::ISource(TPosition pos)
+ : INode(pos)
+{
+}
+
+ISource::~ISource()
+{
+}
+
+TSourcePtr ISource::CloneSource() const {
+ Y_DEBUG_ABORT_UNLESS(dynamic_cast<ISource*>(Clone().Get()), "Cloned node is no source");
+ TSourcePtr result = static_cast<ISource*>(Clone().Get());
+ for (auto curFilter: Filters) {
+ result->Filters.emplace_back(curFilter->Clone());
+ }
+ for (int i = 0; i < static_cast<int>(EExprSeat::Max); ++i) {
+ result->NamedExprs[i] = CloneContainer(NamedExprs[i]);
+ }
+ result->FlattenColumns = FlattenColumns;
+ result->FlattenMode = FlattenMode;
+ return result;
+}
+
+bool ISource::IsFake() const {
+ return false;
+}
+
+void ISource::AllColumns() {
+ return;
+}
+
+const TColumns* ISource::GetColumns() const {
+ return nullptr;
+}
+
+void ISource::GetInputTables(TTableList& tableList) const {
+ for (auto srcPtr: UsedSources) {
+ srcPtr->GetInputTables(tableList);
+ }
+ return;
+}
+
+TMaybe<bool> ISource::AddColumn(TContext& ctx, TColumnNode& column) {
+ if (column.IsReliable()) {
+ ctx.Error(Pos) << "Source does not allow column references";
+ ctx.Error(column.GetPos()) << "Column reference " <<
+ (column.GetColumnName() ? "'" + *column.GetColumnName() + "'" : "(expr)");
+ }
+ return {};
+}
+
+void ISource::FinishColumns() {
+}
+
+
+bool ISource::AddFilter(TContext& ctx, TNodePtr filter) {
+ Y_UNUSED(ctx);
+ Filters.push_back(filter);
+ return true;
+}
+
+bool ISource::AddGroupKey(TContext& ctx, const TString& column) {
+ if (!GroupKeys.insert(column).second) {
+ ctx.Error() << "Duplicate grouping column: " << column;
+ return false;
+ }
+ OrderedGroupKeys.push_back(column);
+ return true;
+}
+
+void ISource::SetCompactGroupBy(bool compactGroupBy) {
+ CompactGroupBy = compactGroupBy;
+}
+
+void ISource::SetGroupBySuffix(const TString& suffix) {
+ GroupBySuffix = suffix;
+}
+
+bool ISource::AddExpressions(TContext& ctx, const TVector<TNodePtr>& expressions, EExprSeat exprSeat) {
+ YQL_ENSURE(exprSeat < EExprSeat::Max);
+ THashSet<TString> names;
+ THashSet<TString> aliasSet;
+ // TODO: merge FlattenBy with FlattenByExpr
+ const bool isFlatten = (exprSeat == EExprSeat::FlattenBy || exprSeat == EExprSeat::FlattenByExpr);
+ THashSet<TString>& aliases = isFlatten ? FlattenByAliases : aliasSet;
+ for (const auto& expr: expressions) {
+ const auto& alias = expr->GetLabel();
+ const auto& columnNamePtr = expr->GetColumnName();
+ if (alias) {
+ ExprAliases.insert(alias);
+ if (!aliases.emplace(alias).second) {
+ ctx.Error(expr->GetPos()) << "Duplicate alias found: " << alias << " in " << exprSeat << " section";
+ return false;
+ }
+ if (names.contains(alias)) {
+ ctx.Error(expr->GetPos()) << "Collision between alias and column name: " << alias << " in " << exprSeat << " section";
+ return false;
+ }
+ }
+ if (columnNamePtr) {
+ const auto& sourceName = *expr->GetSourceName();
+ auto columnName = *columnNamePtr;
+ if (sourceName) {
+ columnName = DotJoin(sourceName, columnName);
+ }
+ if (!names.emplace(columnName).second) {
+ ctx.Error(expr->GetPos()) << "Duplicate column name found: " << columnName << " in " << exprSeat << " section";
+ return false;
+ }
+ if (!alias && aliases.contains(columnName)) {
+ ctx.Error(expr->GetPos()) << "Collision between alias and column name: " << columnName << " in " << exprSeat << " section";
+ return false;
+ }
+ if (alias && exprSeat == EExprSeat::GroupBy) {
+ auto columnAlias = GroupByColumnAliases.emplace(columnName, alias);
+ auto oldAlias = columnAlias.first->second;
+ if (columnAlias.second && oldAlias != alias) {
+ ctx.Error(expr->GetPos()) << "Alias for column not same, column: " << columnName <<
+ ", exist alias: " << oldAlias << ", another alias: " << alias;
+ return false;
+ }
+ }
+ }
+
+ if (exprSeat == EExprSeat::GroupBy) {
+ if (auto sessionWindow = dynamic_cast<TSessionWindow*>(expr.Get())) {
+ if (SessionWindow) {
+ ctx.Error(expr->GetPos()) << "Duplicate session window specification:";
+ ctx.Error(SessionWindow->GetPos()) << "Previous session window is declared here";
+ return false;
+ }
+ SessionWindow = expr;
+ }
+ if (auto hoppingWindow = dynamic_cast<THoppingWindow*>(expr.Get())) {
+ if (HoppingWindow) {
+ ctx.Error(expr->GetPos()) << "Duplicate hopping window specification:";
+ ctx.Error(HoppingWindow->GetPos()) << "Previous hopping window is declared here";
+ return false;
+ }
+ HoppingWindow = expr;
+ }
+ }
+ Expressions(exprSeat).emplace_back(expr);
+ }
+ return true;
+}
+
+void ISource::SetFlattenByMode(const TString& mode) {
+ FlattenMode = mode;
+}
+
+void ISource::MarkFlattenColumns() {
+ FlattenColumns = true;
+}
+
+bool ISource::IsFlattenColumns() const {
+ return FlattenColumns;
+}
+
+TString ISource::MakeLocalName(const TString& name) {
+ auto iter = GenIndexes.find(name);
+ if (iter == GenIndexes.end()) {
+ iter = GenIndexes.emplace(name, 0).first;
+ }
+ TStringBuilder str;
+ str << name << iter->second;
+ ++iter->second;
+ return std::move(str);
+}
+
+bool ISource::AddAggregation(TContext& ctx, TAggregationPtr aggr) {
+ Y_UNUSED(ctx);
+ YQL_ENSURE(aggr);
+ Aggregations.push_back(aggr);
+ return true;
+}
+
+bool ISource::HasAggregations() const {
+ return !Aggregations.empty() || !GroupKeys.empty();
+}
+
+void ISource::AddWindowSpecs(TWinSpecs winSpecs) {
+ WinSpecs = winSpecs;
+}
+
+bool ISource::AddFuncOverWindow(TContext& ctx, TNodePtr expr) {
+ Y_UNUSED(ctx);
+ Y_UNUSED(expr);
+ return false;
+}
+
+void ISource::AddTmpWindowColumn(const TString& column) {
+ TmpWindowColumns.push_back(column);
+}
+
+const TVector<TString>& ISource::GetTmpWindowColumns() const {
+ return TmpWindowColumns;
+}
+
+void ISource::SetLegacyHoppingWindowSpec(TLegacyHoppingWindowSpecPtr spec) {
+ LegacyHoppingWindowSpec = spec;
+}
+
+TLegacyHoppingWindowSpecPtr ISource::GetLegacyHoppingWindowSpec() const {
+ return LegacyHoppingWindowSpec;
+}
+
+TNodePtr ISource::GetSessionWindowSpec() const {
+ return SessionWindow;
+}
+
+TNodePtr ISource::GetHoppingWindowSpec() const {
+ return HoppingWindow;
+}
+
+TWindowSpecificationPtr ISource::FindWindowSpecification(TContext& ctx, const TString& windowName) const {
+ auto winIter = WinSpecs.find(windowName);
+ if (winIter == WinSpecs.end()) {
+ ctx.Error(Pos) << "Unable to find window specification for window '" << windowName << "'";
+ return {};
+ }
+ YQL_ENSURE(winIter->second);
+ return winIter->second;
+}
+
+inline TVector<TNodePtr>& ISource::Expressions(EExprSeat exprSeat) {
+ return NamedExprs[static_cast<size_t>(exprSeat)];
+}
+
+const TVector<TNodePtr>& ISource::Expressions(EExprSeat exprSeat) const {
+ return NamedExprs[static_cast<size_t>(exprSeat)];
+}
+
+inline TNodePtr ISource::AliasOrColumn(const TNodePtr& node, bool withSource) {
+ auto result = node->GetLabel();
+ if (!result) {
+ const auto columnNamePtr = node->GetColumnName();
+ YQL_ENSURE(columnNamePtr);
+ result = *columnNamePtr;
+ if (withSource) {
+ const auto sourceNamePtr = node->GetSourceName();
+ if (sourceNamePtr) {
+ result = DotJoin(*sourceNamePtr, result);
+ }
+ }
+ }
+ return BuildQuotedAtom(node->GetPos(), result);
+}
+
+bool ISource::AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func) {
+ if (ctx.DistinctOverWindow) {
+ YQL_ENSURE(func->IsOverWindow() || func->IsOverWindowDistinct());
+ } else {
+ YQL_ENSURE(func->IsOverWindow());
+ if (func->IsDistinct()) {
+ ctx.Error(func->GetPos()) << "Aggregation with distinct is not allowed over window: " << windowName;
+ return false;
+ }
+ }
+
+ if (!FindWindowSpecification(ctx, windowName)) {
+ return false;
+ }
+ AggregationOverWindow[windowName].emplace_back(std::move(func));
+ return true;
+}
+
+bool ISource::AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func) {
+ if (!FindWindowSpecification(ctx, windowName)) {
+ return false;
+ }
+ FuncOverWindow[windowName].emplace_back(std::move(func));
+ return true;
+}
+
+void ISource::SetMatchRecognize(TMatchRecognizeBuilderPtr matchRecognize) {
+ MatchRecognizeBuilder = matchRecognize;
+}
+
+bool ISource::IsCompositeSource() const {
+ return false;
+}
+
+bool ISource::IsGroupByColumn(const TString& column) const {
+ return GroupKeys.contains(column);
+}
+
+bool ISource::IsFlattenByColumns() const {
+ return !Expressions(EExprSeat::FlattenBy).empty();
+}
+
+bool ISource::IsFlattenByExprs() const {
+ return !Expressions(EExprSeat::FlattenByExpr).empty();
+}
+
+bool ISource::IsAlias(EExprSeat exprSeat, const TString& column) const {
+ for (const auto& exprNode: Expressions(exprSeat)) {
+ const auto& labelName = exprNode->GetLabel();
+ if (labelName && labelName == column) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ISource::IsExprAlias(const TString& column) const {
+ std::array<EExprSeat, 5> exprSeats = {{EExprSeat::FlattenBy, EExprSeat::FlattenByExpr, EExprSeat::GroupBy,
+ EExprSeat::WindowPartitionBy, EExprSeat::DistinctAggr}};
+ for (auto seat: exprSeats) {
+ if (IsAlias(seat, column)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ISource::IsExprSeat(EExprSeat exprSeat, EExprType type) const {
+ auto expressions = Expressions(exprSeat);
+ if (!expressions) {
+ return false;
+ }
+ for (const auto& exprNode: expressions) {
+ if (exprNode->GetLabel()) {
+ return type == EExprType::WithExpression;
+ }
+ }
+ return type == EExprType::ColumnOnly;
+}
+
+TString ISource::GetGroupByColumnAlias(const TString& column) const {
+ auto iter = GroupByColumnAliases.find(column);
+ if (iter == GroupByColumnAliases.end()) {
+ return {};
+ }
+ return iter->second;
+}
+
+const TString* ISource::GetWindowName() const {
+ return {};
+}
+
+bool ISource::IsCalcOverWindow() const {
+ return !AggregationOverWindow.empty() || !FuncOverWindow.empty() ||
+ AnyOf(WinSpecs, [](const auto& item) { return item.second->Session; });
+}
+
+bool ISource::IsOverWindowSource() const {
+ return !WinSpecs.empty();
+}
+
+bool ISource::IsStream() const {
+ return false;
+}
+
+EOrderKind ISource::GetOrderKind() const {
+ return EOrderKind::None;
+}
+
+TWriteSettings ISource::GetWriteSettings() const {
+ return {};
+}
+
+TNodePtr ISource::PrepareSamplingRate(TPosition pos, ESampleClause clause, TNodePtr samplingRate) {
+ if (ESampleClause::Sample == clause) {
+ samplingRate = Y("*", samplingRate, Y("Double", Q("100")));
+ }
+ auto ensureLow = Y("Ensure", "samplingRate", Y(">=", "samplingRate", Y("Double", Q("0"))), Y("String", BuildQuotedAtom(pos, "Expected sampling rate to be nonnegative")));
+ auto ensureHigh = Y("Ensure", "samplingRate", Y("<=", "samplingRate", Y("Double", Q("100"))), Y("String", BuildQuotedAtom(pos, "Sampling rate is over 100%")));
+
+ auto block(Y(Y("let", "samplingRate", samplingRate)));
+ block = L(block, Y("let", "samplingRate", ensureLow));
+ block = L(block, Y("let", "samplingRate", ensureHigh));
+ samplingRate = Y("block", Q(L(block, Y("return", "samplingRate"))));
+ return samplingRate;
+}
+
+
+bool ISource::SetSamplingOptions(TContext& ctx,
+ TPosition pos,
+ ESampleClause sampleClause,
+ ESampleMode mode,
+ TNodePtr samplingRate,
+ TNodePtr samplingSeed) {
+ Y_UNUSED(pos);
+ Y_UNUSED(sampleClause);
+ Y_UNUSED(mode);
+ Y_UNUSED(samplingRate);
+ Y_UNUSED(samplingSeed);
+ ctx.Error() << "Sampling is only supported for table sources";
+ return false;
+}
+
+bool ISource::SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) {
+ Y_UNUSED(pos);
+ Y_UNUSED(contextHints);
+ if (hints) {
+ ctx.Error() << "Explicit hints are only supported for table sources";
+ return false;
+ }
+ return true;
+}
+
+bool ISource::AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& grouingColumn) {
+ Y_UNUSED(columns);
+ Y_UNUSED(grouingColumn);
+ ctx.Error() << "Source not support grouping hint";
+ return false;
+}
+
+size_t ISource::GetGroupingColumnsCount() const {
+ return 0;
+}
+
+TNodePtr ISource::BuildFilter(TContext& ctx, const TString& label) {
+ return Filters.empty() ? nullptr : Y(ctx.UseUnordered(*this) ? "OrderedFilter" : "Filter", label, BuildFilterLambda());
+}
+
+TNodePtr ISource::BuildFilterLambda() {
+ if (Filters.empty()) {
+ return BuildLambda(Pos, Y("row"), Y("Bool", Q("true")));
+ }
+ YQL_ENSURE(Filters[0]->HasState(ENodeState::Initialized));
+ TNodePtr filter(Filters[0]);
+ for (ui32 i = 1; i < Filters.size(); ++i) {
+ YQL_ENSURE(Filters[i]->HasState(ENodeState::Initialized));
+ filter = Y("And", filter, Filters[i]);
+ }
+ filter = Y("Coalesce", filter, Y("Bool", Q("false")));
+ return BuildLambda(Pos, Y("row"), filter);
+}
+
+TNodePtr ISource::BuildFlattenByColumns(const TString& label) {
+ auto columnsList = Y("FlattenByColumns", Q(FlattenMode), label);
+ for (const auto& column: Expressions(EExprSeat::FlattenBy)) {
+ const auto columnNamePtr = column->GetColumnName();
+ YQL_ENSURE(columnNamePtr);
+ if (column->GetLabel().empty()) {
+ columnsList = L(columnsList, Q(*columnNamePtr));
+ } else {
+ columnsList = L(columnsList, Q(Y(Q(*columnNamePtr), Q(column->GetLabel()))));
+ }
+ }
+ return Y(Y("let", "res", columnsList));
+}
+
+TNodePtr ISource::BuildFlattenColumns(const TString& label) {
+ return Y(Y("let", "res", Y("Just", Y("FlattenStructs", label))));
+}
+
+namespace {
+
+TNodePtr BuildLambdaBodyForExprAliases(TPosition pos, const TVector<TNodePtr>& exprs) {
+ auto structObj = BuildAtom(pos, "row", TNodeFlags::Default);
+ for (const auto& exprNode: exprs) {
+ const auto name = exprNode->GetLabel();
+ YQL_ENSURE(name);
+ structObj = structObj->Y("ForceRemoveMember", structObj, structObj->Q(name));
+ if (dynamic_cast<const TSessionWindow*>(exprNode.Get())) {
+ continue;
+ }
+ if (dynamic_cast<const THoppingWindow*>(exprNode.Get())) {
+ continue;
+ }
+ structObj = structObj->Y("AddMember", structObj, structObj->Q(name), exprNode);
+ }
+ return structObj->Y("AsList", structObj);
+}
+
+}
+
+TNodePtr ISource::BuildPreaggregatedMap(TContext& ctx) {
+ Y_UNUSED(ctx);
+ const auto& groupByExprs = Expressions(EExprSeat::GroupBy);
+ const auto& distinctAggrExprs = Expressions(EExprSeat::DistinctAggr);
+ YQL_ENSURE(groupByExprs || distinctAggrExprs);
+
+ TNodePtr res;
+ if (groupByExprs) {
+ auto body = BuildLambdaBodyForExprAliases(Pos, groupByExprs);
+ res = Y("FlatMap", "core", BuildLambda(Pos, Y("row"), body));
+ }
+
+ if (distinctAggrExprs) {
+ auto body = BuildLambdaBodyForExprAliases(Pos, distinctAggrExprs);
+ auto lambda = BuildLambda(Pos, Y("row"), body);
+ res = res ? Y("FlatMap", res, lambda) : Y("FlatMap", "core", lambda);
+ }
+ return res;
+}
+
+TNodePtr ISource::BuildPreFlattenMap(TContext& ctx) {
+ Y_UNUSED(ctx);
+ YQL_ENSURE(IsFlattenByExprs());
+ return BuildLambdaBodyForExprAliases(Pos, Expressions(EExprSeat::FlattenByExpr));
+}
+
+TNodePtr ISource::BuildPrewindowMap(TContext& ctx) {
+ auto feed = BuildAtom(Pos, "row", TNodeFlags::Default);
+ for (const auto& exprNode: Expressions(EExprSeat::WindowPartitionBy)) {
+ const auto name = exprNode->GetLabel();
+ if (name && !dynamic_cast<const TSessionWindow*>(exprNode.Get())) {
+ feed = Y("AddMember", feed, Q(name), exprNode);
+ }
+ }
+ return Y(ctx.UseUnordered(*this) ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), Y("AsList", feed)));
+}
+
+bool ISource::BuildSamplingLambda(TNodePtr& node) {
+ if (!SamplingRate) {
+ return true;
+ }
+ auto res = Y("Coalesce", Y("SafeCast", SamplingRate, Y("DataType", Q("Double"))), Y("Double", Q("0")));
+ res = Y("/", res, Y("Double", Q("100")));
+ res = Y(Y("let", "res", Y("OptionalIf", Y("<", Y("Random", Y("DependsOn", "row")), res), "row")));
+ node = BuildLambda(GetPos(), Y("row"), res, "res");
+ return !!node;
+}
+
+bool ISource::SetSamplingRate(TContext& ctx, ESampleClause clause, TNodePtr samplingRate) {
+ if (samplingRate) {
+ if (!samplingRate->Init(ctx, this)) {
+ return false;
+ }
+ SamplingRate = PrepareSamplingRate(Pos, clause, samplingRate);
+ }
+ return true;
+}
+
+std::pair<TNodePtr, bool> ISource::BuildAggregation(const TString& label, TContext& ctx) {
+ if (GroupKeys.empty() && Aggregations.empty() && !IsCompositeSource() && !LegacyHoppingWindowSpec) {
+ return { nullptr, true };
+ }
+
+ auto keysTuple = Y();
+ YQL_ENSURE(GroupKeys.size() == OrderedGroupKeys.size());
+ for (const auto& key: OrderedGroupKeys) {
+ YQL_ENSURE(GroupKeys.contains(key));
+ keysTuple = L(keysTuple, BuildQuotedAtom(Pos, key));
+ }
+
+ std::map<std::pair<bool, TString>, std::vector<IAggregation*>> genericAggrs;
+ for (const auto& aggr: Aggregations) {
+ if (const auto key = aggr->GetGenericKey()) {
+ genericAggrs[{aggr->IsDistinct(), *key}].emplace_back(aggr.Get());
+ }
+ }
+
+ for (const auto& aggr : genericAggrs) {
+ for (size_t i = 1U; i < aggr.second.size(); ++i) {
+ aggr.second.front()->Join(aggr.second[i]);
+ }
+ }
+
+ const auto listType = Y("TypeOf", label);
+ auto aggrArgs = Y();
+ const bool overState = GroupBySuffix == "CombineState" || GroupBySuffix == "MergeState" ||
+ GroupBySuffix == "MergeFinalize" || GroupBySuffix == "MergeManyFinalize";
+ const bool allowAggApply = !LegacyHoppingWindowSpec && !SessionWindow && !HoppingWindow;
+ for (const auto& aggr: Aggregations) {
+ auto res = aggr->AggregationTraits(listType, overState, GroupBySuffix == "MergeManyFinalize", allowAggApply, ctx);
+ if (!res.second) {
+ return { nullptr, false };
+ }
+
+ if (res.first) {
+ aggrArgs = L(aggrArgs, res.first);
+ }
+ }
+
+ auto options = Y();
+ if (CompactGroupBy || GroupBySuffix == "Finalize") {
+ options = L(options, Q(Y(Q("compact"))));
+ }
+
+ if (LegacyHoppingWindowSpec) {
+ auto hoppingTraits = Y(
+ "HoppingTraits",
+ Y("ListItemType", listType),
+ BuildLambda(Pos, Y("row"), LegacyHoppingWindowSpec->TimeExtractor),
+ LegacyHoppingWindowSpec->Hop,
+ LegacyHoppingWindowSpec->Interval,
+ LegacyHoppingWindowSpec->Delay,
+ LegacyHoppingWindowSpec->DataWatermarks ? Q("true") : Q("false"),
+ Q("v1"));
+
+ options = L(options, Q(Y(Q("hopping"), hoppingTraits)));
+ }
+
+ if (SessionWindow) {
+ YQL_ENSURE(SessionWindow->GetLabel());
+ auto sessionWindow = dynamic_cast<TSessionWindow*>(SessionWindow.Get());
+ YQL_ENSURE(sessionWindow);
+ options = L(options, Q(Y(Q("session"),
+ Q(Y(BuildQuotedAtom(Pos, SessionWindow->GetLabel()), sessionWindow->BuildTraits(label))))));
+ }
+
+ if (HoppingWindow) {
+ YQL_ENSURE(HoppingWindow->GetLabel());
+ auto hoppingWindow = dynamic_cast<THoppingWindow*>(HoppingWindow.Get());
+ YQL_ENSURE(hoppingWindow);
+ options = L(options, Q(Y(Q("hopping"),
+ Q(Y(BuildQuotedAtom(Pos, HoppingWindow->GetLabel()), hoppingWindow->BuildTraits(label))))));
+ }
+
+ return { Y("AssumeColumnOrderPartial", Y("Aggregate" + GroupBySuffix, label, Q(keysTuple), Q(aggrArgs), Q(options)), Q(keysTuple)), true };
+}
+
+TMaybe<TString> ISource::FindColumnMistype(const TString& name) const {
+ auto result = FindMistypeIn(GroupKeys, name);
+ return result ? result : FindMistypeIn(ExprAliases, name);
+}
+
+void ISource::AddDependentSource(ISource* usedSource) {
+ UsedSources.push_back(usedSource);
+}
+
+class TYqlFrameBound final: public TCallNode {
+public:
+ TYqlFrameBound(TPosition pos, TNodePtr bound)
+ : TCallNode(pos, "EvaluateExpr", 1, 1, { bound })
+ , FakeSource(BuildFakeSource(pos))
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[0]->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlFrameBound(Pos, Args[0]->Clone());
+ }
+private:
+ TSourcePtr FakeSource;
+};
+
+TNodePtr BuildFrameNode(const TFrameBound& frame, EFrameType frameType) {
+ TString settingStr;
+ switch (frame.Settings) {
+ case FramePreceding: settingStr = "preceding"; break;
+ case FrameCurrentRow: settingStr = "currentRow"; break;
+ case FrameFollowing: settingStr = "following"; break;
+ default: YQL_ENSURE(false, "Unexpected frame setting");
+ }
+
+ TNodePtr node = frame.Bound;
+ TPosition pos = frame.Pos;
+ if (frameType != EFrameType::FrameByRows) {
+ TVector<TNodePtr> settings;
+ settings.push_back(BuildQuotedAtom(pos, settingStr, TNodeFlags::Default));
+ if (frame.Settings != FrameCurrentRow) {
+ if (!node) {
+ node = BuildQuotedAtom(pos, "unbounded", TNodeFlags::Default);
+ } else if (!node->IsLiteral()) {
+ node = new TYqlFrameBound(pos, node);
+ }
+ settings.push_back(std::move(node));
+ }
+ return BuildTuple(pos, std::move(settings));
+ }
+
+ // TODO: switch FrameByRows to common format above
+ YQL_ENSURE(frame.Settings != FrameCurrentRow, "Should be already replaced by 0 preceding/following");
+ if (!node) {
+ node = BuildLiteralVoid(pos);
+ } else if (node->IsLiteral()) {
+ YQL_ENSURE(node->GetLiteralType() == "Int32");
+ i32 value = FromString<i32>(node->GetLiteralValue());
+ YQL_ENSURE(value >= 0);
+ if (frame.Settings == FramePreceding) {
+ value = -value;
+ }
+ node = new TCallNodeImpl(pos, "Int32", { BuildQuotedAtom(pos, ToString(value), TNodeFlags::Default) });
+ } else {
+ if (frame.Settings == FramePreceding) {
+ node = new TCallNodeImpl(pos, "Minus", { node->Clone() });
+ }
+ node = new TYqlFrameBound(pos, node);
+ }
+ return node;
+}
+
+TNodePtr ISource::BuildWindowFrame(const TFrameSpecification& spec, bool isCompact) {
+ YQL_ENSURE(spec.FrameExclusion == FrameExclNone);
+ YQL_ENSURE(spec.FrameBegin);
+ YQL_ENSURE(spec.FrameEnd);
+
+ auto frameBeginNode = BuildFrameNode(*spec.FrameBegin, spec.FrameType);
+ auto frameEndNode = BuildFrameNode(*spec.FrameEnd, spec.FrameType);
+
+ auto begin = Q(Y(Q("begin"), frameBeginNode));
+ auto end = Q(Y(Q("end"), frameEndNode));
+
+ return isCompact ? Q(Y(begin, end, Q(Y(Q("compact"))))) : Q(Y(begin, end));
+}
+
+class TSessionWindowTraits final: public TCallNode {
+public:
+ TSessionWindowTraits(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "SessionWindowTraits", args)
+ , FakeSource(BuildFakeSource(pos))
+ {
+ YQL_ENSURE(args.size() == 4);
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args.back()->Init(ctx, FakeSource.Get())) {
+ return false;
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TSessionWindowTraits(Pos, CloneContainer(Args));
+ }
+private:
+ TSourcePtr FakeSource;
+};
+
+TNodePtr ISource::BuildCalcOverWindow(TContext& ctx, const TString& label) {
+ YQL_ENSURE(IsCalcOverWindow());
+
+ TSet<TString> usedWindows;
+ for (auto& it : AggregationOverWindow) {
+ usedWindows.insert(it.first);
+ }
+ for (auto& it : FuncOverWindow) {
+ usedWindows.insert(it.first);
+ }
+ for (auto& it : WinSpecs) {
+ if (it.second->Session) {
+ usedWindows.insert(it.first);
+ }
+ }
+
+ YQL_ENSURE(!usedWindows.empty());
+
+ const bool onePartition = usedWindows.size() == 1;
+ const auto useLabel = onePartition ? label : "partitioning";
+ const auto listType = Y("TypeOf", useLabel);
+ auto framesProcess = Y();
+ auto resultNode = onePartition ? Y() : Y(Y("let", "partitioning", label));
+
+ for (const auto& name : usedWindows) {
+ auto spec = FindWindowSpecification(ctx, name);
+ YQL_ENSURE(spec);
+
+ auto aggsIter = AggregationOverWindow.find(name);
+ auto funcsIter = FuncOverWindow.find(name);
+
+ const auto& aggs = (aggsIter == AggregationOverWindow.end()) ? TVector<TAggregationPtr>() : aggsIter->second;
+ const auto& funcs = (funcsIter == FuncOverWindow.end()) ? TVector<TNodePtr>() : funcsIter->second;
+
+ auto frames = Y();
+ TString frameType;
+ switch (spec->Frame->FrameType) {
+ case EFrameType::FrameByRows: frameType = "WinOnRows"; break;
+ case EFrameType::FrameByRange: frameType = "WinOnRange"; break;
+ case EFrameType::FrameByGroups: frameType = "WinOnGroups"; break;
+ }
+ YQL_ENSURE(frameType);
+ auto callOnFrame = Y(frameType, BuildWindowFrame(*spec->Frame, spec->IsCompact));
+ for (auto& agg : aggs) {
+ auto winTraits = agg->WindowTraits(listType, ctx);
+ callOnFrame = L(callOnFrame, winTraits);
+ }
+ for (auto& func : funcs) {
+ auto winSpec = func->WindowSpecFunc(listType);
+ callOnFrame = L(callOnFrame, winSpec);
+ }
+ frames = L(frames, callOnFrame);
+
+ auto keysTuple = Y();
+ for (const auto& key: spec->Partitions) {
+ if (!dynamic_cast<TSessionWindow*>(key.Get())) {
+ keysTuple = L(keysTuple, AliasOrColumn(key, GetJoin()));
+ }
+ }
+
+ auto sortSpec = spec->OrderBy.empty() ? Y("Void") : BuildSortSpec(spec->OrderBy, useLabel, true, false);
+ if (spec->Session) {
+ TString label = spec->Session->GetLabel();
+ YQL_ENSURE(label);
+ auto sessionWindow = dynamic_cast<TSessionWindow*>(spec->Session.Get());
+ YQL_ENSURE(sessionWindow);
+ auto labelNode = BuildQuotedAtom(sessionWindow->GetPos(), label);
+
+ auto sessionTraits = sessionWindow->BuildTraits(useLabel);
+ framesProcess = Y("CalcOverSessionWindow", useLabel, Q(keysTuple), sortSpec, Q(frames), sessionTraits, Q(Y(labelNode)));
+ } else {
+ YQL_ENSURE(aggs || funcs);
+ framesProcess = Y("CalcOverWindow", useLabel, Q(keysTuple), sortSpec, Q(frames));
+ }
+
+ if (!onePartition) {
+ resultNode = L(resultNode, Y("let", "partitioning", framesProcess));
+ }
+ }
+ if (onePartition) {
+ return framesProcess;
+ } else {
+ return Y("block", Q(L(resultNode, Y("return", "partitioning"))));
+ }
+}
+
+TNodePtr ISource::BuildSort(TContext& ctx, const TString& label) {
+ Y_UNUSED(ctx);
+ Y_UNUSED(label);
+ return nullptr;
+}
+
+TNodePtr ISource::BuildCleanupColumns(TContext& ctx, const TString& label) {
+ Y_UNUSED(ctx);
+ Y_UNUSED(label);
+ return nullptr;
+}
+
+TNodePtr ISource::BuildGroupingColumns(const TString& label) {
+ Y_UNUSED(label);
+ return nullptr;
+}
+
+IJoin* ISource::GetJoin() {
+ return nullptr;
+}
+
+ISource* ISource::GetCompositeSource() {
+ return nullptr;
+}
+
+bool ISource::IsSelect() const {
+ return true;
+}
+
+bool ISource::IsTableSource() const {
+ return false;
+}
+
+bool ISource::ShouldUseSourceAsColumn(const TString& source) const {
+ Y_UNUSED(source);
+ return false;
+}
+
+bool ISource::IsJoinKeysInitializing() const {
+ return false;
+}
+
+bool ISource::DoInit(TContext& ctx, ISource* src) {
+ for (auto& column: Expressions(EExprSeat::FlattenBy)) {
+ if (!column->Init(ctx, this)) {
+ return false;
+ }
+ }
+
+ if (IsFlattenColumns() && src) {
+ src->AllColumns();
+ }
+
+ return true;
+}
+
+bool ISource::InitFilters(TContext& ctx) {
+ for (auto& filter: Filters) {
+ if (!filter->Init(ctx, this)) {
+ return false;
+ }
+ if (filter->IsAggregated() && !filter->IsConstant() && !filter->HasState(ENodeState::AggregationKey)) {
+ ctx.Error(filter->GetPos()) << "Can not use aggregated values in filtering";
+ return false;
+ }
+ }
+ return true;
+}
+
+TAstNode* ISource::Translate(TContext& ctx) const {
+ Y_DEBUG_ABORT_UNLESS(false);
+ Y_UNUSED(ctx);
+ return nullptr;
+}
+
+void ISource::FillSortParts(const TVector<TSortSpecificationPtr>& orderBy, TNodePtr& sortDirection, TNodePtr& sortKeySelector) {
+ TNodePtr expr;
+ if (orderBy.empty()) {
+ YQL_ENSURE(!sortKeySelector);
+ sortDirection = sortKeySelector = Y("Void");
+ return;
+ } else if (orderBy.size() == 1) {
+ auto& sortSpec = orderBy.front();
+ expr = Y("PersistableRepr", sortSpec->OrderExpr);
+ sortDirection = Y("Bool", Q(sortSpec->Ascending ? "true" : "false"));
+ } else {
+ auto exprList = Y();
+ sortDirection = Y();
+ for (const auto& sortSpec: orderBy) {
+ const auto asc = sortSpec->Ascending;
+ sortDirection = L(sortDirection, Y("Bool", Q(asc ? "true" : "false")));
+ exprList = L(exprList, Y("PersistableRepr", sortSpec->OrderExpr));
+ }
+ sortDirection = Q(sortDirection);
+ expr = Q(exprList);
+ }
+ sortKeySelector = BuildLambda(Pos, Y("row"), expr);
+}
+
+TNodePtr ISource::BuildSortSpec(const TVector<TSortSpecificationPtr>& orderBy, const TString& label, bool traits, bool assume) {
+ YQL_ENSURE(!orderBy.empty());
+ TNodePtr dirsNode;
+ TNodePtr keySelectorNode;
+ FillSortParts(orderBy, dirsNode, keySelectorNode);
+ if (traits) {
+ return Y("SortTraits", Y("TypeOf", label), dirsNode, keySelectorNode);
+ } else if (assume) {
+ return Y("AssumeSorted", label, dirsNode, keySelectorNode);
+ } else {
+ return Y("Sort", label, dirsNode, keySelectorNode);
+ }
+}
+
+bool ISource::HasMatchRecognize() const {
+ return static_cast<bool>(MatchRecognizeBuilder);
+}
+
+TNodePtr ISource::BuildMatchRecognize(TContext& ctx, TString&& inputTable){
+ YQL_ENSURE(HasMatchRecognize());
+ return MatchRecognizeBuilder->Build(ctx, std::move(inputTable), this);
+};
+
+IJoin::IJoin(TPosition pos)
+ : ISource(pos)
+{
+}
+
+IJoin::~IJoin()
+{
+}
+
+IJoin* IJoin::GetJoin() {
+ return this;
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/source.h b/yql/essentials/sql/v1/source.h
new file mode 100644
index 00000000000..35129fffbb8
--- /dev/null
+++ b/yql/essentials/sql/v1/source.h
@@ -0,0 +1,320 @@
+#pragma once
+#include "node.h"
+#include "match_recognize.h"
+#include <library/cpp/containers/sorted_vector/sorted_vector.h>
+
+namespace NSQLTranslationV1 {
+ using TColumnsSets = NSorted::TSimpleSet<NSorted::TSimpleSet<TString>>;
+
+ class ISource;
+ typedef TIntrusivePtr<ISource> TSourcePtr;
+
+ struct TTableRef {
+ TString RefName;
+ TString Service;
+ TDeferredAtom Cluster;
+ TNodePtr Keys;
+ TNodePtr Options;
+ TSourcePtr Source;
+
+ TTableRef() = default;
+ TTableRef(const TString& refName, const TString& service, const TDeferredAtom& cluster, TNodePtr keys);
+ TTableRef(const TTableRef&) = default;
+ TTableRef& operator=(const TTableRef&) = default;
+
+ TString ShortName() const;
+ };
+
+ typedef TVector<TTableRef> TTableList;
+
+
+ class IJoin;
+ class ISource: public INode {
+ public:
+ virtual ~ISource();
+
+ virtual bool IsFake() const;
+ virtual void AllColumns();
+ virtual const TColumns* GetColumns() const;
+ virtual void GetInputTables(TTableList& tableList) const;
+ /// in case of error unfilled, flag show if ensure column name
+ virtual TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column);
+ virtual void FinishColumns();
+ virtual bool AddExpressions(TContext& ctx, const TVector<TNodePtr>& columns, EExprSeat exprSeat);
+ virtual void SetFlattenByMode(const TString& mode);
+ virtual void MarkFlattenColumns();
+ virtual bool IsFlattenColumns() const;
+ virtual bool AddFilter(TContext& ctx, TNodePtr filter);
+ virtual bool AddGroupKey(TContext& ctx, const TString& column);
+ virtual void SetCompactGroupBy(bool compactGroupBy);
+ virtual void SetGroupBySuffix(const TString& suffix);
+ virtual TString MakeLocalName(const TString& name);
+ virtual bool AddAggregation(TContext& ctx, TAggregationPtr aggr);
+ virtual bool AddFuncOverWindow(TContext& ctx, TNodePtr expr);
+ virtual void AddTmpWindowColumn(const TString& column);
+ virtual void SetMatchRecognize(TMatchRecognizeBuilderPtr matchRecognize);
+ virtual const TVector<TString>& GetTmpWindowColumns() const;
+ virtual bool HasAggregations() const;
+ virtual void AddWindowSpecs(TWinSpecs winSpecs);
+ virtual bool AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func);
+ virtual bool AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func);
+ virtual void SetLegacyHoppingWindowSpec(TLegacyHoppingWindowSpecPtr spec);
+ virtual TLegacyHoppingWindowSpecPtr GetLegacyHoppingWindowSpec() const;
+ virtual TNodePtr GetSessionWindowSpec() const;
+ virtual TNodePtr GetHoppingWindowSpec() const;
+ virtual bool IsCompositeSource() const;
+ virtual bool IsGroupByColumn(const TString& column) const;
+ virtual bool IsFlattenByColumns() const;
+ virtual bool IsFlattenByExprs() const;
+ virtual bool IsCalcOverWindow() const;
+ virtual bool IsOverWindowSource() const;
+ virtual bool IsStream() const;
+ virtual EOrderKind GetOrderKind() const;
+ virtual TWriteSettings GetWriteSettings() const;
+ TNodePtr PrepareSamplingRate(TPosition pos, ESampleClause clause, TNodePtr samplingRate);
+ virtual bool SetSamplingOptions(TContext& ctx, TPosition pos, ESampleClause clause, ESampleMode mode, TNodePtr samplingRate, TNodePtr samplingSeed);
+ virtual bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints);
+ virtual bool AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& groupingColumn);
+ virtual size_t GetGroupingColumnsCount() const;
+ virtual TNodePtr BuildFilter(TContext& ctx, const TString& label);
+ virtual TNodePtr BuildFilterLambda();
+ virtual TNodePtr BuildFlattenByColumns(const TString& label);
+ virtual TNodePtr BuildFlattenColumns(const TString& label);
+ virtual TNodePtr BuildPreaggregatedMap(TContext& ctx);
+ virtual TNodePtr BuildPreFlattenMap(TContext& ctx);
+ virtual TNodePtr BuildPrewindowMap(TContext& ctx);
+ virtual std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx);
+ virtual TNodePtr BuildCalcOverWindow(TContext& ctx, const TString& label);
+ virtual TNodePtr BuildSort(TContext& ctx, const TString& label);
+ virtual TNodePtr BuildCleanupColumns(TContext& ctx, const TString& label);
+ virtual TNodePtr BuildGroupingColumns(const TString& label);
+ virtual bool BuildSamplingLambda(TNodePtr& node);
+ virtual bool SetSamplingRate(TContext& ctx, ESampleClause clause, TNodePtr samplingRate);
+ virtual IJoin* GetJoin();
+ virtual ISource* GetCompositeSource();
+ virtual bool IsSelect() const;
+ virtual bool IsTableSource() const;
+ virtual bool ShouldUseSourceAsColumn(const TString& source) const;
+ virtual bool IsJoinKeysInitializing() const;
+ virtual const TString* GetWindowName() const;
+ virtual bool HasMatchRecognize() const;
+ virtual TNodePtr BuildMatchRecognize(TContext& ctx, TString&& inputTable);
+ virtual bool DoInit(TContext& ctx, ISource* src);
+ virtual TNodePtr Build(TContext& ctx) = 0;
+
+ virtual TMaybe<TString> FindColumnMistype(const TString& name) const;
+
+ virtual bool InitFilters(TContext& ctx);
+ void AddDependentSource(ISource* usedSource);
+ bool IsAlias(EExprSeat exprSeat, const TString& label) const;
+ bool IsExprAlias(const TString& label) const;
+ bool IsExprSeat(EExprSeat exprSeat, EExprType type = EExprType::WithExpression) const;
+ TString GetGroupByColumnAlias(const TString& column) const;
+ const TVector<TNodePtr>& Expressions(EExprSeat exprSeat) const;
+
+ virtual TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const;
+
+ TIntrusivePtr<ISource> CloneSource() const;
+ TNodePtr BuildSortSpec(const TVector<TSortSpecificationPtr>& orderBy, const TString& label, bool traits, bool assume);
+
+ protected:
+ ISource(TPosition pos);
+ virtual TAstNode* Translate(TContext& ctx) const;
+
+ void FillSortParts(const TVector<TSortSpecificationPtr>& orderBy, TNodePtr& sortKeySelector, TNodePtr& sortDirection);
+
+ TVector<TNodePtr>& Expressions(EExprSeat exprSeat);
+ TNodePtr AliasOrColumn(const TNodePtr& node, bool withSource);
+
+ TNodePtr BuildWindowFrame(const TFrameSpecification& spec, bool isCompact);
+
+ THashSet<TString> ExprAliases;
+ THashSet<TString> FlattenByAliases;
+ THashMap<TString, TString> GroupByColumnAliases;
+ TVector<TNodePtr> Filters;
+ bool CompactGroupBy = false;
+ TString GroupBySuffix;
+ TSet<TString> GroupKeys;
+ TVector<TString> OrderedGroupKeys;
+ std::array<TVector<TNodePtr>, static_cast<unsigned>(EExprSeat::Max)> NamedExprs;
+ TVector<TAggregationPtr> Aggregations;
+ TMap<TString, TVector<TAggregationPtr>> AggregationOverWindow;
+ TMap<TString, TVector<TNodePtr>> FuncOverWindow;
+ TWinSpecs WinSpecs;
+ TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec;
+ TNodePtr SessionWindow;
+ TNodePtr HoppingWindow;
+ TVector<ISource*> UsedSources;
+ TString FlattenMode;
+ bool FlattenColumns = false;
+ THashMap<TString, ui32> GenIndexes;
+ TVector<TString> TmpWindowColumns;
+ TNodePtr SamplingRate;
+ TMatchRecognizeBuilderPtr MatchRecognizeBuilder;
+ };
+
+ template<>
+ inline TVector<TSourcePtr> CloneContainer<TSourcePtr>(const TVector<TSourcePtr>& args) {
+ TVector<TSourcePtr> cloneArgs;
+ cloneArgs.reserve(args.size());
+ for (const auto& arg: args) {
+ cloneArgs.emplace_back(arg ? arg->CloneSource() : nullptr);
+ }
+ return cloneArgs;
+ }
+
+ struct TJoinLinkSettings {
+ enum class EStrategy {
+ Default,
+ SortedMerge,
+ StreamLookup,
+ ForceMap,
+ ForceGrace
+ };
+ EStrategy Strategy = EStrategy::Default;
+ bool Compact = false;
+ };
+
+ class IJoin: public ISource {
+ public:
+ virtual ~IJoin();
+
+ virtual IJoin* GetJoin();
+ virtual TNodePtr BuildJoinKeys(TContext& ctx, const TVector<TDeferredAtom>& names) = 0;
+ virtual void SetupJoin(const TString& joinOp, TNodePtr joinExpr, const TJoinLinkSettings& linkSettings) = 0;
+ virtual const THashMap<TString, THashSet<TString>>& GetSameKeysMap() const = 0;
+ virtual TVector<TString> GetJoinLabels() const = 0;
+
+ protected:
+ IJoin(TPosition pos);
+ };
+
+ class TSessionWindow final : public INode {
+ public:
+ TSessionWindow(TPosition pos, const TVector<TNodePtr>& args);
+ void MarkValid();
+ TNodePtr BuildTraits(const TString& label) const;
+ private:
+ bool DoInit(TContext& ctx, ISource* src) override;
+ TAstNode* Translate(TContext&) const override;
+ void DoUpdateState() const override;
+ TNodePtr DoClone() const override;
+ TString GetOpName() const override;
+
+ TVector<TNodePtr> Args;
+ TSourcePtr FakeSource;
+ TNodePtr Node;
+ bool Valid;
+ };
+
+ class THoppingWindow final : public INode {
+ public:
+ THoppingWindow(TPosition pos, const TVector<TNodePtr>& args);
+ void MarkValid();
+ TNodePtr BuildTraits(const TString& label) const;
+ public:
+ TNodePtr Hop;
+ TNodePtr Interval;
+ private:
+ bool DoInit(TContext& ctx, ISource* src) override;
+ TAstNode* Translate(TContext&) const override;
+ void DoUpdateState() const override;
+ TNodePtr DoClone() const override;
+ TString GetOpName() const override;
+ TNodePtr ProcessIntervalParam(const TNodePtr& val) const;
+
+ TVector<TNodePtr> Args;
+ TSourcePtr FakeSource;
+ TNodePtr Node;
+ bool Valid;
+ };
+
+
+ // Implemented in join.cpp
+ TString NormalizeJoinOp(const TString& joinOp);
+ TSourcePtr BuildEquiJoin(TPosition pos, TVector<TSourcePtr>&& sources, TVector<bool>&& anyFlags, bool strictJoinKeyTypes);
+
+ // Implemented in select.cpp
+ TNodePtr BuildSubquery(TSourcePtr source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped);
+ TNodePtr BuildSubqueryRef(TNodePtr subquery, const TString& alias, int tupleIndex = -1);
+ TNodePtr BuildInvalidSubqueryRef(TPosition subqueryPos);
+ TNodePtr BuildSourceNode(TPosition pos, TSourcePtr source, bool checkExist = false);
+ TSourcePtr BuildMuxSource(TPosition pos, TVector<TSourcePtr>&& sources);
+ TSourcePtr BuildFakeSource(TPosition pos, bool missingFrom = false, bool inSubquery = false);
+ TSourcePtr BuildNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList = false);
+ TSourcePtr BuildTableSource(TPosition pos, const TTableRef& table, const TString& label = TString());
+ TSourcePtr BuildInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label = TString());
+ TSourcePtr BuildRefColumnSource(TPosition pos, const TString& partExpression);
+ TSourcePtr BuildUnion(TPosition pos, TVector<TSourcePtr>&& sources, bool quantifierAll, const TWriteSettings& settings);
+ TSourcePtr BuildOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource);
+
+ TNodePtr BuildOrderBy(TPosition pos, const TVector<TNodePtr>& keys, const TVector<bool>& order);
+ TNodePtr BuildSkipTake(TPosition pos, const TNodePtr& skip, const TNodePtr& take);
+
+
+ TSourcePtr BuildSelectCore(
+ TContext& ctx,
+ TPosition pos,
+ TSourcePtr source,
+ const TVector<TNodePtr>& groupByExpr,
+ const TVector<TNodePtr>& groupBy,
+ bool compactGroupBy,
+ const TString& groupBySuffix,
+ bool assumeSorted,
+ const TVector<TSortSpecificationPtr>& orderBy,
+ TNodePtr having,
+ TWinSpecs&& windowSpec,
+ TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
+ TVector<TNodePtr>&& terms,
+ bool distinct,
+ TVector<TNodePtr>&& without,
+ bool selectStream,
+ const TWriteSettings& settings,
+ TColumnsSets&& uniqueSets,
+ TColumnsSets&& distinctSets
+ );
+ TSourcePtr BuildSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake);
+
+
+ enum class ReduceMode {
+ ByPartition,
+ ByAll,
+ };
+ TSourcePtr BuildReduce(TPosition pos, ReduceMode mode, TSourcePtr source, TVector<TSortSpecificationPtr>&& orderBy,
+ TVector<TNodePtr>&& keys, TVector<TNodePtr>&& args, TNodePtr udf, TNodePtr having, const TWriteSettings& settings,
+ const TVector<TSortSpecificationPtr>& assumeOrderBy, bool listCall);
+ TSourcePtr BuildProcess(TPosition pos, TSourcePtr source, TNodePtr with, bool withExtFunction, TVector<TNodePtr>&& terms, bool listCall,
+ bool prcessStream, const TWriteSettings& settings, const TVector<TSortSpecificationPtr>& assumeOrderBy);
+
+ TNodePtr BuildSelectResult(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery, TScopedStatePtr scoped);
+
+ // Implemented in insert.cpp
+ TSourcePtr BuildWriteValues(TPosition pos, const TString& opertationHumanName, const TVector<TString>& columnsHint, const TVector<TVector<TNodePtr>>& values);
+ TSourcePtr BuildWriteValues(TPosition pos, const TString& opertationHumanName, const TVector<TString>& columnsHint, TSourcePtr source);
+ TSourcePtr BuildUpdateValues(TPosition pos, const TVector<TString>& columnsHint, const TVector<TNodePtr>& values);
+
+ EWriteColumnMode ToWriteColumnsMode(ESQLWriteColumnMode sqlWriteColumnMode);
+ TNodePtr BuildEraseColumns(TPosition pos, const TVector<TString>& columns);
+ TNodePtr BuildIntoTableOptions(TPosition pos, const TVector<TString>& eraseColumns, const TTableHints& hints);
+ TNodePtr BuildWriteColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, EWriteColumnMode mode, TSourcePtr values, TNodePtr options = nullptr);
+ TNodePtr BuildUpdateColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr values, TSourcePtr source, TNodePtr options = nullptr);
+ TNodePtr BuildDelete(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr source, TNodePtr options = nullptr);
+
+ // Implemented in query.cpp
+ TNodePtr BuildTableKey(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TViewDescription& view);
+ TNodePtr BuildTableKeys(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TString& func, const TVector<TTableArg>& args);
+ TNodePtr BuildTopicKey(TPosition pos, const TDeferredAtom& cluster, const TDeferredAtom& name);
+ TNodePtr BuildInputOptions(TPosition pos, const TTableHints& hints);
+ TNodePtr BuildInputTables(TPosition pos, const TTableList& tables, bool inSubquery, TScopedStatePtr scoped);
+ TNodePtr BuildCreateTable(TPosition pos, const TTableRef& tr, bool existingOk, bool replaceIfExists, const TCreateTableParameters& params, TSourcePtr source, TScopedStatePtr scoped);
+ TNodePtr BuildAlterTable(TPosition pos, const TTableRef& tr, const TAlterTableParameters& params, TScopedStatePtr scoped);
+ TNodePtr BuildDropTable(TPosition pos, const TTableRef& table, bool missingOk, ETableType tableType, TScopedStatePtr scoped);
+ TNodePtr BuildWriteTable(TPosition pos, const TString& label, const TTableRef& table, EWriteColumnMode mode, TNodePtr options,
+ TScopedStatePtr scoped);
+ TNodePtr BuildAnalyze(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TAnalyzeParams& params, TScopedStatePtr scoped);
+ TSourcePtr TryMakeSourceFromExpression(TPosition pos, TContext& ctx, const TString& currService, const TDeferredAtom& currCluster,
+ TNodePtr node, const TString& view = {});
+ void MakeTableFromExpression(TPosition pos, TContext& ctx, TNodePtr node, TDeferredAtom& table, const TString& prefix = {});
+ TDeferredAtom MakeAtomFromExpression(TPosition pos, TContext& ctx, TNodePtr node, const TString& prefix = {});
+ TString NormalizeTypeString(const TString& str);
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql.cpp b/yql/essentials/sql/v1/sql.cpp
new file mode 100644
index 00000000000..506b3950d54
--- /dev/null
+++ b/yql/essentials/sql/v1/sql.cpp
@@ -0,0 +1,247 @@
+#include "sql.h"
+#include "sql_query.h"
+#include <yql/essentials/parser/proto_ast/collect_issues/collect_issues.h>
+#include <yql/essentials/sql/v1/lexer/lexer.h>
+#include <yql/essentials/sql/v1/proto_parser/proto_parser.h>
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+TAstNode* SqlASTToYql(const google::protobuf::Message& protoAst, TContext& ctx) {
+ const google::protobuf::Descriptor* d = protoAst.GetDescriptor();
+ if (d && d->name() != "TSQLv1ParserAST") {
+ ctx.Error() << "Invalid AST structure: " << d->name() << ", expected TSQLv1ParserAST";
+ return nullptr;
+ }
+ TSqlQuery query(ctx, ctx.Settings.Mode, true);
+ TNodePtr node(query.Build(static_cast<const TSQLv1ParserAST&>(protoAst)));
+ try {
+ if (node && node->Init(ctx, nullptr)) {
+ return node->Translate(ctx);
+ }
+ } catch (const NProtoAST::TTooManyErrors&) {
+ // do not add error issue, no room for it
+ }
+
+ return nullptr;
+}
+
+TAstNode* SqlASTsToYqls(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core>& ast, TContext& ctx) {
+ TSqlQuery query(ctx, ctx.Settings.Mode, true);
+ TNodePtr node(query.Build(ast));
+ try {
+ if (node && node->Init(ctx, nullptr)) {
+ return node->Translate(ctx);
+ }
+ } catch (const NProtoAST::TTooManyErrors&) {
+ // do not add error issue, no room for it
+ }
+
+ return nullptr;
+}
+
+void SqlASTToYqlImpl(NYql::TAstParseResult& res, const google::protobuf::Message& protoAst,
+ TContext& ctx) {
+ YQL_ENSURE(!ctx.Issues.Size());
+ res.Root = SqlASTToYql(protoAst, ctx);
+ res.Pool = std::move(ctx.Pool);
+ if (!res.Root) {
+ if (ctx.Issues.Size()) {
+ ctx.IncrementMonCounter("sql_errors", "AstToYqlError");
+ } else {
+ ctx.IncrementMonCounter("sql_errors", "AstToYqlSilentError");
+ ctx.Error() << "Error occurred on parse SQL query, but no error is collected" <<
+ ", please send this request over bug report into YQL interface or write on yql@ maillist";
+ }
+ } else {
+ ctx.WarnUnusedHints();
+ }
+}
+
+void SqlASTsToYqlsImpl(NYql::TAstParseResult& res, const std::vector<::NSQLv1Generated::TRule_sql_stmt_core>& ast, TContext& ctx) {
+ res.Root = SqlASTsToYqls(ast, ctx);
+ res.Pool = std::move(ctx.Pool);
+ if (!res.Root) {
+ if (ctx.Issues.Size()) {
+ ctx.IncrementMonCounter("sql_errors", "AstToYqlError");
+ } else {
+ ctx.IncrementMonCounter("sql_errors", "AstToYqlSilentError");
+ ctx.Error() << "Error occurred on parse SQL query, but no error is collected" <<
+ ", please send this request over bug report into YQL interface or write on yql@ maillist";
+ }
+ } else {
+ ctx.WarnUnusedHints();
+ }
+}
+
+NYql::TAstParseResult SqlASTToYql(const google::protobuf::Message& protoAst,
+ const NSQLTranslation::TSQLHints& hints,
+ const NSQLTranslation::TTranslationSettings& settings)
+{
+ YQL_ENSURE(IsQueryMode(settings.Mode));
+ TAstParseResult res;
+ TContext ctx(settings, hints, res.Issues);
+ SqlASTToYqlImpl(res, protoAst, ctx);
+ res.ActualSyntaxType = NYql::ESyntaxType::YQLv1;
+ return res;
+}
+
+NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules)
+{
+ TAstParseResult res;
+ const TString queryName = "query";
+
+ NSQLTranslation::TSQLHints hints;
+ auto lexer = MakeLexer(settings.AnsiLexer, settings.Antlr4Parser);
+ YQL_ENSURE(lexer);
+ if (!CollectSqlHints(*lexer, query, queryName, settings.File, hints, res.Issues, settings.MaxErrors, settings.Antlr4Parser)) {
+ return res;
+ }
+
+ TContext ctx(settings, hints, res.Issues);
+ NSQLTranslation::TErrorCollectorOverIssues collector(res.Issues, settings.MaxErrors, settings.File);
+
+ google::protobuf::Message* ast(SqlAST(query, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena));
+ if (ast) {
+ SqlASTToYqlImpl(res, *ast, ctx);
+ } else {
+ ctx.IncrementMonCounter("sql_errors", "AstError");
+ }
+ if (warningRules) {
+ *warningRules = ctx.WarningPolicy.GetRules();
+ ctx.WarningPolicy.Clear();
+ }
+ res.ActualSyntaxType = NYql::ESyntaxType::YQLv1;
+ return res;
+}
+
+bool NeedUseForAllStatements(const TRule_sql_stmt_core::AltCase& subquery) {
+ switch (subquery) {
+ case TRule_sql_stmt_core::kAltSqlStmtCore1: // pragma
+ case TRule_sql_stmt_core::kAltSqlStmtCore3: // named nodes
+ case TRule_sql_stmt_core::kAltSqlStmtCore6: // use
+ case TRule_sql_stmt_core::kAltSqlStmtCore12: // declare
+ case TRule_sql_stmt_core::kAltSqlStmtCore13: // import
+ case TRule_sql_stmt_core::kAltSqlStmtCore14: // export
+ case TRule_sql_stmt_core::kAltSqlStmtCore18: // define action or subquery
+ return true;
+ case TRule_sql_stmt_core::ALT_NOT_SET:
+ case TRule_sql_stmt_core::kAltSqlStmtCore2: // select
+ case TRule_sql_stmt_core::kAltSqlStmtCore4: // create table
+ case TRule_sql_stmt_core::kAltSqlStmtCore5: // drop table
+ case TRule_sql_stmt_core::kAltSqlStmtCore7: // into table
+ case TRule_sql_stmt_core::kAltSqlStmtCore8: // commit
+ case TRule_sql_stmt_core::kAltSqlStmtCore9: // update
+ case TRule_sql_stmt_core::kAltSqlStmtCore10: // delete
+ case TRule_sql_stmt_core::kAltSqlStmtCore11: // rollback
+ case TRule_sql_stmt_core::kAltSqlStmtCore15: // alter table
+ case TRule_sql_stmt_core::kAltSqlStmtCore16: // alter external table
+ case TRule_sql_stmt_core::kAltSqlStmtCore17: // do
+ case TRule_sql_stmt_core::kAltSqlStmtCore19: // if
+ case TRule_sql_stmt_core::kAltSqlStmtCore20: // for
+ case TRule_sql_stmt_core::kAltSqlStmtCore21: // values
+ case TRule_sql_stmt_core::kAltSqlStmtCore22: // create user
+ case TRule_sql_stmt_core::kAltSqlStmtCore23: // alter user
+ case TRule_sql_stmt_core::kAltSqlStmtCore24: // create group
+ case TRule_sql_stmt_core::kAltSqlStmtCore25: // alter group
+ case TRule_sql_stmt_core::kAltSqlStmtCore26: // drop role
+ case TRule_sql_stmt_core::kAltSqlStmtCore27: // create object
+ case TRule_sql_stmt_core::kAltSqlStmtCore28: // alter object
+ case TRule_sql_stmt_core::kAltSqlStmtCore29: // drop object
+ case TRule_sql_stmt_core::kAltSqlStmtCore30: // create external data source
+ case TRule_sql_stmt_core::kAltSqlStmtCore31: // alter external data source
+ case TRule_sql_stmt_core::kAltSqlStmtCore32: // drop external data source
+ case TRule_sql_stmt_core::kAltSqlStmtCore33: // create replication
+ case TRule_sql_stmt_core::kAltSqlStmtCore34: // drop replication
+ case TRule_sql_stmt_core::kAltSqlStmtCore35: // create topic
+ case TRule_sql_stmt_core::kAltSqlStmtCore36: // alter topic
+ case TRule_sql_stmt_core::kAltSqlStmtCore37: // drop topic
+ case TRule_sql_stmt_core::kAltSqlStmtCore38: // grant permissions
+ case TRule_sql_stmt_core::kAltSqlStmtCore39: // revoke permissions
+ case TRule_sql_stmt_core::kAltSqlStmtCore40: // alter table store
+ case TRule_sql_stmt_core::kAltSqlStmtCore41: // upsert object
+ case TRule_sql_stmt_core::kAltSqlStmtCore42: // create view
+ case TRule_sql_stmt_core::kAltSqlStmtCore43: // drop view
+ case TRule_sql_stmt_core::kAltSqlStmtCore44: // alter replication
+ case TRule_sql_stmt_core::kAltSqlStmtCore45: // create resource pool
+ case TRule_sql_stmt_core::kAltSqlStmtCore46: // alter resource pool
+ case TRule_sql_stmt_core::kAltSqlStmtCore47: // drop resource pool
+ case TRule_sql_stmt_core::kAltSqlStmtCore48: // create backup collection
+ case TRule_sql_stmt_core::kAltSqlStmtCore49: // alter backup collection
+ case TRule_sql_stmt_core::kAltSqlStmtCore50: // drop backup collection
+ case TRule_sql_stmt_core::kAltSqlStmtCore51: // analyze
+ case TRule_sql_stmt_core::kAltSqlStmtCore52: // create resource pool classifier
+ case TRule_sql_stmt_core::kAltSqlStmtCore53: // alter resource pool classifier
+ case TRule_sql_stmt_core::kAltSqlStmtCore54: // drop resource pool classifier
+ case TRule_sql_stmt_core::kAltSqlStmtCore55: // backup
+ case TRule_sql_stmt_core::kAltSqlStmtCore56: // restore
+ return false;
+ }
+}
+
+TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules,
+ TVector<NYql::TStmtParseInfo>* stmtParseInfo)
+{
+ TVector<TAstParseResult> result;
+ const TString queryName = "query";
+ TIssues issues;
+
+ NSQLTranslation::TSQLHints hints;
+ auto lexer = MakeLexer(settings.AnsiLexer, settings.Antlr4Parser);
+ YQL_ENSURE(lexer);
+ if (!CollectSqlHints(*lexer, query, queryName, settings.File, hints, issues, settings.MaxErrors, settings.Antlr4Parser)) {
+ return result;
+ }
+
+ TContext ctx(settings, hints, issues);
+ NSQLTranslation::TErrorCollectorOverIssues collector(issues, settings.MaxErrors, settings.File);
+
+ google::protobuf::Message* astProto(SqlAST(query, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena));
+ if (astProto) {
+ auto ast = static_cast<const TSQLv1ParserAST&>(*astProto);
+ const auto& query = ast.GetRule_sql_query();
+ if (query.Alt_case() == NSQLv1Generated::TRule_sql_query::kAltSqlQuery1) {
+ std::vector<::NSQLv1Generated::TRule_sql_stmt_core> commonStates;
+ std::vector<::NSQLv1Generated::TRule_sql_stmt_core> statementResult;
+ const auto& statements = query.GetAlt_sql_query1().GetRule_sql_stmt_list1();
+ if (NeedUseForAllStatements(statements.GetRule_sql_stmt2().GetRule_sql_stmt_core2().Alt_case())) {
+ commonStates.push_back(statements.GetRule_sql_stmt2().GetRule_sql_stmt_core2());
+ } else {
+ TContext ctx(settings, hints, issues);
+ result.emplace_back();
+ if (stmtParseInfo) {
+ stmtParseInfo->push_back({});
+ }
+ SqlASTsToYqlsImpl(result.back(), {statements.GetRule_sql_stmt2().GetRule_sql_stmt_core2()}, ctx);
+ result.back().Issues = std::move(issues);
+ issues.Clear();
+ }
+ for (auto block: statements.GetBlock3()) {
+ if (NeedUseForAllStatements(block.GetRule_sql_stmt2().GetRule_sql_stmt_core2().Alt_case())) {
+ commonStates.push_back(block.GetRule_sql_stmt2().GetRule_sql_stmt_core2());
+ continue;
+ }
+ TContext ctx(settings, hints, issues);
+ result.emplace_back();
+ if (stmtParseInfo) {
+ stmtParseInfo->push_back({});
+ }
+ statementResult = commonStates;
+ statementResult.push_back(block.GetRule_sql_stmt2().GetRule_sql_stmt_core2());
+ SqlASTsToYqlsImpl(result.back(), statementResult, ctx);
+ result.back().Issues = std::move(issues);
+ issues.Clear();
+ }
+ }
+ } else {
+ ctx.IncrementMonCounter("sql_errors", "AstError");
+ }
+ if (warningRules) {
+ *warningRules = ctx.WarningPolicy.GetRules();
+ ctx.WarningPolicy.Clear();
+ }
+ return result;
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql.h b/yql/essentials/sql/v1/sql.h
new file mode 100644
index 00000000000..0a12c45d308
--- /dev/null
+++ b/yql/essentials/sql/v1/sql.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <yql/essentials/ast/yql_ast.h>
+#include <yql/essentials/parser/lexer_common/hints.h>
+#include <yql/essentials/parser/proto_ast/common.h>
+#include <yql/essentials/public/issue/yql_warning.h>
+#include <yql/essentials/public/issue/yql_issue_manager.h>
+#include <yql/essentials/sql/settings/translation_settings.h>
+
+#include <google/protobuf/message.h>
+
+namespace NSQLTranslation {
+ struct TTranslationSettings;
+}
+
+namespace NSQLTranslationV1 {
+
+ NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules = nullptr);
+ NYql::TAstParseResult SqlASTToYql(const google::protobuf::Message& protoAst, const NSQLTranslation::TSQLHints& hints, const NSQLTranslation::TTranslationSettings& settings);
+ TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& query, const NSQLTranslation::TTranslationSettings& settings, NYql::TWarningRules* warningRules, TVector<NYql::TStmtParseInfo>* stmtParseInfo = nullptr);
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_call_expr.cpp b/yql/essentials/sql/v1/sql_call_expr.cpp
new file mode 100644
index 00000000000..1871c1bbc0b
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_call_expr.cpp
@@ -0,0 +1,444 @@
+#include "sql_call_expr.h"
+#include "sql_expression.h"
+
+#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
+
+#include <yql/essentials/minikql/mkql_program_builder.h>
+
+namespace NSQLTranslationV1 {
+
+TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args,
+ TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig);
+
+using namespace NSQLv1Generated;
+
+static bool ValidateForCounters(const TString& input) {
+ for (auto c : input) {
+ if (!(IsAlnum(c) || c == '_')) {
+ return false;
+ }
+ }
+ return true;
+}
+
+TNodePtr TSqlCallExpr::BuildUdf(bool forReduce) {
+ auto result = Node ? Node : BuildCallable(Pos, Module, Func, Args, forReduce);
+ if (to_lower(Module) == "tensorflow" && Func == "RunBatch") {
+ if (Args.size() > 2) {
+ Args.erase(Args.begin() + 2);
+ } else {
+ Ctx.Error(Pos) << "Excepted >= 3 arguments, but got: " << Args.size();
+ return nullptr;
+ }
+ }
+ return result;
+}
+
+TNodePtr TSqlCallExpr::BuildCall() {
+ TVector<TNodePtr> args;
+ bool warnOnYqlNameSpace = true;
+
+ TUdfNode* udf_node = Node ? Node->GetUdfNode() : nullptr;
+ if (udf_node) {
+ if (!udf_node->DoInit(Ctx, nullptr)) {
+ return nullptr;
+ }
+ TNodePtr positional_args = BuildTuple(Pos, PositionalArgs);
+ TNodePtr positional = positional_args->Y("TypeOf", positional_args);
+ TNodePtr named_args = BuildStructure(Pos, NamedArgs);
+ TNodePtr named = named_args->Y("TypeOf", named_args);
+
+ TNodePtr custom_user_type = new TCallNodeImpl(Pos, "TupleType", {positional, named, udf_node->GetExternalTypes()});
+
+ return BuildSqlCall(Ctx, Pos, udf_node->GetModule(), udf_node->GetFunction(),
+ args, positional_args, named_args, custom_user_type,
+ udf_node->GetTypeConfig(), udf_node->GetRunConfig());
+ }
+
+ if (Node && !Node->FuncName()) {
+ Module = "YQL";
+ Func = NamedArgs.empty() ? "Apply" : "NamedApply";
+ warnOnYqlNameSpace = false;
+ args.push_back(Node);
+ }
+
+ if (Node && Node->FuncName()) {
+ Module = Node->ModuleName() ? *Node->ModuleName() : "YQL";
+ Func = *Node->FuncName();
+ }
+ bool mustUseNamed = !NamedArgs.empty();
+ if (mustUseNamed) {
+ if (Node && !Node->FuncName()) {
+ mustUseNamed = false;
+ }
+ args.emplace_back(BuildTuple(Pos, PositionalArgs));
+ args.emplace_back(BuildStructure(Pos, NamedArgs));
+ } else if (IsExternalCall) {
+ Func = "SqlExternalFunction";
+ if (Args.size() < 2 || Args.size() > 3) {
+ Ctx.Error(Pos) << "EXTERNAL FUNCTION requires from 2 to 3 arguments, but got: " << Args.size();
+ return nullptr;
+ }
+
+ if (Args.size() == 3) {
+ args.insert(args.end(), Args.begin(), Args.end() - 1);
+ Args.erase(Args.begin(), Args.end() - 1);
+ } else {
+ args.insert(args.end(), Args.begin(), Args.end());
+ Args.erase(Args.begin(), Args.end());
+ }
+ auto configNode = new TExternalFunctionConfig(Pos, CallConfig);
+ auto configList = new TAstListNodeImpl(Pos, { new TAstAtomNodeImpl(Pos, "quote", 0), configNode });
+ args.push_back(configList);
+ } else {
+ args.insert(args.end(), Args.begin(), Args.end());
+ }
+
+ auto result = BuildBuiltinFunc(Ctx, Pos, Func, args, Module, AggMode, &mustUseNamed, warnOnYqlNameSpace);
+ if (mustUseNamed) {
+ Error() << "Named args are used for call, but unsupported by function: " << Func;
+ return nullptr;
+ }
+
+ if (WindowName) {
+ result = BuildCalcOverWindow(Pos, WindowName, result);
+ }
+
+ return result;
+}
+
+bool TSqlCallExpr::Init(const TRule_value_constructor& node) {
+ switch (node.Alt_case()) {
+ case TRule_value_constructor::kAltValueConstructor1: {
+ auto& ctor = node.GetAlt_value_constructor1();
+ Func = "Variant";
+ TSqlExpression expr(Ctx, Mode);
+ if (!Expr(expr, Args, ctor.GetRule_expr3())) {
+ return false;
+ }
+ if (!Expr(expr, Args, ctor.GetRule_expr5())) {
+ return false;
+ }
+ if (!Expr(expr, Args, ctor.GetRule_expr7())) {
+ return false;
+ }
+ break;
+ }
+ case TRule_value_constructor::kAltValueConstructor2: {
+ auto& ctor = node.GetAlt_value_constructor2();
+ Func = "Enum";
+ TSqlExpression expr(Ctx, Mode);
+ if (!Expr(expr, Args, ctor.GetRule_expr3())) {
+ return false;
+ }
+ if (!Expr(expr, Args, ctor.GetRule_expr5())) {
+ return false;
+ }
+ break;
+ }
+ case TRule_value_constructor::kAltValueConstructor3: {
+ auto& ctor = node.GetAlt_value_constructor3();
+ Func = "Callable";
+ TSqlExpression expr(Ctx, Mode);
+ if (!Expr(expr, Args, ctor.GetRule_expr3())) {
+ return false;
+ }
+ if (!Expr(expr, Args, ctor.GetRule_expr5())) {
+ return false;
+ }
+ break;
+ }
+ case TRule_value_constructor::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ PositionalArgs = Args;
+ return true;
+}
+
+bool TSqlCallExpr::ExtractCallParam(const TRule_external_call_param& node) {
+ TString paramName = Id(node.GetRule_an_id1(), *this);
+ paramName = to_lower(paramName);
+
+ if (CallConfig.contains(paramName)) {
+ Ctx.Error() << "WITH " << to_upper(paramName).Quote()
+ << " clause should be specified only once";
+ return false;
+ }
+
+ const bool optimizeForParam = paramName == "optimize_for";
+ const auto columnRefState = optimizeForParam ? EColumnRefState::AsStringLiteral : EColumnRefState::Deny;
+
+ TColumnRefScope scope(Ctx, columnRefState);
+ if (optimizeForParam) {
+ scope.SetNoColumnErrContext("in external call params");
+ }
+
+ TSqlExpression expression(Ctx, Mode);
+ auto value = expression.Build(node.GetRule_expr3());
+ if (value && optimizeForParam) {
+ TDeferredAtom atom;
+ MakeTableFromExpression(Ctx.Pos(), Ctx, value, atom);
+ value = new TCallNodeImpl(Ctx.Pos(), "String", { atom.Build() });
+ }
+
+ if (!value) {
+ return false;
+ }
+
+ CallConfig[paramName] = value;
+ return true;
+}
+
+bool TSqlCallExpr::ConfigureExternalCall(const TRule_external_call_settings& node) {
+ bool success = ExtractCallParam(node.GetRule_external_call_param1());
+ for (auto& block: node.GetBlock2()) {
+ success = ExtractCallParam(block.GetRule_external_call_param2()) && success;
+ }
+
+ return success;
+}
+
+bool TSqlCallExpr::Init(const TRule_using_call_expr& node) {
+ // using_call_expr: ((an_id_or_type NAMESPACE an_id_or_type) | an_id_expr | bind_parameter | (EXTERNAL FUNCTION)) invoke_expr;
+ const auto& block = node.GetBlock1();
+ switch (block.Alt_case()) {
+ case TRule_using_call_expr::TBlock1::kAlt1: {
+ auto& subblock = block.GetAlt1();
+ Module = Id(subblock.GetRule_an_id_or_type1(), *this);
+ Func = Id(subblock.GetRule_an_id_or_type3(), *this);
+ break;
+ }
+ case TRule_using_call_expr::TBlock1::kAlt2: {
+ Func = Id(block.GetAlt2().GetRule_an_id_expr1(), *this);
+ break;
+ }
+ case TRule_using_call_expr::TBlock1::kAlt3: {
+ TString bindName;
+ if (!NamedNodeImpl(block.GetAlt3().GetRule_bind_parameter1(), bindName, *this)) {
+ return false;
+ }
+ Node = GetNamedNode(bindName);
+ if (!Node) {
+ return false;
+ }
+ break;
+ }
+ case TRule_using_call_expr::TBlock1::kAlt4: {
+ IsExternalCall = true;
+ break;
+ }
+ case TRule_using_call_expr::TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ YQL_ENSURE(!DistinctAllowed);
+ UsingCallExpr = true;
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ return Init(node.GetRule_invoke_expr2());
+}
+
+void TSqlCallExpr::InitName(const TString& name) {
+ Module = "";
+ Func = name;
+}
+
+void TSqlCallExpr::InitExpr(const TNodePtr& expr) {
+ Node = expr;
+}
+
+bool TSqlCallExpr::FillArg(const TString& module, const TString& func, size_t& idx, const TRule_named_expr& node) {
+ const bool isNamed = node.HasBlock2();
+
+ TMaybe<EColumnRefState> status;
+ // TODO: support named args
+ if (!isNamed) {
+ status = GetFunctionArgColumnStatus(Ctx, module, func, idx);
+ }
+
+ TNodePtr expr;
+ if (status) {
+ TColumnRefScope scope(Ctx, *status, /* isTopLevel = */ false);
+ expr = NamedExpr(node);
+ } else {
+ expr = NamedExpr(node);
+ }
+
+ if (!expr) {
+ return false;
+ }
+
+ Args.emplace_back(std::move(expr));
+ if (!isNamed) {
+ ++idx;
+ }
+ return true;
+}
+
+bool TSqlCallExpr::FillArgs(const TRule_named_expr_list& node) {
+ TString module = Module;
+ TString func = Func;
+ if (Node && Node->FuncName()) {
+ module = Node->ModuleName() ? *Node->ModuleName() : "YQL";
+ func = *Node->FuncName();
+ }
+
+ size_t idx = 0;
+ if (!FillArg(module, func, idx, node.GetRule_named_expr1())) {
+ return false;
+ }
+
+ for (auto& b: node.GetBlock2()) {
+ if (!FillArg(module, func, idx, b.GetRule_named_expr2())) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool TSqlCallExpr::Init(const TRule_invoke_expr& node) {
+ // invoke_expr: LPAREN (opt_set_quantifier named_expr_list COMMA? | ASTERISK)? RPAREN invoke_expr_tail;
+ // invoke_expr_tail:
+ // (null_treatment | filter_clause)? (OVER window_name_or_specification)?
+ // ;
+ Pos = Ctx.Pos();
+ if (node.HasBlock2()) {
+ switch (node.GetBlock2().Alt_case()) {
+ case TRule_invoke_expr::TBlock2::kAlt1: {
+ const auto& alt = node.GetBlock2().GetAlt1();
+ TPosition distinctPos;
+ if (IsDistinctOptSet(alt.GetRule_opt_set_quantifier1(), distinctPos)) {
+ if (!DistinctAllowed) {
+ if (UsingCallExpr) {
+ Ctx.Error(distinctPos) << "DISTINCT can not be used in PROCESS/REDUCE";
+ } else {
+ Ctx.Error(distinctPos) << "DISTINCT can only be used in aggregation functions";
+ }
+ return false;
+ }
+ YQL_ENSURE(AggMode == EAggregateMode::Normal);
+ AggMode = EAggregateMode::Distinct;
+ Ctx.IncrementMonCounter("sql_features", "DistinctInCallExpr");
+ }
+ if (!FillArgs(alt.GetRule_named_expr_list2())) {
+ return false;
+ }
+ for (const auto& arg : Args) {
+ if (arg->GetLabel()) {
+ NamedArgs.push_back(arg);
+ }
+ else {
+ PositionalArgs.push_back(arg);
+ if (!NamedArgs.empty()) {
+ Ctx.Error(arg->GetPos()) << "Unnamed arguments can not follow after named one";
+ return false;
+ }
+ }
+ }
+ break;
+ }
+ case TRule_invoke_expr::TBlock2::kAlt2:
+ if (IsExternalCall) {
+ Ctx.Error() << "You should set EXTERNAL FUNCTION type. Example: EXTERNAL FUNCTION('YANDEX-CLOUD', ...)";
+ } else {
+ Args.push_back(new TAsteriskNode(Pos));
+ }
+ break;
+ case TRule_invoke_expr::TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ }
+
+ const auto& tail = node.GetRule_invoke_expr_tail4();
+
+ if (tail.HasBlock1()) {
+ if (IsExternalCall) {
+ Ctx.Error() << "Additional clause after EXTERNAL FUNCTION(...) is not supported";
+ return false;
+ }
+
+ switch (tail.GetBlock1().Alt_case()) {
+ case TRule_invoke_expr_tail::TBlock1::kAlt1: {
+ if (!tail.HasBlock2()) {
+ Ctx.Error() << "RESPECT/IGNORE NULLS can only be used with window functions";
+ return false;
+ }
+ const auto& alt = tail.GetBlock1().GetAlt1();
+ if (alt.GetRule_null_treatment1().Alt_case() == TRule_null_treatment::kAltNullTreatment2) {
+ SetIgnoreNulls();
+ }
+ break;
+ }
+ case TRule_invoke_expr_tail::TBlock1::kAlt2: {
+ Ctx.Error() << "FILTER clause is not supported yet";
+ return false;
+ }
+ case TRule_invoke_expr_tail::TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ }
+
+ if (tail.HasBlock2()) {
+ if (Ctx.DistinctOverWindow) {
+ AggMode == EAggregateMode::Distinct ? SetOverWindowDistinct() : SetOverWindow();
+ } else {
+ if (AggMode == EAggregateMode::Distinct) {
+ Ctx.Error() << "DISTINCT is not yet supported in window functions";
+ return false;
+ }
+ SetOverWindow();
+ }
+ auto winRule = tail.GetBlock2().GetRule_window_name_or_specification2();
+ switch (winRule.Alt_case()) {
+ case TRule_window_name_or_specification::kAltWindowNameOrSpecification1: {
+ WindowName = Id(winRule.GetAlt_window_name_or_specification1().GetRule_window_name1().GetRule_an_id_window1(), *this);
+ break;
+ }
+ case TRule_window_name_or_specification::kAltWindowNameOrSpecification2: {
+ if (!Ctx.WinSpecsScopes) {
+ auto pos = Ctx.TokenPosition(tail.GetBlock2().GetToken1());
+ Ctx.Error(pos) << "Window and aggregation functions are not allowed in this context";
+ return false;
+ }
+
+ TWindowSpecificationPtr spec = WindowSpecification(
+ winRule.GetAlt_window_name_or_specification2().GetRule_window_specification1().GetRule_window_specification_details2());
+ if (!spec) {
+ return false;
+ }
+
+ WindowName = Ctx.MakeName("_yql_anonymous_window");
+ TWinSpecs& specs = Ctx.WinSpecsScopes.back();
+ YQL_ENSURE(!specs.contains(WindowName));
+ specs[WindowName] = spec;
+ break;
+ }
+ case TRule_window_name_or_specification::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ Ctx.IncrementMonCounter("sql_features", "WindowFunctionOver");
+ }
+
+ return true;
+}
+
+void TSqlCallExpr::IncCounters() {
+ if (Node) {
+ Ctx.IncrementMonCounter("sql_features", "NamedNodeUseApply");
+ } else if (!Module.empty()) {
+ if (ValidateForCounters(Module)) {
+ Ctx.IncrementMonCounter("udf_modules", Module);
+ Ctx.IncrementMonCounter("sql_features", "CallUdf");
+ if (ValidateForCounters(Func)) {
+ auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(Module);
+ if (scriptType == NKikimr::NMiniKQL::EScriptType::Unknown) {
+ Ctx.IncrementMonCounter("udf_functions", Module + "." + Func);
+ }
+ }
+ }
+ } else if (ValidateForCounters(Func)) {
+ Ctx.IncrementMonCounter("sql_builtins", Func);
+ Ctx.IncrementMonCounter("sql_features", "CallBuiltin");
+ }
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_call_expr.h b/yql/essentials/sql/v1/sql_call_expr.h
new file mode 100644
index 00000000000..9b9d39b81a8
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_call_expr.h
@@ -0,0 +1,98 @@
+#pragma once
+
+#include "sql_translation.h"
+
+namespace NSQLTranslationV1 {
+
+TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args,
+ TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig);
+
+using namespace NSQLv1Generated;
+
+class TSqlCallExpr: public TSqlTranslation {
+public:
+ TSqlCallExpr(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlTranslation(ctx, mode)
+ {
+ }
+
+ TSqlCallExpr(const TSqlCallExpr& call, const TVector<TNodePtr>& args)
+ : TSqlTranslation(call.Ctx, call.Mode)
+ , Pos(call.Pos)
+ , Func(call.Func)
+ , Module(call.Module)
+ , Node(call.Node)
+ , Args(args)
+ , AggMode(call.AggMode)
+ , DistinctAllowed(call.DistinctAllowed)
+ , UsingCallExpr(call.UsingCallExpr)
+ , IsExternalCall(call.IsExternalCall)
+ , CallConfig(call.CallConfig)
+ {
+ }
+
+ void AllowDistinct() {
+ DistinctAllowed = true;
+ }
+
+ void InitName(const TString& name);
+ void InitExpr(const TNodePtr& expr);
+
+ bool Init(const TRule_using_call_expr& node);
+ bool Init(const TRule_value_constructor& node);
+ bool Init(const TRule_invoke_expr& node);
+ bool ConfigureExternalCall(const TRule_external_call_settings& node);
+ void IncCounters();
+
+ TNodePtr BuildUdf(bool forReduce);
+
+ TNodePtr BuildCall();
+
+ TPosition GetPos() const {
+ return Pos;
+ }
+
+ const TVector<TNodePtr>& GetArgs() const {
+ return Args;
+ }
+
+ void SetOverWindow() {
+ YQL_ENSURE(AggMode == EAggregateMode::Normal);
+ AggMode = EAggregateMode::OverWindow;
+ }
+
+ void SetOverWindowDistinct() {
+ YQL_ENSURE(AggMode == EAggregateMode::Distinct);
+ AggMode = EAggregateMode::OverWindowDistinct;
+ }
+
+ void SetIgnoreNulls() {
+ Func += "_IgnoreNulls";
+ }
+
+ bool IsExternal() {
+ return IsExternalCall;
+ }
+
+private:
+ bool ExtractCallParam(const TRule_external_call_param& node);
+ bool FillArg(const TString& module, const TString& func, size_t& idx, const TRule_named_expr& node);
+ bool FillArgs(const TRule_named_expr_list& node);
+
+private:
+ TPosition Pos;
+ TString Func;
+ TString Module;
+ TNodePtr Node;
+ TVector<TNodePtr> Args;
+ TVector<TNodePtr> PositionalArgs;
+ TVector<TNodePtr> NamedArgs;
+ EAggregateMode AggMode = EAggregateMode::Normal;
+ TString WindowName;
+ bool DistinctAllowed = false;
+ bool UsingCallExpr = false;
+ bool IsExternalCall = false;
+ TFunctionConfig CallConfig;
+};
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_call_param.h b/yql/essentials/sql/v1/sql_call_param.h
new file mode 100644
index 00000000000..57495afd88f
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_call_param.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <util/system/types.h>
+
+namespace NSQLTranslationV1 {
+
+///////////////////////////////////////////////////////////////////////////////////////////////
+
+enum class ESqlCallParam: ui32 {
+ InputType /* "INPUT_TYPE" */, // as is
+ OutputType /* "OUTPUT_TYPE" */, // as is
+ Concurrency /* "CONCURRENCY" */, // as is
+ BatchSize /* "BATCH_SIZE" */, // as is
+ OptimizeFor /* "OPTIMIZE_FOR" */, // evaluate atom
+ Connection /* "CONNECTION" */, // evaluate atom
+ Init /* "INIT" */, // as is
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////
+}
diff --git a/yql/essentials/sql/v1/sql_expression.cpp b/yql/essentials/sql/v1/sql_expression.cpp
new file mode 100644
index 00000000000..1f506991a2c
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_expression.cpp
@@ -0,0 +1,2307 @@
+#include "sql_expression.h"
+#include "sql_call_expr.h"
+#include "sql_select.h"
+#include "sql_values.h"
+#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/utils/utf8.h>
+#include <util/charset/wide.h>
+#include <util/string/ascii.h>
+#include <util/string/hex.h>
+
+namespace NSQLTranslationV1 {
+
+using NALPDefault::SQLv1LexerTokens;
+using NALPDefaultAntlr4::SQLv1Antlr4Lexer;
+
+using namespace NSQLv1Generated;
+
+TNodePtr TSqlExpression::Build(const TRule_expr& node) {
+ // expr:
+ // or_subexpr (OR or_subexpr)*
+ // | type_name_composite
+ switch (node.Alt_case()) {
+ case TRule_expr::kAltExpr1: {
+ auto getNode = [](const TRule_expr_TAlt1_TBlock2& b) -> const TRule_or_subexpr& { return b.GetRule_or_subexpr2(); };
+ return BinOper("Or", node.GetAlt_expr1().GetRule_or_subexpr1(), getNode,
+ node.GetAlt_expr1().GetBlock2().begin(), node.GetAlt_expr1().GetBlock2().end(), {});
+ }
+ case TRule_expr::kAltExpr2: {
+ return TypeNode(node.GetAlt_expr2().GetRule_type_name_composite1());
+ }
+ case TRule_expr::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ }
+
+TNodePtr TSqlExpression::SubExpr(const TRule_mul_subexpr& node, const TTrailingQuestions& tail) {
+ // mul_subexpr: con_subexpr (DOUBLE_PIPE con_subexpr)*;
+ auto getNode = [](const TRule_mul_subexpr::TBlock2& b) -> const TRule_con_subexpr& { return b.GetRule_con_subexpr2(); };
+ return BinOper("Concat", node.GetRule_con_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
+}
+
+TNodePtr TSqlExpression::SubExpr(const TRule_add_subexpr& node, const TTrailingQuestions& tail) {
+ // add_subexpr: mul_subexpr ((ASTERISK | SLASH | PERCENT) mul_subexpr)*;
+ auto getNode = [](const TRule_add_subexpr::TBlock2& b) -> const TRule_mul_subexpr& { return b.GetRule_mul_subexpr2(); };
+ return BinOpList(node.GetRule_mul_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
+}
+
+TNodePtr TSqlExpression::SubExpr(const TRule_bit_subexpr& node, const TTrailingQuestions& tail) {
+ // bit_subexpr: add_subexpr ((PLUS | MINUS) add_subexpr)*;
+ auto getNode = [](const TRule_bit_subexpr::TBlock2& b) -> const TRule_add_subexpr& { return b.GetRule_add_subexpr2(); };
+ return BinOpList(node.GetRule_add_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
+ }
+
+TNodePtr TSqlExpression::SubExpr(const TRule_neq_subexpr& node, const TTrailingQuestions& tailExternal) {
+ //neq_subexpr: bit_subexpr ((SHIFT_LEFT | shift_right | ROT_LEFT | rot_right | AMPERSAND | PIPE | CARET) bit_subexpr)*
+ // // trailing QUESTIONS are used in optional simple types (String?) and optional lambda args: ($x, $y?) -> ($x)
+ // ((double_question neq_subexpr) => double_question neq_subexpr | QUESTION+)?;
+ YQL_ENSURE(tailExternal.Count == 0);
+ MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && !node.HasBlock3();
+ TTrailingQuestions tail;
+ if (node.HasBlock3() && node.GetBlock3().Alt_case() == TRule_neq_subexpr::TBlock3::kAlt2) {
+ auto& questions = node.GetBlock3().GetAlt2();
+ tail.Count = questions.GetBlock1().size();
+ tail.Pos = Ctx.TokenPosition(questions.GetBlock1().begin()->GetToken1());
+ YQL_ENSURE(tail.Count > 0);
+ }
+
+ auto getNode = [](const TRule_neq_subexpr::TBlock2& b) -> const TRule_bit_subexpr& { return b.GetRule_bit_subexpr2(); };
+ auto result = BinOpList(node.GetRule_bit_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
+ if (!result) {
+ return {};
+ }
+ if (node.HasBlock3()) {
+ auto& block = node.GetBlock3();
+ if (block.Alt_case() == TRule_neq_subexpr::TBlock3::kAlt1) {
+ TSqlExpression altExpr(Ctx, Mode);
+ auto altResult = SubExpr(block.GetAlt1().GetRule_neq_subexpr2(), {});
+ if (!altResult) {
+ return {};
+ }
+ const TVector<TNodePtr> args({result, altResult});
+ Token(block.GetAlt1().GetRule_double_question1().GetToken1());
+ result = BuildBuiltinFunc(Ctx, Ctx.Pos(), "Coalesce", args);
+ }
+ }
+ return result;
+ }
+
+ TNodePtr TSqlExpression::SubExpr(const TRule_eq_subexpr& node, const TTrailingQuestions& tail) {
+ // eq_subexpr: neq_subexpr ((LESS | LESS_OR_EQ | GREATER | GREATER_OR_EQ) neq_subexpr)*;
+ auto getNode = [](const TRule_eq_subexpr::TBlock2& b) -> const TRule_neq_subexpr& { return b.GetRule_neq_subexpr2(); };
+ return BinOpList(node.GetRule_neq_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
+ }
+
+ TNodePtr TSqlExpression::SubExpr(const TRule_or_subexpr& node, const TTrailingQuestions& tail) {
+ // or_subexpr: and_subexpr (AND and_subexpr)*;
+ auto getNode = [](const TRule_or_subexpr::TBlock2& b) -> const TRule_and_subexpr& { return b.GetRule_and_subexpr2(); };
+ return BinOper("And", node.GetRule_and_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
+}
+
+TNodePtr TSqlExpression::SubExpr(const TRule_and_subexpr& node, const TTrailingQuestions& tail) {
+ // and_subexpr: xor_subexpr (XOR xor_subexpr)*;
+ auto getNode = [](const TRule_and_subexpr::TBlock2& b) -> const TRule_xor_subexpr& { return b.GetRule_xor_subexpr2(); };
+ return BinOper("Xor", node.GetRule_xor_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
+}
+
+
+bool ChangefeedSettingsEntry(const TRule_changefeed_settings_entry& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter) {
+ const auto id = IdEx(node.GetRule_an_id1(), ctx);
+ if (alter) {
+ // currently we don't support alter settings
+ ctx.Error() << to_upper(id.Name) << " alter is not supported";
+ return false;
+ }
+
+ const auto& setting = node.GetRule_changefeed_setting_value3();
+ auto exprNode = ctx.Build(setting.GetRule_expr1());
+
+ if (!exprNode) {
+ ctx.Context().Error(id.Pos) << "Invalid changefeed setting: " << id.Name;
+ return false;
+ }
+
+ if (to_lower(id.Name) == "sink_type") {
+ if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
+ ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
+ return false;
+ }
+
+ const auto value = exprNode->GetLiteralValue();
+ if (to_lower(value) == "local") {
+ settings.SinkSettings = TChangefeedSettings::TLocalSinkSettings();
+ } else {
+ ctx.Context().Error() << "Unknown changefeed sink type: " << value;
+ return false;
+ }
+ } else if (to_lower(id.Name) == "mode") {
+ if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
+ ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
+ return false;
+ }
+ settings.Mode = exprNode;
+ } else if (to_lower(id.Name) == "format") {
+ if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
+ ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
+ return false;
+ }
+ settings.Format = exprNode;
+ } else if (to_lower(id.Name) == "initial_scan") {
+ if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "Bool") {
+ ctx.Context().Error() << "Literal of Bool type is expected for " << id.Name;
+ return false;
+ }
+ settings.InitialScan = exprNode;
+ } else if (to_lower(id.Name) == "virtual_timestamps") {
+ if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "Bool") {
+ ctx.Context().Error() << "Literal of Bool type is expected for " << id.Name;
+ return false;
+ }
+ settings.VirtualTimestamps = exprNode;
+ } else if (to_lower(id.Name) == "resolved_timestamps") {
+ if (exprNode->GetOpName() != "Interval") {
+ ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name;
+ return false;
+ }
+ settings.ResolvedTimestamps = exprNode;
+ } else if (to_lower(id.Name) == "retention_period") {
+ if (exprNode->GetOpName() != "Interval") {
+ ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name;
+ return false;
+ }
+ settings.RetentionPeriod = exprNode;
+ } else if (to_lower(id.Name) == "topic_auto_partitioning") {
+ auto v = to_lower(exprNode->GetLiteralValue());
+ if (v != "enabled" && v != "disabled") {
+ ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name;
+ }
+ settings.TopicAutoPartitioning = exprNode;
+ } else if (to_lower(id.Name) == "topic_max_active_partitions") {
+ if (!exprNode->IsIntegerLiteral()) {
+ ctx.Context().Error() << "Literal of integer type is expected for " << id.Name;
+ return false;
+ }
+ settings.TopicMaxActivePartitions = exprNode;
+ } else if (to_lower(id.Name) == "topic_min_active_partitions") {
+ if (!exprNode->IsIntegerLiteral()) {
+ ctx.Context().Error() << "Literal of integer type is expected for " << id.Name;
+ return false;
+ }
+ settings.TopicPartitions = exprNode;
+ } else if (to_lower(id.Name) == "aws_region") {
+ if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
+ ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
+ return false;
+ }
+ settings.AwsRegion = exprNode;
+ } else {
+ ctx.Context().Error(id.Pos) << "Unknown changefeed setting: " << id.Name;
+ return false;
+ }
+
+ return true;
+}
+
+bool ChangefeedSettings(const TRule_changefeed_settings& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter) {
+ if (!ChangefeedSettingsEntry(node.GetRule_changefeed_settings_entry1(), ctx, settings, alter)) {
+ return false;
+ }
+
+ for (auto& block : node.GetBlock2()) {
+ if (!ChangefeedSettingsEntry(block.GetRule_changefeed_settings_entry2(), ctx, settings, alter)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool CreateChangefeed(const TRule_changefeed& node, TSqlExpression& ctx, TVector<TChangefeedDescription>& changefeeds) {
+ changefeeds.emplace_back(IdEx(node.GetRule_an_id2(), ctx));
+
+ if (!ChangefeedSettings(node.GetRule_changefeed_settings5(), ctx, changefeeds.back().Settings, false)) {
+ return false;
+ }
+
+ return true;
+}
+
+namespace {
+ bool WithoutAlpha(const std::string_view &literal) {
+ return literal.cend() == std::find_if(literal.cbegin(), literal.cend(), [](char c) { return std::isalpha(c) || (c & '\x80'); });
+ }
+}
+
+
+bool Expr(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr& node) {
+ TNodePtr exprNode = sqlExpr.Build(node);
+ if (!exprNode) {
+ return false;
+ }
+ exprNodes.push_back(exprNode);
+ return true;
+}
+
+bool ExprList(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr_list& node) {
+ if (!Expr(sqlExpr, exprNodes, node.GetRule_expr1())) {
+ return false;
+ }
+ for (auto b: node.GetBlock2()) {
+ sqlExpr.Token(b.GetToken1());
+ if (!Expr(sqlExpr, exprNodes, b.GetRule_expr2())) {
+ return false;
+ }
+ }
+ return true;
+}
+bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& suffix) {
+ const auto str = to_lower(strOrig);
+ const auto strLen = str.size();
+ ui64 base = 10;
+ if (strLen > 2 && str[0] == '0') {
+ const auto formatChar = str[1];
+ if (formatChar == 'x') {
+ base = 16;
+ } else if (formatChar == 'o') {
+ base = 8;
+ } else if (formatChar == 'b') {
+ base = 2;
+ }
+ }
+ if (strLen > 1) {
+ auto iter = str.cend() - 1;
+ if (*iter == 'l' || *iter == 's' || *iter == 't' || *iter == 's' || *iter == 'i' || *iter == 'b' || *iter == 'n') {
+ --iter;
+ }
+ if (*iter == 'u' || *iter == 'p') {
+ --iter;
+ }
+ suffix = TString(++iter, str.cend());
+ }
+ value = 0;
+ const TString digString(str.begin() + (base == 10 ? 0 : 2), str.end() - suffix.size());
+ for (const char& cur: digString) {
+ const ui64 curDigit = Char2DigitTable[static_cast<int>(cur)];
+ if (curDigit >= base) {
+ ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", char: '" << cur <<
+ "' is out of base: " << base;
+ return false;
+ }
+
+ ui64 curValue = value;
+ value *= base;
+ bool overflow = ((value / base) != curValue);
+ if (!overflow) {
+ curValue = value;
+ value += curDigit;
+ overflow = value < curValue;
+ }
+
+ if (overflow) {
+ ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", number limit overflow";
+ return false;
+ }
+ }
+ return true;
+}
+
+TNodePtr LiteralNumber(TContext& ctx, const TRule_integer& node) {
+ const TString intergerString = ctx.Token(node.GetToken1());
+ if (to_lower(intergerString).EndsWith("pn")) {
+ // TODO: add validation
+ return new TLiteralNode(ctx.Pos(), "PgNumeric", intergerString.substr(0, intergerString.size() - 2));
+ }
+
+ ui64 value;
+ TString suffix;
+ if (!ParseNumbers(ctx, intergerString, value, suffix)) {
+ return {};
+ }
+
+ const bool noSpaceForInt32 = value >> 31;
+ const bool noSpaceForInt64 = value >> 63;
+ if (suffix == "") {
+ bool implicitType = true;
+ if (noSpaceForInt64) {
+ return new TLiteralNumberNode<ui64>(ctx.Pos(), "Uint64", ToString(value), implicitType);
+ } else if (noSpaceForInt32) {
+ return new TLiteralNumberNode<i64>(ctx.Pos(), "Int64", ToString(value), implicitType);
+ }
+ return new TLiteralNumberNode<i32>(ctx.Pos(), "Int32", ToString(value), implicitType);
+ } else if (suffix == "p") {
+ bool implicitType = true;
+ if (noSpaceForInt64) {
+ ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << intergerString << ", 64 bit signed integer overflow";
+ return {};
+ } else if (noSpaceForInt32) {
+ return new TLiteralNumberNode<i64>(ctx.Pos(), "PgInt8", ToString(value), implicitType);
+ }
+ return new TLiteralNumberNode<i32>(ctx.Pos(), "PgInt4", ToString(value), implicitType);
+ } else if (suffix == "u") {
+ return new TLiteralNumberNode<ui32>(ctx.Pos(), "Uint32", ToString(value));
+ } else if (suffix == "ul") {
+ return new TLiteralNumberNode<ui64>(ctx.Pos(), "Uint64", ToString(value));
+ } else if (suffix == "ut") {
+ return new TLiteralNumberNode<ui8>(ctx.Pos(), "Uint8", ToString(value));
+ } else if (suffix == "t") {
+ return new TLiteralNumberNode<i8>(ctx.Pos(), "Int8", ToString(value));
+ } else if (suffix == "l") {
+ return new TLiteralNumberNode<i64>(ctx.Pos(), "Int64", ToString(value));
+ } else if (suffix == "us") {
+ return new TLiteralNumberNode<ui16>(ctx.Pos(), "Uint16", ToString(value));
+ } else if (suffix == "s") {
+ return new TLiteralNumberNode<i16>(ctx.Pos(), "Int16", ToString(value));
+ } else if (suffix == "ps") {
+ return new TLiteralNumberNode<i16>(ctx.Pos(), "PgInt2", ToString(value));
+ } else if (suffix == "pi") {
+ return new TLiteralNumberNode<i32>(ctx.Pos(), "PgInt4", ToString(value));
+ } else if (suffix == "pb") {
+ return new TLiteralNumberNode<i64>(ctx.Pos(), "PgInt8", ToString(value));
+ } else {
+ ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << intergerString << ", invalid suffix: " << suffix;
+ return {};
+ }
+}
+
+TNodePtr LiteralReal(TContext& ctx, const TRule_real& node) {
+ const TString value(ctx.Token(node.GetToken1()));
+ YQL_ENSURE(!value.empty());
+ auto lower = to_lower(value);
+ if (lower.EndsWith("f")) {
+ return new TLiteralNumberNode<float>(ctx.Pos(), "Float", value.substr(0, value.size()-1));
+ } else if (lower.EndsWith("p")) {
+ return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat8", value.substr(0, value.size()-1));
+ } else if (lower.EndsWith("pf4")) {
+ return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat4", value.substr(0, value.size()-3));
+ } else if (lower.EndsWith("pf8")) {
+ return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat8", value.substr(0, value.size()-3));
+ } else if (lower.EndsWith("pn")) {
+ return new TLiteralNode(ctx.Pos(), "PgNumeric", value.substr(0, value.size()-2));
+ } else {
+ return new TLiteralNumberNode<double>(ctx.Pos(), "Double", value);
+ }
+}
+
+TMaybe<TExprOrIdent> TSqlExpression::LiteralExpr(const TRule_literal_value& node) {
+ TExprOrIdent result;
+ switch (node.Alt_case()) {
+ case TRule_literal_value::kAltLiteralValue1: {
+ result.Expr = LiteralNumber(Ctx, node.GetAlt_literal_value1().GetRule_integer1());
+ break;
+ }
+ case TRule_literal_value::kAltLiteralValue2: {
+ result.Expr = LiteralReal(Ctx, node.GetAlt_literal_value2().GetRule_real1());
+ break;
+ }
+ case TRule_literal_value::kAltLiteralValue3: {
+ const TString value(Token(node.GetAlt_literal_value3().GetToken1()));
+ return BuildLiteralTypedSmartStringOrId(Ctx, value);
+ }
+ case TRule_literal_value::kAltLiteralValue5: {
+ Token(node.GetAlt_literal_value5().GetToken1());
+ result.Expr = BuildLiteralNull(Ctx.Pos());
+ break;
+ }
+ case TRule_literal_value::kAltLiteralValue9: {
+ const TString value(to_lower(Token(node.GetAlt_literal_value9().GetRule_bool_value1().GetToken1())));
+ result.Expr = BuildLiteralBool(Ctx.Pos(), FromString<bool>(value));
+ break;
+ }
+ case TRule_literal_value::kAltLiteralValue10: {
+ result.Expr = BuildEmptyAction(Ctx.Pos());
+ break;
+ }
+ case TRule_literal_value::kAltLiteralValue4:
+ case TRule_literal_value::kAltLiteralValue6:
+ case TRule_literal_value::kAltLiteralValue7:
+ case TRule_literal_value::kAltLiteralValue8:
+ case TRule_literal_value::ALT_NOT_SET:
+ AltNotImplemented("literal_value", node);
+ }
+ if (!result.Expr) {
+ return {};
+ }
+ return result;
+}
+
+template<typename TUnarySubExprType>
+TNodePtr TSqlExpression::UnaryExpr(const TUnarySubExprType& node, const TTrailingQuestions& tail) {
+ if constexpr (std::is_same_v<TUnarySubExprType, TRule_unary_subexpr>) {
+ if (node.Alt_case() == TRule_unary_subexpr::kAltUnarySubexpr1) {
+ return UnaryCasualExpr(node.GetAlt_unary_subexpr1().GetRule_unary_casual_subexpr1(), tail);
+ } else if (tail.Count) {
+ UnexpectedQuestionToken(tail);
+ return {};
+ } else {
+ MaybeUnnamedSmartParenOnTop = false;
+ return JsonApiExpr(node.GetAlt_unary_subexpr2().GetRule_json_api_expr1());
+ }
+ } else {
+ MaybeUnnamedSmartParenOnTop = false;
+ if (node.Alt_case() == TRule_in_unary_subexpr::kAltInUnarySubexpr1) {
+ return UnaryCasualExpr(node.GetAlt_in_unary_subexpr1().GetRule_in_unary_casual_subexpr1(), tail);
+ } else if (tail.Count) {
+ UnexpectedQuestionToken(tail);
+ return {};
+ } else {
+ return JsonApiExpr(node.GetAlt_in_unary_subexpr2().GetRule_json_api_expr1());
+ }
+ }
+}
+
+TNodePtr TSqlExpression::JsonPathSpecification(const TRule_jsonpath_spec& node) {
+ /*
+ jsonpath_spec: STRING_VALUE;
+ */
+ TString value = Token(node.GetToken1());
+ TPosition pos = Ctx.Pos();
+
+ auto parsed = StringContent(Ctx, pos, value);
+ if (!parsed) {
+ return nullptr;
+ }
+ return new TCallNodeImpl(pos, "Utf8", {BuildQuotedAtom(pos, parsed->Content, parsed->Flags)});
+}
+
+TNodePtr TSqlExpression::JsonReturningTypeRule(const TRule_type_name_simple& node) {
+ /*
+ (RETURNING type_name_simple)?
+ */
+ return TypeSimple(node, /* onlyDataAllowed */ true);
+}
+
+TNodePtr TSqlExpression::JsonInputArg(const TRule_json_common_args& node) {
+ /*
+ json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?;
+ */
+ TNodePtr jsonExpr = Build(node.GetRule_expr1());
+ if (!jsonExpr || jsonExpr->IsNull()) {
+ jsonExpr = new TCallNodeImpl(Ctx.Pos(), "Nothing", {
+ new TCallNodeImpl(Ctx.Pos(), "OptionalType", {BuildDataType(Ctx.Pos(), "Json")})
+ });
+ }
+
+ return jsonExpr;
+}
+
+void TSqlExpression::AddJsonVariable(const TRule_json_variable& node, TVector<TNodePtr>& children) {
+ /*
+ json_variable: expr AS json_variable_name;
+ */
+ TNodePtr expr;
+ TString rawName;
+ TPosition namePos = Ctx.Pos();
+ ui32 nameFlags = 0;
+
+ expr = Build(node.GetRule_expr1());
+ const auto& nameRule = node.GetRule_json_variable_name3();
+ switch (nameRule.GetAltCase()) {
+ case TRule_json_variable_name::kAltJsonVariableName1:
+ rawName = Id(nameRule.GetAlt_json_variable_name1().GetRule_id_expr1(), *this);
+ nameFlags = TNodeFlags::ArbitraryContent;
+ break;
+ case TRule_json_variable_name::kAltJsonVariableName2: {
+ const auto& token = nameRule.GetAlt_json_variable_name2().GetToken1();
+ namePos = GetPos(token);
+ auto parsed = StringContentOrIdContent(Ctx, namePos, token.GetValue());
+ if (!parsed) {
+ return;
+ }
+ rawName = parsed->Content;
+ nameFlags = parsed->Flags;
+ break;
+ }
+ case TRule_json_variable_name::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ TNodePtr nameExpr = BuildQuotedAtom(namePos, rawName, nameFlags);
+ children.push_back(BuildTuple(namePos, {nameExpr, expr}));
+}
+
+void TSqlExpression::AddJsonVariables(const TRule_json_variables& node, TVector<TNodePtr>& children) {
+ /*
+ json_variables: json_variable (COMMA json_variable)*;
+ */
+ AddJsonVariable(node.GetRule_json_variable1(), children);
+ for (size_t i = 0; i < node.Block2Size(); i++) {
+ AddJsonVariable(node.GetBlock2(i).GetRule_json_variable2(), children);
+ }
+}
+
+TNodePtr TSqlExpression::JsonVariables(const TRule_json_common_args& node) {
+ /*
+ json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?;
+ */
+ TVector<TNodePtr> variables;
+ TPosition pos = Ctx.Pos();
+ if (node.HasBlock4()) {
+ const auto& block = node.GetBlock4();
+ pos = GetPos(block.GetToken1());
+ AddJsonVariables(block.GetRule_json_variables2(), variables);
+ }
+ return new TCallNodeImpl(pos, "JsonVariables", variables);
+}
+
+void TSqlExpression::AddJsonCommonArgs(const TRule_json_common_args& node, TVector<TNodePtr>& children) {
+ /*
+ json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?;
+ */
+ TNodePtr jsonExpr = JsonInputArg(node);
+ TNodePtr jsonPath = JsonPathSpecification(node.GetRule_jsonpath_spec3());
+ TNodePtr variables = JsonVariables(node);
+
+ children.push_back(jsonExpr);
+ children.push_back(jsonPath);
+ children.push_back(variables);
+}
+
+TNodePtr TSqlExpression::JsonValueCaseHandler(const TRule_json_case_handler& node, EJsonValueHandlerMode& mode) {
+ /*
+ json_case_handler: ERROR | NULL | (DEFAULT expr);
+ */
+
+ switch (node.GetAltCase()) {
+ case TRule_json_case_handler::kAltJsonCaseHandler1: {
+ const auto pos = GetPos(node.GetAlt_json_case_handler1().GetToken1());
+ mode = EJsonValueHandlerMode::Error;
+ return new TCallNodeImpl(pos, "Null", {});
+ }
+ case TRule_json_case_handler::kAltJsonCaseHandler2: {
+ const auto pos = GetPos(node.GetAlt_json_case_handler2().GetToken1());
+ mode = EJsonValueHandlerMode::DefaultValue;
+ return new TCallNodeImpl(pos, "Null", {});
+ }
+ case TRule_json_case_handler::kAltJsonCaseHandler3:
+ mode = EJsonValueHandlerMode::DefaultValue;
+ return Build(node.GetAlt_json_case_handler3().GetRule_expr2());
+ case TRule_json_case_handler::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+void TSqlExpression::AddJsonValueCaseHandlers(const TRule_json_value& node, TVector<TNodePtr>& children) {
+ /*
+ json_case_handler*
+ */
+ if (node.Block5Size() > 2) {
+ Ctx.Error() << "Only 1 ON EMPTY and/or 1 ON ERROR clause is expected";
+ Ctx.IncrementMonCounter("sql_errors", "JsonValueTooManyHandleClauses");
+ return;
+ }
+
+ TNodePtr onEmpty;
+ EJsonValueHandlerMode onEmptyMode = EJsonValueHandlerMode::DefaultValue;
+ TNodePtr onError;
+ EJsonValueHandlerMode onErrorMode = EJsonValueHandlerMode::DefaultValue;
+ for (size_t i = 0; i < node.Block5Size(); i++) {
+ const auto block = node.GetBlock5(i);
+ const bool isEmptyClause = to_lower(block.GetToken3().GetValue()) == "empty";
+
+ if (isEmptyClause && onEmpty != nullptr) {
+ Ctx.Error() << "Only 1 ON EMPTY clause is expected";
+ Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnEmptyClauses");
+ return;
+ }
+
+ if (!isEmptyClause && onError != nullptr) {
+ Ctx.Error() << "Only 1 ON ERROR clause is expected";
+ Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnErrorClauses");
+ return;
+ }
+
+ if (isEmptyClause && onError != nullptr) {
+ Ctx.Error() << "ON EMPTY clause must be before ON ERROR clause";
+ Ctx.IncrementMonCounter("sql_errors", "JsonValueOnEmptyAfterOnError");
+ return;
+ }
+
+ EJsonValueHandlerMode currentMode;
+ TNodePtr currentHandler = JsonValueCaseHandler(block.GetRule_json_case_handler1(), currentMode);
+
+ if (isEmptyClause) {
+ onEmpty = currentHandler;
+ onEmptyMode = currentMode;
+ } else {
+ onError = currentHandler;
+ onErrorMode = currentMode;
+ }
+ }
+
+ if (onEmpty == nullptr) {
+ onEmpty = new TCallNodeImpl(Ctx.Pos(), "Null", {});
+ }
+
+ if (onError == nullptr) {
+ onError = new TCallNodeImpl(Ctx.Pos(), "Null", {});
+ }
+
+ children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onEmptyMode), TNodeFlags::Default));
+ children.push_back(onEmpty);
+ children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onErrorMode), TNodeFlags::Default));
+ children.push_back(onError);
+}
+
+TNodePtr TSqlExpression::JsonValueExpr(const TRule_json_value& node) {
+ /*
+ json_value: JSON_VALUE LPAREN
+ json_common_args
+ (RETURNING type_name_simple)?
+ (json_case_handler ON (EMPTY | ERROR))*
+ RPAREN;
+ */
+ TVector<TNodePtr> children;
+ AddJsonCommonArgs(node.GetRule_json_common_args3(), children);
+ AddJsonValueCaseHandlers(node, children);
+
+ if (node.HasBlock4()) {
+ auto returningType = JsonReturningTypeRule(node.GetBlock4().GetRule_type_name_simple2());
+ if (!returningType) {
+ return {};
+ }
+ children.push_back(returningType);
+ }
+
+ return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonValue", children);
+}
+
+void TSqlExpression::AddJsonExistsHandler(const TRule_json_exists& node, TVector<TNodePtr>& children) {
+ /*
+ json_exists: JSON_EXISTS LPAREN
+ json_common_args
+ json_exists_handler?
+ RPAREN;
+ */
+ auto buildJustBool = [&](const TPosition& pos, bool value) {
+ return new TCallNodeImpl(pos, "Just", {BuildLiteralBool(pos, value)});
+ };
+
+ if (!node.HasBlock4()) {
+ children.push_back(buildJustBool(Ctx.Pos(), false));
+ return;
+ }
+
+ const auto& handlerRule = node.GetBlock4().GetRule_json_exists_handler1();
+ const auto& token = handlerRule.GetToken1();
+ const auto pos = GetPos(token);
+ const auto mode = to_lower(token.GetValue());
+ if (mode == "unknown") {
+ const auto nothingNode = new TCallNodeImpl(pos, "Nothing", {
+ new TCallNodeImpl(pos, "OptionalType", {BuildDataType(pos, "Bool")})
+ });
+ children.push_back(nothingNode);
+ } else if (mode != "error") {
+ children.push_back(buildJustBool(pos, FromString<bool>(mode)));
+ }
+}
+
+TNodePtr TSqlExpression::JsonExistsExpr(const TRule_json_exists& node) {
+ /*
+ json_exists: JSON_EXISTS LPAREN
+ json_common_args
+ json_exists_handler?
+ RPAREN;
+ */
+ TVector<TNodePtr> children;
+ AddJsonCommonArgs(node.GetRule_json_common_args3(), children);
+
+ AddJsonExistsHandler(node, children);
+
+ return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonExists", children);
+}
+
+EJsonQueryWrap TSqlExpression::JsonQueryWrapper(const TRule_json_query& node) {
+ /*
+ json_query: JSON_QUERY LPAREN
+ json_common_args
+ (json_query_wrapper WRAPPER)?
+ (json_query_handler ON EMPTY)?
+ (json_query_handler ON ERROR)?
+ RPAREN;
+ */
+ // default behaviour - no wrapping
+ if (!node.HasBlock4()) {
+ return EJsonQueryWrap::NoWrap;
+ }
+
+ // WITHOUT ARRAY? - no wrapping
+ const auto& wrapperRule = node.GetBlock4().GetRule_json_query_wrapper1();
+ if (wrapperRule.GetAltCase() == TRule_json_query_wrapper::kAltJsonQueryWrapper1) {
+ return EJsonQueryWrap::NoWrap;
+ }
+
+ // WITH (CONDITIONAL | UNCONDITIONAL)? ARRAY? - wrapping depends on 2nd token. Default is UNCONDITIONAL
+ const auto& withWrapperRule = wrapperRule.GetAlt_json_query_wrapper2();
+ if (!withWrapperRule.HasBlock2()) {
+ return EJsonQueryWrap::Wrap;
+ }
+
+ const auto& token = withWrapperRule.GetBlock2().GetToken1();
+ if (to_lower(token.GetValue()) == "conditional") {
+ return EJsonQueryWrap::ConditionalWrap;
+ } else {
+ return EJsonQueryWrap::Wrap;
+ }
+}
+
+EJsonQueryHandler TSqlExpression::JsonQueryHandler(const TRule_json_query_handler& node) {
+ /*
+ json_query_handler: ERROR | NULL | (EMPTY ARRAY) | (EMPTY OBJECT);
+ */
+ switch (node.GetAltCase()) {
+ case TRule_json_query_handler::kAltJsonQueryHandler1:
+ return EJsonQueryHandler::Error;
+ case TRule_json_query_handler::kAltJsonQueryHandler2:
+ return EJsonQueryHandler::Null;
+ case TRule_json_query_handler::kAltJsonQueryHandler3:
+ return EJsonQueryHandler::EmptyArray;
+ case TRule_json_query_handler::kAltJsonQueryHandler4:
+ return EJsonQueryHandler::EmptyObject;
+ case TRule_json_query_handler::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TNodePtr TSqlExpression::JsonQueryExpr(const TRule_json_query& node) {
+ /*
+ json_query: JSON_QUERY LPAREN
+ json_common_args
+ (json_query_wrapper WRAPPER)?
+ (json_query_handler ON EMPTY)?
+ (json_query_handler ON ERROR)?
+ RPAREN;
+ */
+
+ TVector<TNodePtr> children;
+ AddJsonCommonArgs(node.GetRule_json_common_args3(), children);
+
+ auto addChild = [&](TPosition pos, const TString& content) {
+ children.push_back(BuildQuotedAtom(pos, content, TNodeFlags::Default));
+ };
+
+ const auto wrapMode = JsonQueryWrapper(node);
+ addChild(Ctx.Pos(), ToString(wrapMode));
+
+ auto onEmpty = EJsonQueryHandler::Null;
+ if (node.HasBlock5()) {
+ if (wrapMode != EJsonQueryWrap::NoWrap) {
+ Ctx.Error() << "ON EMPTY is prohibited because WRAPPER clause is specified";
+ Ctx.IncrementMonCounter("sql_errors", "JsonQueryOnEmptyWithWrapper");
+ return nullptr;
+ }
+ onEmpty = JsonQueryHandler(node.GetBlock5().GetRule_json_query_handler1());
+ }
+ addChild(Ctx.Pos(), ToString(onEmpty));
+
+ auto onError = EJsonQueryHandler::Null;
+ if (node.HasBlock6()) {
+ onError = JsonQueryHandler(node.GetBlock6().GetRule_json_query_handler1());
+ }
+ addChild(Ctx.Pos(), ToString(onError));
+
+ return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonQuery", children);
+}
+
+TNodePtr TSqlExpression::JsonApiExpr(const TRule_json_api_expr& node) {
+ /*
+ json_api_expr: json_value | json_exists | json_query;
+ */
+ TPosition pos = Ctx.Pos();
+ TNodePtr result = nullptr;
+ switch (node.GetAltCase()) {
+ case TRule_json_api_expr::kAltJsonApiExpr1: {
+ const auto& jsonValue = node.GetAlt_json_api_expr1().GetRule_json_value1();
+ pos = GetPos(jsonValue.GetToken1());
+ result = JsonValueExpr(jsonValue);
+ break;
+ }
+ case TRule_json_api_expr::kAltJsonApiExpr2: {
+ const auto& jsonExists = node.GetAlt_json_api_expr2().GetRule_json_exists1();
+ pos = GetPos(jsonExists.GetToken1());
+ result = JsonExistsExpr(jsonExists);
+ break;
+ }
+ case TRule_json_api_expr::kAltJsonApiExpr3: {
+ const auto& jsonQuery = node.GetAlt_json_api_expr3().GetRule_json_query1();
+ pos = GetPos(jsonQuery.GetToken1());
+ result = JsonQueryExpr(jsonQuery);
+ break;
+ }
+ case TRule_json_api_expr::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ return result;
+}
+
+TNodePtr MatchRecognizeVarAccess(TTranslation& ctx, const TString& var, const TRule_an_id_or_type& suffix, bool theSameVar) {
+ switch (suffix.GetAltCase()) {
+ case TRule_an_id_or_type::kAltAnIdOrType1:
+ break;
+ case TRule_an_id_or_type::kAltAnIdOrType2:
+ break;
+ case TRule_an_id_or_type::ALT_NOT_SET:
+ break;
+ }
+ const auto& column = Id(
+ suffix.GetAlt_an_id_or_type1()
+ .GetRule_id_or_type1().GetAlt_id_or_type1().GetRule_id1(),
+ ctx
+ );
+ return BuildMatchRecognizeVarAccess(TPosition{}, var, column, theSameVar);
+}
+
+TNodePtr TSqlExpression::RowPatternVarAccess(const TString& alias, const TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2 block) {
+ switch (block.GetAltCase()) {
+ case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt1:
+ break;
+ case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt2:
+ break;
+ case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt3:
+ switch (block.GetAlt3().GetRule_an_id_or_type1().GetAltCase()) {
+ case TRule_an_id_or_type::kAltAnIdOrType1: {
+ const auto &idOrType = block.GetAlt3().GetRule_an_id_or_type1().GetAlt_an_id_or_type1().GetRule_id_or_type1();
+ switch(idOrType.GetAltCase()) {
+ case TRule_id_or_type::kAltIdOrType1:
+ return BuildMatchRecognizeVarAccess(
+ Ctx.Pos(),
+ alias,
+ Id(idOrType.GetAlt_id_or_type1().GetRule_id1(), *this),
+ Ctx.GetMatchRecognizeDefineVar() == alias
+ );
+ case TRule_id_or_type::kAltIdOrType2:
+ break;
+ case TRule_id_or_type::ALT_NOT_SET:
+ break;
+ }
+ break;
+ }
+ case TRule_an_id_or_type::kAltAnIdOrType2:
+ break;
+ case TRule_an_id_or_type::ALT_NOT_SET:
+ break;
+ }
+ break;
+ case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ return TNodePtr{};
+}
+
+template<typename TUnaryCasualExprRule>
+TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const TTrailingQuestions& tail) {
+ // unary_casual_subexpr: (id_expr | atom_expr) unary_subexpr_suffix;
+ // OR
+ // in_unary_casual_subexpr: (id_expr_in | in_atom_expr) unary_subexpr_suffix;
+ // where
+ // unary_subexpr_suffix: (key_expr | invoke_expr |(DOT (bind_parameter | DIGITS | id)))* (COLLATE id)?;
+
+ const auto& suffix = node.GetRule_unary_subexpr_suffix2();
+ const bool suffixIsEmpty = suffix.GetBlock1().empty() && !suffix.HasBlock2();
+ MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && suffixIsEmpty;
+ TString name;
+ TNodePtr expr;
+ bool typePossible = false;
+ auto& block = node.GetBlock1();
+ switch (block.Alt_case()) {
+ case TUnaryCasualExprRule::TBlock1::kAlt1: {
+ MaybeUnnamedSmartParenOnTop = false;
+ auto& alt = block.GetAlt1();
+ if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) {
+ name = Id(alt.GetRule_id_expr1(), *this);
+ typePossible = !IsQuotedId(alt.GetRule_id_expr1(), *this);
+ } else {
+ // type was never possible here
+ name = Id(alt.GetRule_id_expr_in1(), *this);
+ }
+ break;
+ }
+ case TUnaryCasualExprRule::TBlock1::kAlt2: {
+ auto& alt = block.GetAlt2();
+ TMaybe<TExprOrIdent> exprOrId;
+ if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) {
+ exprOrId = AtomExpr(alt.GetRule_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{});
+ } else {
+ MaybeUnnamedSmartParenOnTop = false;
+ exprOrId = InAtomExpr(alt.GetRule_in_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{});
+ }
+
+ if (!exprOrId) {
+ Ctx.IncrementMonCounter("sql_errors", "BadAtomExpr");
+ return nullptr;
+ }
+ if (!exprOrId->Expr) {
+ name = exprOrId->Ident;
+ } else {
+ expr = exprOrId->Expr;
+ }
+ break;
+ }
+ case TUnaryCasualExprRule::TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ // bool onlyDots = true;
+ bool isColumnRef = !expr;
+ bool isFirstElem = true;
+
+ for (auto& _b : suffix.GetBlock1()) {
+ auto& b = _b.GetBlock1();
+ switch (b.Alt_case()) {
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt1: {
+ // key_expr
+ // onlyDots = false;
+ break;
+ }
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: {
+ // invoke_expr - cannot be a column, function name
+ if (isFirstElem) {
+ isColumnRef = false;
+ }
+
+ // onlyDots = false;
+ break;
+ }
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: {
+ // In case of MATCH_RECOGNIZE lambdas
+ // X.Y is treated as Var.Column access
+ if (isColumnRef && EColumnRefState::MatchRecognize == Ctx.GetColumnReferenceState()) {
+ if (auto rowPatternVarAccess = RowPatternVarAccess(
+ name,
+ b.GetAlt3().GetBlock2())
+ ) {
+ return rowPatternVarAccess;
+ }
+ }
+ break;
+ }
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::ALT_NOT_SET:
+ AltNotImplemented("unary_subexpr_suffix", b);
+ return nullptr;
+ }
+
+ isFirstElem = false;
+ }
+
+ isFirstElem = true;
+ TVector<INode::TIdPart> ids;
+ INode::TPtr lastExpr;
+ if (!isColumnRef) {
+ lastExpr = expr;
+ } else {
+ const bool flexibleTypes = Ctx.FlexibleTypes;
+ bool columnOrType = false;
+ auto columnRefsState = Ctx.GetColumnReferenceState();
+ bool explicitPgType = columnRefsState == EColumnRefState::AsPgType;
+ if (explicitPgType && typePossible && suffixIsEmpty) {
+ auto pgType = BuildSimpleType(Ctx, Ctx.Pos(), name, false);
+ if (pgType && tail.Count) {
+ Ctx.Error() << "Optional types are not supported in this context";
+ return {};
+ }
+ return pgType;
+ }
+ if (auto simpleType = LookupSimpleType(name, flexibleTypes, false); simpleType && typePossible && suffixIsEmpty) {
+ if (tail.Count > 0 || columnRefsState == EColumnRefState::Deny || !flexibleTypes) {
+ // a type
+ return AddOptionals(BuildSimpleType(Ctx, Ctx.Pos(), name, false), tail.Count);
+ }
+ // type or column: ambiguity will be resolved on type annotation stage
+ columnOrType = columnRefsState == EColumnRefState::Allow;
+ }
+ if (tail.Count) {
+ UnexpectedQuestionToken(tail);
+ return {};
+ }
+ if (!Ctx.CheckColumnReference(Ctx.Pos(), name)) {
+ return nullptr;
+ }
+
+ ids.push_back(columnOrType ? BuildColumnOrType(Ctx.Pos()) : BuildColumn(Ctx.Pos()));
+ ids.push_back(name);
+ }
+
+ TPosition pos(Ctx.Pos());
+ for (auto& _b : suffix.GetBlock1()) {
+ auto& b = _b.GetBlock1();
+ switch (b.Alt_case()) {
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt1: {
+ // key_expr
+ auto keyExpr = KeyExpr(b.GetAlt1().GetRule_key_expr1());
+ if (!keyExpr) {
+ Ctx.IncrementMonCounter("sql_errors", "BadKeyExpr");
+ return nullptr;
+ }
+
+ if (!lastExpr) {
+ lastExpr = BuildAccess(pos, ids, false);
+ ids.clear();
+ }
+
+ ids.push_back(lastExpr);
+ ids.push_back(keyExpr);
+ lastExpr = BuildAccess(pos, ids, true);
+ ids.clear();
+ break;
+ }
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: {
+ // invoke_expr - cannot be a column, function name
+ TSqlCallExpr call(Ctx, Mode);
+ if (isFirstElem && !name.empty()) {
+ call.AllowDistinct();
+ call.InitName(name);
+ } else {
+ call.InitExpr(lastExpr);
+ }
+
+ bool initRet = call.Init(b.GetAlt2().GetRule_invoke_expr1());
+ if (initRet) {
+ call.IncCounters();
+ }
+
+ if (!initRet) {
+ return nullptr;
+ }
+
+ lastExpr = call.BuildCall();
+ if (!lastExpr) {
+ return nullptr;
+ }
+
+ break;
+ }
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: {
+ // dot
+ if (lastExpr) {
+ ids.push_back(lastExpr);
+ }
+
+ auto bb = b.GetAlt3().GetBlock2();
+ switch (bb.Alt_case()) {
+ case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt1: {
+ TString named;
+ if (!NamedNodeImpl(bb.GetAlt1().GetRule_bind_parameter1(), named, *this)) {
+ return nullptr;
+ }
+ auto namedNode = GetNamedNode(named);
+ if (!namedNode) {
+ return nullptr;
+ }
+
+ ids.push_back(named);
+ ids.back().Expr = namedNode;
+ break;
+ }
+ case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt2: {
+ const TString str(Token(bb.GetAlt2().GetToken1()));
+ ids.push_back(str);
+ break;
+ }
+ case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt3: {
+ ids.push_back(Id(bb.GetAlt3().GetRule_an_id_or_type1(), *this));
+ break;
+ }
+ case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ if (lastExpr) {
+ lastExpr = BuildAccess(pos, ids, false);
+ ids.clear();
+ }
+
+ break;
+ }
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::ALT_NOT_SET:
+ AltNotImplemented("unary_subexpr_suffix", b);
+ return nullptr;
+ }
+
+ isFirstElem = false;
+ }
+
+ if (!lastExpr) {
+ lastExpr = BuildAccess(pos, ids, false);
+ ids.clear();
+ }
+
+ if (suffix.HasBlock2()) {
+ Ctx.IncrementMonCounter("sql_errors", "CollateUnarySubexpr");
+ Error() << "unary_subexpr: COLLATE is not implemented yet";
+ }
+
+ return lastExpr;
+}
+
+TNodePtr TSqlExpression::BindParameterRule(const TRule_bind_parameter& rule, const TTrailingQuestions& tail) {
+ TString namedArg;
+ if (!NamedNodeImpl(rule, namedArg, *this)) {
+ return {};
+ }
+ if (SmartParenthesisMode == ESmartParenthesis::SqlLambdaParams) {
+ Ctx.IncrementMonCounter("sql_features", "LambdaArgument");
+ if (tail.Count > 1) {
+ Ctx.Error(tail.Pos) << "Expecting at most one '?' token here (for optional lambda parameters), but got " << tail.Count;
+ return {};
+ }
+ return BuildAtom(Ctx.Pos(), namedArg, NYql::TNodeFlags::ArbitraryContent, tail.Count != 0);
+ }
+ if (tail.Count) {
+ UnexpectedQuestionToken(tail);
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_features", "NamedNodeUseAtom");
+ return GetNamedNode(namedArg);
+}
+
+TNodePtr TSqlExpression::LambdaRule(const TRule_lambda& rule) {
+ const auto& alt = rule;
+ const bool isSqlLambda = alt.HasBlock2();
+ if (!isSqlLambda) {
+ return SmartParenthesis(alt.GetRule_smart_parenthesis1());
+ }
+
+ MaybeUnnamedSmartParenOnTop = false;
+ TNodePtr parenthesis;
+ {
+ // we allow column reference here to postpone error and report it with better description in SqlLambdaParams
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TSqlExpression expr(Ctx, Mode);
+ expr.SetSmartParenthesisMode(ESmartParenthesis::SqlLambdaParams);
+ parenthesis = expr.SmartParenthesis(alt.GetRule_smart_parenthesis1());
+ }
+ if (!parenthesis) {
+ return {};
+ }
+
+ ui32 optionalArgumentsCount = 0;
+ TVector<TSymbolNameWithPos> args;
+ if (!SqlLambdaParams(parenthesis, args, optionalArgumentsCount)) {
+ return {};
+ }
+ auto bodyBlock = alt.GetBlock2();
+ Token(bodyBlock.GetToken1());
+ TPosition pos(Ctx.Pos());
+ TVector<TNodePtr> exprSeq;
+ for (auto& arg: args) {
+ arg.Name = PushNamedAtom(arg.Pos, arg.Name);
+ }
+ bool ret = false;
+ TColumnRefScope scope(Ctx, EColumnRefState::Deny);
+ scope.SetNoColumnErrContext("in lambda function");
+ if (bodyBlock.GetBlock2().HasAlt1()) {
+ ret = SqlLambdaExprBody(Ctx, bodyBlock.GetBlock2().GetAlt1().GetRule_expr2(), exprSeq);
+ } else {
+ ret = SqlLambdaExprBody(Ctx, bodyBlock.GetBlock2().GetAlt2().GetRule_lambda_body2(), exprSeq);
+ }
+
+ TVector<TString> argNames;
+ for (const auto& arg : args) {
+ argNames.push_back(arg.Name);
+ PopNamedNode(arg.Name);
+ }
+ if (!ret) {
+ return {};
+ }
+
+ auto lambdaNode = BuildSqlLambda(pos, std::move(argNames), std::move(exprSeq));
+ if (optionalArgumentsCount > 0) {
+ lambdaNode = new TCallNodeImpl(pos, "WithOptionalArgs", {
+ lambdaNode,
+ BuildQuotedAtom(pos, ToString(optionalArgumentsCount), TNodeFlags::Default)
+ });
+ }
+
+ return lambdaNode;
+}
+
+TNodePtr TSqlExpression::CastRule(const TRule_cast_expr& rule) {
+ Ctx.IncrementMonCounter("sql_features", "Cast");
+ const auto& alt = rule;
+ Token(alt.GetToken1());
+ TPosition pos(Ctx.Pos());
+ TSqlExpression expr(Ctx, Mode);
+ auto exprNode = expr.Build(rule.GetRule_expr3());
+ if (!exprNode) {
+ return {};
+ }
+ auto type = TypeNodeOrBind(rule.GetRule_type_name_or_bind5());
+ if (!type) {
+ return {};
+ }
+ return new TCallNodeImpl(pos, "SafeCast", {exprNode, type});
+}
+
+TNodePtr TSqlExpression::BitCastRule(const TRule_bitcast_expr& rule) {
+ Ctx.IncrementMonCounter("sql_features", "BitCast");
+ const auto& alt = rule;
+ Token(alt.GetToken1());
+ TPosition pos(Ctx.Pos());
+ TSqlExpression expr(Ctx, Mode);
+ auto exprNode = expr.Build(rule.GetRule_expr3());
+ if (!exprNode) {
+ return {};
+ }
+ auto type = TypeSimple(rule.GetRule_type_name_simple5(), true);
+ if (!type) {
+ return {};
+ }
+ return new TCallNodeImpl(pos, "BitCast", {exprNode, type});
+}
+
+TNodePtr TSqlExpression::ExistsRule(const TRule_exists_expr& rule) {
+ Ctx.IncrementMonCounter("sql_features", "Exists");
+
+ TPosition pos;
+ TSourcePtr source;
+ Token(rule.GetToken2());
+ switch (rule.GetBlock3().Alt_case()) {
+ case TRule_exists_expr::TBlock3::kAlt1: {
+ const auto& alt = rule.GetBlock3().GetAlt1().GetRule_select_stmt1();
+ TSqlSelect select(Ctx, Mode);
+ source = select.Build(alt, pos);
+ break;
+ }
+ case TRule_exists_expr::TBlock3::kAlt2: {
+ const auto& alt = rule.GetBlock3().GetAlt2().GetRule_values_stmt1();
+ TSqlValues values(Ctx, Mode);
+ source = values.Build(alt, pos);
+ break;
+ }
+ case TRule_exists_expr::TBlock3::ALT_NOT_SET:
+ AltNotImplemented("exists_expr", rule.GetBlock3());
+ }
+
+ if (!source) {
+ Ctx.IncrementMonCounter("sql_errors", "BadSource");
+ return nullptr;
+ }
+ const bool checkExist = true;
+ return BuildBuiltinFunc(Ctx, Ctx.Pos(), "ListHasItems", {BuildSourceNode(pos, std::move(source), checkExist)});
+}
+
+TNodePtr TSqlExpression::CaseRule(const TRule_case_expr& rule) {
+ // case_expr: CASE expr? when_expr+ (ELSE expr)? END;
+ // when_expr: WHEN expr THEN expr;
+ Ctx.IncrementMonCounter("sql_features", "Case");
+ const auto& alt = rule;
+ Token(alt.GetToken1());
+ TNodePtr elseExpr;
+ if (alt.HasBlock4()) {
+ Token(alt.GetBlock4().GetToken1());
+ TSqlExpression expr(Ctx, Mode);
+ elseExpr = expr.Build(alt.GetBlock4().GetRule_expr2());
+ } else {
+ Ctx.IncrementMonCounter("sql_errors", "ElseIsRequired");
+ Error() << "ELSE is required";
+ return {};
+ }
+
+ TNodePtr caseExpr;
+ if (alt.HasBlock2()) {
+ TSqlExpression expr(Ctx, Mode);
+ caseExpr = expr.Build(alt.GetBlock2().GetRule_expr1());
+ if (!caseExpr) {
+ return {};
+ }
+ }
+
+ TVector<TCaseBranch> branches;
+ for (size_t i = 0; i < alt.Block3Size(); ++i) {
+ branches.emplace_back();
+ const auto& block = alt.GetBlock3(i).GetRule_when_expr1();
+ Token(block.GetToken1());
+ TSqlExpression condExpr(Ctx, Mode);
+ branches.back().Pred = condExpr.Build(block.GetRule_expr2());
+ if (caseExpr) {
+ branches.back().Pred = BuildBinaryOp(Ctx, Ctx.Pos(), "==", caseExpr->Clone(), branches.back().Pred);
+ }
+ if (!branches.back().Pred) {
+ return {};
+ }
+ Token(block.GetToken3());
+ TSqlExpression thenExpr(Ctx, Mode);
+ branches.back().Value = thenExpr.Build(block.GetRule_expr4());
+ if (!branches.back().Value) {
+ return {};
+ }
+ }
+ auto final = ReduceCaseBranches(branches.begin(), branches.end());
+ return BuildBuiltinFunc(Ctx, Ctx.Pos(), "If", { final.Pred, final.Value, elseExpr });
+}
+
+TMaybe<TExprOrIdent> TSqlExpression::AtomExpr(const TRule_atom_expr& node, const TTrailingQuestions& tail) {
+ // atom_expr:
+ // literal_value
+ // | bind_parameter
+ // | lambda
+ // | cast_expr
+ // | exists_expr
+ // | case_expr
+ // | an_id_or_type NAMESPACE (id_or_type | STRING_VALUE)
+ // | value_constructor
+ // | bitcast_expr
+ // | list_literal
+ // | dict_literal
+ // | struct_literal
+ // ;
+ if (node.Alt_case() != TRule_atom_expr::kAltAtomExpr2 && tail.Count) {
+ UnexpectedQuestionToken(tail);
+ return {};
+ }
+ MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (node.Alt_case() == TRule_atom_expr::kAltAtomExpr3);
+ TExprOrIdent result;
+ switch (node.Alt_case()) {
+ case TRule_atom_expr::kAltAtomExpr1:
+ Ctx.IncrementMonCounter("sql_features", "LiteralExpr");
+ return LiteralExpr(node.GetAlt_atom_expr1().GetRule_literal_value1());
+ case TRule_atom_expr::kAltAtomExpr2:
+ result.Expr = BindParameterRule(node.GetAlt_atom_expr2().GetRule_bind_parameter1(), tail);
+ break;
+ case TRule_atom_expr::kAltAtomExpr3:
+ result.Expr = LambdaRule(node.GetAlt_atom_expr3().GetRule_lambda1());
+ break;
+ case TRule_atom_expr::kAltAtomExpr4:
+ result.Expr = CastRule(node.GetAlt_atom_expr4().GetRule_cast_expr1());
+ break;
+ case TRule_atom_expr::kAltAtomExpr5:
+ result.Expr = ExistsRule(node.GetAlt_atom_expr5().GetRule_exists_expr1());
+ break;
+ case TRule_atom_expr::kAltAtomExpr6:
+ result.Expr = CaseRule(node.GetAlt_atom_expr6().GetRule_case_expr1());
+ break;
+ case TRule_atom_expr::kAltAtomExpr7: {
+ const auto& alt = node.GetAlt_atom_expr7();
+ TString module(Id(alt.GetRule_an_id_or_type1(), *this));
+ TPosition pos(Ctx.Pos());
+ TString name;
+ switch (alt.GetBlock3().Alt_case()) {
+ case TRule_atom_expr::TAlt7::TBlock3::kAlt1:
+ name = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), *this);
+ break;
+ case TRule_atom_expr::TAlt7::TBlock3::kAlt2: {
+ name = Token(alt.GetBlock3().GetAlt2().GetToken1());
+ if (Ctx.AnsiQuotedIdentifiers && name.StartsWith('"')) {
+ // same as previous case
+ name = IdContentFromString(Ctx, name);
+ } else {
+ module = "@" + module;
+ }
+ break;
+ }
+ case TRule_atom_expr::TAlt7::TBlock3::ALT_NOT_SET:
+ Y_ABORT("Unsigned number: you should change implementation according to grammar changes");
+ }
+ result.Expr = BuildCallable(pos, module, name, {});
+ break;
+ }
+ case TRule_atom_expr::kAltAtomExpr8: {
+ result.Expr = ValueConstructor(node.GetAlt_atom_expr8().GetRule_value_constructor1());
+ break;
+ }
+ case TRule_atom_expr::kAltAtomExpr9:
+ result.Expr = BitCastRule(node.GetAlt_atom_expr9().GetRule_bitcast_expr1());
+ break;
+ case TRule_atom_expr::kAltAtomExpr10:
+ result.Expr = ListLiteral(node.GetAlt_atom_expr10().GetRule_list_literal1());
+ break;
+ case TRule_atom_expr::kAltAtomExpr11:
+ result.Expr = DictLiteral(node.GetAlt_atom_expr11().GetRule_dict_literal1());
+ break;
+ case TRule_atom_expr::kAltAtomExpr12:
+ result.Expr = StructLiteral(node.GetAlt_atom_expr12().GetRule_struct_literal1());
+ break;
+ case TRule_atom_expr::ALT_NOT_SET:
+ AltNotImplemented("atom_expr", node);
+ }
+ if (!result.Expr) {
+ return {};
+ }
+ return result;
+}
+
+TMaybe<TExprOrIdent> TSqlExpression::InAtomExpr(const TRule_in_atom_expr& node, const TTrailingQuestions& tail) {
+ // in_atom_expr:
+ // literal_value
+ // | bind_parameter
+ // | lambda
+ // | cast_expr
+ // | case_expr
+ // | an_id_or_type NAMESPACE (id_or_type | STRING_VALUE)
+ // | LPAREN select_stmt RPAREN
+ // | value_constructor
+ // | bitcast_expr
+ // | list_literal
+ // | dict_literal
+ // | struct_literal
+ // ;
+ if (node.Alt_case() != TRule_in_atom_expr::kAltInAtomExpr2 && tail.Count) {
+ UnexpectedQuestionToken(tail);
+ return {};
+ }
+ TExprOrIdent result;
+ switch (node.Alt_case()) {
+ case TRule_in_atom_expr::kAltInAtomExpr1:
+ Ctx.IncrementMonCounter("sql_features", "LiteralExpr");
+ return LiteralExpr(node.GetAlt_in_atom_expr1().GetRule_literal_value1());
+ case TRule_in_atom_expr::kAltInAtomExpr2:
+ result.Expr = BindParameterRule(node.GetAlt_in_atom_expr2().GetRule_bind_parameter1(), tail);
+ break;
+ case TRule_in_atom_expr::kAltInAtomExpr3:
+ result.Expr = LambdaRule(node.GetAlt_in_atom_expr3().GetRule_lambda1());
+ break;
+ case TRule_in_atom_expr::kAltInAtomExpr4:
+ result.Expr = CastRule(node.GetAlt_in_atom_expr4().GetRule_cast_expr1());
+ break;
+ case TRule_in_atom_expr::kAltInAtomExpr5:
+ result.Expr = CaseRule(node.GetAlt_in_atom_expr5().GetRule_case_expr1());
+ break;
+ case TRule_in_atom_expr::kAltInAtomExpr6: {
+ const auto& alt = node.GetAlt_in_atom_expr6();
+ TString module(Id(alt.GetRule_an_id_or_type1(), *this));
+ TPosition pos(Ctx.Pos());
+ TString name;
+ switch (alt.GetBlock3().Alt_case()) {
+ case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1:
+ name = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), *this);
+ break;
+ case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: {
+ name = Token(alt.GetBlock3().GetAlt2().GetToken1());
+ if (Ctx.AnsiQuotedIdentifiers && name.StartsWith('"')) {
+ // same as previous case
+ name = IdContentFromString(Ctx, name);
+ } else {
+ module = "@" + module;
+ }
+ break;
+ }
+ case TRule_in_atom_expr::TAlt6::TBlock3::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ result.Expr = BuildCallable(pos, module, name, {});
+ break;
+ }
+ case TRule_in_atom_expr::kAltInAtomExpr7: {
+ Token(node.GetAlt_in_atom_expr7().GetToken1());
+ // reset column reference scope (select will reenable it where needed)
+ TColumnRefScope scope(Ctx, EColumnRefState::Deny);
+ TSqlSelect select(Ctx, Mode);
+ TPosition pos;
+ auto source = select.Build(node.GetAlt_in_atom_expr7().GetRule_select_stmt2(), pos);
+ if (!source) {
+ Ctx.IncrementMonCounter("sql_errors", "BadSource");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_features", "InSubquery");
+ const auto alias = Ctx.MakeName("subquerynode");
+ const auto ref = Ctx.MakeName("subquery");
+ auto& blocks = Ctx.GetCurrentBlocks();
+ blocks.push_back(BuildSubquery(std::move(source), alias, Mode == NSQLTranslation::ESqlMode::SUBQUERY, -1, Ctx.Scoped));
+ blocks.back()->SetLabel(ref);
+ result.Expr = BuildSubqueryRef(blocks.back(), ref, -1);
+ break;
+ }
+ case TRule_in_atom_expr::kAltInAtomExpr8: {
+ result.Expr = ValueConstructor(node.GetAlt_in_atom_expr8().GetRule_value_constructor1());
+ break;
+ }
+ case TRule_in_atom_expr::kAltInAtomExpr9:
+ result.Expr = BitCastRule(node.GetAlt_in_atom_expr9().GetRule_bitcast_expr1());
+ break;
+ case TRule_in_atom_expr::kAltInAtomExpr10:
+ result.Expr = ListLiteral(node.GetAlt_in_atom_expr10().GetRule_list_literal1());
+ break;
+ case TRule_in_atom_expr::kAltInAtomExpr11:
+ result.Expr = DictLiteral(node.GetAlt_in_atom_expr11().GetRule_dict_literal1());
+ break;
+ case TRule_in_atom_expr::kAltInAtomExpr12:
+ result.Expr = StructLiteral(node.GetAlt_in_atom_expr12().GetRule_struct_literal1());
+ break;
+ case TRule_in_atom_expr::ALT_NOT_SET:
+ AltNotImplemented("in_atom_expr", node);
+ }
+ if (!result.Expr) {
+ return {};
+ }
+ return result;
+}
+
+bool TSqlExpression::SqlLambdaParams(const TNodePtr& node, TVector<TSymbolNameWithPos>& args, ui32& optionalArgumentsCount) {
+ args.clear();
+ optionalArgumentsCount = 0;
+ auto errMsg = TStringBuf("Invalid lambda arguments syntax. Lambda arguments should start with '$' as named value.");
+ auto tupleNodePtr = node->GetTupleNode();;
+ if (!tupleNodePtr) {
+ Ctx.Error(node->GetPos()) << errMsg;
+ return false;
+ }
+ THashSet<TString> dupArgsChecker;
+ for (const auto& argPtr: tupleNodePtr->Elements()) {
+ auto contentPtr = argPtr->GetAtomContent();
+ if (!contentPtr || !contentPtr->StartsWith("$")) {
+ Ctx.Error(argPtr->GetPos()) << errMsg;
+ return false;
+ }
+ if (argPtr->IsOptionalArg()) {
+ ++optionalArgumentsCount;
+ } else if (optionalArgumentsCount > 0) {
+ Ctx.Error(argPtr->GetPos()) << "Non-optional argument can not follow optional one";
+ return false;
+ }
+
+ if (!IsAnonymousName(*contentPtr) && !dupArgsChecker.insert(*contentPtr).second) {
+ Ctx.Error(argPtr->GetPos()) << "Duplicate lambda argument parametr: '" << *contentPtr << "'.";
+ return false;
+ }
+ args.push_back(TSymbolNameWithPos{*contentPtr, argPtr->GetPos()});
+ }
+ return true;
+}
+
+bool TSqlExpression::SqlLambdaExprBody(TContext& ctx, const TRule_expr& node, TVector<TNodePtr>& exprSeq) {
+ TSqlExpression expr(ctx, ctx.Settings.Mode);
+ TNodePtr nodeExpr = expr.Build(node);
+ if (!nodeExpr) {
+ return false;
+ }
+ exprSeq.push_back(nodeExpr);
+ return true;
+}
+
+bool TSqlExpression::SqlLambdaExprBody(TContext& ctx, const TRule_lambda_body& node, TVector<TNodePtr>& exprSeq) {
+ TSqlExpression expr(ctx, ctx.Settings.Mode);
+ TVector<TString> localNames;
+ bool hasError = false;
+ for (auto& block: node.GetBlock2()) {
+ const auto& rule = block.GetRule_lambda_stmt1();
+ switch (rule.Alt_case()) {
+ case TRule_lambda_stmt::kAltLambdaStmt1: {
+ TVector<TSymbolNameWithPos> names;
+ auto nodeExpr = NamedNode(rule.GetAlt_lambda_stmt1().GetRule_named_nodes_stmt1(), names);
+ if (!nodeExpr) {
+ hasError = true;
+ continue;
+ } else if (nodeExpr->GetSource()) {
+ ctx.Error() << "SELECT is not supported inside lambda body";
+ hasError = true;
+ continue;
+ }
+ if (names.size() > 1) {
+ auto ref = ctx.MakeName("tie");
+ exprSeq.push_back(nodeExpr->Y("EnsureTupleSize", nodeExpr, nodeExpr->Q(ToString(names.size()))));
+ exprSeq.back()->SetLabel(ref);
+ for (size_t i = 0; i < names.size(); ++i) {
+ TNodePtr nthExpr = nodeExpr->Y("Nth", ref, nodeExpr->Q(ToString(i)));
+ names[i].Name = PushNamedAtom(names[i].Pos, names[i].Name);
+ nthExpr->SetLabel(names[i].Name);
+ localNames.push_back(names[i].Name);
+ exprSeq.push_back(nthExpr);
+ }
+ } else {
+ auto& symbol = names.front();
+ symbol.Name = PushNamedAtom(symbol.Pos, symbol.Name);
+ nodeExpr->SetLabel(symbol.Name);
+ localNames.push_back(symbol.Name);
+ exprSeq.push_back(nodeExpr);
+ }
+ break;
+ }
+ case TRule_lambda_stmt::kAltLambdaStmt2: {
+ if (!ImportStatement(rule.GetAlt_lambda_stmt2().GetRule_import_stmt1(), &localNames)) {
+ hasError = true;
+ }
+ break;
+ }
+ case TRule_lambda_stmt::ALT_NOT_SET:
+ Y_ABORT("SampleClause: does not correspond to grammar changes");
+ }
+ }
+
+ TNodePtr nodeExpr;
+ if (!hasError) {
+ nodeExpr = expr.Build(node.GetRule_expr4());
+ }
+
+ for (const auto& name : localNames) {
+ PopNamedNode(name);
+ }
+
+ if (!nodeExpr) {
+ return false;
+ }
+ exprSeq.push_back(nodeExpr);
+ return true;
+}
+
+TNodePtr TSqlExpression::SubExpr(const TRule_con_subexpr& node, const TTrailingQuestions& tail) {
+ // con_subexpr: unary_subexpr | unary_op unary_subexpr;
+ switch (node.Alt_case()) {
+ case TRule_con_subexpr::kAltConSubexpr1:
+ return UnaryExpr(node.GetAlt_con_subexpr1().GetRule_unary_subexpr1(), tail);
+ case TRule_con_subexpr::kAltConSubexpr2: {
+ MaybeUnnamedSmartParenOnTop = false;
+ Ctx.IncrementMonCounter("sql_features", "UnaryOperation");
+ TString opName;
+ auto token = node.GetAlt_con_subexpr2().GetRule_unary_op1().GetToken1();
+ Token(token);
+ TPosition pos(Ctx.Pos());
+ auto tokenId = token.GetId();
+ if (IS_TOKEN(tokenId, NOT)) {
+ opName = "Not";
+ } else if (IS_TOKEN(tokenId, PLUS)) {
+ opName = "Plus";
+ } else if (IS_TOKEN(tokenId, MINUS)) {
+ opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMinus" : "Minus";
+ } else if (IS_TOKEN(tokenId, TILDA)) {
+ opName = "BitNot";
+ } else {
+ Ctx.IncrementMonCounter("sql_errors", "UnsupportedUnaryOperation");
+ Error() << "Unsupported unary operation: " << token.GetValue();
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_unary_operations", opName);
+ auto expr = UnaryExpr(node.GetAlt_con_subexpr2().GetRule_unary_subexpr2(), tail);
+ return expr ? expr->ApplyUnaryOp(Ctx, pos, opName) : expr;
+ }
+ case TRule_con_subexpr::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ return nullptr;
+}
+
+TNodePtr TSqlExpression::SubExpr(const TRule_xor_subexpr& node, const TTrailingQuestions& tail) {
+ // xor_subexpr: eq_subexpr cond_expr?;
+ MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && !node.HasBlock2();
+ TNodePtr res(SubExpr(node.GetRule_eq_subexpr1(), node.HasBlock2() ? TTrailingQuestions{} : tail));
+ if (!res) {
+ return {};
+ }
+ TPosition pos(Ctx.Pos());
+ if (node.HasBlock2()) {
+ auto cond = node.GetBlock2().GetRule_cond_expr1();
+ switch (cond.Alt_case()) {
+ case TRule_cond_expr::kAltCondExpr1: {
+ const auto& matchOp = cond.GetAlt_cond_expr1();
+ const bool notMatch = matchOp.HasBlock1();
+ const TCiString& opName = Token(matchOp.GetRule_match_op2().GetToken1());
+ const auto& pattern = SubExpr(cond.GetAlt_cond_expr1().GetRule_eq_subexpr3(), matchOp.HasBlock4() ? TTrailingQuestions{} : tail);
+ if (!pattern) {
+ return {};
+ }
+ TNodePtr isMatch;
+ if (opName == "like" || opName == "ilike") {
+ const TString* escapeLiteral = nullptr;
+ TNodePtr escapeNode;
+ const auto& escaper = BuildUdf(Ctx, pos, "Re2", "PatternFromLike", {});
+ TVector<TNodePtr> escaperArgs({ escaper, pattern });
+
+ if (matchOp.HasBlock4()) {
+ const auto& escapeBlock = matchOp.GetBlock4();
+ TNodePtr escapeExpr = SubExpr(escapeBlock.GetRule_eq_subexpr2(), tail);
+ if (!escapeExpr) {
+ return {};
+ }
+ escapeLiteral = escapeExpr->GetLiteral("String");
+ escapeNode = escapeExpr;
+ if (escapeLiteral) {
+ Ctx.IncrementMonCounter("sql_features", "LikeEscape");
+ if (escapeLiteral->size() != 1) {
+ Ctx.IncrementMonCounter("sql_errors", "LikeMultiCharEscape");
+ Error() << "ESCAPE clause requires single character argument";
+ return nullptr;
+ }
+ if (escapeLiteral[0] == "%" || escapeLiteral[0] == "_" || escapeLiteral[0] == "\\") {
+ Ctx.IncrementMonCounter("sql_errors", "LikeUnsupportedEscapeChar");
+ Error() << "'%', '_' and '\\' are currently not supported in ESCAPE clause, ";
+ Error() << "please choose any other character";
+ return nullptr;
+ }
+ if (!IsAscii(escapeLiteral->front())) {
+ Ctx.IncrementMonCounter("sql_errors", "LikeUnsupportedEscapeChar");
+ Error() << "Non-ASCII symbols are not supported in ESCAPE clause, ";
+ Error() << "please choose ASCII character";
+ return nullptr;
+ }
+ escaperArgs.push_back(BuildLiteralRawString(pos, *escapeLiteral));
+ } else {
+ Ctx.IncrementMonCounter("sql_errors", "LikeNotLiteralEscape");
+ Error() << "ESCAPE clause requires String literal argument";
+ return nullptr;
+ }
+ }
+
+ auto re2options = BuildUdf(Ctx, pos, "Re2", "Options", {});
+ if (opName == "ilike") {
+ Ctx.IncrementMonCounter("sql_features", "CaseInsensitiveLike");
+ }
+ auto csModeLiteral = BuildLiteralBool(pos, opName != "ilike");
+ csModeLiteral->SetLabel("CaseSensitive");
+ auto csOption = BuildStructure(pos, { csModeLiteral });
+ auto optionsApply = new TCallNodeImpl(pos, "NamedApply", { re2options, BuildTuple(pos, {}), csOption });
+
+ const TNodePtr escapedPattern = new TCallNodeImpl(pos, "Apply", { escaperArgs });
+ auto list = new TAstListNodeImpl(pos, { escapedPattern, optionsApply });
+ auto runConfig = new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "quote", 0), list });
+
+ const TNodePtr matcher = new TCallNodeImpl(pos, "AssumeStrict", { BuildUdf(Ctx, pos, "Re2", "Match", { runConfig }) });
+ isMatch = new TCallNodeImpl(pos, "Apply", { matcher, res });
+
+ bool isUtf8 = false;
+ const TString* literalPattern = pattern->GetLiteral("String");
+ if (!literalPattern) {
+ literalPattern = pattern->GetLiteral("Utf8");
+ isUtf8 = literalPattern != nullptr;
+ }
+
+ if (literalPattern) {
+ bool inEscape = false;
+ TMaybe<char> escape;
+ if (escapeLiteral) {
+ escape = escapeLiteral->front();
+ }
+
+ bool mayIgnoreCase;
+ TVector<TPatternComponent<char>> components;
+ if (isUtf8) {
+ auto splitResult = SplitPattern(UTF8ToUTF32<false>(*literalPattern), escape, inEscape);
+ for (const auto& component : splitResult) {
+ TPatternComponent<char> converted;
+ converted.IsSimple = component.IsSimple;
+ converted.Prefix = WideToUTF8(component.Prefix);
+ converted.Suffix = WideToUTF8(component.Suffix);
+ components.push_back(std::move(converted));
+ }
+ mayIgnoreCase = ToLowerUTF8(*literalPattern) == ToUpperUTF8(*literalPattern);
+ } else {
+ components = SplitPattern(*literalPattern, escape, inEscape);
+ mayIgnoreCase = WithoutAlpha(*literalPattern);
+ }
+
+ if (inEscape) {
+ Ctx.IncrementMonCounter("sql_errors", "LikeEscapeSymbolEnd");
+ Error() << "LIKE pattern should not end with escape symbol";
+ return nullptr;
+ }
+
+ if (opName == "like" || mayIgnoreCase) {
+ // TODO: expand LIKE in optimizers - we can analyze argument types there
+ YQL_ENSURE(!components.empty());
+ const auto& first = components.front();
+ if (components.size() == 1 && first.IsSimple) {
+ // no '%'s and '_'s in pattern
+ YQL_ENSURE(first.Prefix == first.Suffix);
+ isMatch = BuildBinaryOp(Ctx, pos, "==", res, BuildLiteralRawString(pos, first.Suffix, isUtf8));
+ } else if (!first.Prefix.empty()) {
+ const TString& prefix = first.Prefix;
+ TNodePtr prefixMatch;
+ if (Ctx.EmitStartsWith) {
+ prefixMatch = BuildBinaryOp(Ctx, pos, "StartsWith", res, BuildLiteralRawString(pos, prefix, isUtf8));
+ } else {
+ prefixMatch = BuildBinaryOp(Ctx, pos, ">=", res, BuildLiteralRawString(pos, prefix, isUtf8));
+ auto upperBound = isUtf8 ? NextValidUtf8(prefix) : NextLexicographicString(prefix);
+ if (upperBound) {
+ prefixMatch = BuildBinaryOp(
+ Ctx,
+ pos,
+ "And",
+ prefixMatch,
+ BuildBinaryOp(Ctx, pos, "<", res, BuildLiteralRawString(pos, TString(*upperBound), isUtf8))
+ );
+ }
+ }
+
+ if (Ctx.AnsiLike && first.IsSimple && components.size() == 2 && components.back().IsSimple) {
+ const TString& suffix = components.back().Suffix;
+ // 'prefix%suffix'
+ if (suffix.empty()) {
+ isMatch = prefixMatch;
+ } else {
+ // len(str) >= len(prefix) + len(suffix) && StartsWith(str, prefix) && EndsWith(str, suffix)
+ TNodePtr sizePred = BuildBinaryOp(Ctx, pos, ">=",
+ TNodePtr(new TCallNodeImpl(pos, "Size", { res })),
+ TNodePtr(new TLiteralNumberNode<ui32>(pos, "Uint32", ToString(prefix.size() + suffix.size()))));
+ TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, suffix, isUtf8));
+ isMatch = new TCallNodeImpl(pos, "And", {
+ sizePred,
+ prefixMatch,
+ suffixMatch
+ });
+ }
+ } else {
+ isMatch = BuildBinaryOp(Ctx, pos, "And", prefixMatch, isMatch);
+ }
+ } else if (Ctx.AnsiLike && AllOf(components, [](const auto& comp) { return comp.IsSimple; })) {
+ YQL_ENSURE(first.Prefix.empty());
+ if (components.size() == 3 && components.back().Prefix.empty()) {
+ // '%foo%'
+ YQL_ENSURE(!components[1].Prefix.empty());
+ isMatch = BuildBinaryOp(Ctx, pos, "StringContains", res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8));
+ } else if (components.size() == 2) {
+ // '%foo'
+ isMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8));
+ }
+ } else if (Ctx.AnsiLike && !components.back().Suffix.empty()) {
+ const TString& suffix = components.back().Suffix;
+ TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, suffix, isUtf8));
+ isMatch = BuildBinaryOp(Ctx, pos, "And", suffixMatch, isMatch);
+ }
+ // TODO: more StringContains/StartsWith/EndsWith cases?
+ }
+ }
+
+ Ctx.IncrementMonCounter("sql_features", notMatch ? "NotLike" : "Like");
+
+ } else if (opName == "regexp" || opName == "rlike" || opName == "match") {
+ if (matchOp.HasBlock4()) {
+ Ctx.IncrementMonCounter("sql_errors", "RegexpEscape");
+ TString opNameUpper(opName);
+ opNameUpper.to_upper();
+ Error() << opName << " and ESCAPE clauses should not be used together";
+ return nullptr;
+ }
+
+ if (!Ctx.PragmaRegexUseRe2) {
+ Ctx.Warning(pos, TIssuesIds::CORE_LEGACY_REGEX_ENGINE) << "Legacy regex engine works incorrectly with unicode. Use PRAGMA RegexUseRe2='true';";
+ }
+
+ const auto& matcher = Ctx.PragmaRegexUseRe2 ?
+ BuildUdf(Ctx, pos, "Re2", opName == "match" ? "Match" : "Grep", {BuildTuple(pos, {pattern, BuildLiteralNull(pos)})}):
+ BuildUdf(Ctx, pos, "Pcre", opName == "match" ? "BacktrackingMatch" : "BacktrackingGrep", { pattern });
+ isMatch = new TCallNodeImpl(pos, "Apply", { matcher, res });
+ if (opName != "match") {
+ Ctx.IncrementMonCounter("sql_features", notMatch ? "NotRegexp" : "Regexp");
+ } else {
+ Ctx.IncrementMonCounter("sql_features", notMatch ? "NotMatch" : "Match");
+ }
+ } else {
+ Ctx.IncrementMonCounter("sql_errors", "UnknownMatchOp");
+ AltNotImplemented("match_op", cond);
+ return nullptr;
+ }
+ return (notMatch && isMatch) ? isMatch->ApplyUnaryOp(Ctx, pos, "Not") : isMatch;
+ }
+ case TRule_cond_expr::kAltCondExpr2: {
+ // | NOT? IN COMPACT? in_expr
+ auto altInExpr = cond.GetAlt_cond_expr2();
+ const bool notIn = altInExpr.HasBlock1();
+ auto hints = BuildTuple(pos, {});
+ bool isCompact = altInExpr.HasBlock3();
+ if (!isCompact) {
+ auto sqlHints = Ctx.PullHintForToken(Ctx.TokenPosition(altInExpr.GetToken2()));
+ isCompact = AnyOf(sqlHints, [](const NSQLTranslation::TSQLHint& hint) { return to_lower(hint.Name) == "compact"; });
+ }
+ if (isCompact) {
+ Ctx.IncrementMonCounter("sql_features", "IsCompactHint");
+ auto sizeHint = BuildTuple(pos, { BuildQuotedAtom(pos, "isCompact", NYql::TNodeFlags::Default) });
+ hints = BuildTuple(pos, { sizeHint });
+ }
+ TSqlExpression inSubexpr(Ctx, Mode);
+ auto inRight = inSubexpr.SqlInExpr(altInExpr.GetRule_in_expr4(), tail);
+ auto isIn = BuildBuiltinFunc(Ctx, pos, "In", {res, inRight, hints});
+ Ctx.IncrementMonCounter("sql_features", notIn ? "NotIn" : "In");
+ return (notIn && isIn) ? isIn->ApplyUnaryOp(Ctx, pos, "Not") : isIn;
+ }
+ case TRule_cond_expr::kAltCondExpr3: {
+ if (tail.Count) {
+ UnexpectedQuestionToken(tail);
+ return {};
+ }
+ auto altCase = cond.GetAlt_cond_expr3().GetBlock1().Alt_case();
+ const bool notNoll =
+ altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt2 ||
+ altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt4
+ ;
+
+ if (altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt4 &&
+ !cond.GetAlt_cond_expr3().GetBlock1().GetAlt4().HasBlock1())
+ {
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_MISSING_IS_BEFORE_NOT_NULL) << "Missing IS keyword before NOT NULL";
+ }
+
+ auto isNull = BuildIsNullOp(pos, res);
+ Ctx.IncrementMonCounter("sql_features", notNoll ? "NotNull" : "Null");
+ return (notNoll && isNull) ? isNull->ApplyUnaryOp(Ctx, pos, "Not") : isNull;
+ }
+ case TRule_cond_expr::kAltCondExpr4: {
+ auto alt = cond.GetAlt_cond_expr4();
+ const bool symmetric = alt.HasBlock3() && IS_TOKEN(alt.GetBlock3().GetToken1().GetId(), SYMMETRIC);
+ const bool negation = alt.HasBlock1();
+ TNodePtr left = SubExpr(alt.GetRule_eq_subexpr4(), {});
+ TNodePtr right = SubExpr(alt.GetRule_eq_subexpr6(), tail);
+ if (!left || !right) {
+ return {};
+ }
+
+ const bool bothArgNull = left->IsNull() && right->IsNull();
+ const bool oneArgNull = left->IsNull() || right->IsNull();
+
+ if (res->IsNull() || bothArgNull || (symmetric && oneArgNull)) {
+ Ctx.Warning(pos, TIssuesIds::YQL_OPERATION_WILL_RETURN_NULL)
+ << "BETWEEN operation will return NULL here";
+ }
+
+ auto buildSubexpr = [&](const TNodePtr& left, const TNodePtr& right) {
+ if (negation) {
+ return BuildBinaryOpRaw(
+ pos,
+ "Or",
+ BuildBinaryOpRaw(pos, "<", res, left),
+ BuildBinaryOpRaw(pos, ">", res, right)
+ );
+ } else {
+ return BuildBinaryOpRaw(
+ pos,
+ "And",
+ BuildBinaryOpRaw(pos, ">=", res, left),
+ BuildBinaryOpRaw(pos, "<=", res, right)
+ );
+ }
+ };
+
+ if (symmetric) {
+ Ctx.IncrementMonCounter("sql_features", negation? "NotBetweenSymmetric" : "BetweenSymmetric");
+ return BuildBinaryOpRaw(
+ pos,
+ negation? "And" : "Or",
+ buildSubexpr(left, right),
+ buildSubexpr(right, left)
+ );
+ } else {
+ Ctx.IncrementMonCounter("sql_features", negation? "NotBetween" : "Between");
+ return buildSubexpr(left, right);
+ }
+ }
+ case TRule_cond_expr::kAltCondExpr5: {
+ auto alt = cond.GetAlt_cond_expr5();
+ auto getNode = [](const TRule_cond_expr::TAlt5::TBlock1& b) -> const TRule_eq_subexpr& { return b.GetRule_eq_subexpr2(); };
+ return BinOpList(node.GetRule_eq_subexpr1(), getNode, alt.GetBlock1().begin(), alt.GetBlock1().end(), tail);
+ }
+ case TRule_cond_expr::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownConditionExpr");
+ AltNotImplemented("cond_expr", cond);
+ return nullptr;
+ }
+ }
+ return res;
+}
+
+TNodePtr TSqlExpression::BinOperList(const TString& opName, TVector<TNodePtr>::const_iterator begin, TVector<TNodePtr>::const_iterator end) const {
+ TPosition pos(Ctx.Pos());
+ const size_t opCount = end - begin;
+ Y_DEBUG_ABORT_UNLESS(opCount >= 2);
+ if (opCount == 2) {
+ return BuildBinaryOp(Ctx, pos, opName, *begin, *(begin+1));
+ } if (opCount == 3) {
+ return BuildBinaryOp(Ctx, pos, opName, BuildBinaryOp(Ctx, pos, opName, *begin, *(begin+1)), *(begin+2));
+ } else {
+ auto mid = begin + opCount / 2;
+ return BuildBinaryOp(Ctx, pos, opName, BinOperList(opName, begin, mid), BinOperList(opName, mid, end));
+ }
+}
+
+TSqlExpression::TCaseBranch TSqlExpression::ReduceCaseBranches(TVector<TCaseBranch>::const_iterator begin, TVector<TCaseBranch>::const_iterator end) const {
+ YQL_ENSURE(begin < end);
+ const size_t branchCount = end - begin;
+ if (branchCount == 1) {
+ return *begin;
+ }
+
+ auto mid = begin + branchCount / 2;
+ auto left = ReduceCaseBranches(begin, mid);
+ auto right = ReduceCaseBranches(mid, end);
+
+ TVector<TNodePtr> preds;
+ preds.reserve(branchCount);
+ for (auto it = begin; it != end; ++it) {
+ preds.push_back(it->Pred);
+ }
+
+ TCaseBranch result;
+ result.Pred = new TCallNodeImpl(Ctx.Pos(), "Or", CloneContainer(preds));
+ result.Value = BuildBuiltinFunc(Ctx, Ctx.Pos(), "If", { left.Pred, left.Value, right.Value });
+ return result;
+}
+
+template <typename TNode, typename TGetNode, typename TIter>
+TNodePtr TSqlExpression::BinOper(const TString& opName, const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
+ if (begin == end) {
+ return SubExpr(node, tail);
+ }
+ // can't have top level smart_parenthesis node if any binary operation is present
+ MaybeUnnamedSmartParenOnTop = false;
+ Ctx.IncrementMonCounter("sql_binary_operations", opName);
+ const size_t listSize = end - begin;
+ TVector<TNodePtr> nodes;
+ nodes.reserve(1 + listSize);
+ nodes.push_back(SubExpr(node, {}));
+ for (; begin != end; ++begin) {
+ nodes.push_back(SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
+ }
+ return BinOperList(opName, nodes.begin(), nodes.end());
+}
+
+template <typename TNode, typename TGetNode, typename TIter>
+TNodePtr TSqlExpression::BinOpList(const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
+ MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end);
+ TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{});
+ while (begin != end) {
+ Ctx.IncrementMonCounter("sql_features", "BinaryOperation");
+ Token(begin->GetToken1());
+ TPosition pos(Ctx.Pos());
+ TString opName;
+ auto tokenId = begin->GetToken1().GetId();
+ if (IS_TOKEN(tokenId, LESS)) {
+ opName = "<";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Less");
+ } else if (IS_TOKEN(tokenId, LESS_OR_EQ)) {
+ opName = "<=";
+ Ctx.IncrementMonCounter("sql_binary_operations", "LessOrEq");
+ } else if (IS_TOKEN(tokenId, GREATER)) {
+ opName = ">";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Greater");
+ } else if (IS_TOKEN(tokenId, GREATER_OR_EQ)) {
+ opName = ">=";
+ Ctx.IncrementMonCounter("sql_binary_operations", "GreaterOrEq");
+ } else if (IS_TOKEN(tokenId, PLUS)) {
+ opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedAdd" : "+MayWarn";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Plus");
+ } else if (IS_TOKEN(tokenId, MINUS)) {
+ opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedSub" : "-MayWarn";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Minus");
+ } else if (IS_TOKEN(tokenId, ASTERISK)) {
+ opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMul" : "*MayWarn";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Multiply");
+ } else if (IS_TOKEN(tokenId, SLASH)) {
+ opName = "/MayWarn";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Divide");
+ if (!Ctx.Scoped->PragmaClassicDivision && partialResult) {
+ partialResult = new TCallNodeImpl(pos, "SafeCast", {std::move(partialResult), BuildDataType(pos, "Double")});
+ } else if (Ctx.Scoped->PragmaCheckedOps) {
+ opName = "CheckedDiv";
+ }
+ } else if (IS_TOKEN(tokenId, PERCENT)) {
+ opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMod" : "%MayWarn";
+ Ctx.IncrementMonCounter("sql_binary_operations", "Mod");
+ } else {
+ Ctx.IncrementMonCounter("sql_errors", "UnsupportedBinaryOperation");
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return nullptr;
+ }
+
+ partialResult = BuildBinaryOp(Ctx, pos, opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
+ ++begin;
+ }
+
+ return partialResult;
+}
+
+template <typename TGetNode, typename TIter>
+TNodePtr TSqlExpression::BinOpList(const TRule_bit_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
+ MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end);
+ TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{});
+ while (begin != end) {
+ Ctx.IncrementMonCounter("sql_features", "BinaryOperation");
+ TString opName;
+ switch (begin->GetBlock1().Alt_case()) {
+ case TRule_neq_subexpr_TBlock2_TBlock1::kAlt1: {
+ Token(begin->GetBlock1().GetAlt1().GetToken1());
+ auto tokenId = begin->GetBlock1().GetAlt1().GetToken1().GetId();
+ if (!IS_TOKEN(tokenId, SHIFT_LEFT)) {
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return {};
+ }
+ opName = "ShiftLeft";
+ Ctx.IncrementMonCounter("sql_binary_operations", "ShiftLeft");
+ break;
+ }
+ case TRule_neq_subexpr_TBlock2_TBlock1::kAlt2: {
+ opName = "ShiftRight";
+ Ctx.IncrementMonCounter("sql_binary_operations", "ShiftRight");
+ break;
+ }
+ case TRule_neq_subexpr_TBlock2_TBlock1::kAlt3: {
+ Token(begin->GetBlock1().GetAlt3().GetToken1());
+ auto tokenId = begin->GetBlock1().GetAlt3().GetToken1().GetId();
+ if (!IS_TOKEN(tokenId, ROT_LEFT)) {
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return {};
+ }
+ opName = "RotLeft";
+ Ctx.IncrementMonCounter("sql_binary_operations", "RotLeft");
+ break;
+ }
+ case TRule_neq_subexpr_TBlock2_TBlock1::kAlt4: {
+ opName = "RotRight";
+ Ctx.IncrementMonCounter("sql_binary_operations", "RotRight");
+ break;
+ }
+ case TRule_neq_subexpr_TBlock2_TBlock1::kAlt5: {
+ Token(begin->GetBlock1().GetAlt5().GetToken1());
+ auto tokenId = begin->GetBlock1().GetAlt5().GetToken1().GetId();
+ if (!IS_TOKEN(tokenId, AMPERSAND)) {
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return {};
+ }
+ opName = "BitAnd";
+ Ctx.IncrementMonCounter("sql_binary_operations", "BitAnd");
+ break;
+ }
+ case TRule_neq_subexpr_TBlock2_TBlock1::kAlt6: {
+ Token(begin->GetBlock1().GetAlt6().GetToken1());
+ auto tokenId = begin->GetBlock1().GetAlt6().GetToken1().GetId();
+ if (!IS_TOKEN(tokenId, PIPE)) {
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return {};
+ }
+ opName = "BitOr";
+ Ctx.IncrementMonCounter("sql_binary_operations", "BitOr");
+ break;
+ }
+ case TRule_neq_subexpr_TBlock2_TBlock1::kAlt7: {
+ Token(begin->GetBlock1().GetAlt7().GetToken1());
+ auto tokenId = begin->GetBlock1().GetAlt7().GetToken1().GetId();
+ if (!IS_TOKEN(tokenId, CARET)) {
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return {};
+ }
+ opName = "BitXor";
+ Ctx.IncrementMonCounter("sql_binary_operations", "BitXor");
+ break;
+ }
+ case TRule_neq_subexpr_TBlock2_TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ partialResult = BuildBinaryOp(Ctx, Ctx.Pos(), opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
+ ++begin;
+ }
+
+ return partialResult;
+}
+
+template <typename TGetNode, typename TIter>
+TNodePtr TSqlExpression::BinOpList(const TRule_eq_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
+ MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end);
+ TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{});
+ while (begin != end) {
+ Ctx.IncrementMonCounter("sql_features", "BinaryOperation");
+ TString opName;
+ switch (begin->GetBlock1().Alt_case()) {
+ case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt1: {
+ Token(begin->GetBlock1().GetAlt1().GetToken1());
+ auto tokenId = begin->GetBlock1().GetAlt1().GetToken1().GetId();
+ if (!IS_TOKEN(tokenId, EQUALS)) {
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_binary_operations", "Equals");
+ opName = "==";
+ break;
+ }
+ case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt2: {
+ Token(begin->GetBlock1().GetAlt2().GetToken1());
+ auto tokenId = begin->GetBlock1().GetAlt2().GetToken1().GetId();
+ if (!IS_TOKEN(tokenId, EQUALS2)) {
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_binary_operations", "Equals2");
+ opName = "==";
+ break;
+ }
+ case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt3: {
+ Token(begin->GetBlock1().GetAlt3().GetToken1());
+ auto tokenId = begin->GetBlock1().GetAlt3().GetToken1().GetId();
+ if (!IS_TOKEN(tokenId, NOT_EQUALS)) {
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_binary_operations", "NotEquals");
+ opName = "!=";
+ break;
+ }
+ case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt4: {
+ Token(begin->GetBlock1().GetAlt4().GetToken1());
+ auto tokenId = begin->GetBlock1().GetAlt4().GetToken1().GetId();
+ if (!IS_TOKEN(tokenId, NOT_EQUALS2)) {
+ Error() << "Unsupported binary operation token: " << tokenId;
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_binary_operations", "NotEquals2");
+ opName = "!=";
+ break;
+ }
+ case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt5: {
+ Token(begin->GetBlock1().GetAlt5().GetRule_distinct_from_op1().GetToken1());
+ opName = begin->GetBlock1().GetAlt5().GetRule_distinct_from_op1().HasBlock2() ? "IsNotDistinctFrom" : "IsDistinctFrom";
+ Ctx.IncrementMonCounter("sql_binary_operations", opName);
+ break;
+ }
+ case TRule_cond_expr::TAlt5::TBlock1::TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ partialResult = BuildBinaryOp(Ctx, Ctx.Pos(), opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
+ ++begin;
+ }
+
+ return partialResult;
+}
+
+TNodePtr TSqlExpression::SqlInExpr(const TRule_in_expr& node, const TTrailingQuestions& tail) {
+ TSqlExpression expr(Ctx, Mode);
+ expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::InStatement);
+ auto result = expr.UnaryExpr(node.GetRule_in_unary_subexpr1(), tail);
+ return result;
+}
+
+TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) {
+ TVector<TNodePtr> exprs;
+ Token(node.GetToken1());
+ const TPosition pos(Ctx.Pos());
+ const bool isTuple = node.HasBlock3();
+ bool expectTuple = SmartParenthesisMode == ESmartParenthesis::InStatement;
+ EExpr mode = EExpr::Regular;
+ if (SmartParenthesisMode == ESmartParenthesis::SqlLambdaParams) {
+ mode = EExpr::SqlLambdaParams;
+ expectTuple = true;
+ }
+ if (node.HasBlock2() && !NamedExprList(node.GetBlock2().GetRule_named_expr_list1(), exprs, mode)) {
+ return {};
+ }
+
+ bool topLevelGroupBy = MaybeUnnamedSmartParenOnTop && SmartParenthesisMode == ESmartParenthesis::GroupBy;
+
+ bool hasAliases = false;
+ bool hasUnnamed = false;
+ for (const auto& expr: exprs) {
+ if (expr->GetLabel()) {
+ hasAliases = true;
+ } else {
+ hasUnnamed = true;
+ }
+ if (hasAliases && hasUnnamed && !topLevelGroupBy) {
+ Ctx.IncrementMonCounter("sql_errors", "AnonymousStructMembers");
+ Ctx.Error(pos) << "Structure does not allow anonymous members";
+ return nullptr;
+ }
+ }
+ if (exprs.size() == 1 && hasUnnamed && !isTuple && !expectTuple) {
+ return exprs.back();
+ }
+ if (topLevelGroupBy) {
+ if (isTuple) {
+ Ctx.IncrementMonCounter("sql_errors", "SimpleTupleInGroupBy");
+ Token(node.GetBlock3().GetToken1());
+ Ctx.Error() << "Unexpected trailing comma in grouping elements list";
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "ListOfNamedNode");
+ return BuildListOfNamedNodes(pos, std::move(exprs));
+ }
+ Ctx.IncrementMonCounter("sql_features", hasUnnamed ? "SimpleTuple" : "SimpleStruct");
+ return (hasUnnamed || expectTuple || exprs.size() == 0) ? BuildTuple(pos, exprs) : BuildStructure(pos, exprs);
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_expression.h b/yql/essentials/sql/v1/sql_expression.h
new file mode 100644
index 00000000000..64b9dd8a690
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_expression.h
@@ -0,0 +1,147 @@
+#pragma once
+
+#include "sql_translation.h"
+#include <yql/essentials/core/sql_types/yql_atom_enums.h>
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+class TSqlExpression: public TSqlTranslation {
+public:
+ enum class ESmartParenthesis {
+ Default,
+ GroupBy,
+ InStatement,
+ SqlLambdaParams,
+ };
+
+ TSqlExpression(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlTranslation(ctx, mode)
+ {
+ }
+
+ TNodePtr Build(const TRule_expr& node);
+
+ void SetSmartParenthesisMode(ESmartParenthesis mode) {
+ SmartParenthesisMode = mode;
+ }
+
+ void MarkAsNamed() {
+ MaybeUnnamedSmartParenOnTop = false;
+ }
+
+ TMaybe<TExprOrIdent> LiteralExpr(const TRule_literal_value& node);
+private:
+ struct TTrailingQuestions {
+ size_t Count = 0;
+ TPosition Pos;
+ };
+
+ TNodePtr BindParameterRule(const TRule_bind_parameter& rule, const TTrailingQuestions& tail);
+ TNodePtr LambdaRule(const TRule_lambda& rule);
+ TNodePtr CastRule(const TRule_cast_expr& rule);
+ TNodePtr BitCastRule(const TRule_bitcast_expr& rule);
+ TNodePtr ExistsRule(const TRule_exists_expr& rule);
+ TNodePtr CaseRule(const TRule_case_expr& rule);
+
+ TMaybe<TExprOrIdent> AtomExpr(const TRule_atom_expr& node, const TTrailingQuestions& tail);
+ TMaybe<TExprOrIdent> InAtomExpr(const TRule_in_atom_expr& node, const TTrailingQuestions& tail);
+
+ TNodePtr JsonInputArg(const TRule_json_common_args& node);
+ TNodePtr JsonPathSpecification(const TRule_jsonpath_spec& node);
+ TNodePtr JsonReturningTypeRule(const TRule_type_name_simple& node);
+ TNodePtr JsonValueCaseHandler(const TRule_json_case_handler& node, EJsonValueHandlerMode& mode);
+ void AddJsonValueCaseHandlers(const TRule_json_value& node, TVector<TNodePtr>& children);
+ void AddJsonVariable(const TRule_json_variable& node, TVector<TNodePtr>& children);
+ void AddJsonVariables(const TRule_json_variables& node, TVector<TNodePtr>& children);
+ TNodePtr JsonVariables(const TRule_json_common_args& node);
+ void AddJsonCommonArgs(const TRule_json_common_args& node, TVector<TNodePtr>& children);
+ TNodePtr JsonValueExpr(const TRule_json_value& node);
+ void AddJsonExistsHandler(const TRule_json_exists& node, TVector<TNodePtr>& children);
+ TNodePtr JsonExistsExpr(const TRule_json_exists& node);
+ EJsonQueryWrap JsonQueryWrapper(const TRule_json_query& node);
+ EJsonQueryHandler JsonQueryHandler(const TRule_json_query_handler& node);
+ TNodePtr JsonQueryExpr(const TRule_json_query& node);
+ TNodePtr JsonApiExpr(const TRule_json_api_expr& node);
+
+ template<typename TUnaryCasualExprRule>
+ TNodePtr UnaryCasualExpr(const TUnaryCasualExprRule& node, const TTrailingQuestions& tail);
+
+ template<typename TUnarySubExprRule>
+ TNodePtr UnaryExpr(const TUnarySubExprRule& node, const TTrailingQuestions& tail);
+
+ bool SqlLambdaParams(const TNodePtr& node, TVector<TSymbolNameWithPos>& args, ui32& optionalArgumentsCount);
+ bool SqlLambdaExprBody(TContext& ctx, const TRule_lambda_body& node, TVector<TNodePtr>& exprSeq);
+ bool SqlLambdaExprBody(TContext& ctx, const TRule_expr& node, TVector<TNodePtr>& exprSeq);
+
+ TNodePtr KeyExpr(const TRule_key_expr& node) {
+ TSqlExpression expr(Ctx, Mode);
+ return expr.Build(node.GetRule_expr2());
+ }
+
+ TNodePtr SubExpr(const TRule_con_subexpr& node, const TTrailingQuestions& tail);
+ TNodePtr SubExpr(const TRule_xor_subexpr& node, const TTrailingQuestions& tail);
+
+ TNodePtr SubExpr(const TRule_mul_subexpr& node, const TTrailingQuestions& tail);
+
+ TNodePtr SubExpr(const TRule_add_subexpr& node, const TTrailingQuestions& tail);
+
+ TNodePtr SubExpr(const TRule_bit_subexpr& node, const TTrailingQuestions& tail);
+
+ TNodePtr SubExpr(const TRule_neq_subexpr& node, const TTrailingQuestions& tailExternal);
+
+ TNodePtr SubExpr(const TRule_eq_subexpr& node, const TTrailingQuestions& tail);
+
+ TNodePtr SubExpr(const TRule_or_subexpr& node, const TTrailingQuestions& tail);
+
+ TNodePtr SubExpr(const TRule_and_subexpr& node, const TTrailingQuestions& tail);
+
+ template <typename TNode, typename TGetNode, typename TIter>
+ TNodePtr BinOpList(const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail);
+
+ template <typename TGetNode, typename TIter>
+ TNodePtr BinOpList(const TRule_bit_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail);
+
+ template <typename TGetNode, typename TIter>
+ TNodePtr BinOpList(const TRule_eq_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail);
+
+ TNodePtr BinOperList(const TString& opName, TVector<TNodePtr>::const_iterator begin, TVector<TNodePtr>::const_iterator end) const;
+
+ TNodePtr RowPatternVarAccess(const TString& alias, const TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2 block);
+
+ struct TCaseBranch {
+ TNodePtr Pred;
+ TNodePtr Value;
+ };
+ TCaseBranch ReduceCaseBranches(TVector<TCaseBranch>::const_iterator begin, TVector<TCaseBranch>::const_iterator end) const;
+
+ template <typename TNode, typename TGetNode, typename TIter>
+ TNodePtr BinOper(const TString& operName, const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail);
+
+ TNodePtr SqlInExpr(const TRule_in_expr& node, const TTrailingQuestions& tail);
+
+ void UnexpectedQuestionToken(const TTrailingQuestions& tail) {
+ YQL_ENSURE(tail.Count > 0);
+ Ctx.Error(tail.Pos) << "Unexpected token '?' at the end of expression";
+ }
+
+ TNodePtr SmartParenthesis(const TRule_smart_parenthesis& node);
+
+ ESmartParenthesis SmartParenthesisMode = ESmartParenthesis::Default;
+ bool MaybeUnnamedSmartParenOnTop = true;
+
+ THashMap<TString, TNodePtr> ExprShortcuts;
+};
+
+bool ChangefeedSettingsEntry(const TRule_changefeed_settings_entry& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter);
+
+bool ChangefeedSettings(const TRule_changefeed_settings& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter);
+
+bool CreateChangefeed(const TRule_changefeed& node, TSqlExpression& ctx, TVector<TChangefeedDescription>& changefeeds);
+
+bool Expr(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr& node);
+
+bool ExprList(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr_list& node);
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_group_by.cpp b/yql/essentials/sql/v1/sql_group_by.cpp
new file mode 100644
index 00000000000..46ae8ee52a8
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_group_by.cpp
@@ -0,0 +1,475 @@
+#include "sql_group_by.h"
+#include "sql_expression.h"
+#include "source.h"
+#include <yql/essentials/minikql/mkql_type_ops.h>
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+const TString TGroupByClause::AutogenerateNamePrefix = "group";
+
+bool TGroupByClause::Build(const TRule_group_by_clause& node) {
+ // group_by_clause: GROUP COMPACT? BY opt_set_quantifier grouping_element_list (WITH an_id)?;
+ if (Ctx.CompactGroupBy.Defined()) {
+ CompactGroupBy = *Ctx.CompactGroupBy;
+ } else {
+ CompactGroupBy = node.HasBlock2();
+ if (!CompactGroupBy) {
+ auto hints = Ctx.PullHintForToken(Ctx.TokenPosition(node.GetToken1()));
+ CompactGroupBy = AnyOf(hints, [](const NSQLTranslation::TSQLHint& hint) { return to_lower(hint.Name) == "compact"; });
+ }
+ }
+ TPosition distinctPos;
+ if (IsDistinctOptSet(node.GetRule_opt_set_quantifier4(), distinctPos)) {
+ Ctx.Error(distinctPos) << "DISTINCT is not supported in GROUP BY clause yet!";
+ Ctx.IncrementMonCounter("sql_errors", "DistinctInGroupByNotSupported");
+ return false;
+ }
+ if (!ParseList(node.GetRule_grouping_element_list5(), EGroupByFeatures::Ordinary)) {
+ return false;
+ }
+
+ if (node.HasBlock6()) {
+ TString mode = Id(node.GetBlock6().GetRule_an_id2(), *this);
+ TMaybe<TIssue> normalizeError = NormalizeName(Ctx.Pos(), mode);
+ if (!normalizeError.Empty()) {
+ Error() << normalizeError->GetMessage();
+ Ctx.IncrementMonCounter("sql_errors", "NormalizeGroupByModeError");
+ return false;
+ }
+
+ if (mode == "combine") {
+ Suffix = "Combine";
+ } else if (mode == "combinestate") {
+ Suffix = "CombineState";
+ } else if (mode == "mergestate") {
+ Suffix = "MergeState";
+ } else if (mode == "finalize") {
+ Suffix = "Finalize";
+ } else if (mode == "mergefinalize") {
+ Suffix = "MergeFinalize";
+ } else if (mode == "mergemanyfinalize") {
+ Suffix = "MergeManyFinalize";
+ } else {
+ Ctx.Error() << "Unsupported group by mode: " << mode;
+ Ctx.IncrementMonCounter("sql_errors", "GroupByModeUnknown");
+ return false;
+ }
+ }
+
+ if (!ResolveGroupByAndGrouping()) {
+ return false;
+ }
+ return true;
+}
+
+bool TGroupByClause::ParseList(const TRule_grouping_element_list& groupingListNode, EGroupByFeatures featureContext) {
+ if (!GroupingElement(groupingListNode.GetRule_grouping_element1(), featureContext)) {
+ return false;
+ }
+ for (auto b: groupingListNode.GetBlock2()) {
+ if (!GroupingElement(b.GetRule_grouping_element2(), featureContext)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void TGroupByClause::SetFeatures(const TString& field) const {
+ Ctx.IncrementMonCounter(field, "GroupBy");
+ const auto& features = Features();
+ if (features.Test(EGroupByFeatures::Ordinary)) {
+ Ctx.IncrementMonCounter(field, "GroupByOrdinary");
+ }
+ if (features.Test(EGroupByFeatures::Expression)) {
+ Ctx.IncrementMonCounter(field, "GroupByExpression");
+ }
+ if (features.Test(EGroupByFeatures::Rollup)) {
+ Ctx.IncrementMonCounter(field, "GroupByRollup");
+ }
+ if (features.Test(EGroupByFeatures::Cube)) {
+ Ctx.IncrementMonCounter(field, "GroupByCube");
+ }
+ if (features.Test(EGroupByFeatures::GroupingSet)) {
+ Ctx.IncrementMonCounter(field, "GroupByGroupingSet");
+ }
+ if (features.Test(EGroupByFeatures::Empty)) {
+ Ctx.IncrementMonCounter(field, "GroupByEmpty");
+ }
+}
+
+TVector<TNodePtr>& TGroupByClause::Content() {
+ return GroupBySet;
+}
+
+TMap<TString, TNodePtr>& TGroupByClause::Aliases() {
+ return GroupSetContext->NodeAliases;
+}
+
+TLegacyHoppingWindowSpecPtr TGroupByClause::GetLegacyHoppingWindow() const {
+ return LegacyHoppingWindowSpec;
+}
+
+bool TGroupByClause::IsCompactGroupBy() const {
+ return CompactGroupBy;
+}
+
+TString TGroupByClause::GetSuffix() const {
+ return Suffix;
+}
+
+TMaybe<TVector<TNodePtr>> TGroupByClause::MultiplyGroupingSets(const TVector<TNodePtr>& lhs, const TVector<TNodePtr>& rhs) const {
+ TVector<TNodePtr> content;
+ for (const auto& leftNode: lhs) {
+ auto leftPtr = leftNode->ContentListPtr();
+ if (!leftPtr) {
+ // TODO: shouldn't happen
+ Ctx.Error() << "Unable to multiply grouping sets";
+ return {};
+ }
+ for (const auto& rightNode: rhs) {
+ TVector<TNodePtr> mulItem(leftPtr->begin(), leftPtr->end());
+ auto rightPtr = rightNode->ContentListPtr();
+ if (!rightPtr) {
+ // TODO: shouldn't happen
+ Ctx.Error() << "Unable to multiply grouping sets";
+ return {};
+ }
+ mulItem.insert(mulItem.end(), rightPtr->begin(), rightPtr->end());
+ content.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(mulItem)));
+ }
+ }
+ return content;
+}
+
+bool TGroupByClause::ResolveGroupByAndGrouping() {
+ auto listPos = std::find_if(GroupBySet.begin(), GroupBySet.end(), [](const TNodePtr& node) {
+ return node->ContentListPtr();
+ });
+ if (listPos == GroupBySet.end()) {
+ return true;
+ }
+ auto curContent = *(*listPos)->ContentListPtr();
+ if (listPos != GroupBySet.begin()) {
+ TVector<TNodePtr> emulate(GroupBySet.begin(), listPos);
+ TVector<TNodePtr> emulateContent(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(emulate)));
+ auto mult = MultiplyGroupingSets(emulateContent, curContent);
+ if (!mult) {
+ return false;
+ }
+ curContent = *mult;
+ }
+ for (++listPos; listPos != GroupBySet.end(); ++listPos) {
+ auto newElem = (*listPos)->ContentListPtr();
+ if (newElem) {
+ auto mult = MultiplyGroupingSets(curContent, *newElem);
+ if (!mult) {
+ return false;
+ }
+ curContent = *mult;
+ } else {
+ TVector<TNodePtr> emulate(1, *listPos);
+ TVector<TNodePtr> emulateContent(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(emulate)));
+ auto mult = MultiplyGroupingSets(curContent, emulateContent);
+ if (!mult) {
+ return false;
+ }
+ curContent = *mult;
+ }
+ }
+ TVector<TNodePtr> result(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(curContent)));
+ std::swap(result, GroupBySet);
+ return true;
+}
+
+bool TGroupByClause::GroupingElement(const TRule_grouping_element& node, EGroupByFeatures featureContext) {
+ TSourcePtr res;
+ TVector<TNodePtr> emptyContent;
+ switch (node.Alt_case()) {
+ case TRule_grouping_element::kAltGroupingElement1:
+ if (!OrdinaryGroupingSet(node.GetAlt_grouping_element1().GetRule_ordinary_grouping_set1(), featureContext)) {
+ return false;
+ }
+ Features().Set(EGroupByFeatures::Ordinary);
+ break;
+ case TRule_grouping_element::kAltGroupingElement2: {
+ TGroupByClause subClause(Ctx, Mode, GroupSetContext);
+ if (!subClause.OrdinaryGroupingSetList(node.GetAlt_grouping_element2().GetRule_rollup_list1().GetRule_ordinary_grouping_set_list3(),
+ EGroupByFeatures::Rollup))
+ {
+ return false;
+ }
+ auto& content = subClause.Content();
+ TVector<TNodePtr> collection;
+ for (auto limit = content.end(), begin = content.begin(); limit != begin; --limit) {
+ TVector<TNodePtr> grouping(begin, limit);
+ collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(grouping)));
+ }
+ collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(emptyContent)));
+ GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
+ Ctx.IncrementMonCounter("sql_features", TStringBuilder() << "GroupByRollup" << content.size());
+ Features().Set(EGroupByFeatures::Rollup);
+ break;
+ }
+ case TRule_grouping_element::kAltGroupingElement3: {
+ TGroupByClause subClause(Ctx, Mode, GroupSetContext);
+ if (!subClause.OrdinaryGroupingSetList(node.GetAlt_grouping_element3().GetRule_cube_list1().GetRule_ordinary_grouping_set_list3(),
+ EGroupByFeatures::Cube))
+ {
+ return false;
+ }
+ auto& content = subClause.Content();
+ if (content.size() > Ctx.PragmaGroupByCubeLimit) {
+ Ctx.Error() << "GROUP BY CUBE is allowed only for " << Ctx.PragmaGroupByCubeLimit << " columns, but you use " << content.size();
+ return false;
+ }
+ TVector<TNodePtr> collection;
+ for (unsigned mask = (1 << content.size()) - 1; mask > 0; --mask) {
+ TVector<TNodePtr> grouping;
+ for (unsigned index = 0; index < content.size(); ++index) {
+ if (mask & (1 << index)) {
+ grouping.push_back(content[content.size() - index - 1]);
+ }
+ }
+ collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(grouping)));
+ }
+ collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(emptyContent)));
+ GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
+ Ctx.IncrementMonCounter("sql_features", TStringBuilder() << "GroupByCube" << content.size());
+ Features().Set(EGroupByFeatures::Cube);
+ break;
+ }
+ case TRule_grouping_element::kAltGroupingElement4: {
+ auto listNode = node.GetAlt_grouping_element4().GetRule_grouping_sets_specification1().GetRule_grouping_element_list4();
+ TGroupByClause subClause(Ctx, Mode, GroupSetContext);
+ if (!subClause.ParseList(listNode, EGroupByFeatures::GroupingSet)) {
+ return false;
+ }
+ auto& content = subClause.Content();
+ TVector<TNodePtr> collection;
+ bool hasEmpty = false;
+ for (auto& elem: content) {
+ auto elemContent = elem->ContentListPtr();
+ if (elemContent) {
+ if (!elemContent->empty() && elemContent->front()->ContentListPtr()) {
+ for (auto& sub: *elemContent) {
+ FeedCollection(sub, collection, hasEmpty);
+ }
+ } else {
+ FeedCollection(elem, collection, hasEmpty);
+ }
+ } else {
+ TVector<TNodePtr> elemList(1, std::move(elem));
+ collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(elemList)));
+ }
+ }
+ GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
+ Features().Set(EGroupByFeatures::GroupingSet);
+ break;
+ }
+ case TRule_grouping_element::kAltGroupingElement5: {
+ if (!HoppingWindow(node.GetAlt_grouping_element5().GetRule_hopping_window_specification1())) {
+ return false;
+ }
+ break;
+ }
+ case TRule_grouping_element::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ return true;
+}
+
+void TGroupByClause::FeedCollection(const TNodePtr& elem, TVector<TNodePtr>& collection, bool& hasEmpty) const {
+ auto elemContentPtr = elem->ContentListPtr();
+ if (elemContentPtr && elemContentPtr->empty()) {
+ if (hasEmpty) {
+ return;
+ }
+ hasEmpty = true;
+ }
+ collection.push_back(elem);
+}
+
+bool TGroupByClause::OrdinaryGroupingSet(const TRule_ordinary_grouping_set& node, EGroupByFeatures featureContext) {
+ TNodePtr namedExprNode;
+ {
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ namedExprNode = NamedExpr(node.GetRule_named_expr1(), EExpr::GroupBy);
+ }
+ if (!namedExprNode) {
+ return false;
+ }
+ auto nodeLabel = namedExprNode->GetLabel();
+ auto contentPtr = namedExprNode->ContentListPtr();
+ if (contentPtr) {
+ if (nodeLabel && (contentPtr->size() != 1 || contentPtr->front()->GetLabel())) {
+ Ctx.Error() << "Unable to use aliases for list of named expressions";
+ Ctx.IncrementMonCounter("sql_errors", "GroupByAliasForListOfExpressions");
+ return false;
+ }
+ for (auto& content: *contentPtr) {
+ auto label = content->GetLabel();
+ if (!label) {
+ if (content->GetColumnName()) {
+ namedExprNode->AssumeColumn();
+ continue;
+ }
+
+ if (!AllowUnnamed(content->GetPos(), featureContext)) {
+ return false;
+ }
+
+ content->SetLabel(label = GenerateGroupByExprName());
+ }
+ if (!AddAlias(label, content)) {
+ return false;
+ }
+ content = BuildColumn(content->GetPos(), label);
+ }
+ } else {
+ if (!nodeLabel && namedExprNode->GetColumnName()) {
+ namedExprNode->AssumeColumn();
+ }
+
+ if (!nodeLabel && !namedExprNode->GetColumnName()) {
+ if (!AllowUnnamed(namedExprNode->GetPos(), featureContext)) {
+ return false;
+ }
+ namedExprNode->SetLabel(nodeLabel = GenerateGroupByExprName());
+ }
+ if (nodeLabel) {
+ if (!AddAlias(nodeLabel, namedExprNode)) {
+ return false;
+ }
+ namedExprNode = BuildColumn(namedExprNode->GetPos(), nodeLabel);
+ }
+ }
+ GroupBySet.emplace_back(std::move(namedExprNode));
+ return true;
+}
+
+bool TGroupByClause::OrdinaryGroupingSetList(const TRule_ordinary_grouping_set_list& node, EGroupByFeatures featureContext) {
+ if (!OrdinaryGroupingSet(node.GetRule_ordinary_grouping_set1(), featureContext)) {
+ return false;
+ }
+ for (auto& block: node.GetBlock2()) {
+ if (!OrdinaryGroupingSet(block.GetRule_ordinary_grouping_set2(), featureContext)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TGroupByClause::HoppingWindow(const TRule_hopping_window_specification& node) {
+ if (LegacyHoppingWindowSpec) {
+ Ctx.Error() << "Duplicate hopping window specification.";
+ return false;
+ }
+ LegacyHoppingWindowSpec = new TLegacyHoppingWindowSpec;
+ {
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TSqlExpression expr(Ctx, Mode);
+ LegacyHoppingWindowSpec->TimeExtractor = expr.Build(node.GetRule_expr3());
+ if (!LegacyHoppingWindowSpec->TimeExtractor) {
+ return false;
+ }
+ }
+ auto processIntervalParam = [&] (const TRule_expr& rule) -> TNodePtr {
+ TSqlExpression expr(Ctx, Mode);
+ auto node = expr.Build(rule);
+ if (!node) {
+ return nullptr;
+ }
+
+ auto literal = node->GetLiteral("String");
+ if (!literal) {
+ return new TAstListNodeImpl(Ctx.Pos(), {
+ new TAstAtomNodeImpl(Ctx.Pos(), "EvaluateExpr", TNodeFlags::Default),
+ node
+ });
+ }
+
+ const auto out = NKikimr::NMiniKQL::ValueFromString(NKikimr::NUdf::EDataSlot::Interval, *literal);
+ if (!out) {
+ Ctx.Error(node->GetPos()) << "Expected interval in ISO 8601 format";
+ return nullptr;
+ }
+
+ if ('T' == literal->back()) {
+ Ctx.Error(node->GetPos()) << "Time prefix 'T' at end of interval constant. The designator 'T' shall be absent if all of the time components are absent.";
+ return nullptr;
+ }
+
+ return new TAstListNodeImpl(Ctx.Pos(), {
+ new TAstAtomNodeImpl(Ctx.Pos(), "Interval", TNodeFlags::Default),
+ new TAstListNodeImpl(Ctx.Pos(), {
+ new TAstAtomNodeImpl(Ctx.Pos(), "quote", TNodeFlags::Default),
+ new TAstAtomNodeImpl(Ctx.Pos(), ToString(out.Get<i64>()), TNodeFlags::Default)
+ })
+ });
+ };
+
+ LegacyHoppingWindowSpec->Hop = processIntervalParam(node.GetRule_expr5());
+ if (!LegacyHoppingWindowSpec->Hop) {
+ return false;
+ }
+ LegacyHoppingWindowSpec->Interval = processIntervalParam(node.GetRule_expr7());
+ if (!LegacyHoppingWindowSpec->Interval) {
+ return false;
+ }
+ LegacyHoppingWindowSpec->Delay = processIntervalParam(node.GetRule_expr9());
+ if (!LegacyHoppingWindowSpec->Delay) {
+ return false;
+ }
+ LegacyHoppingWindowSpec->DataWatermarks = Ctx.PragmaDataWatermarks;
+
+ return true;
+}
+
+bool TGroupByClause::AllowUnnamed(TPosition pos, EGroupByFeatures featureContext) {
+ TStringBuf feature;
+ switch (featureContext) {
+ case EGroupByFeatures::Ordinary:
+ return true;
+ case EGroupByFeatures::Rollup:
+ feature = "ROLLUP";
+ break;
+ case EGroupByFeatures::Cube:
+ feature = "CUBE";
+ break;
+ case EGroupByFeatures::GroupingSet:
+ feature = "GROUPING SETS";
+ break;
+ default:
+ YQL_ENSURE(false, "Unknown feature");
+ }
+
+ Ctx.Error(pos) << "Unnamed expressions are not supported in " << feature << ". Please use '<expr> AS <name>'.";
+ Ctx.IncrementMonCounter("sql_errors", "GroupBySetNoAliasOrColumn");
+ return false;
+}
+
+TGroupByClause::TGroupingSetFeatures& TGroupByClause::Features() {
+ return GroupSetContext->GroupFeatures;
+}
+
+const TGroupByClause::TGroupingSetFeatures& TGroupByClause::Features() const {
+ return GroupSetContext->GroupFeatures;
+}
+
+bool TGroupByClause::AddAlias(const TString& label, const TNodePtr& node) {
+ if (Aliases().contains(label)) {
+ Ctx.Error() << "Duplicated aliases not allowed";
+ Ctx.IncrementMonCounter("sql_errors", "GroupByDuplicateAliases");
+ return false;
+ }
+ Aliases().emplace(label, node);
+ return true;
+}
+
+TString TGroupByClause::GenerateGroupByExprName() {
+ return TStringBuilder() << AutogenerateNamePrefix << GroupSetContext->UnnamedCount++;
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_group_by.h b/yql/essentials/sql/v1/sql_group_by.h
new file mode 100644
index 00000000000..83e602596c6
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_group_by.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include "sql_translation.h"
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+class TGroupByClause: public TSqlTranslation {
+ enum class EGroupByFeatures {
+ Begin,
+ Ordinary = Begin,
+ Expression,
+ Rollup,
+ Cube,
+ GroupingSet,
+ Empty,
+ End,
+ };
+ typedef TEnumBitSet<EGroupByFeatures, static_cast<int>(EGroupByFeatures::Begin), static_cast<int>(EGroupByFeatures::End)> TGroupingSetFeatures;
+
+ class TGroupByClauseCtx: public TSimpleRefCount<TGroupByClauseCtx> {
+ public:
+ typedef TIntrusivePtr<TGroupByClauseCtx> TPtr;
+
+ TGroupingSetFeatures GroupFeatures;
+ TMap<TString, TNodePtr> NodeAliases;
+ size_t UnnamedCount = 0;
+ };
+
+public:
+ TGroupByClause(TContext& ctx, NSQLTranslation::ESqlMode mode, TGroupByClauseCtx::TPtr groupSetContext = {})
+ : TSqlTranslation(ctx, mode)
+ , GroupSetContext(groupSetContext ? groupSetContext : TGroupByClauseCtx::TPtr(new TGroupByClauseCtx()))
+ , CompactGroupBy(false)
+ {}
+
+ bool Build(const TRule_group_by_clause& node);
+ bool ParseList(const TRule_grouping_element_list& groupingListNode, EGroupByFeatures featureContext);
+
+ void SetFeatures(const TString& field) const;
+ TVector<TNodePtr>& Content();
+ TMap<TString, TNodePtr>& Aliases();
+ TLegacyHoppingWindowSpecPtr GetLegacyHoppingWindow() const;
+ bool IsCompactGroupBy() const;
+ TString GetSuffix() const;
+
+private:
+ TMaybe<TVector<TNodePtr>> MultiplyGroupingSets(const TVector<TNodePtr>& lhs, const TVector<TNodePtr>& rhs) const;
+ bool ResolveGroupByAndGrouping();
+ bool GroupingElement(const TRule_grouping_element& node, EGroupByFeatures featureContext);
+ void FeedCollection(const TNodePtr& elem, TVector<TNodePtr>& collection, bool& hasEmpty) const;
+ bool OrdinaryGroupingSet(const TRule_ordinary_grouping_set& node, EGroupByFeatures featureContext);
+ bool OrdinaryGroupingSetList(const TRule_ordinary_grouping_set_list& node, EGroupByFeatures featureContext);
+ bool HoppingWindow(const TRule_hopping_window_specification& node);
+
+ bool AllowUnnamed(TPosition pos, EGroupByFeatures featureContext);
+
+ TGroupingSetFeatures& Features();
+ const TGroupingSetFeatures& Features() const;
+ bool AddAlias(const TString& label, const TNodePtr& node);
+ TString GenerateGroupByExprName();
+ bool IsAutogenerated(const TString* name) const;
+
+ TVector<TNodePtr> GroupBySet;
+ TGroupByClauseCtx::TPtr GroupSetContext;
+ TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec; // stream queries
+ static const TString AutogenerateNamePrefix;
+ bool CompactGroupBy;
+ TString Suffix;
+};
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_into_tables.cpp b/yql/essentials/sql/v1/sql_into_tables.cpp
new file mode 100644
index 00000000000..43cc08de0d7
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_into_tables.cpp
@@ -0,0 +1,267 @@
+#include "sql_into_tables.h"
+#include "sql_values.h"
+
+#include <util/string/join.h>
+
+using namespace NYql;
+
+namespace NSQLTranslationV1 {
+
+using NALPDefault::SQLv1LexerTokens;
+
+using namespace NSQLv1Generated;
+
+TNodePtr TSqlIntoTable::Build(const TRule_into_table_stmt& node) {
+ static const TMap<TString, ESQLWriteColumnMode> str2Mode = {
+ {"InsertInto", ESQLWriteColumnMode::InsertInto},
+ {"InsertOrAbortInto", ESQLWriteColumnMode::InsertOrAbortInto},
+ {"InsertOrIgnoreInto", ESQLWriteColumnMode::InsertOrIgnoreInto},
+ {"InsertOrRevertInto", ESQLWriteColumnMode::InsertOrRevertInto},
+ {"UpsertInto", ESQLWriteColumnMode::UpsertInto},
+ {"ReplaceInto", ESQLWriteColumnMode::ReplaceInto},
+ {"InsertIntoWithTruncate", ESQLWriteColumnMode::InsertIntoWithTruncate}
+ };
+
+ auto& modeBlock = node.GetBlock1();
+
+ TVector<TToken> modeTokens;
+ switch (modeBlock.Alt_case()) {
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt1:
+ modeTokens = {modeBlock.GetAlt1().GetToken1()};
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt2:
+ modeTokens = {
+ modeBlock.GetAlt2().GetToken1(),
+ modeBlock.GetAlt2().GetToken2(),
+ modeBlock.GetAlt2().GetToken3()
+ };
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt3:
+ modeTokens = {
+ modeBlock.GetAlt3().GetToken1(),
+ modeBlock.GetAlt3().GetToken2(),
+ modeBlock.GetAlt3().GetToken3()
+ };
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt4:
+ modeTokens = {
+ modeBlock.GetAlt4().GetToken1(),
+ modeBlock.GetAlt4().GetToken2(),
+ modeBlock.GetAlt4().GetToken3()
+ };
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt5:
+ modeTokens = {modeBlock.GetAlt5().GetToken1()};
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::kAlt6:
+ modeTokens = {modeBlock.GetAlt6().GetToken1()};
+ break;
+ case TRule_into_table_stmt_TBlock1::AltCase::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ TVector<TString> modeStrings;
+ modeStrings.reserve(modeTokens.size());
+ TVector<TString> userModeStrings;
+ userModeStrings.reserve(modeTokens.size());
+
+ for (auto& token : modeTokens) {
+ auto tokenStr = Token(token);
+
+ auto modeStr = tokenStr;
+ modeStr.to_lower();
+ modeStr.to_upper(0, 1);
+ modeStrings.push_back(modeStr);
+
+ auto userModeStr = tokenStr;
+ userModeStr.to_upper();
+ userModeStrings.push_back(userModeStr);
+ }
+
+ modeStrings.push_back("Into");
+ userModeStrings.push_back("INTO");
+
+ SqlIntoModeStr = JoinRange("", modeStrings.begin(), modeStrings.end());
+ SqlIntoUserModeStr = JoinRange(" ", userModeStrings.begin(), userModeStrings.end());
+
+ const auto& intoTableRef = node.GetRule_into_simple_table_ref3();
+ const auto& tableRef = intoTableRef.GetRule_simple_table_ref1();
+ const auto& tableRefCore = tableRef.GetRule_simple_table_ref_core1();
+
+ auto service = Ctx.Scoped->CurrService;
+ auto cluster = Ctx.Scoped->CurrCluster;
+ std::pair<bool, TDeferredAtom> nameOrAt;
+ bool isBinding = false;
+ switch (tableRefCore.Alt_case()) {
+ case TRule_simple_table_ref_core::AltCase::kAltSimpleTableRefCore1: {
+ if (tableRefCore.GetAlt_simple_table_ref_core1().GetRule_object_ref1().HasBlock1()) {
+ const auto& clusterExpr = tableRefCore.GetAlt_simple_table_ref_core1().GetRule_object_ref1().GetBlock1().GetRule_cluster_expr1();
+ bool hasAt = tableRefCore.GetAlt_simple_table_ref_core1().GetRule_object_ref1().GetRule_id_or_at2().HasBlock1();
+ bool result = !hasAt ?
+ ClusterExprOrBinding(clusterExpr, service, cluster, isBinding) : ClusterExpr(clusterExpr, false, service, cluster);
+ if (!result) {
+ return nullptr;
+ }
+ }
+
+ if (!isBinding && cluster.Empty()) {
+ Ctx.Error() << "No cluster name given and no default cluster is selected";
+ return nullptr;
+ }
+
+ auto id = Id(tableRefCore.GetAlt_simple_table_ref_core1().GetRule_object_ref1().GetRule_id_or_at2(), *this);
+ nameOrAt = std::make_pair(id.first, TDeferredAtom(Ctx.Pos(), id.second));
+ break;
+ }
+ case TRule_simple_table_ref_core::AltCase::kAltSimpleTableRefCore2: {
+ auto at = tableRefCore.GetAlt_simple_table_ref_core2().HasBlock1();
+ TString name;
+ if (!NamedNodeImpl(tableRefCore.GetAlt_simple_table_ref_core2().GetRule_bind_parameter2(), name, *this)) {
+ return nullptr;
+ }
+ auto named = GetNamedNode(name);
+ if (!named) {
+ return nullptr;
+ }
+
+ if (cluster.Empty()) {
+ Ctx.Error() << "No cluster name given and no default cluster is selected";
+ return nullptr;
+ }
+
+ TDeferredAtom table;
+ MakeTableFromExpression(Ctx.Pos(), Ctx, named, table);
+ nameOrAt = std::make_pair(at, table);
+ break;
+ }
+ case TRule_simple_table_ref_core::AltCase::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ bool withTruncate = false;
+ TTableHints tableHints;
+ if (tableRef.HasBlock2()) {
+ auto hints = TableHintsImpl(tableRef.GetBlock2().GetRule_table_hints1(), service);
+ if (!hints) {
+ Ctx.Error() << "Failed to parse table hints";
+ return nullptr;
+ }
+ for (const auto& hint : *hints) {
+ if (to_upper(hint.first) == "TRUNCATE") {
+ withTruncate = true;
+ }
+ }
+ std::erase_if(*hints, [](const auto &hint) { return to_upper(hint.first) == "TRUNCATE"; });
+ tableHints = std::move(*hints);
+ }
+
+ TVector<TString> eraseColumns;
+ if (intoTableRef.HasBlock2()) {
+ if (service != StatProviderName) {
+ Ctx.Error() << "ERASE BY is unsupported for " << service;
+ return nullptr;
+ }
+
+ PureColumnListStr(
+ intoTableRef.GetBlock2().GetRule_pure_column_list3(), *this, eraseColumns
+ );
+ }
+
+ if (withTruncate) {
+ if (SqlIntoModeStr != "InsertInto") {
+ Error() << "Unable " << SqlIntoUserModeStr << " with truncate mode";
+ return nullptr;
+ }
+ SqlIntoModeStr += "WithTruncate";
+ SqlIntoUserModeStr += " ... WITH TRUNCATE";
+ }
+ const auto iterMode = str2Mode.find(SqlIntoModeStr);
+ YQL_ENSURE(iterMode != str2Mode.end(), "Invalid sql write mode string: " << SqlIntoModeStr);
+ const auto SqlIntoMode = iterMode->second;
+
+ TPosition pos(Ctx.Pos());
+ TTableRef table(Ctx.MakeName("table"), service, cluster, nullptr);
+ if (isBinding) {
+ const TString* binding = nameOrAt.second.GetLiteral();
+ YQL_ENSURE(binding);
+ YQL_ENSURE(!nameOrAt.first);
+ if (!ApplyTableBinding(*binding, table, tableHints)) {
+ return nullptr;
+ }
+ } else {
+ table.Keys = BuildTableKey(pos, service, cluster, nameOrAt.second, {nameOrAt.first ? "@" : ""});
+ }
+
+ Ctx.IncrementMonCounter("sql_insert_clusters", table.Cluster.GetLiteral() ? *table.Cluster.GetLiteral() : "unknown");
+
+ auto values = TSqlIntoValues(Ctx, Mode).Build(node.GetRule_into_values_source4(), SqlIntoUserModeStr);
+ if (!values) {
+ return nullptr;
+ }
+ if (!ValidateServiceName(node, table, SqlIntoMode, GetPos(modeTokens[0]))) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", SqlIntoModeStr);
+
+ auto options = BuildIntoTableOptions(pos, eraseColumns, tableHints);
+
+ if (node.HasBlock5()) {
+ options = options->L(options, ReturningList(node.GetBlock5().GetRule_returning_columns_list1()));
+ }
+
+ return BuildWriteColumns(pos, Ctx.Scoped, table,
+ ToWriteColumnsMode(SqlIntoMode), std::move(values),
+ options);
+}
+
+bool TSqlIntoTable::ValidateServiceName(const TRule_into_table_stmt& node, const TTableRef& table,
+ ESQLWriteColumnMode mode, const TPosition& pos) {
+ Y_UNUSED(node);
+ auto serviceName = table.Service;
+ const bool isMapReduce = serviceName == YtProviderName;
+ const bool isKikimr = serviceName == KikimrProviderName || serviceName == YdbProviderName;
+ const bool isRtmr = serviceName == RtmrProviderName;
+ const bool isStat = serviceName == StatProviderName;
+
+ if (!isKikimr) {
+ if (mode == ESQLWriteColumnMode::InsertOrAbortInto ||
+ mode == ESQLWriteColumnMode::InsertOrIgnoreInto ||
+ mode == ESQLWriteColumnMode::InsertOrRevertInto ||
+ mode == ESQLWriteColumnMode::UpsertInto && !isStat)
+ {
+ Ctx.Error(pos) << SqlIntoUserModeStr << " is not supported for " << serviceName << " tables";
+ Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName);
+ return false;
+ }
+ }
+
+ if (isMapReduce) {
+ if (mode == ESQLWriteColumnMode::ReplaceInto) {
+ Ctx.Error(pos) << "Meaning of REPLACE INTO has been changed, now you should use INSERT INTO <table> WITH TRUNCATE ... for " << serviceName;
+ Ctx.IncrementMonCounter("sql_errors", "ReplaceIntoConflictUsage");
+ return false;
+ }
+ } else if (isKikimr) {
+ if (mode == ESQLWriteColumnMode::InsertIntoWithTruncate) {
+ Ctx.Error(pos) << "INSERT INTO WITH TRUNCATE is not supported for " << serviceName << " tables";
+ Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName);
+ return false;
+ }
+ } else if (isRtmr) {
+ if (mode != ESQLWriteColumnMode::InsertInto) {
+ Ctx.Error(pos) << SqlIntoUserModeStr << " is unsupported for " << serviceName;
+ Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName);
+ return false;
+ }
+ } else if (isStat) {
+ if (mode != ESQLWriteColumnMode::UpsertInto) {
+ Ctx.Error(pos) << SqlIntoUserModeStr << " is unsupported for " << serviceName;
+ Ctx.IncrementMonCounter("sql_errors", TStringBuilder() << SqlIntoUserModeStr << "UnsupportedFor" << serviceName);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_into_tables.h b/yql/essentials/sql/v1/sql_into_tables.h
new file mode 100644
index 00000000000..0e40f5669bb
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_into_tables.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "sql_translation.h"
+#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+class TSqlIntoTable: public TSqlTranslation {
+public:
+ TSqlIntoTable(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlTranslation(ctx, mode)
+ {
+ }
+
+ TNodePtr Build(const TRule_into_table_stmt& node);
+
+private:
+ //bool BuildValuesRow(const TRule_values_source_row& inRow, TVector<TNodePtr>& outRow);
+ //TSourcePtr ValuesSource(const TRule_values_source& node, TVector<TString>& columnsHint);
+ //TSourcePtr IntoValuesSource(const TRule_into_values_source& node);
+
+ bool ValidateServiceName(const TRule_into_table_stmt& node, const TTableRef& table, ESQLWriteColumnMode mode,
+ const TPosition& pos);
+ TString SqlIntoModeStr;
+ TString SqlIntoUserModeStr;
+};
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_match_recognize.cpp b/yql/essentials/sql/v1/sql_match_recognize.cpp
new file mode 100644
index 00000000000..47e001efbb3
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_match_recognize.cpp
@@ -0,0 +1,377 @@
+#include "sql_match_recognize.h"
+#include "node.h"
+#include "sql_expression.h"
+#include <yql/essentials/core/sql_types/match_recognize.h>
+#include <algorithm>
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+namespace {
+
+TPosition TokenPosition(const TToken& token){
+ return TPosition{token.GetColumn(), token.GetLine()};
+}
+
+TString PatternVar(const TRule_row_pattern_variable_name& node, TSqlMatchRecognizeClause& ctx){
+ return Id(node.GetRule_identifier1(), ctx);
+}
+
+} //namespace
+
+TMatchRecognizeBuilderPtr TSqlMatchRecognizeClause::CreateBuilder(const NSQLv1Generated::TRule_row_pattern_recognition_clause &matchRecognizeClause) {
+ TPosition pos(matchRecognizeClause.GetToken1().GetColumn(), matchRecognizeClause.GetToken1().GetLine());
+ if (!Ctx.FeatureR010) {
+ Ctx.Error(pos, TIssuesIds::CORE) << "Unexpected MATCH_RECOGNIZE";
+ return {};
+ }
+ TVector<TNamedFunction> partitioners;
+ TPosition partitionsPos = pos;
+ if (matchRecognizeClause.HasBlock3()) {
+ const auto& partitionClause = matchRecognizeClause.GetBlock3().GetRule_window_partition_clause1();
+ partitionsPos = TokenPosition(partitionClause.GetToken1());
+ partitioners = ParsePartitionBy(partitionClause);
+ if (!partitioners)
+ return {};
+ }
+ TVector<TSortSpecificationPtr> sortSpecs;
+ TPosition orderByPos = pos;
+ if (matchRecognizeClause.HasBlock4()) {
+ const auto& orderByClause = matchRecognizeClause.GetBlock4().GetRule_order_by_clause1();
+ orderByPos = TokenPosition(orderByClause.GetToken1());
+ if (!OrderByClause(orderByClause, sortSpecs)) {
+ return {};
+ }
+ }
+
+ TPosition measuresPos = pos;
+ TVector<TNamedFunction> measures;
+ if (matchRecognizeClause.HasBlock5()) {
+ const auto& measuresClause = matchRecognizeClause.GetBlock5().GetRule_row_pattern_measures1();
+ measuresPos = TokenPosition(measuresClause.GetToken1());
+ measures = ParseMeasures(measuresClause.GetRule_row_pattern_measure_list2());
+ }
+
+ TPosition rowsPerMatchPos = pos;
+ ERowsPerMatch rowsPerMatch = ERowsPerMatch::OneRow;
+ if (matchRecognizeClause.HasBlock6()) {
+ std::tie(rowsPerMatchPos, rowsPerMatch) = ParseRowsPerMatch(matchRecognizeClause.GetBlock6().GetRule_row_pattern_rows_per_match1());
+ if (ERowsPerMatch::AllRows == rowsPerMatch) {
+ //https://st.yandex-team.ru/YQL-16213
+ Ctx.Error(pos, TIssuesIds::CORE) << "ALL ROWS PER MATCH is not supported yet";
+ return {};
+ }
+ }
+
+ const auto& commonSyntax = matchRecognizeClause.GetRule_row_pattern_common_syntax7();
+
+
+ if (commonSyntax.HasBlock2()) {
+ const auto& initialOrSeek = commonSyntax.GetBlock2().GetRule_row_pattern_initial_or_seek1();
+ Ctx.Error(TokenPosition(initialOrSeek.GetToken1())) << "InitialOrSeek subclause is not allowed in FROM clause";
+ return {};
+ }
+
+ auto pattern = ParsePattern(commonSyntax.GetRule_row_pattern5());
+ const auto& patternPos = TokenPosition(commonSyntax.token3());
+
+ //this block is located before pattern block in grammar,
+ // but depends on it, so it is processed after pattern block
+ std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> skipTo {
+ pos,
+ NYql::NMatchRecognize::TAfterMatchSkipTo{
+ NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow,
+ TString()
+ }
+ };
+ if (commonSyntax.HasBlock1()){
+ skipTo = ParseAfterMatchSkipTo(commonSyntax.GetBlock1().GetRule_row_pattern_skip_to3());
+ const auto varRequired =
+ NYql::NMatchRecognize::EAfterMatchSkipTo::ToFirst == skipTo.second.To ||
+ NYql::NMatchRecognize::EAfterMatchSkipTo::ToLast == skipTo.second.To ||
+ NYql::NMatchRecognize::EAfterMatchSkipTo::To == skipTo.second.To;
+ if (varRequired) {
+ const auto& allVars = NYql::NMatchRecognize::GetPatternVars(pattern);
+ if (allVars.find(skipTo.second.Var) == allVars.cend()) {
+ Ctx.Error(skipTo.first) << "Unknown pattern variable in AFTER MATCH";
+ return {};
+ }
+ }
+ }
+
+
+ TNodePtr subset;
+ TPosition subsetPos = pos;
+ if (commonSyntax.HasBlock7()) {
+ const auto& rowPatternSubset = commonSyntax.GetBlock7().GetRule_row_pattern_subset_clause1();
+ subsetPos = TokenPosition(rowPatternSubset.GetToken1());
+ Ctx.Error() << "SUBSET is not implemented yet";
+ //TODO https://st.yandex-team.ru/YQL-16225
+ return {};
+ }
+ const auto& definitions = ParseDefinitions(commonSyntax.GetRule_row_pattern_definition_list9());
+ const auto& definitionsPos = TokenPosition(commonSyntax.GetToken8());
+
+ const auto& rowPatternVariables = GetPatternVars(pattern);
+ for (const auto& [callable, name]: definitions) {
+ if (!rowPatternVariables.contains(name)) {
+ Ctx.Error(callable->GetPos()) << "ROW PATTERN VARIABLE " << name << " is defined, but not mentioned in the PATTERN";
+ return {};
+ }
+ }
+
+ return new TMatchRecognizeBuilder{
+ pos,
+ std::pair{partitionsPos, std::move(partitioners)},
+ std::pair{orderByPos, std::move(sortSpecs)},
+ std::pair{measuresPos, measures},
+ std::pair{rowsPerMatchPos, rowsPerMatch},
+ std::move(skipTo),
+ std::pair{patternPos, std::move(pattern)},
+ std::pair{subsetPos, std::move(subset)},
+ std::pair{definitionsPos, std::move(definitions)}
+ };
+
+
+}
+
+TVector<TNamedFunction> TSqlMatchRecognizeClause::ParsePartitionBy(const TRule_window_partition_clause& partitionClause) {
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TVector<TNodePtr> partitionExprs;
+ if (!NamedExprList(
+ partitionClause.GetRule_named_expr_list4(),
+ partitionExprs)) {
+ return {};
+ }
+ TVector<TNamedFunction> partitioners;
+ for (const auto& p: partitionExprs) {
+ auto label = p->GetLabel();
+ if (!label && p->GetColumnName()) {
+ label = *p->GetColumnName();
+ }
+ partitioners.push_back(TNamedFunction{p, label});
+ }
+ return partitioners;
+}
+
+TNamedFunction TSqlMatchRecognizeClause::ParseOneMeasure(const TRule_row_pattern_measure_definition& node) {
+ TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize);
+ const auto& expr = TSqlExpression(Ctx, Mode).Build(node.GetRule_expr1());
+ const auto& name = Id(node.GetRule_an_id3(), *this);
+ //TODO https://st.yandex-team.ru/YQL-16186
+ //Each measure must be a lambda, that accepts 2 args:
+ // - List<InputTableColumns + _yql_Classifier, _yql_MatchNumber>
+ // - Struct that maps row pattern variables to ranges in the queue
+ return {expr, name};
+}
+
+TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseMeasures(const TRule_row_pattern_measure_list& node) {
+ TVector<TNamedFunction> result{ ParseOneMeasure(node.GetRule_row_pattern_measure_definition1()) };
+ for (const auto& m: node.GetBlock2()) {
+ result.push_back(ParseOneMeasure(m.GetRule_row_pattern_measure_definition2()));
+ }
+ return result;
+}
+
+std::pair<TPosition, ERowsPerMatch> TSqlMatchRecognizeClause::ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause) {
+
+ switch(rowsPerMatchClause.GetAltCase()) {
+ case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch1:
+ return std::pair {
+ TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match1().GetToken1()),
+ ERowsPerMatch::OneRow
+ };
+ case TRule_row_pattern_rows_per_match::kAltRowPatternRowsPerMatch2:
+ return std::pair {
+ TokenPosition(rowsPerMatchClause.GetAlt_row_pattern_rows_per_match2().GetToken1()),
+ ERowsPerMatch::AllRows
+ };
+ case TRule_row_pattern_rows_per_match::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> TSqlMatchRecognizeClause::ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause) {
+ switch (skipToClause.GetAltCase()) {
+ case TRule_row_pattern_skip_to::kAltRowPatternSkipTo1:
+ return std::pair{
+ TokenPosition(skipToClause.GetAlt_row_pattern_skip_to1().GetToken1()),
+ NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::NextRow, ""}
+ };
+ case TRule_row_pattern_skip_to::kAltRowPatternSkipTo2:
+ return std::pair{
+ TokenPosition(skipToClause.GetAlt_row_pattern_skip_to2().GetToken1()),
+ NYql::NMatchRecognize::TAfterMatchSkipTo{NYql::NMatchRecognize::EAfterMatchSkipTo::PastLastRow, ""}
+ };
+ case TRule_row_pattern_skip_to::kAltRowPatternSkipTo3:
+ return std::pair{
+ TokenPosition(skipToClause.GetAlt_row_pattern_skip_to3().GetToken1()),
+ NYql::NMatchRecognize::TAfterMatchSkipTo{
+ NYql::NMatchRecognize::EAfterMatchSkipTo::ToFirst,
+ skipToClause.GetAlt_row_pattern_skip_to3().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue()
+ }
+ };
+ case TRule_row_pattern_skip_to::kAltRowPatternSkipTo4:
+ return std::pair{
+ TokenPosition(skipToClause.GetAlt_row_pattern_skip_to4().GetToken1()),
+ NYql::NMatchRecognize::TAfterMatchSkipTo{
+ NYql::NMatchRecognize::EAfterMatchSkipTo::ToLast,
+ skipToClause.GetAlt_row_pattern_skip_to4().GetRule_row_pattern_skip_to_variable_name4().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue()
+ }
+ };
+ case TRule_row_pattern_skip_to::kAltRowPatternSkipTo5:
+ return std::pair{
+ TokenPosition(skipToClause.GetAlt_row_pattern_skip_to5().GetToken1()),
+ NYql::NMatchRecognize::TAfterMatchSkipTo{
+ NYql::NMatchRecognize::EAfterMatchSkipTo::To,
+ skipToClause.GetAlt_row_pattern_skip_to5().GetRule_row_pattern_skip_to_variable_name3().GetRule_row_pattern_variable_name1().GetRule_identifier1().GetToken1().GetValue()
+ }
+ };
+ case TRule_row_pattern_skip_to::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+NYql::NMatchRecognize::TRowPatternTerm TSqlMatchRecognizeClause::ParsePatternTerm(const TRule_row_pattern_term& node){
+ NYql::NMatchRecognize::TRowPatternTerm term;
+ TPosition pos;
+ for (const auto& factor: node.GetBlock1()) {
+ const auto& primaryVar = factor.GetRule_row_pattern_factor1().GetRule_row_pattern_primary1();
+ NYql::NMatchRecognize::TRowPatternPrimary primary;
+ bool output = true;
+ switch (primaryVar.GetAltCase()) {
+ case TRule_row_pattern_primary::kAltRowPatternPrimary1:
+ primary = PatternVar(primaryVar.GetAlt_row_pattern_primary1().GetRule_row_pattern_primary_variable_name1().GetRule_row_pattern_variable_name1(), *this);
+ break;
+ case TRule_row_pattern_primary::kAltRowPatternPrimary2:
+ primary = primaryVar.GetAlt_row_pattern_primary2().GetToken1().GetValue();
+ Y_ENSURE("$" == std::get<0>(primary));
+ break;
+ case TRule_row_pattern_primary::kAltRowPatternPrimary3:
+ primary = primaryVar.GetAlt_row_pattern_primary3().GetToken1().GetValue();
+ Y_ENSURE("^" == std::get<0>(primary));
+ break;
+ case TRule_row_pattern_primary::kAltRowPatternPrimary4: {
+ if (++PatternNestingLevel <= NYql::NMatchRecognize::MaxPatternNesting) {
+ primary = ParsePattern(primaryVar.GetAlt_row_pattern_primary4().GetBlock2().GetRule_row_pattern1());
+ --PatternNestingLevel;
+ } else {
+ Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
+ << "To big nesting level in the pattern";
+ return NYql::NMatchRecognize::TRowPatternTerm{};
+ }
+ break;
+ }
+ case TRule_row_pattern_primary::kAltRowPatternPrimary5:
+ output = false;
+ Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
+ << "ALL ROWS PER MATCH and {- -} are not supported yet"; //https://st.yandex-team.ru/YQL-16227
+ break;
+ case TRule_row_pattern_primary::kAltRowPatternPrimary6: {
+ std::vector<NYql::NMatchRecognize::TRowPatternPrimary> items{ParsePattern(
+ primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetRule_row_pattern3())
+ };
+ for (const auto& p: primaryVar.GetAlt_row_pattern_primary6().GetRule_row_pattern_permute1().GetBlock4()) {
+ items.push_back(ParsePattern(p.GetRule_row_pattern2()));
+ }
+ //Permutations now is a syntactic sugar and converted to all possible alternatives
+ if (items.size() > NYql::NMatchRecognize::MaxPermutedItems) {
+ Ctx.Error(TokenPosition(primaryVar.GetAlt_row_pattern_primary4().GetToken1()))
+ << "Too many items in permute";
+ return NYql::NMatchRecognize::TRowPatternTerm{};
+ }
+ std::vector<size_t> indexes(items.size());
+ std::generate(begin(indexes), end(indexes), [n = 0] () mutable { return n++; });
+ NYql::NMatchRecognize::TRowPattern permuted;
+ do {
+ NYql::NMatchRecognize::TRowPatternTerm term;
+ term.reserve(indexes.size());
+ for (size_t i = 0; i != indexes.size(); ++i) {
+ term.push_back({items[indexes[i]], 1, 1, true, false, false});
+ }
+ permuted.push_back(std::move(term));
+ } while (std::next_permutation(indexes.begin(), indexes.end()));
+ primary = permuted;
+ break;
+ }
+ case TRule_row_pattern_primary::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ uint64_t quantityMin = 1;
+ uint64_t quantityMax = 1;
+ constexpr uint64_t infinity = std::numeric_limits<uint64_t>::max();
+ bool greedy = true;
+ if (factor.GetRule_row_pattern_factor1().HasBlock2()) {
+ const auto& quantifier = factor.GetRule_row_pattern_factor1().GetBlock2().GetRule_row_pattern_quantifier1();
+ switch(quantifier.GetAltCase()){
+ case TRule_row_pattern_quantifier::kAltRowPatternQuantifier1: //*
+ quantityMin = 0;
+ quantityMax = infinity;
+ greedy = !quantifier.GetAlt_row_pattern_quantifier1().HasBlock2();
+ break;
+ case TRule_row_pattern_quantifier::kAltRowPatternQuantifier2: //+
+ quantityMax = infinity;
+ greedy = !quantifier.GetAlt_row_pattern_quantifier2().HasBlock2();
+ break;
+ case TRule_row_pattern_quantifier::kAltRowPatternQuantifier3: //?
+ quantityMin = 0;
+ greedy = !quantifier.GetAlt_row_pattern_quantifier3().HasBlock2();
+ break;
+ case TRule_row_pattern_quantifier::kAltRowPatternQuantifier4: //{ 2?, 4?}
+ if (quantifier.GetAlt_row_pattern_quantifier4().HasBlock2()) {
+ quantityMin = FromString(quantifier.GetAlt_row_pattern_quantifier4().GetBlock2().GetRule_integer1().GetToken1().GetValue());
+ }
+ else {
+ quantityMin = 0;;
+ }
+ if (quantifier.GetAlt_row_pattern_quantifier4().HasBlock4()) {
+ quantityMax = FromString(quantifier.GetAlt_row_pattern_quantifier4().GetBlock4().GetRule_integer1().GetToken1().GetValue());
+ }
+ else {
+ quantityMax = infinity;
+ }
+ greedy = !quantifier.GetAlt_row_pattern_quantifier4().HasBlock6();
+
+ break;
+ case TRule_row_pattern_quantifier::kAltRowPatternQuantifier5:
+ quantityMin = quantityMax = FromString(quantifier.GetAlt_row_pattern_quantifier5().GetRule_integer2().GetToken1().GetValue());
+ break;
+ case TRule_row_pattern_quantifier::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ }
+ term.push_back(NYql::NMatchRecognize::TRowPatternFactor{std::move(primary), quantityMin, quantityMax, greedy, output, false});
+ }
+ return term;
+}
+
+NYql::NMatchRecognize::TRowPattern TSqlMatchRecognizeClause::ParsePattern(const TRule_row_pattern& node){
+ TVector<NYql::NMatchRecognize::TRowPatternTerm> result;
+ result.push_back(ParsePatternTerm(node.GetRule_row_pattern_term1()));
+ for (const auto& term: node.GetBlock2())
+ result.push_back(ParsePatternTerm(term.GetRule_row_pattern_term2()));
+ return result;
+}
+
+TNamedFunction TSqlMatchRecognizeClause::ParseOneDefinition(const TRule_row_pattern_definition& node){
+ const auto& varName = PatternVar(node.GetRule_row_pattern_definition_variable_name1().GetRule_row_pattern_variable_name1(), *this);
+ TColumnRefScope scope(Ctx, EColumnRefState::MatchRecognize, true, varName);
+ const auto& searchCondition = TSqlExpression(Ctx, Mode).Build(node.GetRule_row_pattern_definition_search_condition3().GetRule_search_condition1().GetRule_expr1());
+ return TNamedFunction{searchCondition, varName};
+}
+
+TVector<TNamedFunction> TSqlMatchRecognizeClause::ParseDefinitions(const TRule_row_pattern_definition_list& node) {
+ TVector<TNamedFunction> result { ParseOneDefinition(node.GetRule_row_pattern_definition1())};
+ for (const auto& d: node.GetBlock2()) {
+ //TODO https://st.yandex-team.ru/YQL-16186
+ //Each define must be a predicate lambda, that accepts 3 args:
+ // - List<input table rows>
+ // - A struct that maps row pattern variables to ranges in the queue
+ // - An index of the current row
+ result.push_back(ParseOneDefinition(d.GetRule_row_pattern_definition2()));
+ }
+ return result;
+}
+
+} //namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_match_recognize.h b/yql/essentials/sql/v1/sql_match_recognize.h
new file mode 100644
index 00000000000..6766acc9537
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_match_recognize.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "sql_translation.h"
+#include "match_recognize.h"
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+class TSqlMatchRecognizeClause: public TSqlTranslation {
+public:
+ TSqlMatchRecognizeClause(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlTranslation(ctx, mode)
+ {}
+ TMatchRecognizeBuilderPtr CreateBuilder(const TRule_row_pattern_recognition_clause& node);
+private:
+ TVector<TNamedFunction> ParsePartitionBy(const TRule_window_partition_clause& partitionClause);
+ TNamedFunction ParseOneMeasure(const TRule_row_pattern_measure_definition& node);
+ TVector<TNamedFunction> ParseMeasures(const TRule_row_pattern_measure_list& node);
+ std::pair<TPosition, ERowsPerMatch> ParseRowsPerMatch(const TRule_row_pattern_rows_per_match& rowsPerMatchClause);
+ std::pair<TPosition, NYql::NMatchRecognize::TAfterMatchSkipTo> ParseAfterMatchSkipTo(const TRule_row_pattern_skip_to& skipToClause);
+ NYql::NMatchRecognize::TRowPatternTerm ParsePatternTerm(const TRule_row_pattern_term& node);
+ NYql::NMatchRecognize::TRowPattern ParsePattern(const TRule_row_pattern& node);
+ TNamedFunction ParseOneDefinition(const TRule_row_pattern_definition& node);
+ TVector<TNamedFunction> ParseDefinitions(const TRule_row_pattern_definition_list& node);
+private:
+ size_t PatternNestingLevel = 0;
+};
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_match_recognize_ut.cpp b/yql/essentials/sql/v1/sql_match_recognize_ut.cpp
new file mode 100644
index 00000000000..20c5e6ab7bb
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_match_recognize_ut.cpp
@@ -0,0 +1,742 @@
+#include "sql_ut.h"
+#include "match_recognize.h"
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+#include <yql/essentials/core/sql_types/match_recognize.h>
+#include <yql/essentials/sql/sql.h>
+#include <util/generic/map.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/string/split.h>
+
+using namespace NSQLTranslation;
+
+NYql::TAstParseResult MatchRecognizeSqlToYql(const TString& query) {
+ TString enablingPragma = R"(
+pragma FeatureR010="prototype";
+)";
+ return SqlToYql(enablingPragma + query);
+}
+
+const NYql::TAstNode* FindMatchRecognizeParam(const NYql::TAstNode* root, TString name) {
+ auto matchRecognizeBlock = FindNodeByChildAtomContent(root, 1, "match_recognize");
+ UNIT_ASSERT(matchRecognizeBlock);
+ auto paramNode = FindNodeByChildAtomContent(matchRecognizeBlock, 1, name);
+ return paramNode->GetChild(2);
+}
+
+bool IsQuotedListOfSize(const NYql::TAstNode* node, ui32 size) {
+ UNIT_ASSERT(node->IsListOfSize(2));
+ if (!node->IsListOfSize(2))
+ return false;
+ UNIT_ASSERT_EQUAL(node->GetChild(0)->GetContent(), "quote");
+ if (node->GetChild(0)->GetContent() != "quote")
+ return false;
+ UNIT_ASSERT_EQUAL(node->GetChild(1)->GetChildrenCount(), size);
+ return node->GetChild(1)->IsListOfSize(size);
+}
+
+bool IsLambda(const NYql::TAstNode* node, ui32 numberOfArgs) {
+ if (!node->IsListOfSize(3)) {
+ return false;
+ }
+ if (!node->GetChild(0)->IsAtom() || node->GetChild(0)->GetContent() != "lambda") {
+ return false;
+ }
+ return IsQuotedListOfSize(node->GetChild(1), numberOfArgs);
+}
+
+Y_UNIT_TEST_SUITE(MatchRecognize) {
+ auto minValidMatchRecognizeSql = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN ( A )
+ DEFINE A as A
+ )
+)";
+ Y_UNIT_TEST(EnabledWithPragma) {
+ UNIT_ASSERT(not SqlToYql(minValidMatchRecognizeSql).IsOk());
+ UNIT_ASSERT(MatchRecognizeSqlToYql(minValidMatchRecognizeSql).IsOk());
+ }
+
+ Y_UNIT_TEST(InputTableName) {
+ auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
+ UNIT_ASSERT(r.IsOk());
+ auto input = FindMatchRecognizeParam(r.Root, "input");
+ UNIT_ASSERT(input->IsAtom() && input->GetContent() == "core");
+ }
+
+ Y_UNIT_TEST(MatchRecognizeAndSample) {
+ auto matchRecognizeAndSample = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN ( A )
+ DEFINE A as A
+ ) TABLESAMPLE BERNOULLI(1.0)
+)";
+ UNIT_ASSERT(not MatchRecognizeSqlToYql(matchRecognizeAndSample).IsOk());
+ }
+
+ Y_UNIT_TEST(NoPartitionBy) {
+ auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
+ UNIT_ASSERT(r.IsOk());
+ auto partitionKeySelector = FindMatchRecognizeParam(r.Root, "partitionKeySelector");
+ UNIT_ASSERT(IsQuotedListOfSize(partitionKeySelector->GetChild(2), 0)); //empty tuple
+ auto partitionColumns = FindMatchRecognizeParam(r.Root, "partitionColumns");
+ UNIT_ASSERT(IsQuotedListOfSize(partitionColumns, 0)); //empty tuple
+ }
+
+ Y_UNIT_TEST(PartitionBy) {
+ auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PARTITION BY col1 as c1, ~CAST(col1 as Int32) as invertedC1, c2
+ PATTERN ( A )
+ DEFINE A as A
+ )
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto partitionKeySelector = FindMatchRecognizeParam(r.Root, "partitionKeySelector");
+ UNIT_ASSERT(IsQuotedListOfSize(partitionKeySelector->GetChild(2), 3));
+ auto partitionColumns = FindMatchRecognizeParam(r.Root, "partitionColumns");
+ UNIT_ASSERT(IsQuotedListOfSize(partitionColumns, 3));
+ //TODO check partitioner lambdas(alias/no alias)
+ }
+
+ Y_UNIT_TEST(NoOrderBy) {
+ auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
+ UNIT_ASSERT(r.IsOk());
+ auto sortTraits = FindMatchRecognizeParam(r.Root, "sortTraits");
+ UNIT_ASSERT(sortTraits && sortTraits->IsListOfSize(1));
+ UNIT_ASSERT(sortTraits->GetChild(0)->GetContent() == "Void");
+ }
+
+ Y_UNIT_TEST(OrderBy) {
+ auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ ORDER BY col1, ~CAST(col1 as Int32), c2
+ PATTERN ( A )
+ DEFINE A as A
+ )
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto sortTraits = FindMatchRecognizeParam(r.Root, "sortTraits");
+ UNIT_ASSERT(sortTraits && sortTraits->IsListOfSize(4));
+ UNIT_ASSERT(sortTraits->GetChild(0)->GetContent() == "SortTraits");
+ UNIT_ASSERT(IsQuotedListOfSize(sortTraits->GetChild(2), 3));
+ UNIT_ASSERT(IsQuotedListOfSize(sortTraits->GetChild(3)->GetChild(2), 3));
+ }
+ Y_UNIT_TEST(Measures) {
+ auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ MEASURES
+ Last(Q.dt) as T,
+ First(Y.key) as Key
+ PATTERN ( Y Q )
+ DEFINE Y as true
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto measures = FindMatchRecognizeParam(r.Root, "measures");
+ UNIT_ASSERT_VALUES_EQUAL(6, measures->GetChildrenCount());
+ const auto columnNames = measures->GetChild(3);
+ UNIT_ASSERT(IsQuotedListOfSize(columnNames, 2));
+ UNIT_ASSERT_VALUES_EQUAL("T", columnNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ UNIT_ASSERT_VALUES_EQUAL("Key", columnNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
+ UNIT_ASSERT(IsLambda(measures->GetChild(4), 2));
+ UNIT_ASSERT(IsLambda(measures->GetChild(5), 2));
+ }
+ Y_UNIT_TEST(RowsPerMatch) {
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ ONE ROW PER MATCH
+ PATTERN (A)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto rowsPerMatch = FindMatchRecognizeParam(r.Root, "rowsPerMatch");
+ UNIT_ASSERT_VALUES_EQUAL("RowsPerMatch_OneRow", rowsPerMatch->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ ALL ROWS PER MATCH
+ PATTERN (A)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(not r.IsOk()); ///https://st.yandex-team.ru/YQL-16213
+ }
+ { //default
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN (A)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto rowsPerMatch = FindMatchRecognizeParam(r.Root, "rowsPerMatch");
+ UNIT_ASSERT_VALUES_EQUAL("RowsPerMatch_OneRow", rowsPerMatch->GetChild(1)->GetContent());
+ }
+
+ }
+ Y_UNIT_TEST(SkipAfterMatch) {
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO NEXT ROW
+ PATTERN (A)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
+ UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_NextRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP PAST LAST ROW
+ PATTERN (A)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
+ UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_PastLastRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO FIRST Y
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
+ UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToFirst", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO FIRST T -- unknown pattern var
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(not r.IsOk());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO LAST Y
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
+ UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToLast", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO LAST T -- unknown pattern var
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(not r.IsOk());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO Y
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
+ UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_To", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
+ }
+ {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ AFTER MATCH SKIP TO T -- unknown pattern var
+ PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
+ DEFINE A as A
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(not r.IsOk());
+ }
+ }
+ Y_UNIT_TEST(row_pattern_initial) {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ INITIAL
+ PATTERN (A+ B* C?)
+ DEFINE A as A
+ )
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(not r.IsOk());
+ }
+
+ Y_UNIT_TEST(row_pattern_seek) {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ SEEK
+ PATTERN (A+ B* C?)
+ DEFINE A as A
+ )
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(not r.IsOk());
+ }
+
+ Y_UNIT_TEST(PatternSimple) {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN (A+ B* C?)
+ DEFINE A as A
+ )
+)";
+ const auto& r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 1);
+ const auto& term = patternCallable->GetChild(1);
+ UNIT_ASSERT(IsQuotedListOfSize(term, 3));
+ }
+
+ Y_UNIT_TEST(PatternMultiTerm) {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN ($ A+ B{1,3} | C{3} D{1,4} E? | F?? | G{3,}? H*? I J ^)
+ DEFINE A as A
+ )
+)";
+ const auto& r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 4);
+ const auto& lastTerm = patternCallable->GetChild(4);
+ UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 5));
+ }
+
+ Y_UNIT_TEST(PatternWithParanthesis) {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN (
+ A | ($ B)+ C D
+ )
+ DEFINE A as A
+ )
+)";
+ const auto& r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
+ UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 2);
+ const auto& firstTerm = patternCallable->GetChild(1);
+ UNIT_ASSERT(IsQuotedListOfSize(firstTerm, 1));
+ const auto& lastTerm = patternCallable->GetChild(2);
+ UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 3));
+ const auto& firstFactorOfLastTerm = lastTerm->GetChild(1)->GetChild(0);
+ UNIT_ASSERT(IsQuotedListOfSize(firstFactorOfLastTerm, 6));
+ const auto nestedPattern = firstFactorOfLastTerm->GetChild(1)->GetChild(0);
+ UNIT_ASSERT_EQUAL(nestedPattern->GetChildrenCount(), 1 + 1);
+ UNIT_ASSERT_EQUAL(nestedPattern->GetChild(0)->GetContent(), "MatchRecognizePattern");
+ UNIT_ASSERT(IsQuotedListOfSize(nestedPattern->GetChild(1), 2));
+ }
+
+ Y_UNIT_TEST(PatternManyAlternatives) {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+PATTERN (
+ (A B C D ) | (B A C D ) | (C B A D ) | (B C A D ) | (C A B D ) | (A C B D ) | (D A B C ) | (A D B C ) | (B A D C ) | (A B D C ) | (B D A C ) | (D B A C ) | (C D A B ) | (D C A B ) | (A D C B ) | (D A C B ) | (A C D B ) | (C A D B ) | (B C D A ) | (C B D A ) | (D C B A ) | (C D B A ) | (D B C A ) | (B D C A )
+ )
+ DEFINE A as A
+)
+)";
+ UNIT_ASSERT(MatchRecognizeSqlToYql(stmt).IsOk());
+ }
+
+ Y_UNIT_TEST(PatternLimitedNesting) {
+ const size_t MaxNesting = 20;
+ for (size_t extraNesting = 0; extraNesting <= 1; ++extraNesting) {
+ std::string pattern;
+ for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
+ pattern.push_back('(');
+ pattern.push_back('A');
+ for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
+ pattern.push_back(')');
+ const auto stmt = TString(R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN(
+)") + pattern + R"(
+ )
+ DEFINE A as A
+ )
+)";
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ if (not extraNesting) {
+ UNIT_ASSERT(r.IsOk());
+ } else {
+ UNIT_ASSERT(not r.IsOk());
+ }
+ }
+ }
+
+ Y_UNIT_TEST(PatternFactorQuantifiers) {
+ auto makeRequest = [](const TString& factor) {
+ return TString(R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN(
+)") + factor + R"(
+ )
+ DEFINE A as A
+ )
+)";
+ };
+ auto getTheFactor = [](const NYql::TAstNode* root) {
+ const auto& patternCallable = FindMatchRecognizeParam(root, "pattern");
+ const auto& factor = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1);
+ return NYql::NMatchRecognize::TRowPatternFactor{
+ TString(), //primary var or subexpression, not used in this test
+ FromString<uint64_t>(factor->GetChild(1)->GetChild(1)->GetContent()), //QuantityMin
+ FromString<uint64_t>(factor->GetChild(2)->GetChild(1)->GetContent()), //QuantityMax
+ FromString<bool>(factor->GetChild(3)->GetChild(1)->GetContent()), //Greedy
+ false, //Output, not used in this test
+ false, // Flag "Unused", not used in this test
+ };
+ };
+ {
+ //no quantifiers
+ const auto stmt = makeRequest("A");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
+ UNIT_ASSERT(factor.Greedy);
+ }
+ {
+ //optional greedy(default)
+ const auto stmt = makeRequest("A?");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
+ UNIT_ASSERT(factor.Greedy);
+ }
+ {
+ //optional reluctant
+ const auto stmt = makeRequest("A??");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
+ UNIT_ASSERT(!factor.Greedy);
+ }
+ {
+ //+ greedy(default)
+ const auto stmt = makeRequest("A+");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
+ UNIT_ASSERT(factor.Greedy);
+ }
+ {
+ //+ reluctant
+ const auto stmt = makeRequest("A+?");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
+ UNIT_ASSERT(!factor.Greedy);
+ }
+ {
+ //* greedy(default)
+ const auto stmt = makeRequest("A*");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
+ UNIT_ASSERT(factor.Greedy);
+ }
+ {
+ //* reluctant
+ const auto stmt = makeRequest("A*?");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
+ UNIT_ASSERT(!factor.Greedy);
+ }
+ {
+ //exact n
+ const auto stmt = makeRequest("A{4}");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(4, factor.QuantityMax);
+ }
+ {
+ //from n to m greedy(default
+ const auto stmt = makeRequest("A{4, 7}");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
+ UNIT_ASSERT(factor.Greedy);
+ }
+ {
+ //from n to m reluctant
+ const auto stmt = makeRequest("A{4,7}?");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
+ UNIT_ASSERT(!factor.Greedy);
+ }
+ {
+ //at least n greedy(default)
+ const auto stmt = makeRequest("A{4,}");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
+ UNIT_ASSERT(factor.Greedy);
+ }
+ {
+ //at least n reluctant
+ const auto stmt = makeRequest("A{4,}?");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
+ UNIT_ASSERT(!factor.Greedy);
+ }
+ {
+ //at most m greedy(default)
+ const auto stmt = makeRequest("A{,7}");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
+ UNIT_ASSERT(factor.Greedy);
+ }
+ {
+ //at least n reluctant
+ const auto stmt = makeRequest("A{,7}?");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
+ UNIT_ASSERT(!factor.Greedy);
+ }
+
+ {
+ //quantifiers on subexpression
+ const auto stmt = makeRequest("(A B+ C | D | ^){4,7}?");
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto& factor = getTheFactor(r.Root);
+ UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
+ UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
+ UNIT_ASSERT(!factor.Greedy);
+ }
+ }
+
+ Y_UNIT_TEST(Permute) {
+ const auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN (
+ PERMUTE(A, B, C, D, E) --5 variables produce 5! permutations
+ )
+ DEFINE A as A
+)
+)";
+ const auto& r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+
+ const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
+ const auto permutePattern = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1)->GetChild(0);
+ UNIT_ASSERT(permutePattern->IsListOfSize(1 + 120)); //CallableName + 5!
+ }
+
+ Y_UNIT_TEST(PermuteTooMuch) {
+ for (size_t n = 1; n <= NYql::NMatchRecognize::MaxPermutedItems + 1; ++n) {
+ std::vector<std::string> vars(n);
+ std::generate(begin(vars), end(vars), [n = 0] () mutable { return "A" + std::to_string(n++);});
+ const auto stmt = TString(R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN (
+ PERMUTE( )" + std::accumulate(cbegin(vars) + 1, cend(vars), vars.front(),
+ [](const std::string& acc, const std::string& v) {
+ return acc + ", " + v;
+ }) +
+ R"(
+ )
+ )
+ DEFINE A0 as A0
+)
+)"
+ );
+ const auto &r = MatchRecognizeSqlToYql(stmt);
+ if (n <= NYql::NMatchRecognize::MaxPermutedItems) {
+ UNIT_ASSERT(r.IsOk());
+ } else {
+ UNIT_ASSERT(!r.IsOk());
+ }
+ }
+ }
+
+
+ Y_UNIT_TEST(row_pattern_subset_clause) {
+ //TODO https://st.yandex-team.ru/YQL-16186
+ }
+
+ Y_UNIT_TEST(Defines) {
+ auto stmt = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN ( Y Q L )
+ DEFINE
+ Y as true,
+ Q as Q.V = "value",
+ L as L.V = LAST(Q.T)
+)
+)";
+ auto r = MatchRecognizeSqlToYql(stmt);
+ UNIT_ASSERT(r.IsOk());
+ const auto defines = FindMatchRecognizeParam(r.Root, "define");
+ UNIT_ASSERT_VALUES_EQUAL(7, defines->GetChildrenCount());
+ const auto varNames = defines->GetChild(3);
+ UNIT_ASSERT(IsQuotedListOfSize(varNames, 3));
+ UNIT_ASSERT_VALUES_EQUAL("Y", varNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
+ UNIT_ASSERT_VALUES_EQUAL("Q", varNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
+ UNIT_ASSERT_VALUES_EQUAL("L", varNames->GetChild(1)->GetChild(2)->GetChild(1)->GetContent());
+
+ UNIT_ASSERT(IsLambda(defines->GetChild(4), 3));
+ UNIT_ASSERT(IsLambda(defines->GetChild(5), 3));
+ UNIT_ASSERT(IsLambda(defines->GetChild(6), 3));
+ }
+
+ Y_UNIT_TEST(AbsentRowPatternVariableInDefines) {
+ auto getStatement = [](const TString &var) {
+ return TStringBuilder() << R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+PATTERN ( Q )
+DEFINE
+)" << var << " AS TRUE )";
+ };
+ UNIT_ASSERT(MatchRecognizeSqlToYql(getStatement("Q")).IsOk());
+ UNIT_ASSERT(!MatchRecognizeSqlToYql(getStatement("Y")).IsOk());
+ }
+
+ Y_UNIT_TEST(CheckRequiredNavigationFunction) {
+ TString stmtPrefix = R"(
+USE plato;
+SELECT *
+FROM Input MATCH_RECOGNIZE(
+ PATTERN ( Y Q L )
+ DEFINE
+ L as L.V =
+)";
+ //Be aware that right parenthesis is added at the end of the query as required
+ UNIT_ASSERT(MatchRecognizeSqlToYql(stmtPrefix + "LAST(Q.dt) )").IsOk());
+ UNIT_ASSERT(!MatchRecognizeSqlToYql(stmtPrefix + "Q.dt )").IsOk());
+ }
+
+}
diff --git a/yql/essentials/sql/v1/sql_query.cpp b/yql/essentials/sql/v1/sql_query.cpp
new file mode 100644
index 00000000000..d0fb8737e15
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_query.cpp
@@ -0,0 +1,3446 @@
+#include "sql_query.h"
+#include "sql_expression.h"
+#include "sql_select.h"
+#include "sql_into_tables.h"
+#include "sql_values.h"
+#include "node.h"
+#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/sql/v1/object_processing.h>
+#include <yql/essentials/utils/yql_paths.h>
+#include <util/generic/scope.h>
+#include <util/string/join.h>
+#ifdef GetMessage
+#undef GetMessage
+#endif
+
+namespace NSQLTranslationV1 {
+
+using NALPDefault::SQLv1LexerTokens;
+using NALPDefaultAntlr4::SQLv1Antlr4Lexer;
+
+using namespace NSQLv1Generated;
+
+void FillTargetList(TTranslation& ctx, const TRule_set_target_list& node, TVector<TString>& targetList) {
+ targetList.push_back(ColumnNameAsSingleStr(ctx, node.GetRule_set_target2().GetRule_column_name1()));
+ for (auto& block: node.GetBlock3()) {
+ targetList.push_back(ColumnNameAsSingleStr(ctx, block.GetRule_set_target2().GetRule_column_name1()));
+ }
+}
+
+bool PackageVersionFromString(const TString& s, ui32& version) {
+ if (s == "release") {
+ version = 0;
+ return true;
+ }
+ if (s == "draft") {
+ version = 1;
+ return true;
+ }
+ return TryFromString(s, version);
+}
+
+void TSqlQuery::AddStatementToBlocks(TVector<TNodePtr>& blocks, TNodePtr node) {
+ blocks.emplace_back(node);
+}
+
+static bool AsyncReplicationSettingsEntry(std::map<TString, TNodePtr>& out,
+ const TRule_replication_settings_entry& in, TTranslation& ctx, bool create)
+{
+ auto key = IdEx(in.GetRule_an_id1(), ctx);
+ auto value = BuildLiteralSmartString(ctx.Context(), ctx.Token(in.GetToken3()));
+
+ TSet<TString> configSettings = {
+ "connection_string",
+ "endpoint",
+ "database",
+ "token",
+ "token_secret_name",
+ "user",
+ "password",
+ "password_secret_name",
+ };
+
+ TSet<TString> stateSettings = {
+ "state",
+ "failover_mode",
+ };
+
+ const auto keyName = to_lower(key.Name);
+ if (!configSettings.count(keyName) && !stateSettings.count(keyName)) {
+ ctx.Context().Error() << "Unknown replication setting: " << key.Name;
+ return false;
+ }
+
+ if (create && stateSettings.count(keyName)) {
+ ctx.Context().Error() << key.Name << " is not supported in CREATE";
+ return false;
+ }
+
+ if (!out.emplace(keyName, value).second) {
+ ctx.Context().Error() << "Duplicate replication setting: " << key.Name;
+ }
+
+ return true;
+}
+
+static bool AsyncReplicationSettings(std::map<TString, TNodePtr>& out,
+ const TRule_replication_settings& in, TTranslation& ctx, bool create)
+{
+ if (!AsyncReplicationSettingsEntry(out, in.GetRule_replication_settings_entry1(), ctx, create)) {
+ return false;
+ }
+
+ for (auto& block : in.GetBlock2()) {
+ if (!AsyncReplicationSettingsEntry(out, block.GetRule_replication_settings_entry2(), ctx, create)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool AsyncReplicationTarget(std::vector<std::pair<TString, TString>>& out, TStringBuf prefixPath,
+ const TRule_replication_target& in, TTranslation& ctx)
+{
+ const TString remote = Id(in.GetRule_object_ref1().GetRule_id_or_at2(), ctx).second;
+ const TString local = Id(in.GetRule_object_ref3().GetRule_id_or_at2(), ctx).second;
+ out.emplace_back(remote, BuildTablePath(prefixPath, local));
+ return true;
+}
+
+static bool AsyncReplicationAlterAction(std::map<TString, TNodePtr>& settings,
+ const TRule_alter_replication_action& in, TTranslation& ctx)
+{
+ // TODO(ilnaz): support other actions
+ return AsyncReplicationSettings(settings, in.GetRule_alter_replication_set_setting1().GetRule_replication_settings3(), ctx, false);
+}
+
+bool TSqlQuery::Statement(TVector<TNodePtr>& blocks, const TRule_sql_stmt_core& core) {
+ TString internalStatementName;
+ TString humanStatementName;
+ ParseStatementName(core, internalStatementName, humanStatementName);
+ const auto& altCase = core.Alt_case();
+ if (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW && (altCase >= TRule_sql_stmt_core::kAltSqlStmtCore4 &&
+ altCase != TRule_sql_stmt_core::kAltSqlStmtCore13)) {
+ Error() << humanStatementName << " statement is not supported in limited views";
+ return false;
+ }
+
+ if (Mode == NSQLTranslation::ESqlMode::SUBQUERY && (altCase >= TRule_sql_stmt_core::kAltSqlStmtCore4 &&
+ altCase != TRule_sql_stmt_core::kAltSqlStmtCore13 && altCase != TRule_sql_stmt_core::kAltSqlStmtCore6 &&
+ altCase != TRule_sql_stmt_core::kAltSqlStmtCore18)) {
+ Error() << humanStatementName << " statement is not supported in subqueries";
+ return false;
+ }
+
+ switch (altCase) {
+ case TRule_sql_stmt_core::kAltSqlStmtCore1: {
+ bool success = false;
+ TNodePtr nodeExpr = PragmaStatement(core.GetAlt_sql_stmt_core1().GetRule_pragma_stmt1(), success);
+ if (!success) {
+ return false;
+ }
+ if (nodeExpr) {
+ AddStatementToBlocks(blocks, nodeExpr);
+ }
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore2: {
+ if (Ctx.ParallelModeCount > 0) {
+ Error() << humanStatementName << " statement is not supported in parallel mode";
+ return false;
+ }
+
+ Ctx.BodyPart();
+ TSqlSelect select(Ctx, Mode);
+ TPosition pos;
+ auto source = select.Build(core.GetAlt_sql_stmt_core2().GetRule_select_stmt1(), pos);
+ if (!source) {
+ return false;
+ }
+ blocks.emplace_back(BuildSelectResult(pos, std::move(source),
+ Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW && Mode != NSQLTranslation::ESqlMode::SUBQUERY, Mode == NSQLTranslation::ESqlMode::SUBQUERY,
+ Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore3: {
+ Ctx.BodyPart();
+ TVector<TSymbolNameWithPos> names;
+ auto nodeExpr = NamedNode(core.GetAlt_sql_stmt_core3().GetRule_named_nodes_stmt1(), names);
+ if (!nodeExpr) {
+ return false;
+ }
+ TVector<TNodePtr> nodes;
+ auto subquery = nodeExpr->GetSource();
+ if (subquery && Mode == NSQLTranslation::ESqlMode::LIBRARY && Ctx.ScopeLevel == 0) {
+ for (size_t i = 0; i < names.size(); ++i) {
+ nodes.push_back(BuildInvalidSubqueryRef(subquery->GetPos()));
+ }
+ } else if (subquery) {
+ const auto alias = Ctx.MakeName("subquerynode");
+ const auto ref = Ctx.MakeName("subquery");
+ blocks.push_back(BuildSubquery(subquery, alias,
+ Mode == NSQLTranslation::ESqlMode::SUBQUERY, names.size() == 1 ? -1 : names.size(), Ctx.Scoped));
+ blocks.back()->SetLabel(ref);
+
+ for (size_t i = 0; i < names.size(); ++i) {
+ nodes.push_back(BuildSubqueryRef(blocks.back(), ref, names.size() == 1 ? -1 : i));
+ }
+ } else if (!Ctx.CompactNamedExprs || nodeExpr->GetUdfNode()) {
+ // Unlike other nodes, TUdfNode is not an independent node, but more like a set of parameters which should be
+ // applied on UDF call site. For example, TUdfNode can not be Translate()d
+ // So we can't add it to blocks and use reference, instead we store the TUdfNode itself as named node
+ // TODO: remove this special case
+ if (names.size() > 1) {
+ auto tupleRes = BuildTupleResult(nodeExpr, names.size());
+ for (size_t i = 0; i < names.size(); ++i) {
+ nodes.push_back(nodeExpr->Y("Nth", tupleRes, nodeExpr->Q(ToString(i))));
+ }
+ } else {
+ nodes.push_back(std::move(nodeExpr));
+ }
+ } else {
+ const auto ref = Ctx.MakeName("namedexprnode");
+ blocks.push_back(BuildNamedExpr(names.size() > 1 ? BuildTupleResult(nodeExpr, names.size()) : nodeExpr));
+ blocks.back()->SetLabel(ref);
+ for (size_t i = 0; i < names.size(); ++i) {
+ nodes.push_back(BuildNamedExprReference(blocks.back(), ref, names.size() == 1 ? TMaybe<size_t>() : i));
+ }
+ }
+
+ for (size_t i = 0; i < names.size(); ++i) {
+ PushNamedNode(names[i].Pos, names[i].Name, nodes[i]);
+ }
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore4: {
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core4().GetRule_create_table_stmt1();
+
+ bool replaceIfExists = false;
+ if (rule.HasBlock2()) { // OR REPLACE
+ replaceIfExists = true;
+ Y_DEBUG_ABORT_UNLESS(
+ (IS_TOKEN(rule.GetBlock2().GetToken1().GetId(), OR) &&
+ IS_TOKEN(rule.GetBlock2().GetToken2().GetId(), REPLACE))
+ );
+ }
+
+ const bool isCreateTableAs = rule.HasBlock15();
+ const auto& block = rule.GetBlock3();
+ ETableType tableType = ETableType::Table;
+ bool temporary = false;
+ if (block.HasAlt2() &&
+ IS_TOKEN(block.GetAlt2().GetToken1().GetId(), TABLESTORE)
+ ) {
+ tableType = ETableType::TableStore;
+ if (isCreateTableAs) {
+ Context().Error(GetPos(block.GetAlt2().GetToken1()))
+ << "CREATE TABLE AS is not supported for TABLESTORE";
+ return false;
+ }
+ } else if (block.HasAlt3() &&
+ IS_TOKEN(block.GetAlt3().GetToken1().GetId(), EXTERNAL)
+ ) {
+ tableType = ETableType::ExternalTable;
+ if (isCreateTableAs) {
+ Context().Error(GetPos(block.GetAlt3().GetToken1()))
+ << "CREATE TABLE AS is not supported for EXTERNAL TABLE";
+ return false;
+ }
+ } else if (block.HasAlt4() && IS_TOKEN(block.GetAlt4().GetToken1().GetId(), TEMP) ||
+ block.HasAlt5() && IS_TOKEN(block.GetAlt5().GetToken1().GetId(), TEMPORARY)) {
+ temporary = true;
+ }
+
+ bool existingOk = false;
+ if (rule.HasBlock4()) { // IF NOT EXISTS
+ existingOk = true;
+ Y_DEBUG_ABORT_UNLESS(
+ IS_TOKEN(rule.GetBlock4().GetToken1().GetId(), IF) &&
+ IS_TOKEN(rule.GetBlock4().GetToken2().GetId(), NOT) &&
+ IS_TOKEN(rule.GetBlock4().GetToken3().GetId(), EXISTS)
+ );
+ }
+
+ if (replaceIfExists && tableType != ETableType::ExternalTable) {
+ Context().Error(GetPos(rule.GetBlock2().GetToken1()))
+ << "OR REPLACE feature is supported only for EXTERNAL DATA SOURCE and EXTERNAL TABLE";
+ return false;
+ }
+
+ TTableRef tr;
+ if (!SimpleTableRefImpl(rule.GetRule_simple_table_ref5(), tr)) {
+ return false;
+ }
+
+ TCreateTableParameters params{.TableType=tableType, .Temporary=temporary};
+ if (!CreateTableEntry(rule.GetRule_create_table_entry7(), params, isCreateTableAs)) {
+ return false;
+ }
+ for (auto& block: rule.GetBlock8()) {
+ if (!CreateTableEntry(block.GetRule_create_table_entry2(), params, isCreateTableAs)) {
+ return false;
+ }
+ }
+
+ if (rule.HasBlock11()) {
+ Context().Error(GetPos(rule.GetBlock11().GetRule_table_inherits1().GetToken1()))
+ << "INHERITS clause is not supported yet";
+ return false;
+ }
+
+ if (rule.HasBlock12()) {
+ if (tableType == ETableType::TableStore) {
+ Context().Error(GetPos(rule.GetBlock12().GetRule_table_partition_by1().GetToken1()))
+ << "PARTITION BY is not supported for TABLESTORE";
+ return false;
+ }
+ const auto list = rule.GetBlock12().GetRule_table_partition_by1().GetRule_pure_column_list4();
+ params.PartitionByColumns.push_back(IdEx(list.GetRule_an_id2(), *this));
+ for (auto& node : list.GetBlock3()) {
+ params.PartitionByColumns.push_back(IdEx(node.GetRule_an_id2(), *this));
+ }
+ }
+
+ if (rule.HasBlock13()) {
+ if (!CreateTableSettings(rule.GetBlock13().GetRule_with_table_settings1(), params)) {
+ return false;
+ }
+ }
+
+ if (rule.HasBlock14()) {
+ Context().Error(GetPos(rule.GetBlock14().GetRule_table_tablestore1().GetToken1()))
+ << "TABLESTORE clause is not supported yet";
+ return false;
+ }
+
+ TSourcePtr tableSource = nullptr;
+ if (isCreateTableAs) {
+ tableSource = TSqlAsValues(Ctx, Mode).Build(rule.GetBlock15().GetRule_table_as_source1().GetRule_values_source2(), "CreateTableAs");
+ if (!tableSource) {
+ return false;
+ }
+ }
+
+ if (!ValidateExternalTable(params)) {
+ return false;
+ }
+
+ AddStatementToBlocks(blocks, BuildCreateTable(Ctx.Pos(), tr, existingOk, replaceIfExists, params, std::move(tableSource), Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore5: {
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core5().GetRule_drop_table_stmt1();
+ const auto& block = rule.GetBlock2();
+ ETableType tableType = ETableType::Table;
+ if (block.HasAlt2()) {
+ tableType = ETableType::TableStore;
+ }
+ if (block.HasAlt3()) {
+ tableType = ETableType::ExternalTable;
+ }
+
+ bool missingOk = false;
+ if (rule.HasBlock3()) { // IF EXISTS
+ missingOk = true;
+ Y_DEBUG_ABORT_UNLESS(
+ IS_TOKEN(rule.GetBlock3().GetToken1().GetId(), IF) &&
+ IS_TOKEN(rule.GetBlock3().GetToken2().GetId(), EXISTS)
+ );
+ }
+
+ TTableRef tr;
+ if (!SimpleTableRefImpl(rule.GetRule_simple_table_ref4(), tr)) {
+ return false;
+ }
+
+ AddStatementToBlocks(blocks, BuildDropTable(Ctx.Pos(), tr, missingOk, tableType, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore6: {
+ const auto& rule = core.GetAlt_sql_stmt_core6().GetRule_use_stmt1();
+ Token(rule.GetToken1());
+ if (!ClusterExpr(rule.GetRule_cluster_expr2(), true, Ctx.Scoped->CurrService, Ctx.Scoped->CurrCluster)) {
+ return false;
+ }
+
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore7: {
+ Ctx.BodyPart();
+ TSqlIntoTable intoTable(Ctx, Mode);
+ TNodePtr block(intoTable.Build(core.GetAlt_sql_stmt_core7().GetRule_into_table_stmt1()));
+ if (!block) {
+ return false;
+ }
+ blocks.emplace_back(block);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore8: {
+ if (Ctx.ParallelModeCount > 0) {
+ Error() << humanStatementName << " statement is not supported in parallel mode";
+ return false;
+ }
+
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core8().GetRule_commit_stmt1();
+ Token(rule.GetToken1());
+ blocks.emplace_back(BuildCommitClusters(Ctx.Pos()));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore9: {
+ Ctx.BodyPart();
+ auto updateNode = Build(core.GetAlt_sql_stmt_core9().GetRule_update_stmt1());
+ if (!updateNode) {
+ return false;
+ }
+ AddStatementToBlocks(blocks, updateNode);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore10: {
+ Ctx.BodyPart();
+ auto deleteNode = Build(core.GetAlt_sql_stmt_core10().GetRule_delete_stmt1());
+ if (!deleteNode) {
+ return false;
+ }
+ blocks.emplace_back(deleteNode);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore11: {
+ if (Ctx.ParallelModeCount > 0) {
+ Error() << humanStatementName << " statement is not supported in parallel mode";
+ return false;
+ }
+
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core11().GetRule_rollback_stmt1();
+ Token(rule.GetToken1());
+ blocks.emplace_back(BuildRollbackClusters(Ctx.Pos()));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore12:
+ if (!DeclareStatement(core.GetAlt_sql_stmt_core12().GetRule_declare_stmt1())) {
+ return false;
+ }
+ break;
+ case TRule_sql_stmt_core::kAltSqlStmtCore13:
+ if (!ImportStatement(core.GetAlt_sql_stmt_core13().GetRule_import_stmt1())) {
+ return false;
+ }
+ break;
+ case TRule_sql_stmt_core::kAltSqlStmtCore14:
+ if (!ExportStatement(core.GetAlt_sql_stmt_core14().GetRule_export_stmt1())) {
+ return false;
+ }
+ break;
+ case TRule_sql_stmt_core::kAltSqlStmtCore15: {
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core15().GetRule_alter_table_stmt1();
+ const bool isTablestore = IS_TOKEN(rule.GetToken2().GetId(), TABLESTORE);
+ TTableRef tr;
+ if (!SimpleTableRefImpl(rule.GetRule_simple_table_ref3(), tr)) {
+ return false;
+ }
+
+ TAlterTableParameters params;
+ if (isTablestore) {
+ params.TableType = ETableType::TableStore;
+ }
+ if (!AlterTableAction(rule.GetRule_alter_table_action4(), params)) {
+ return false;
+ }
+
+ for (auto& block : rule.GetBlock5()) {
+ if (!AlterTableAction(block.GetRule_alter_table_action2(), params)) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildAlterTable(Ctx.Pos(), tr, params, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore16: {
+ // alter_external_table_stmt: ALTER EXTERNAL TABLE simple_table_ref alter_external_table_action (COMMA alter_external_table_action)*
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core16().GetRule_alter_external_table_stmt1();
+ TTableRef tr;
+ if (!SimpleTableRefImpl(rule.GetRule_simple_table_ref4(), tr)) {
+ return false;
+ }
+
+ TAlterTableParameters params;
+ params.TableType = ETableType::ExternalTable;
+ if (!AlterExternalTableAction(rule.GetRule_alter_external_table_action5(), params)) {
+ return false;
+ }
+
+ for (auto& block : rule.GetBlock6()) {
+ if (!AlterExternalTableAction(block.GetRule_alter_external_table_action2(), params)) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildAlterTable(Ctx.Pos(), tr, params, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore17: {
+ Ctx.BodyPart();
+ auto node = DoStatement(core.GetAlt_sql_stmt_core17().GetRule_do_stmt1(), false);
+ if (!node) {
+ return false;
+ }
+
+ blocks.push_back(node);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore18: {
+ Ctx.BodyPart();
+ TNodePtr lambda;
+ TSymbolNameWithPos nameAndPos;
+ const auto& stmt = core.GetAlt_sql_stmt_core18().GetRule_define_action_or_subquery_stmt1();
+ const TString kind = to_lower(Ctx.Token(stmt.GetToken2()));
+ YQL_ENSURE(kind == "action" || kind == "subquery");
+ if (!DefineActionOrSubqueryStatement(stmt, nameAndPos, lambda)) {
+ return false;
+ }
+
+ if (Ctx.CompactNamedExprs) {
+ const auto ref = Ctx.MakeName("named" + kind + "node");
+ blocks.push_back(BuildNamedExpr(lambda));
+ blocks.back()->SetLabel(ref);
+ lambda = BuildNamedExprReference(blocks.back(), ref, {});
+ }
+
+ PushNamedNode(nameAndPos.Pos, nameAndPos.Name, lambda);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore19: {
+ Ctx.BodyPart();
+ auto node = IfStatement(core.GetAlt_sql_stmt_core19().GetRule_if_stmt1());
+ if (!node) {
+ return false;
+ }
+
+ blocks.push_back(node);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore20: {
+ Ctx.BodyPart();
+ auto node = ForStatement(core.GetAlt_sql_stmt_core20().GetRule_for_stmt1());
+ if (!node) {
+ return false;
+ }
+
+ blocks.push_back(node);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore21: {
+ if (Ctx.ParallelModeCount > 0) {
+ Error() << humanStatementName << " statement is not supported in parallel mode";
+ return false;
+ }
+
+ Ctx.BodyPart();
+ TSqlValues values(Ctx, Mode);
+ TPosition pos;
+ auto source = values.Build(core.GetAlt_sql_stmt_core21().GetRule_values_stmt1(), pos, {}, TPosition());
+ if (!source) {
+ return false;
+ }
+ blocks.emplace_back(BuildSelectResult(pos, std::move(source),
+ Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW && Mode != NSQLTranslation::ESqlMode::SUBQUERY, Mode == NSQLTranslation::ESqlMode::SUBQUERY,
+ Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore22: {
+ // create_user_stmt: CREATE USER role_name create_user_option?;
+ Ctx.BodyPart();
+ auto& node = core.GetAlt_sql_stmt_core22().GetRule_create_user_stmt1();
+
+ Ctx.Token(node.GetToken1());
+ const TPosition pos = Ctx.Pos();
+
+ TString service = Ctx.Scoped->CurrService;
+ TDeferredAtom cluster = Ctx.Scoped->CurrCluster;
+ if (cluster.Empty()) {
+ Error() << "USE statement is missing - no default cluster is selected";
+ return false;
+ }
+
+ TDeferredAtom roleName;
+ bool allowSystemRoles = false;
+ if (!RoleNameClause(node.GetRule_role_name3(), roleName, allowSystemRoles)) {
+ return false;
+ }
+
+ TMaybe<TRoleParameters> roleParams;
+ if (node.HasBlock4()) {
+ roleParams.ConstructInPlace();
+ if (!RoleParameters(node.GetBlock4().GetRule_create_user_option1(), *roleParams)) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildCreateUser(pos, service, cluster, roleName, roleParams, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore23: {
+ // alter_user_stmt: ALTER USER role_name (WITH? create_user_option | RENAME TO role_name);
+ Ctx.BodyPart();
+ auto& node = core.GetAlt_sql_stmt_core23().GetRule_alter_user_stmt1();
+
+ Ctx.Token(node.GetToken1());
+ const TPosition pos = Ctx.Pos();
+
+ TString service = Ctx.Scoped->CurrService;
+ TDeferredAtom cluster = Ctx.Scoped->CurrCluster;
+ if (cluster.Empty()) {
+ Error() << "USE statement is missing - no default cluster is selected";
+ return false;
+ }
+
+ TDeferredAtom roleName;
+ {
+ bool allowSystemRoles = true;
+ if (!RoleNameClause(node.GetRule_role_name3(), roleName, allowSystemRoles)) {
+ return false;
+ }
+ }
+
+ TNodePtr stmt;
+ switch (node.GetBlock4().Alt_case()) {
+ case TRule_alter_user_stmt_TBlock4::kAlt1: {
+ TRoleParameters roleParams;
+ if (!RoleParameters(node.GetBlock4().GetAlt1().GetRule_create_user_option2(), roleParams)) {
+ return false;
+ }
+ stmt = BuildAlterUser(pos, service, cluster, roleName, roleParams, Ctx.Scoped);
+ break;
+ }
+ case TRule_alter_user_stmt_TBlock4::kAlt2: {
+ TDeferredAtom tgtRoleName;
+ bool allowSystemRoles = false;
+ if (!RoleNameClause(node.GetBlock4().GetAlt2().GetRule_role_name3(), tgtRoleName, allowSystemRoles)) {
+ return false;
+ }
+ stmt = BuildRenameUser(pos, service, cluster, roleName, tgtRoleName,Ctx.Scoped);
+ break;
+ }
+ case TRule_alter_user_stmt_TBlock4::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ AddStatementToBlocks(blocks, stmt);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore24: {
+ // create_group_stmt: CREATE GROUP role_name (WITH USER role_name (COMMA role_name)* COMMA?)?;
+ Ctx.BodyPart();
+ auto& node = core.GetAlt_sql_stmt_core24().GetRule_create_group_stmt1();
+
+ Ctx.Token(node.GetToken1());
+ const TPosition pos = Ctx.Pos();
+
+ TString service = Ctx.Scoped->CurrService;
+ TDeferredAtom cluster = Ctx.Scoped->CurrCluster;
+ if (cluster.Empty()) {
+ Error() << "USE statement is missing - no default cluster is selected";
+ return false;
+ }
+
+ TDeferredAtom roleName;
+ bool allowSystemRoles = false;
+ if (!RoleNameClause(node.GetRule_role_name3(), roleName, allowSystemRoles)) {
+ return false;
+ }
+
+ TRoleParameters roleParams;
+ if (node.HasBlock4()) {
+ auto& addDropNode = node.GetBlock4();
+ TVector<TDeferredAtom> roles;
+ bool allowSystemRoles = false;
+ roleParams.Roles.emplace_back();
+ if (!RoleNameClause(addDropNode.GetRule_role_name3(), roleParams.Roles.back(), allowSystemRoles)) {
+ return false;
+ }
+
+ for (auto& item : addDropNode.GetBlock4()) {
+ roleParams.Roles.emplace_back();
+ if (!RoleNameClause(item.GetRule_role_name2(), roleParams.Roles.back(), allowSystemRoles)) {
+ return false;
+ }
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildCreateGroup(pos, service, cluster, roleName, roleParams, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore25: {
+ // alter_group_stmt: ALTER GROUP role_name ((ADD|DROP) USER role_name (COMMA role_name)* COMMA? | RENAME TO role_name);
+ Ctx.BodyPart();
+ auto& node = core.GetAlt_sql_stmt_core25().GetRule_alter_group_stmt1();
+
+ Ctx.Token(node.GetToken1());
+ const TPosition pos = Ctx.Pos();
+
+ TString service = Ctx.Scoped->CurrService;
+ TDeferredAtom cluster = Ctx.Scoped->CurrCluster;
+ if (cluster.Empty()) {
+ Error() << "USE statement is missing - no default cluster is selected";
+ return false;
+ }
+
+ TDeferredAtom roleName;
+ {
+ bool allowSystemRoles = true;
+ if (!RoleNameClause(node.GetRule_role_name3(), roleName, allowSystemRoles)) {
+ return false;
+ }
+ }
+
+ TNodePtr stmt;
+ switch (node.GetBlock4().Alt_case()) {
+ case TRule_alter_group_stmt_TBlock4::kAlt1: {
+ auto& addDropNode = node.GetBlock4().GetAlt1();
+ const bool isDrop = IS_TOKEN(addDropNode.GetToken1().GetId(), DROP);
+ TVector<TDeferredAtom> roles;
+ bool allowSystemRoles = false;
+ roles.emplace_back();
+ if (!RoleNameClause(addDropNode.GetRule_role_name3(), roles.back(), allowSystemRoles)) {
+ return false;
+ }
+
+ for (auto& item : addDropNode.GetBlock4()) {
+ roles.emplace_back();
+ if (!RoleNameClause(item.GetRule_role_name2(), roles.back(), allowSystemRoles)) {
+ return false;
+ }
+ }
+
+ stmt = BuildAlterGroup(pos, service, cluster, roleName, roles, isDrop, Ctx.Scoped);
+ break;
+ }
+ case TRule_alter_group_stmt_TBlock4::kAlt2: {
+ TDeferredAtom tgtRoleName;
+ bool allowSystemRoles = false;
+ if (!RoleNameClause(node.GetBlock4().GetAlt2().GetRule_role_name3(), tgtRoleName, allowSystemRoles)) {
+ return false;
+ }
+ stmt = BuildRenameGroup(pos, service, cluster, roleName, tgtRoleName, Ctx.Scoped);
+ break;
+ }
+ case TRule_alter_group_stmt_TBlock4::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ AddStatementToBlocks(blocks, stmt);
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore26: {
+ // drop_role_stmt: DROP (USER|GROUP) (IF EXISTS)? role_name (COMMA role_name)* COMMA?;
+ Ctx.BodyPart();
+ auto& node = core.GetAlt_sql_stmt_core26().GetRule_drop_role_stmt1();
+
+ Ctx.Token(node.GetToken1());
+ const TPosition pos = Ctx.Pos();
+
+ TString service = Ctx.Scoped->CurrService;
+ TDeferredAtom cluster = Ctx.Scoped->CurrCluster;
+ if (cluster.Empty()) {
+ Error() << "USE statement is missing - no default cluster is selected";
+ return false;
+ }
+
+ const bool isUser = IS_TOKEN(node.GetToken2().GetId(), USER);
+ bool missingOk = false;
+ if (node.HasBlock3()) { // IF EXISTS
+ missingOk = true;
+ Y_DEBUG_ABORT_UNLESS(
+ IS_TOKEN(node.GetBlock3().GetToken1().GetId(), IF) &&
+ IS_TOKEN(node.GetBlock3().GetToken2().GetId(), EXISTS)
+ );
+ }
+
+ TVector<TDeferredAtom> roles;
+ bool allowSystemRoles = true;
+ roles.emplace_back();
+ if (!RoleNameClause(node.GetRule_role_name4(), roles.back(), allowSystemRoles)) {
+ return false;
+ }
+
+ for (auto& item : node.GetBlock5()) {
+ roles.emplace_back();
+ if (!RoleNameClause(item.GetRule_role_name2(), roles.back(), allowSystemRoles)) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildDropRoles(pos, service, cluster, roles, isUser, missingOk, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore27: {
+ // create_object_stmt: CREATE OBJECT (IF NOT EXISTS)? name (TYPE type [WITH k=v,...]);
+ auto& node = core.GetAlt_sql_stmt_core27().GetRule_create_object_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref4().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ bool existingOk = false;
+ if (node.HasBlock3()) { // IF NOT EXISTS
+ existingOk = true;
+ Y_DEBUG_ABORT_UNLESS(
+ IS_TOKEN(node.GetBlock3().GetToken1().GetId(), IF) &&
+ IS_TOKEN(node.GetBlock3().GetToken2().GetId(), NOT) &&
+ IS_TOKEN(node.GetBlock3().GetToken3().GetId(), EXISTS)
+ );
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second;
+ const TString& typeId = Id(node.GetRule_object_type_ref7().GetRule_an_id_or_type1(), *this);
+ std::map<TString, TDeferredAtom> kv;
+ if (node.HasBlock9()) {
+ if (!ParseObjectFeatures(kv, node.GetBlock9().GetRule_create_object_features1().GetRule_object_features2())) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildCreateObjectOperation(Ctx.Pos(), objectId, typeId, existingOk, false, std::move(kv), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore28: {
+ // alter_object_stmt: ALTER OBJECT name (TYPE type [SET k=v,...]);
+ auto& node = core.GetAlt_sql_stmt_core28().GetRule_alter_object_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref3().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref3().GetRule_id_or_at2(), *this).second;
+ const TString& typeId = Id(node.GetRule_object_type_ref6().GetRule_an_id_or_type1(), *this);
+ std::map<TString, TDeferredAtom> kv;
+ if (!ParseObjectFeatures(kv, node.GetRule_alter_object_features8().GetRule_object_features2())) {
+ return false;
+ }
+
+ AddStatementToBlocks(blocks, BuildAlterObjectOperation(Ctx.Pos(), objectId, typeId, std::move(kv), std::set<TString>(), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore29: {
+ // drop_object_stmt: DROP OBJECT (IF EXISTS)? name (TYPE type [WITH k=v,...]);
+ auto& node = core.GetAlt_sql_stmt_core29().GetRule_drop_object_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref4().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ bool missingOk = false;
+ if (node.HasBlock3()) { // IF EXISTS
+ missingOk = true;
+ Y_DEBUG_ABORT_UNLESS(
+ IS_TOKEN(node.GetBlock3().GetToken1().GetId(), IF) &&
+ IS_TOKEN(node.GetBlock3().GetToken2().GetId(), EXISTS)
+ );
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second;
+ const TString& typeId = Id(node.GetRule_object_type_ref7().GetRule_an_id_or_type1(), *this);
+ std::map<TString, TDeferredAtom> kv;
+ if (node.HasBlock9()) {
+ if (!ParseObjectFeatures(kv, node.GetBlock9().GetRule_drop_object_features1().GetRule_object_features2())) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildDropObjectOperation(Ctx.Pos(), objectId, typeId, missingOk, std::move(kv), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore30: {
+ // create_external_data_source_stmt: CREATE (OR REPLACE)? EXTERNAL DATA SOURCE (IF NOT EXISTS)? name WITH (k=v,...);
+ auto& node = core.GetAlt_sql_stmt_core30().GetRule_create_external_data_source_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref7().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref7().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ bool replaceIfExists = false;
+ if (node.HasBlock2()) { // OR REPLACE
+ replaceIfExists = true;
+ Y_DEBUG_ABORT_UNLESS(
+ IS_TOKEN(node.GetBlock2().GetToken1().GetId(), OR) &&
+ IS_TOKEN(node.GetBlock2().GetToken2().GetId(), REPLACE)
+ );
+ }
+
+ bool existingOk = false;
+ if (node.HasBlock6()) { // IF NOT EXISTS
+ existingOk = true;
+ Y_DEBUG_ABORT_UNLESS(
+ IS_TOKEN(node.GetBlock6().GetToken1().GetId(), IF) &&
+ IS_TOKEN(node.GetBlock6().GetToken2().GetId(), NOT) &&
+ IS_TOKEN(node.GetBlock6().GetToken3().GetId(), EXISTS)
+ );
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref7().GetRule_id_or_at2(), *this).second;
+ std::map<TString, TDeferredAtom> kv;
+ if (!ParseExternalDataSourceSettings(kv, node.GetRule_with_table_settings8())) {
+ return false;
+ }
+
+ AddStatementToBlocks(blocks, BuildCreateObjectOperation(Ctx.Pos(), BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), "EXTERNAL_DATA_SOURCE", existingOk, replaceIfExists, std::move(kv), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore31: {
+ // alter_external_data_source_stmt: ALTER EXTERNAL DATA SOURCE object_ref alter_external_data_source_action (COMMA alter_external_data_source_action)*
+ Ctx.BodyPart();
+ const auto& node = core.GetAlt_sql_stmt_core31().GetRule_alter_external_data_source_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref5().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref5().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref5().GetRule_id_or_at2(), *this).second;
+ std::map<TString, TDeferredAtom> kv;
+ std::set<TString> toReset;
+ if (!ParseExternalDataSourceSettings(kv, toReset, node.GetRule_alter_external_data_source_action6())) {
+ return false;
+ }
+
+ for (const auto& action : node.GetBlock7()) {
+ if (!ParseExternalDataSourceSettings(kv, toReset, action.GetRule_alter_external_data_source_action2())) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildAlterObjectOperation(Ctx.Pos(), objectId, "EXTERNAL_DATA_SOURCE", std::move(kv), std::move(toReset), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore32: {
+ // drop_external_data_source_stmt: DROP EXTERNAL DATA SOURCE (IF EXISTS)? name;
+ auto& node = core.GetAlt_sql_stmt_core32().GetRule_drop_external_data_source_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref6().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref6().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ bool missingOk = false;
+ if (node.HasBlock5()) { // IF EXISTS
+ missingOk = true;
+ Y_DEBUG_ABORT_UNLESS(
+ IS_TOKEN(node.GetBlock5().GetToken1().GetId(), IF) &&
+ IS_TOKEN(node.GetBlock5().GetToken2().GetId(), EXISTS)
+ );
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref6().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks, BuildDropObjectOperation(Ctx.Pos(), BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId), "EXTERNAL_DATA_SOURCE", missingOk, {}, context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore33: {
+ // create_replication_stmt: CREATE ASYNC REPLICATION
+ auto& node = core.GetAlt_sql_stmt_core33().GetRule_create_replication_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref4().HasBlock1()) {
+ const auto& cluster = node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1();
+ if (!ClusterExpr(cluster, false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ auto prefixPath = Ctx.GetPrefixPath(context.ServiceId, context.Cluster);
+
+ std::vector<std::pair<TString, TString>> targets;
+ if (!AsyncReplicationTarget(targets, prefixPath, node.GetRule_replication_target6(), *this)) {
+ return false;
+ }
+ for (auto& block : node.GetBlock7()) {
+ if (!AsyncReplicationTarget(targets, prefixPath, block.GetRule_replication_target2(), *this)) {
+ return false;
+ }
+ }
+
+ std::map<TString, TNodePtr> settings;
+ if (!AsyncReplicationSettings(settings, node.GetRule_replication_settings10(), *this, true)) {
+ return false;
+ }
+
+ const TString id = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks, BuildCreateAsyncReplication(Ctx.Pos(), BuildTablePath(prefixPath, id),
+ std::move(targets), std::move(settings), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore34: {
+ // drop_replication_stmt: DROP ASYNC REPLICATION
+ auto& node = core.GetAlt_sql_stmt_core34().GetRule_drop_replication_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref4().HasBlock1()) {
+ const auto& cluster = node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1();
+ if (!ClusterExpr(cluster, false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString id = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks, BuildDropAsyncReplication(Ctx.Pos(),
+ BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), id),
+ node.HasBlock5(), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore35: {
+ Ctx.BodyPart();
+ // create_topic_stmt: CREATE TOPIC (IF NOT EXISTS)? topic1 (CONSUMER ...)? [WITH (opt1 = val1, ...]?
+ auto& rule = core.GetAlt_sql_stmt_core35().GetRule_create_topic_stmt1();
+ TTopicRef tr;
+ if (!TopicRefImpl(rule.GetRule_topic_ref4(), tr)) {
+ return false;
+ }
+ bool existingOk = false;
+ if (rule.HasBlock3()) { // if not exists
+ existingOk = true;
+ }
+
+ TCreateTopicParameters params;
+ params.ExistingOk = existingOk;
+ if (rule.HasBlock5()) { //create_topic_entry (consumers)
+ auto& entries = rule.GetBlock5().GetRule_create_topic_entries1();
+ auto& firstEntry = entries.GetRule_create_topic_entry2();
+ if (!CreateTopicEntry(firstEntry, params)) {
+ return false;
+ }
+ const auto& list = entries.GetBlock3();
+ for (auto& node : list) {
+ if (!CreateTopicEntry(node.GetRule_create_topic_entry2(), params)) {
+ return false;
+ }
+ }
+
+ }
+ if (rule.HasBlock6()) { // with_topic_settings
+ auto& topic_settings_node = rule.GetBlock6().GetRule_with_topic_settings1().GetRule_topic_settings3();
+ CreateTopicSettings(topic_settings_node, params.TopicSettings);
+ }
+
+ AddStatementToBlocks(blocks, BuildCreateTopic(Ctx.Pos(), tr, params, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore36: {
+// alter_topic_stmt: ALTER TOPIC topic_ref alter_topic_action (COMMA alter_topic_action)*;
+// alter_topic_stmt: ALTER TOPIC IF EXISTS topic_ref alter_topic_action (COMMA alter_topic_action)*;
+
+ Ctx.BodyPart();
+ auto& rule = core.GetAlt_sql_stmt_core36().GetRule_alter_topic_stmt1();
+ TTopicRef tr;
+ bool missingOk = false;
+ if (rule.HasBlock3()) { // IF EXISTS
+ missingOk = true;
+ }
+ if (!TopicRefImpl(rule.GetRule_topic_ref4(), tr)) {
+ return false;
+ }
+
+ TAlterTopicParameters params;
+ params.MissingOk = missingOk;
+ auto& firstEntry = rule.GetRule_alter_topic_action5();
+ if (!AlterTopicAction(firstEntry, params)) {
+ return false;
+ }
+ const auto& list = rule.GetBlock6();
+ for (auto& node : list) {
+ if (!AlterTopicAction(node.GetRule_alter_topic_action2(), params)) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildAlterTopic(Ctx.Pos(), tr, params, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore37: {
+ // drop_topic_stmt: DROP TOPIC (IF EXISTS)? topic_ref;
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core37().GetRule_drop_topic_stmt1();
+
+ TDropTopicParameters params;
+ if (rule.HasBlock3()) { // IF EXISTS
+ params.MissingOk = true;
+ } else {
+ params.MissingOk = false;
+ }
+
+ TTopicRef tr;
+ if (!TopicRefImpl(rule.GetRule_topic_ref4(), tr)) {
+ return false;
+ }
+ AddStatementToBlocks(blocks, BuildDropTopic(Ctx.Pos(), tr, params, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore38: {
+ // GRANT permission_name_target ON an_id_schema (COMMA an_id_schema)* TO role_name (COMMA role_name)* COMMA? (WITH GRANT OPTION)?;
+ Ctx.BodyPart();
+ auto& node = core.GetAlt_sql_stmt_core38().GetRule_grant_permissions_stmt1();
+
+ Ctx.Token(node.GetToken1());
+ const TPosition pos = Ctx.Pos();
+
+ TString service = Ctx.Scoped->CurrService;
+ TDeferredAtom cluster = Ctx.Scoped->CurrCluster;
+ if (cluster.Empty()) {
+ Error() << "USE statement is missing - no default cluster is selected";
+ return false;
+ }
+
+ TVector<TDeferredAtom> permissions;
+ if (!PermissionNameClause(node.GetRule_permission_name_target2(), permissions, node.has_block10())) {
+ return false;
+ }
+
+ TVector<TDeferredAtom> schemaPaths;
+ schemaPaths.emplace_back(Ctx.Pos(), Id(node.GetRule_an_id_schema4(), *this));
+ for (const auto& item : node.GetBlock5()) {
+ schemaPaths.emplace_back(Ctx.Pos(), Id(item.GetRule_an_id_schema2(), *this));
+ }
+
+ TVector<TDeferredAtom> roleNames;
+ const bool allowSystemRoles = false;
+ roleNames.emplace_back();
+ if (!RoleNameClause(node.GetRule_role_name7(), roleNames.back(), allowSystemRoles)) {
+ return false;
+ }
+ for (const auto& item : node.GetBlock8()) {
+ roleNames.emplace_back();
+ if (!RoleNameClause(item.GetRule_role_name2(), roleNames.back(), allowSystemRoles)) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildGrantPermissions(pos, service, cluster, permissions, schemaPaths, roleNames, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore39:
+ {
+ // REVOKE (GRANT OPTION FOR)? permission_name_target ON an_id_schema (COMMA an_id_schema)* FROM role_name (COMMA role_name)*;
+ Ctx.BodyPart();
+ auto& node = core.GetAlt_sql_stmt_core39().GetRule_revoke_permissions_stmt1();
+
+ Ctx.Token(node.GetToken1());
+ const TPosition pos = Ctx.Pos();
+
+ TString service = Ctx.Scoped->CurrService;
+ TDeferredAtom cluster = Ctx.Scoped->CurrCluster;
+ if (cluster.Empty()) {
+ Error() << "USE statement is missing - no default cluster is selected";
+ return false;
+ }
+
+ TVector<TDeferredAtom> permissions;
+ if (!PermissionNameClause(node.GetRule_permission_name_target3(), permissions, node.HasBlock2())) {
+ return false;
+ }
+
+ TVector<TDeferredAtom> schemaPaths;
+ schemaPaths.emplace_back(Ctx.Pos(), Id(node.GetRule_an_id_schema5(), *this));
+ for (const auto& item : node.GetBlock6()) {
+ schemaPaths.emplace_back(Ctx.Pos(), Id(item.GetRule_an_id_schema2(), *this));
+ }
+
+ TVector<TDeferredAtom> roleNames;
+ const bool allowSystemRoles = false;
+ roleNames.emplace_back();
+ if (!RoleNameClause(node.GetRule_role_name8(), roleNames.back(), allowSystemRoles)) {
+ return false;
+ }
+ for (const auto& item : node.GetBlock9()) {
+ roleNames.emplace_back();
+ if (!RoleNameClause(item.GetRule_role_name2(), roleNames.back(), allowSystemRoles)) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildRevokePermissions(pos, service, cluster, permissions, schemaPaths, roleNames, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore40:
+ {
+ // ALTER TABLESTORE object_ref alter_table_store_action (COMMA alter_table_store_action)*;
+ auto& node = core.GetAlt_sql_stmt_core40().GetRule_alter_table_store_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+
+ if (node.GetRule_object_ref3().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref3().GetRule_id_or_at2(), *this).second;
+ const TString& typeId = "TABLESTORE";
+ std::map<TString, TDeferredAtom> kv;
+ if (!ParseTableStoreFeatures(kv, node.GetRule_alter_table_store_action4())) {
+ return false;
+ }
+
+ AddStatementToBlocks(blocks, BuildAlterObjectOperation(Ctx.Pos(), objectId, typeId, std::move(kv), std::set<TString>(), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore41:
+ {
+ // create_object_stmt: UPSERT OBJECT name (TYPE type [WITH k=v,...]);
+ auto& node = core.GetAlt_sql_stmt_core41().GetRule_upsert_object_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref3().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref3().GetRule_id_or_at2(), *this).second;
+ const TString& typeId = Id(node.GetRule_object_type_ref6().GetRule_an_id_or_type1(), *this);
+ std::map<TString, TDeferredAtom> kv;
+ if (node.HasBlock8()) {
+ if (!ParseObjectFeatures(kv, node.GetBlock8().GetRule_create_object_features1().GetRule_object_features2())) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildUpsertObjectOperation(Ctx.Pos(), objectId, typeId, std::move(kv), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore42: {
+ // create_view_stmt: CREATE VIEW name WITH (k = v, ...) AS select_stmt;
+ auto& node = core.GetAlt_sql_stmt_core42().GetRule_create_view_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref3().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(),
+ false,
+ context.ServiceId,
+ context.Cluster)) {
+ return false;
+ }
+ }
+
+ std::map<TString, TDeferredAtom> features;
+ if (node.HasBlock4()) {
+ if (!ParseObjectFeatures(features, node.GetBlock4().GetRule_create_object_features1().GetRule_object_features2())) {
+ return false;
+ }
+ }
+ if (!ParseViewQuery(features, node.GetRule_select_stmt6())) {
+ return false;
+ }
+
+ const TString objectId = Id(node.GetRule_object_ref3().GetRule_id_or_at2(), *this).second;
+ constexpr const char* TypeId = "VIEW";
+ AddStatementToBlocks(blocks,
+ BuildCreateObjectOperation(Ctx.Pos(),
+ BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId),
+ TypeId,
+ false,
+ false,
+ std::move(features),
+ context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore43: {
+ // drop_view_stmt: DROP VIEW name;
+ auto& node = core.GetAlt_sql_stmt_core43().GetRule_drop_view_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref3().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(),
+ false,
+ context.ServiceId,
+ context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString objectId = Id(node.GetRule_object_ref3().GetRule_id_or_at2(), *this).second;
+ constexpr const char* TypeId = "VIEW";
+ AddStatementToBlocks(blocks,
+ BuildDropObjectOperation(Ctx.Pos(),
+ BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId),
+ TypeId,
+ false,
+ {},
+ context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore44: {
+ // alter_replication_stmt: ALTER ASYNC REPLICATION
+ auto& node = core.GetAlt_sql_stmt_core44().GetRule_alter_replication_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref4().HasBlock1()) {
+ const auto& cluster = node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1();
+ if (!ClusterExpr(cluster, false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ std::map<TString, TNodePtr> settings;
+ if (!AsyncReplicationAlterAction(settings, node.GetRule_alter_replication_action5(), *this)) {
+ return false;
+ }
+ for (auto& block : node.GetBlock6()) {
+ if (!AsyncReplicationAlterAction(settings, block.GetRule_alter_replication_action2(), *this)) {
+ return false;
+ }
+ }
+
+ const TString id = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks, BuildAlterAsyncReplication(Ctx.Pos(),
+ BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), id),
+ std::move(settings), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore45: {
+ // create_resource_pool_stmt: CREATE RESOURCE POOL name WITH (k=v,...);
+ auto& node = core.GetAlt_sql_stmt_core45().GetRule_create_resource_pool_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref4().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second;
+ std::map<TString, TDeferredAtom> kv;
+ if (!ParseResourcePoolSettings(kv, node.GetRule_with_table_settings5())) {
+ return false;
+ }
+
+ AddStatementToBlocks(blocks, BuildCreateObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL", false, false, std::move(kv), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore46: {
+ // alter_resource_pool_stmt: ALTER RESOURCE POOL object_ref alter_resource_pool_action (COMMA alter_external_data_source_action)*
+ Ctx.BodyPart();
+ const auto& node = core.GetAlt_sql_stmt_core46().GetRule_alter_resource_pool_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref4().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second;
+ std::map<TString, TDeferredAtom> kv;
+ std::set<TString> toReset;
+ if (!ParseResourcePoolSettings(kv, toReset, node.GetRule_alter_resource_pool_action5())) {
+ return false;
+ }
+
+ for (const auto& action : node.GetBlock6()) {
+ if (!ParseResourcePoolSettings(kv, toReset, action.GetRule_alter_resource_pool_action2())) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildAlterObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL", std::move(kv), std::move(toReset), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore47: {
+ // drop_resource_pool_stmt: DROP RESOURCE POOL name;
+ auto& node = core.GetAlt_sql_stmt_core47().GetRule_drop_resource_pool_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref4().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks, BuildDropObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL", false, {}, context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore48: {
+ // create_backup_collection_stmt: CREATE BACKUP COLLECTION name WITH (k=v,...);
+ auto& node = core.GetAlt_sql_stmt_core48().GetRule_create_backup_collection_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_backup_collection2().GetRule_object_ref3().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_backup_collection2().GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(),
+ false,
+ context.ServiceId,
+ context.Cluster)) {
+ return false;
+ }
+ }
+
+ std::map<TString, TDeferredAtom> kv;
+ if (!ParseBackupCollectionSettings(kv, node.GetRule_backup_collection_settings6())) {
+ return false;
+ }
+
+ bool database = false;
+ TVector<TDeferredAtom> tables;
+ if (node.HasBlock3()) {
+ database = node.GetBlock3().GetRule_create_backup_collection_entries1().has_alt_create_backup_collection_entries1();
+ if (node.GetBlock3().GetRule_create_backup_collection_entries1().has_alt_create_backup_collection_entries2()) {
+ if (!ParseBackupCollectionTables(
+ tables,
+ node
+ .GetBlock3()
+ .GetRule_create_backup_collection_entries1()
+ .alt_create_backup_collection_entries2()
+ .GetRule_create_backup_collection_entries_many1()
+ .GetRule_table_list2()))
+ {
+ return false;
+ }
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_backup_collection2().GetRule_object_ref3().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks,
+ BuildCreateBackupCollection(Ctx.Pos(),
+ BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId),
+ TCreateBackupCollectionParameters {
+ .Settings = std::move(kv),
+ .Database = database,
+ .Tables = tables,
+ .ExistingOk = false,
+ },
+ context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore49: {
+ // alter_backup_collection_stmt: ALTER BACKUP COLLECTION name alter_backup_collection_action (COMMA alter_backup_collection_action)*;
+ auto& node = core.GetAlt_sql_stmt_core49().GetRule_alter_backup_collection_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_backup_collection2().GetRule_object_ref3().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_backup_collection2().GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(),
+ false,
+ context.ServiceId,
+ context.Cluster)) {
+ return false;
+ }
+ }
+
+ std::map<TString, TDeferredAtom> kv;
+ std::set<TString> toReset;
+
+ bool addDatabase = false;
+ bool dropDatabase = false;
+ TVector<TDeferredAtom> addTables;
+ TVector<TDeferredAtom> removeTables;
+
+ switch (node.GetBlock3().Alt_case()) {
+ case TRule_alter_backup_collection_stmt_TBlock3::kAlt1: {
+ if (!ParseBackupCollectionSettings(kv, toReset, node.GetBlock3().GetAlt1().GetRule_alter_backup_collection_actions1())) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_backup_collection_stmt_TBlock3::kAlt2: {
+ if (!ParseBackupCollectionEntries(
+ addDatabase,
+ dropDatabase,
+ addTables,
+ removeTables,
+ node.GetBlock3().GetAlt2().GetRule_alter_backup_collection_entries1()))
+ {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_backup_collection_stmt_TBlock3::ALT_NOT_SET: {} // do nothing
+ }
+
+ auto database = addDatabase ?
+ TAlterBackupCollectionParameters::EDatabase::Add :
+ dropDatabase ?
+ TAlterBackupCollectionParameters::EDatabase::Drop :
+ TAlterBackupCollectionParameters::EDatabase::Unchanged;
+
+ const TString& objectId = Id(node.GetRule_backup_collection2().GetRule_object_ref3().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks,
+ BuildAlterBackupCollection(Ctx.Pos(),
+ BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId),
+ TAlterBackupCollectionParameters {
+ .Settings = std::move(kv),
+ .SettingsToReset = std::move(toReset),
+ .Database = database,
+ .TablesToAdd = addTables,
+ .TablesToDrop = removeTables,
+ .MissingOk = false,
+ },
+ context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore50: {
+ // drop_backup_collection_stmt: DROP BACKUP COLLECTION name;
+ auto& node = core.GetAlt_sql_stmt_core50().GetRule_drop_backup_collection_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_backup_collection2().GetRule_object_ref3().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_backup_collection2().GetRule_object_ref3().GetBlock1().GetRule_cluster_expr1(),
+ false,
+ context.ServiceId,
+ context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_backup_collection2().GetRule_object_ref3().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks,
+ BuildDropBackupCollection(Ctx.Pos(),
+ BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId),
+ TDropBackupCollectionParameters {
+ .MissingOk = false,
+ },
+ context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore51: {
+ // analyze_stmt: ANALYZE table_ref
+ Ctx.BodyPart();
+ const auto& rule = core.GetAlt_sql_stmt_core51().GetRule_analyze_stmt1();
+
+ if (!rule.GetRule_analyze_table_list2().GetBlock2().empty()) {
+ Error() << "ANALYZE with multitables hasn't been implemented yet";
+ return false;
+ }
+ auto analyzeTable = rule.GetRule_analyze_table_list2().GetRule_analyze_table1();
+
+ TVector<TString> columns;
+ if (analyzeTable.HasBlock2()) {
+ auto columnsNode =
+ analyzeTable.GetBlock2().GetRule_column_list2();
+
+ if (columnsNode.HasRule_column_name1()) {
+ columns.push_back(Id(columnsNode.GetRule_column_name1().GetRule_an_id2(), *this));
+ for (const auto& columnNode: columnsNode.GetBlock2()) {
+ columns.push_back(Id(columnNode.GetRule_column_name2().GetRule_an_id2(), *this));
+ }
+ }
+ }
+
+ TTableRef tr;
+ if (!SimpleTableRefImpl(rule.GetRule_analyze_table_list2().GetRule_analyze_table1().GetRule_simple_table_ref1(), tr)) {
+ return false;
+ }
+
+ auto params = TAnalyzeParams{.Table = std::make_shared<TTableRef>(tr), .Columns = std::move(columns)};
+ AddStatementToBlocks(blocks, BuildAnalyze(Ctx.Pos(), tr.Service, tr.Cluster, params, Ctx.Scoped));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore52: {
+ // create_resource_pool_classifier_stmt: CREATE RESOURCE POOL CLASSIFIER name WITH (k=v,...);
+ auto& node = core.GetAlt_sql_stmt_core52().GetRule_create_resource_pool_classifier_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref5().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref5().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref5().GetRule_id_or_at2(), *this).second;
+ std::map<TString, TDeferredAtom> kv;
+ if (!ParseResourcePoolClassifierSettings(kv, node.GetRule_with_table_settings6())) {
+ return false;
+ }
+
+ AddStatementToBlocks(blocks, BuildCreateObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL_CLASSIFIER", false, false, std::move(kv), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore53: {
+ // alter_resource_pool_classifier_stmt: ALTER RESOURCE POOL CLASSIFIER object_ref alter_resource_pool_classifier_action (COMMA alter_resource_pool_classifier_action)*
+ Ctx.BodyPart();
+ const auto& node = core.GetAlt_sql_stmt_core53().GetRule_alter_resource_pool_classifier_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref5().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref5().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref5().GetRule_id_or_at2(), *this).second;
+ std::map<TString, TDeferredAtom> kv;
+ std::set<TString> toReset;
+ if (!ParseResourcePoolClassifierSettings(kv, toReset, node.GetRule_alter_resource_pool_classifier_action6())) {
+ return false;
+ }
+
+ for (const auto& action : node.GetBlock7()) {
+ if (!ParseResourcePoolClassifierSettings(kv, toReset, action.GetRule_alter_resource_pool_classifier_action2())) {
+ return false;
+ }
+ }
+
+ AddStatementToBlocks(blocks, BuildAlterObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL_CLASSIFIER", std::move(kv), std::move(toReset), context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore54: {
+ // drop_resource_pool_classifier_stmt: DROP RESOURCE POOL CLASSIFIER name;
+ auto& node = core.GetAlt_sql_stmt_core54().GetRule_drop_resource_pool_classifier_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref5().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref5().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref5().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks, BuildDropObjectOperation(Ctx.Pos(), objectId, "RESOURCE_POOL_CLASSIFIER", false, {}, context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore55: {
+ // backup_stmt: BACKUP object_ref (INCREMENTAL)?;
+ auto& node = core.GetAlt_sql_stmt_core55().GetRule_backup_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref2().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref2().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ bool incremental = node.HasBlock3();
+
+ const TString& objectId = Id(node.GetRule_object_ref2().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks,
+ BuildBackup(
+ Ctx.Pos(),
+ BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId),
+ TBackupParameters{
+ .Incremental = incremental,
+ },
+ context));
+ break;
+ }
+ case TRule_sql_stmt_core::kAltSqlStmtCore56: {
+ // restore_stmt: RESTORE object_ref (AT STRING_VALUE)?;
+ auto& node = core.GetAlt_sql_stmt_core56().GetRule_restore_stmt1();
+ TObjectOperatorContext context(Ctx.Scoped);
+ if (node.GetRule_object_ref2().HasBlock1()) {
+ if (!ClusterExpr(node.GetRule_object_ref2().GetBlock1().GetRule_cluster_expr1(),
+ false, context.ServiceId, context.Cluster)) {
+ return false;
+ }
+ }
+
+ TString at;
+ if (node.HasBlock3()) {
+ const TString stringValue = Ctx.Token(node.GetBlock3().GetToken2());
+ const auto unescaped = StringContent(Ctx, Ctx.Pos(), stringValue);
+ if (!unescaped) {
+ return false;
+ }
+ at = unescaped->Content;
+ }
+
+ const TString& objectId = Id(node.GetRule_object_ref2().GetRule_id_or_at2(), *this).second;
+ AddStatementToBlocks(blocks,
+ BuildRestore(
+ Ctx.Pos(),
+ BuildTablePath(Ctx.GetPrefixPath(context.ServiceId, context.Cluster), objectId),
+ TRestoreParameters{
+ .At = at,
+ },
+ context));
+ break;
+ }
+ case TRule_sql_stmt_core::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownStatement" + internalStatementName);
+ AltNotImplemented("sql_stmt_core", core);
+ return false;
+ }
+
+ Ctx.IncrementMonCounter("sql_features", internalStatementName);
+ return !Ctx.HasPendingErrors;
+}
+
+bool TSqlQuery::DeclareStatement(const TRule_declare_stmt& stmt) {
+ TNodePtr defaultValue;
+ if (stmt.HasBlock5()) {
+ TSqlExpression sqlExpr(Ctx, Mode);
+ auto exprOrId = sqlExpr.LiteralExpr(stmt.GetBlock5().GetRule_literal_value2());
+ if (!exprOrId) {
+ return false;
+ }
+ if (!exprOrId->Expr) {
+ Ctx.Error() << "Identifier is not expected here";
+ return false;
+ }
+ defaultValue = exprOrId->Expr;
+ }
+ if (defaultValue) {
+ Error() << "DEFAULT value not supported yet";
+ return false;
+ }
+ if (!Ctx.IsParseHeading()) {
+ Error() << "DECLARE statement should be in beginning of query, but it's possible to use PRAGMA or USE before it";
+ return false;
+ }
+
+ TString varName;
+ if (!NamedNodeImpl(stmt.GetRule_bind_parameter2(), varName, *this)) {
+ return false;
+ }
+ const auto varPos = Ctx.Pos();
+ const auto typeNode = TypeNode(stmt.GetRule_type_name4());
+ if (!typeNode) {
+ return false;
+ }
+ if (IsAnonymousName(varName)) {
+ Ctx.Error(varPos) << "Can not use anonymous name '" << varName << "' in DECLARE statement";
+ return false;
+ }
+
+ if (Ctx.IsAlreadyDeclared(varName)) {
+ Ctx.Warning(varPos, TIssuesIds::YQL_DUPLICATE_DECLARE) << "Duplicate declaration of '" << varName << "' will be ignored";
+ } else {
+ PushNamedAtom(varPos, varName);
+ Ctx.DeclareVariable(varName, varPos, typeNode);
+ }
+ return true;
+}
+
+bool TSqlQuery::ExportStatement(const TRule_export_stmt& stmt) {
+ if (Mode != NSQLTranslation::ESqlMode::LIBRARY || !TopLevel) {
+ Error() << "EXPORT statement should be used only in a library on the top level";
+ return false;
+ }
+
+ TVector<TSymbolNameWithPos> bindNames;
+ if (!BindList(stmt.GetRule_bind_parameter_list2(), bindNames)) {
+ return false;
+ }
+
+ for (auto& bindName : bindNames) {
+ if (!Ctx.AddExport(bindName.Pos, bindName.Name)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlQuery::AlterTableAction(const TRule_alter_table_action& node, TAlterTableParameters& params) {
+ if (params.RenameTo) {
+ // rename action is followed by some other actions
+ Error() << "RENAME TO can not be used together with another table action";
+ return false;
+ }
+
+ switch (node.Alt_case()) {
+ case TRule_alter_table_action::kAltAlterTableAction1: {
+ // ADD COLUMN
+ const auto& addRule = node.GetAlt_alter_table_action1().GetRule_alter_table_add_column1();
+ if (!AlterTableAddColumn(addRule, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction2: {
+ // DROP COLUMN
+ const auto& dropRule = node.GetAlt_alter_table_action2().GetRule_alter_table_drop_column1();
+ if (!AlterTableDropColumn(dropRule, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction3: {
+ // ALTER COLUMN
+ const auto& alterRule = node.GetAlt_alter_table_action3().GetRule_alter_table_alter_column1();
+ if (!AlterTableAlterColumn(alterRule, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction4: {
+ // ADD FAMILY
+ const auto& familyEntry = node.GetAlt_alter_table_action4().GetRule_alter_table_add_column_family1()
+ .GetRule_family_entry2();
+ if (!AlterTableAddFamily(familyEntry, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction5: {
+ // ALTER FAMILY
+ const auto& alterRule = node.GetAlt_alter_table_action5().GetRule_alter_table_alter_column_family1();
+ if (!AlterTableAlterFamily(alterRule, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction6: {
+ // SET (uncompat)
+ const auto& setRule = node.GetAlt_alter_table_action6().GetRule_alter_table_set_table_setting_uncompat1();
+ if (!AlterTableSetTableSetting(setRule, params.TableSettings, params.TableType)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction7: {
+ // SET (compat)
+ const auto& setRule = node.GetAlt_alter_table_action7().GetRule_alter_table_set_table_setting_compat1();
+ if (!AlterTableSetTableSetting(setRule, params.TableSettings, params.TableType)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction8: {
+ // RESET
+ const auto& setRule = node.GetAlt_alter_table_action8().GetRule_alter_table_reset_table_setting1();
+ if (!AlterTableResetTableSetting(setRule, params.TableSettings, params.TableType)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction9: {
+ // ADD INDEX
+ const auto& addIndex = node.GetAlt_alter_table_action9().GetRule_alter_table_add_index1();
+ if (!AlterTableAddIndex(addIndex, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction10: {
+ // DROP INDEX
+ const auto& dropIndex = node.GetAlt_alter_table_action10().GetRule_alter_table_drop_index1();
+ AlterTableDropIndex(dropIndex, params);
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction11: {
+ // RENAME TO
+ if (!params.IsEmpty()) {
+ // rename action follows some other actions
+ Error() << "RENAME TO can not be used together with another table action";
+ return false;
+ }
+
+ const auto& renameTo = node.GetAlt_alter_table_action11().GetRule_alter_table_rename_to1();
+ AlterTableRenameTo(renameTo, params);
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction12: {
+ // ADD CHANGEFEED
+ const auto& rule = node.GetAlt_alter_table_action12().GetRule_alter_table_add_changefeed1();
+ if (!AlterTableAddChangefeed(rule, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction13: {
+ // ALTER CHANGEFEED
+ const auto& rule = node.GetAlt_alter_table_action13().GetRule_alter_table_alter_changefeed1();
+ if (!AlterTableAlterChangefeed(rule, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction14: {
+ // DROP CHANGEFEED
+ const auto& rule = node.GetAlt_alter_table_action14().GetRule_alter_table_drop_changefeed1();
+ AlterTableDropChangefeed(rule, params);
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction15: {
+ // RENAME INDEX TO
+ if (!params.IsEmpty()) {
+ // rename action follows some other actions
+ Error() << "RENAME INDEX TO can not be used together with another table action";
+ return false;
+ }
+
+ const auto& renameTo = node.GetAlt_alter_table_action15().GetRule_alter_table_rename_index_to1();
+ AlterTableRenameIndexTo(renameTo, params);
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction16: {
+ // ALTER INDEX
+ const auto& rule = node.GetAlt_alter_table_action16().GetRule_alter_table_alter_index1();
+ if (!AlterTableAlterIndex(rule, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_action::kAltAlterTableAction17: {
+ // ALTER COLUMN id DROP NOT NULL
+ const auto& alterRule = node.GetAlt_alter_table_action17().GetRule_alter_table_alter_column_drop_not_null1();
+
+ if (!AlterTableAlterColumnDropNotNull(alterRule, params)) {
+ return false;
+ }
+
+ break;
+ }
+
+ case TRule_alter_table_action::ALT_NOT_SET: {
+ AltNotImplemented("alter_table_action", node);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlQuery::AlterExternalTableAction(const TRule_alter_external_table_action& node, TAlterTableParameters& params) {
+ if (params.RenameTo) {
+ // rename action is followed by some other actions
+ Error() << "RENAME TO can not be used together with another table action";
+ return false;
+ }
+
+ switch (node.Alt_case()) {
+ case TRule_alter_external_table_action::kAltAlterExternalTableAction1: {
+ // ADD COLUMN
+ const auto& addRule = node.GetAlt_alter_external_table_action1().GetRule_alter_table_add_column1();
+ if (!AlterTableAddColumn(addRule, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_external_table_action::kAltAlterExternalTableAction2: {
+ // DROP COLUMN
+ const auto& dropRule = node.GetAlt_alter_external_table_action2().GetRule_alter_table_drop_column1();
+ if (!AlterTableDropColumn(dropRule, params)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_external_table_action::kAltAlterExternalTableAction3: {
+ // SET (uncompat)
+ const auto& setRule = node.GetAlt_alter_external_table_action3().GetRule_alter_table_set_table_setting_uncompat1();
+ if (!AlterTableSetTableSetting(setRule, params.TableSettings, params.TableType)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_external_table_action::kAltAlterExternalTableAction4: {
+ // SET (compat)
+ const auto& setRule = node.GetAlt_alter_external_table_action4().GetRule_alter_table_set_table_setting_compat1();
+ if (!AlterTableSetTableSetting(setRule, params.TableSettings, params.TableType)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_external_table_action::kAltAlterExternalTableAction5: {
+ // RESET
+ const auto& setRule = node.GetAlt_alter_external_table_action5().GetRule_alter_table_reset_table_setting1();
+ if (!AlterTableResetTableSetting(setRule, params.TableSettings, params.TableType)) {
+ return false;
+ }
+ break;
+ }
+
+ case TRule_alter_external_table_action::ALT_NOT_SET:
+ AltNotImplemented("alter_external_table_action", node);
+ return false;
+ }
+ return true;
+}
+
+bool TSqlQuery::AlterTableAddColumn(const TRule_alter_table_add_column& node, TAlterTableParameters& params) {
+ auto columnSchema = ColumnSchemaImpl(node.GetRule_column_schema3());
+ if (!columnSchema) {
+ return false;
+ }
+ if (columnSchema->Families.size() > 1) {
+ Ctx.Error() << "Several column families for a single column are not yet supported";
+ return false;
+ }
+ params.AddColumns.push_back(*columnSchema);
+ return true;
+}
+
+bool TSqlQuery::AlterTableDropColumn(const TRule_alter_table_drop_column& node, TAlterTableParameters& params) {
+ TString name = Id(node.GetRule_an_id3(), *this);
+ params.DropColumns.push_back(name);
+ return true;
+}
+
+bool TSqlQuery::AlterTableAlterColumn(const TRule_alter_table_alter_column& node,
+ TAlterTableParameters& params)
+{
+ TString name = Id(node.GetRule_an_id3(), *this);
+ const TPosition pos(Context().Pos());
+ TVector<TIdentifier> families;
+ const auto& familyRelation = node.GetRule_family_relation5();
+ families.push_back(IdEx(familyRelation.GetRule_an_id2(), *this));
+ params.AlterColumns.emplace_back(pos, name, nullptr, false, families, false, nullptr, TColumnSchema::ETypeOfChange::SetFamily);
+ return true;
+}
+
+bool TSqlQuery::AlterTableAddFamily(const TRule_family_entry& node, TAlterTableParameters& params) {
+ TFamilyEntry family(IdEx(node.GetRule_an_id2(), *this));
+ if (!FillFamilySettings(node.GetRule_family_settings3(), family)) {
+ return false;
+ }
+ params.AddColumnFamilies.push_back(family);
+ return true;
+}
+
+bool TSqlQuery::AlterTableAlterFamily(const TRule_alter_table_alter_column_family& node,
+ TAlterTableParameters& params)
+{
+ TFamilyEntry* entry = nullptr;
+ TIdentifier name = IdEx(node.GetRule_an_id3(), *this);
+ for (auto& family : params.AlterColumnFamilies) {
+ if (family.Name.Name == name.Name) {
+ entry = &family;
+ break;
+ }
+ }
+ if (!entry) {
+ entry = &params.AlterColumnFamilies.emplace_back(name);
+ }
+ TIdentifier settingName = IdEx(node.GetRule_an_id5(), *this);
+ const TRule_family_setting_value& value = node.GetRule_family_setting_value6();
+ if (to_lower(settingName.Name) == "data") {
+ if (entry->Data) {
+ Ctx.Error() << "Redefinition of 'data' setting for column family '" << name.Name
+ << "' in one alter";
+ return false;
+ }
+ const TString stringValue(Ctx.Token(value.GetAlt_family_setting_value1().GetToken1()));
+ entry->Data = BuildLiteralSmartString(Ctx, stringValue);
+ } else if (to_lower(settingName.Name) == "compression") {
+ if (entry->Compression) {
+ Ctx.Error() << "Redefinition of 'compression' setting for column family '" << name.Name
+ << "' in one alter";
+ return false;
+ }
+ const TString stringValue(Ctx.Token(value.GetAlt_family_setting_value1().GetToken1()));
+ entry->Compression = BuildLiteralSmartString(Ctx, stringValue);
+ } else if (to_lower(settingName.Name) == "compression_level") {
+ if (entry->CompressionLevel) {
+ Ctx.Error() << "Redefinition of 'compression_level' setting for column family '" << name.Name << "' in one alter";
+ return false;
+ }
+ entry->CompressionLevel = LiteralNumber(Ctx, value.GetAlt_family_setting_value2().GetRule_integer1());
+ } else {
+ Ctx.Error() << "Unknown table setting: " << settingName.Name;
+ return false;
+ }
+ return true;
+}
+
+bool TSqlQuery::AlterTableSetTableSetting(
+ const TRule_alter_table_set_table_setting_uncompat& node, TTableSettings& tableSettings, ETableType tableType
+) {
+ return StoreTableSettingsEntry(
+ IdEx(node.GetRule_an_id2(), *this),
+ node.GetRule_table_setting_value3(),
+ tableSettings,
+ tableType,
+ true
+ );
+}
+
+bool TSqlQuery::AlterTableSetTableSetting(
+ const TRule_alter_table_set_table_setting_compat& node, TTableSettings& tableSettings, ETableType tableType
+) {
+ const auto storeSetting = [&](const TRule_alter_table_setting_entry& entry) {
+ return StoreTableSettingsEntry(
+ IdEx(entry.GetRule_an_id1(), *this),
+ entry.GetRule_table_setting_value3(),
+ tableSettings,
+ tableType,
+ true
+ );
+ };
+
+ const auto& firstEntry = node.GetRule_alter_table_setting_entry3();
+ if (!storeSetting(firstEntry)) {
+ return false;
+ }
+ for (const auto& block : node.GetBlock4()) {
+ const auto& entry = block.GetRule_alter_table_setting_entry2();
+ if (!storeSetting(entry)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlQuery::AlterTableResetTableSetting(
+ const TRule_alter_table_reset_table_setting& node, TTableSettings& tableSettings, ETableType tableType
+) {
+ const auto resetSetting = [&](const TRule_an_id& id) {
+ return ResetTableSettingsEntry(IdEx(id, *this), tableSettings, tableType);
+ };
+
+ const auto& firstEntry = node.GetRule_an_id3();
+ if (!resetSetting(firstEntry)) {
+ return false;
+ }
+ for (const auto& block : node.GetBlock4()) {
+ const auto& entry = block.GetRule_an_id2();
+ if (!resetSetting(entry)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlQuery::AlterTableAddIndex(const TRule_alter_table_add_index& node, TAlterTableParameters& params) {
+ if (!CreateTableIndex(node.GetRule_table_index2(), params.AddIndexes)) {
+ return false;
+ }
+ return true;
+}
+
+void TSqlQuery::AlterTableDropIndex(const TRule_alter_table_drop_index& node, TAlterTableParameters& params) {
+ params.DropIndexes.emplace_back(IdEx(node.GetRule_an_id3(), *this));
+}
+
+void TSqlQuery::AlterTableRenameTo(const TRule_alter_table_rename_to& node, TAlterTableParameters& params) {
+ params.RenameTo = IdEx(node.GetRule_an_id_table3(), *this);
+}
+
+void TSqlQuery::AlterTableRenameIndexTo(const TRule_alter_table_rename_index_to& node, TAlterTableParameters& params) {
+ auto src = IdEx(node.GetRule_an_id3(), *this);
+ auto dst = IdEx(node.GetRule_an_id5(), *this);
+
+ params.RenameIndexTo = std::make_pair(src, dst);
+}
+
+bool TSqlQuery::AlterTableAlterIndex(const TRule_alter_table_alter_index& node, TAlterTableParameters& params) {
+ const auto indexName = IdEx(node.GetRule_an_id3(), *this);
+ params.AlterIndexes.emplace_back(indexName);
+ TTableSettings& indexTableSettings = params.AlterIndexes.back().TableSettings;
+
+ const auto& action = node.GetRule_alter_table_alter_index_action4();
+
+ switch (action.Alt_case()) {
+ case TRule_alter_table_alter_index_action::kAltAlterTableAlterIndexAction1: {
+ // SET setting value
+ const auto& rule = action.GetAlt_alter_table_alter_index_action1().GetRule_alter_table_set_table_setting_uncompat1();
+ if (!AlterTableSetTableSetting(rule, indexTableSettings, params.TableType)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_alter_index_action::kAltAlterTableAlterIndexAction2: {
+ // SET (setting1 = value1, ...)
+ const auto& rule = action.GetAlt_alter_table_alter_index_action2().GetRule_alter_table_set_table_setting_compat1();
+ if (!AlterTableSetTableSetting(rule, indexTableSettings, params.TableType)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_alter_index_action::kAltAlterTableAlterIndexAction3: {
+ // RESET (setting1, ...)
+ const auto& rule = action.GetAlt_alter_table_alter_index_action3().GetRule_alter_table_reset_table_setting1();
+ if (!AlterTableResetTableSetting(rule, indexTableSettings, params.TableType)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_alter_table_alter_index_action::ALT_NOT_SET:
+ AltNotImplemented("alter_table_alter_index_action", action);
+ return false;
+ }
+
+ return true;
+}
+
+bool TSqlQuery::AlterTableAlterColumnDropNotNull(const TRule_alter_table_alter_column_drop_not_null& node, TAlterTableParameters& params) {
+ TString name = Id(node.GetRule_an_id3(), *this);
+ const TPosition pos(Context().Pos());
+ params.AlterColumns.emplace_back(pos, name, nullptr, false, TVector<TIdentifier>(), false, nullptr, TColumnSchema::ETypeOfChange::DropNotNullConstraint);
+ return true;
+}
+
+bool TSqlQuery::AlterTableAddChangefeed(const TRule_alter_table_add_changefeed& node, TAlterTableParameters& params) {
+ TSqlExpression expr(Ctx, Mode);
+ return CreateChangefeed(node.GetRule_changefeed2(), expr, params.AddChangefeeds);
+}
+
+bool TSqlQuery::AlterTableAlterChangefeed(const TRule_alter_table_alter_changefeed& node, TAlterTableParameters& params) {
+ params.AlterChangefeeds.emplace_back(IdEx(node.GetRule_an_id3(), *this));
+
+ const auto& alter = node.GetRule_changefeed_alter_settings4();
+ switch (alter.Alt_case()) {
+ case TRule_changefeed_alter_settings::kAltChangefeedAlterSettings1: {
+ // DISABLE
+ params.AlterChangefeeds.back().Disable = true;
+ break;
+ }
+ case TRule_changefeed_alter_settings::kAltChangefeedAlterSettings2: {
+ // SET
+ const auto& rule = alter.GetAlt_changefeed_alter_settings2().GetRule_changefeed_settings3();
+ TSqlExpression expr(Ctx, Mode);
+ if (!ChangefeedSettings(rule, expr, params.AlterChangefeeds.back().Settings, true)) {
+ return false;
+ }
+ break;
+ }
+
+ case TRule_changefeed_alter_settings::ALT_NOT_SET:
+ AltNotImplemented("changefeed_alter_settings", alter);
+ return false;
+ }
+
+ return true;
+}
+
+void TSqlQuery::AlterTableDropChangefeed(const TRule_alter_table_drop_changefeed& node, TAlterTableParameters& params) {
+ params.DropChangefeeds.emplace_back(IdEx(node.GetRule_an_id3(), *this));
+}
+
+TNodePtr TSqlQuery::PragmaStatement(const TRule_pragma_stmt& stmt, bool& success) {
+ success = false;
+ const TString& prefix = OptIdPrefixAsStr(stmt.GetRule_opt_id_prefix_or_type2(), *this);
+ const TString& lowerPrefix = to_lower(prefix);
+ const TString pragma(Id(stmt.GetRule_an_id3(), *this));
+ TString normalizedPragma(pragma);
+ TMaybe<TIssue> normalizeError = NormalizeName(Ctx.Pos(), normalizedPragma);
+ if (!normalizeError.Empty()) {
+ Error() << normalizeError->GetMessage();
+ Ctx.IncrementMonCounter("sql_errors", "NormalizePragmaError");
+ return {};
+ }
+
+ TVector<TDeferredAtom> values;
+ TVector<const TRule_pragma_value*> pragmaValues;
+ bool pragmaValueDefault = false;
+ if (stmt.GetBlock4().HasAlt1()) {
+ pragmaValues.push_back(&stmt.GetBlock4().GetAlt1().GetRule_pragma_value2());
+ }
+ else if (stmt.GetBlock4().HasAlt2()) {
+ pragmaValues.push_back(&stmt.GetBlock4().GetAlt2().GetRule_pragma_value2());
+ for (auto& additionalValue : stmt.GetBlock4().GetAlt2().GetBlock3()) {
+ pragmaValues.push_back(&additionalValue.GetRule_pragma_value2());
+ }
+ }
+
+ const bool withConfigure = prefix || normalizedPragma == "file" || normalizedPragma == "folder" || normalizedPragma == "udf";
+ static const THashSet<TStringBuf> lexicalScopePragmas = {
+ "classicdivision",
+ "strictjoinkeytypes",
+ "disablestrictjoinkeytypes",
+ "checkedops",
+ "unicodeliterals",
+ "disableunicodeliterals",
+ "warnuntypedstringliterals",
+ "disablewarnuntypedstringliterals",
+ };
+ const bool hasLexicalScope = withConfigure || lexicalScopePragmas.contains(normalizedPragma);
+ const bool withFileAlias = normalizedPragma == "file" || normalizedPragma == "folder" || normalizedPragma == "library" || normalizedPragma == "udf";
+ for (auto pragmaValue : pragmaValues) {
+ if (pragmaValue->HasAlt_pragma_value3()) {
+ auto value = Token(pragmaValue->GetAlt_pragma_value3().GetToken1());
+ auto parsed = StringContentOrIdContent(Ctx, Ctx.Pos(), value);
+ if (!parsed) {
+ return {};
+ }
+
+ TString prefix;
+ if (withFileAlias && (values.size() == 0)) {
+ prefix = Ctx.Settings.FileAliasPrefix;
+ }
+
+ values.push_back(TDeferredAtom(Ctx.Pos(), prefix + parsed->Content));
+ }
+ else if (pragmaValue->HasAlt_pragma_value2()
+ && pragmaValue->GetAlt_pragma_value2().GetRule_id1().HasAlt_id2()
+ && "default" == to_lower(Id(pragmaValue->GetAlt_pragma_value2().GetRule_id1(), *this)))
+ {
+ pragmaValueDefault = true;
+ }
+ else if (withConfigure && pragmaValue->HasAlt_pragma_value5()) {
+ TString bindName;
+ if (!NamedNodeImpl(pragmaValue->GetAlt_pragma_value5().GetRule_bind_parameter1(), bindName, *this)) {
+ return {};
+ }
+ auto namedNode = GetNamedNode(bindName);
+ if (!namedNode) {
+ return {};
+ }
+
+ TString prefix;
+ if (withFileAlias && (values.size() == 0)) {
+ prefix = Ctx.Settings.FileAliasPrefix;
+ }
+
+ TDeferredAtom atom;
+ MakeTableFromExpression(Ctx.Pos(), Ctx, namedNode, atom, prefix);
+ values.push_back(atom);
+ } else {
+ Error() << "Expected string" << (withConfigure ? ", named parameter" : "") << " or 'default' keyword as pragma value for pragma: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ }
+
+ if (prefix.empty()) {
+ if (!TopLevel && !hasLexicalScope) {
+ Error() << "This pragma '" << pragma << "' is not allowed to be used in actions or subqueries";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return{};
+ }
+
+ if (normalizedPragma == "refselect") {
+ Ctx.PragmaRefSelect = true;
+ Ctx.IncrementMonCounter("sql_pragma", "RefSelect");
+ } else if (normalizedPragma == "sampleselect") {
+ Ctx.PragmaSampleSelect = true;
+ Ctx.IncrementMonCounter("sql_pragma", "SampleSelect");
+ } else if (normalizedPragma == "allowdotinalias") {
+ Ctx.PragmaAllowDotInAlias = true;
+ Ctx.IncrementMonCounter("sql_pragma", "AllowDotInAlias");
+ } else if (normalizedPragma == "udf") {
+ if ((values.size() != 1 && values.size() != 2) || pragmaValueDefault) {
+ Error() << "Expected file alias as pragma value";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ if (Ctx.Settings.FileAliasPrefix) {
+ if (values.size() == 1) {
+ values.emplace_back(TDeferredAtom(Ctx.Pos(), ""));
+ }
+
+ TString prefix;
+ if (!values[1].GetLiteral(prefix, Ctx)) {
+ Error() << "Expected literal UDF module prefix in views";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ values[1] = TDeferredAtom(Ctx.Pos(), Ctx.Settings.FileAliasPrefix + prefix);
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "udf");
+ success = true;
+ return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "ImportUdfs", values, false);
+ } else if (normalizedPragma == "packageversion") {
+ if (values.size() != 2 || pragmaValueDefault) {
+ Error() << "Expected package name and version";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ ui32 version = 0;
+ TString versionString;
+ TString packageName;
+ if (!values[0].GetLiteral(packageName, Ctx) || !values[1].GetLiteral(versionString, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ if (!PackageVersionFromString(versionString, version)) {
+ Error() << "Unable to parse package version, possible values 0, 1, draft, release";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.SetPackageVersion(packageName, version);
+ Ctx.IncrementMonCounter("sql_pragma", "PackageVersion");
+ success = true;
+ return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "SetPackageVersion", TVector<TDeferredAtom>{ values[0], TDeferredAtom(values[1].Build()->GetPos(), ToString(version)) }, false);
+ } else if (normalizedPragma == "file") {
+ if (values.size() < 2U || values.size() > 3U || pragmaValueDefault) {
+ Error() << "Expected file alias, url and optional token name as pragma values";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "file");
+ success = true;
+ return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "AddFileByUrl", values, false);
+ } else if (normalizedPragma == "fileoption") {
+ if (values.size() < 3U) {
+ Error() << "Expected file alias, option key and value";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "FileOption");
+ success = true;
+ return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "SetFileOption", values, false);
+ } else if (normalizedPragma == "folder") {
+ if (values.size() < 2U || values.size() > 3U || pragmaValueDefault) {
+ Error() << "Expected folder alias, url and optional token name as pragma values";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "folder");
+ success = true;
+ return BuildPragma(Ctx.Pos(), TString(ConfigProviderName), "AddFolderByUrl", values, false);
+ } else if (normalizedPragma == "library") {
+ if (values.size() < 1) {
+ Error() << "Expected non-empty file alias";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return{};
+ }
+ if (values.size() > 3) {
+ Error() << "Expected file alias and optional url and token name as pragma values";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return{};
+ }
+
+ TString alias;
+ if (!values.front().GetLiteral(alias, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return{};
+ }
+
+ TContext::TLibraryStuff library;
+ std::get<TPosition>(library) = values.front().Build()->GetPos();
+ if (values.size() > 1) {
+ auto& first = std::get<1U>(library);
+ first.emplace();
+ first->second = values[1].Build()->GetPos();
+ if (!values[1].GetLiteral(first->first, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return{};
+ }
+
+ TSet<TString> names;
+ SubstParameters(first->first, Nothing(), &names);
+ for (const auto& name : names) {
+ auto namedNode = GetNamedNode(name);
+ if (!namedNode) {
+ return{};
+ }
+ }
+ if (values.size() > 2) {
+ auto& second = std::get<2U>(library);
+ second.emplace();
+ second->second = values[2].Build()->GetPos();
+ if (!values[2].GetLiteral(second->first, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return{};
+ }
+ }
+ }
+
+ Ctx.Libraries[alias] = std::move(library);
+ Ctx.IncrementMonCounter("sql_pragma", "library");
+ } else if (normalizedPragma == "package") {
+ if (values.size() < 2U || values.size() > 3U) {
+ Error() << "Expected package name, url and optional token name as pragma values";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ TString packageName;
+ if (!values.front().GetLiteral(packageName, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ TContext::TPackageStuff package;
+ std::get<TPosition>(package) = values.front().Build()->GetPos();
+
+ auto fillLiteral = [&](auto& literal, size_t index) {
+ if (values.size() <= index) {
+ return true;
+ }
+
+ constexpr bool optional = std::is_base_of_v<
+ std::optional<TContext::TLiteralWithPosition>,
+ std::decay_t<decltype(literal)>
+ >;
+
+ TContext::TLiteralWithPosition* literalPtr;
+
+ if constexpr (optional) {
+ literal.emplace();
+ literalPtr = &*literal;
+ } else {
+ literalPtr = &literal;
+ }
+
+ literalPtr->second = values[index].Build()->GetPos();
+
+ if (!values[index].GetLiteral(literalPtr->first, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return false;
+ }
+
+ return true;
+ };
+
+ // fill url
+ auto& urlLiteral = std::get<1U>(package);
+ if (!fillLiteral(urlLiteral, 1U)) {
+ return {};
+ }
+
+ TSet<TString> names;
+ SubstParameters(urlLiteral.first, Nothing(), &names);
+ for (const auto& name : names) {
+ auto namedNode = GetNamedNode(name);
+ if (!namedNode) {
+ return {};
+ }
+ }
+
+ // fill token
+ if (!fillLiteral(std::get<2U>(package), 2U)) {
+ return {};
+ }
+
+ Ctx.Packages[packageName] = std::move(package);
+ Ctx.IncrementMonCounter("sql_pragma", "package");
+ } else if (normalizedPragma == "overridelibrary") {
+ if (values.size() != 1U) {
+ Error() << "Expected override library alias as pragma value";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ TString alias;
+ if (!values.front().GetLiteral(alias, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ TContext::TOverrideLibraryStuff overrideLibrary;
+ std::get<TPosition>(overrideLibrary) = values.front().Build()->GetPos();
+
+ Ctx.OverrideLibraries[alias] = std::move(overrideLibrary);
+ Ctx.IncrementMonCounter("sql_pragma", "overridelibrary");
+ } else if (normalizedPragma == "directread") {
+ Ctx.PragmaDirectRead = true;
+ Ctx.IncrementMonCounter("sql_pragma", "DirectRead");
+ } else if (normalizedPragma == "equijoin") {
+ Ctx.IncrementMonCounter("sql_pragma", "EquiJoin");
+ } else if (normalizedPragma == "autocommit") {
+ Ctx.PragmaAutoCommit = true;
+ Ctx.IncrementMonCounter("sql_pragma", "AutoCommit");
+ } else if (normalizedPragma == "usetableprefixforeach") {
+ Ctx.PragmaUseTablePrefixForEach = true;
+ Ctx.IncrementMonCounter("sql_pragma", "UseTablePrefixForEach");
+ } else if (normalizedPragma == "tablepathprefix") {
+ TString value;
+ TMaybe<TString> arg;
+
+ if (values.size() == 1 || values.size() == 2) {
+ if (!values.front().GetLiteral(value, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ if (values.size() == 2) {
+ arg = value;
+ if (!values.back().GetLiteral(value, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ }
+
+ if (!Ctx.SetPathPrefix(value, arg)) {
+ return {};
+ }
+ } else {
+ Error() << "Expected path prefix or tuple of (Provider, PathPrefix) or"
+ << " (Cluster, PathPrefix) as pragma value";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "PathPrefix");
+ } else if (normalizedPragma == "groupbylimit") {
+ if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.PragmaGroupByLimit)) {
+ Error() << "Expected unsigned integer literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "GroupByLimit");
+ } else if (normalizedPragma == "groupbycubelimit") {
+ if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.PragmaGroupByCubeLimit)) {
+ Error() << "Expected unsigned integer literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "GroupByCubeLimit");
+ } else if (normalizedPragma == "simplecolumns") {
+ Ctx.SimpleColumns = true;
+ Ctx.IncrementMonCounter("sql_pragma", "SimpleColumns");
+ } else if (normalizedPragma == "disablesimplecolumns") {
+ Ctx.SimpleColumns = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableSimpleColumns");
+ } else if (normalizedPragma == "coalescejoinkeysonqualifiedall") {
+ Ctx.CoalesceJoinKeysOnQualifiedAll = true;
+ Ctx.IncrementMonCounter("sql_pragma", "CoalesceJoinKeysOnQualifiedAll");
+ } else if (normalizedPragma == "disablecoalescejoinkeysonqualifiedall") {
+ Ctx.CoalesceJoinKeysOnQualifiedAll = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableCoalesceJoinKeysOnQualifiedAll");
+ } else if (normalizedPragma == "resultrowslimit") {
+ if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.ResultRowsLimit)) {
+ Error() << "Expected unsigned integer literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "ResultRowsLimit");
+ } else if (normalizedPragma == "resultsizelimit") {
+ if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.ResultSizeLimit)) {
+ Error() << "Expected unsigned integer literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "ResultSizeLimit");
+ } else if (normalizedPragma == "warning") {
+ if (values.size() != 2U || values.front().Empty() || values.back().Empty()) {
+ Error() << "Expected arguments <action>, <issueId> for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ TString action;
+ TString codePattern;
+ if (!values[0].GetLiteral(action, Ctx) || !values[1].GetLiteral(codePattern, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ TWarningRule rule;
+ TString parseError;
+ auto parseResult = TWarningRule::ParseFrom(codePattern, action, rule, parseError);
+ switch (parseResult) {
+ case TWarningRule::EParseResult::PARSE_OK:
+ break;
+ case TWarningRule::EParseResult::PARSE_PATTERN_FAIL:
+ case TWarningRule::EParseResult::PARSE_ACTION_FAIL:
+ Ctx.Error() << parseError;
+ return {};
+ default:
+ Y_ENSURE(false, "Unknown parse result");
+ }
+
+ Ctx.WarningPolicy.AddRule(rule);
+ if (rule.GetPattern() == "*" && rule.GetAction() == EWarningAction::ERROR) {
+ // Keep 'unused symbol' warning as warning unless explicitly set to error
+ Ctx.SetWarningPolicyFor(TIssuesIds::YQL_UNUSED_SYMBOL, EWarningAction::DEFAULT);
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "warning");
+ } else if (normalizedPragma == "greetings") {
+ if (values.size() > 1) {
+ Error() << "Multiple arguments are not expected for " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ if (values.empty()) {
+ values.emplace_back(TDeferredAtom(Ctx.Pos(), "Hello, world! And best wishes from the YQL Team!"));
+ }
+
+ TString arg;
+ if (!values.front().GetLiteral(arg, Ctx)) {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.Info(Ctx.Pos()) << arg;
+ } else if (normalizedPragma == "warningmsg") {
+ if (values.size() != 1 || !values[0].GetLiteral()) {
+ Error() << "Expected string literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_PRAGMA_WARNING_MSG) << *values[0].GetLiteral();
+ } else if (normalizedPragma == "errormsg") {
+ if (values.size() != 1 || !values[0].GetLiteral()) {
+ Error() << "Expected string literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.Error(Ctx.Pos()) << *values[0].GetLiteral();
+ } else if (normalizedPragma == "classicdivision") {
+ if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.Scoped->PragmaClassicDivision)) {
+ Error() << "Expected boolean literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "ClassicDivision");
+ } else if (normalizedPragma == "checkedops") {
+ if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.Scoped->PragmaCheckedOps)) {
+ Error() << "Expected boolean literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "CheckedOps");
+ } else if (normalizedPragma == "disableunordered") {
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_DEPRECATED_PRAGMA)
+ << "Use of deprecated DisableUnordered pragma. It will be dropped soon";
+ } else if (normalizedPragma == "pullupflatmapoverjoin") {
+ Ctx.PragmaPullUpFlatMapOverJoin = true;
+ Ctx.IncrementMonCounter("sql_pragma", "PullUpFlatMapOverJoin");
+ } else if (normalizedPragma == "disablepullupflatmapoverjoin") {
+ Ctx.PragmaPullUpFlatMapOverJoin = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisablePullUpFlatMapOverJoin");
+ } else if (normalizedPragma == "filterpushdownoverjoinoptionalside") {
+ Ctx.FilterPushdownOverJoinOptionalSide = true;
+ Ctx.IncrementMonCounter("sql_pragma", "FilterPushdownOverJoinOptionalSide");
+ } else if (normalizedPragma == "disablefilterpushdownoverjoinoptionalside") {
+ Ctx.FilterPushdownOverJoinOptionalSide = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableFilterPushdownOverJoinOptionalSide");
+ } else if (normalizedPragma == "rotatejointree") {
+ if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.RotateJoinTree)) {
+ Error() << "Expected boolean literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ } else if (normalizedPragma == "allowunnamedcolumns") {
+ Ctx.WarnUnnamedColumns = false;
+ Ctx.IncrementMonCounter("sql_pragma", "AllowUnnamedColumns");
+ } else if (normalizedPragma == "warnunnamedcolumns") {
+ Ctx.WarnUnnamedColumns = true;
+ Ctx.IncrementMonCounter("sql_pragma", "WarnUnnamedColumns");
+ } else if (normalizedPragma == "discoverymode") {
+ Ctx.DiscoveryMode = true;
+ Ctx.IncrementMonCounter("sql_pragma", "DiscoveryMode");
+ } else if (normalizedPragma == "enablesystemcolumns") {
+ if (values.size() != 1 || !values[0].GetLiteral() || !TryFromString(*values[0].GetLiteral(), Ctx.EnableSystemColumns)) {
+ Error() << "Expected boolean literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "EnableSystemColumns");
+ } else if (normalizedPragma == "ansiinforemptyornullableitemscollections") {
+ Ctx.AnsiInForEmptyOrNullableItemsCollections = true;
+ Ctx.IncrementMonCounter("sql_pragma", "AnsiInForEmptyOrNullableItemsCollections");
+ } else if (normalizedPragma == "disableansiinforemptyornullableitemscollections") {
+ Ctx.AnsiInForEmptyOrNullableItemsCollections = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiInForEmptyOrNullableItemsCollections");
+ } else if (normalizedPragma == "dqengine" || normalizedPragma == "blockengine") {
+ Ctx.IncrementMonCounter("sql_pragma", "DqEngine");
+ if (values.size() != 1 || !values[0].GetLiteral()
+ || ! (*values[0].GetLiteral() == "disable" || *values[0].GetLiteral() == "auto" || *values[0].GetLiteral() == "force"))
+ {
+ Error() << "Expected `disable|auto|force' argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ const bool isDqEngine = normalizedPragma == "dqengine";
+ auto& enable = isDqEngine ? Ctx.DqEngineEnable : Ctx.BlockEngineEnable;
+ auto& force = isDqEngine ? Ctx.DqEngineForce : Ctx.BlockEngineForce;
+ if (*values[0].GetLiteral() == "disable") {
+ enable = false;
+ force = false;
+ } else if (*values[0].GetLiteral() == "force") {
+ enable = true;
+ force = true;
+ } else if (*values[0].GetLiteral() == "auto") {
+ enable = true;
+ force = false;
+ }
+ } else if (normalizedPragma == "ansirankfornullablekeys") {
+ Ctx.AnsiRankForNullableKeys = true;
+ Ctx.IncrementMonCounter("sql_pragma", "AnsiRankForNullableKeys");
+ } else if (normalizedPragma == "disableansirankfornullablekeys") {
+ Ctx.AnsiRankForNullableKeys = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiRankForNullableKeys");
+ } else if (normalizedPragma == "ansiorderbylimitinunionall") {
+ Ctx.IncrementMonCounter("sql_pragma", "AnsiOrderByLimitInUnionAll");
+ } else if (normalizedPragma == "disableansiorderbylimitinunionall") {
+ Error() << "DisableAnsiOrderByLimitInUnionAll pragma is deprecated and no longer supported";
+ Ctx.IncrementMonCounter("sql_errors", "DeprecatedPragma");
+ return {};
+ } else if (normalizedPragma == "ansioptionalas") {
+ Ctx.AnsiOptionalAs = true;
+ Ctx.IncrementMonCounter("sql_pragma", "AnsiOptionalAs");
+ } else if (normalizedPragma == "disableansioptionalas") {
+ Ctx.AnsiOptionalAs = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiOptionalAs");
+ } else if (normalizedPragma == "warnonansialiasshadowing") {
+ Ctx.WarnOnAnsiAliasShadowing = true;
+ Ctx.IncrementMonCounter("sql_pragma", "WarnOnAnsiAliasShadowing");
+ } else if (normalizedPragma == "disablewarnonansialiasshadowing") {
+ Ctx.WarnOnAnsiAliasShadowing = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableWarnOnAnsiAliasShadowing");
+ } else if (normalizedPragma == "regexusere2") {
+ if (values.size() != 1U || !values.front().GetLiteral() || !TryFromString(*values.front().GetLiteral(), Ctx.PragmaRegexUseRe2)) {
+ Error() << "Expected 'true' or 'false' for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "RegexUseRe2");
+ } else if (normalizedPragma == "jsonqueryreturnsjsondocument") {
+ Ctx.JsonQueryReturnsJsonDocument = true;
+ Ctx.IncrementMonCounter("sql_pragma", "JsonQueryReturnsJsonDocument");
+ } else if (normalizedPragma == "disablejsonqueryreturnsjsondocument") {
+ Ctx.JsonQueryReturnsJsonDocument = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableJsonQueryReturnsJsonDocument");
+ } else if (normalizedPragma == "orderedcolumns") {
+ Ctx.OrderedColumns = true;
+ Ctx.IncrementMonCounter("sql_pragma", "OrderedColumns");
+ } else if (normalizedPragma == "disableorderedcolumns") {
+ Ctx.OrderedColumns = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableOrderedColumns");
+ } else if (normalizedPragma == "positionalunionall") {
+ Ctx.PositionalUnionAll = true;
+ // PositionalUnionAll implies OrderedColumns
+ Ctx.OrderedColumns = true;
+ Ctx.IncrementMonCounter("sql_pragma", "PositionalUnionAll");
+ } else if (normalizedPragma == "pqreadby") {
+ if (values.size() != 1 || !values[0].GetLiteral()) {
+ Error() << "Expected string literal as a single argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ // special guard to raise error on situation:
+ // use cluster1;
+ // pragma PqReadPqBy="cluster2";
+ const TString* currentClusterLiteral = Ctx.Scoped->CurrCluster.GetLiteral();
+ if (currentClusterLiteral && *values[0].GetLiteral() != "dq" && *currentClusterLiteral != *values[0].GetLiteral()) {
+ Error() << "Cluster in PqReadPqBy pragma differs from cluster specified in USE statement: " << *values[0].GetLiteral() << " != " << *currentClusterLiteral;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ Ctx.PqReadByRtmrCluster = *values[0].GetLiteral();
+ Ctx.IncrementMonCounter("sql_pragma", "PqReadBy");
+ } else if (normalizedPragma == "bogousstaringroupbyoverjoin") {
+ Ctx.BogousStarInGroupByOverJoin = true;
+ Ctx.IncrementMonCounter("sql_pragma", "BogousStarInGroupByOverJoin");
+ } else if (normalizedPragma == "strictjoinkeytypes") {
+ Ctx.Scoped->StrictJoinKeyTypes = true;
+ Ctx.IncrementMonCounter("sql_pragma", "StrictJoinKeyTypes");
+ } else if (normalizedPragma == "disablestrictjoinkeytypes") {
+ Ctx.Scoped->StrictJoinKeyTypes = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableStrictJoinKeyTypes");
+ } else if (normalizedPragma == "unicodeliterals") {
+ Ctx.Scoped->UnicodeLiterals = true;
+ Ctx.IncrementMonCounter("sql_pragma", "UnicodeLiterals");
+ } else if (normalizedPragma == "disableunicodeliterals") {
+ Ctx.Scoped->UnicodeLiterals = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableUnicodeLiterals");
+ } else if (normalizedPragma == "warnuntypedstringliterals") {
+ Ctx.Scoped->WarnUntypedStringLiterals = true;
+ Ctx.IncrementMonCounter("sql_pragma", "WarnUntypedStringLiterals");
+ } else if (normalizedPragma == "disablewarnuntypedstringliterals") {
+ Ctx.Scoped->WarnUntypedStringLiterals = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableWarnUntypedStringLiterals");
+ } else if (normalizedPragma == "unorderedsubqueries") {
+ Ctx.UnorderedSubqueries = true;
+ Ctx.IncrementMonCounter("sql_pragma", "UnorderedSubqueries");
+ } else if (normalizedPragma == "disableunorderedsubqueries") {
+ Ctx.UnorderedSubqueries = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableUnorderedSubqueries");
+ } else if (normalizedPragma == "datawatermarks") {
+ if (values.size() != 1 || !values[0].GetLiteral()
+ || ! (*values[0].GetLiteral() == "enable" || *values[0].GetLiteral() == "disable"))
+ {
+ Error() << "Expected `enable|disable' argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ if (*values[0].GetLiteral() == "enable") {
+ Ctx.PragmaDataWatermarks = true;
+ } else if (*values[0].GetLiteral() == "disable") {
+ Ctx.PragmaDataWatermarks = false;
+ }
+
+ Ctx.IncrementMonCounter("sql_pragma", "DataWatermarks");
+ } else if (normalizedPragma == "flexibletypes") {
+ Ctx.FlexibleTypes = true;
+ Ctx.IncrementMonCounter("sql_pragma", "FlexibleTypes");
+ } else if (normalizedPragma == "disableflexibletypes") {
+ Ctx.FlexibleTypes = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableFlexibleTypes");
+ } else if (normalizedPragma == "ansicurrentrow") {
+ Ctx.AnsiCurrentRow = true;
+ Ctx.IncrementMonCounter("sql_pragma", "AnsiCurrentRow");
+ } else if (normalizedPragma == "disableansicurrentrow") {
+ Ctx.AnsiCurrentRow = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiCurrentRow");
+ } else if (normalizedPragma == "emitaggapply") {
+ Ctx.EmitAggApply = true;
+ Ctx.IncrementMonCounter("sql_pragma", "EmitAggApply");
+ } else if (normalizedPragma == "disableemitaggapply") {
+ Ctx.EmitAggApply = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableEmitAggApply");
+ } else if (normalizedPragma == "useblocks") {
+ Ctx.UseBlocks = true;
+ Ctx.IncrementMonCounter("sql_pragma", "UseBlocks");
+ } else if (normalizedPragma == "disableuseblocks") {
+ Ctx.UseBlocks = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableUseBlocks");
+ } else if (normalizedPragma == "ansilike") {
+ Ctx.AnsiLike = true;
+ Ctx.IncrementMonCounter("sql_pragma", "AnsiLike");
+ } else if (normalizedPragma == "disableansilike") {
+ Ctx.AnsiLike = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiLike");
+ } else if (normalizedPragma == "unorderedresult") {
+ Ctx.UnorderedResult = true;
+ Ctx.IncrementMonCounter("sql_pragma", "UnorderedResult");
+ } else if (normalizedPragma == "disableunorderedresult") {
+ Ctx.UnorderedResult = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableUnorderedResult");
+ } else if (normalizedPragma == "featurer010") {
+ if (values.size() == 1 && values[0].GetLiteral()) {
+ const auto& value = *values[0].GetLiteral();
+ if ("prototype" == value)
+ Ctx.FeatureR010 = true;
+ else {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ }
+ else {
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ Ctx.IncrementMonCounter("sql_pragma", "FeatureR010");
+ } else if (normalizedPragma == "compactgroupby") {
+ Ctx.CompactGroupBy = true;
+ Ctx.IncrementMonCounter("sql_pragma", "CompactGroupBy");
+ } else if (normalizedPragma == "disablecompactgroupby") {
+ Ctx.CompactGroupBy = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableCompactGroupBy");
+ } else if (normalizedPragma == "costbasedoptimizer") {
+ Ctx.IncrementMonCounter("sql_pragma", "CostBasedOptimizer");
+ if (values.size() == 1 && values[0].GetLiteral()) {
+ Ctx.CostBasedOptimizer = to_lower(*values[0].GetLiteral());
+ }
+ if (values.size() != 1 || !values[0].GetLiteral()
+ || ! (Ctx.CostBasedOptimizer == "disable" || Ctx.CostBasedOptimizer == "pg" || Ctx.CostBasedOptimizer == "native"))
+ {
+ Error() << "Expected `disable|pg|native' argument for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ } else if (normalizedPragma == "compactnamedexprs") {
+ Ctx.CompactNamedExprs = true;
+ Ctx.IncrementMonCounter("sql_pragma", "CompactNamedExprs");
+ } else if (normalizedPragma == "disablecompactnamedexprs") {
+ Ctx.CompactNamedExprs = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableCompactNamedExprs");
+ } else if (normalizedPragma == "validateunusedexprs") {
+ Ctx.ValidateUnusedExprs = true;
+ Ctx.IncrementMonCounter("sql_pragma", "ValidateUnusedExprs");
+ } else if (normalizedPragma == "disablevalidateunusedexprs") {
+ Ctx.ValidateUnusedExprs = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableValidateUnusedExprs");
+ } else if (normalizedPragma == "ansiimplicitcrossjoin") {
+ Ctx.AnsiImplicitCrossJoin = true;
+ Ctx.IncrementMonCounter("sql_pragma", "AnsiImplicitCrossJoin");
+ } else if (normalizedPragma == "disableansiimplicitcrossjoin") {
+ Ctx.AnsiImplicitCrossJoin = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableAnsiImplicitCrossJoin");
+ } else if (normalizedPragma == "distinctoverwindow") {
+ Ctx.DistinctOverWindow = true;
+ Ctx.IncrementMonCounter("sql_pragma", "DistinctOverWindow");
+ } else if (normalizedPragma == "disabledistinctoverwindow") {
+ Ctx.DistinctOverWindow = false;
+ Ctx.IncrementMonCounter("sql_pragma", "DisableDistinctOverWindow");
+ } else {
+ Error() << "Unknown pragma: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "UnknownPragma");
+ return {};
+ }
+ } else {
+ if (lowerPrefix == "yson") {
+ if (!TopLevel) {
+ Error() << "This pragma '" << pragma << "' is not allowed to be used in actions";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ if (normalizedPragma == "fast") {
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_DEPRECATED_PRAGMA)
+ << "Use of deprecated yson.Fast pragma. It will be dropped soon";
+ success = true;
+ return {};
+ } else if (normalizedPragma == "autoconvert") {
+ Ctx.PragmaYsonAutoConvert = true;
+ success = true;
+ return {};
+ } else if (normalizedPragma == "strict") {
+ if (values.size() == 0U) {
+ Ctx.PragmaYsonStrict = true;
+ success = true;
+ } else if (values.size() == 1U && values.front().GetLiteral() && TryFromString(*values.front().GetLiteral(), Ctx.PragmaYsonStrict)) {
+ success = true;
+ } else {
+ Error() << "Expected 'true', 'false' or no parameter for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ }
+ return {};
+ } else if (normalizedPragma == "disablestrict") {
+ if (values.size() == 0U) {
+ Ctx.PragmaYsonStrict = false;
+ success = true;
+ return {};
+ }
+ bool pragmaYsonDisableStrict;
+ if (values.size() == 1U && values.front().GetLiteral() && TryFromString(*values.front().GetLiteral(), pragmaYsonDisableStrict)) {
+ Ctx.PragmaYsonStrict = !pragmaYsonDisableStrict;
+ success = true;
+ } else {
+ Error() << "Expected 'true', 'false' or no parameter for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ }
+ return {};
+ } else if (normalizedPragma == "casttostring" || normalizedPragma == "disablecasttostring") {
+ const bool allow = normalizedPragma == "casttostring";
+ if (values.size() == 0U) {
+ Ctx.YsonCastToString = allow;
+ success = true;
+ return {};
+ }
+ bool pragmaYsonCastToString;
+ if (values.size() == 1U && values.front().GetLiteral() && TryFromString(*values.front().GetLiteral(), pragmaYsonCastToString)) {
+ Ctx.PragmaYsonStrict = allow ? pragmaYsonCastToString : !pragmaYsonCastToString;
+ success = true;
+ } else {
+ Error() << "Expected 'true', 'false' or no parameter for: " << pragma;
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ }
+ return {};
+ } else {
+ Error() << "Unknown pragma: '" << pragma << "'";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+
+ } else if (std::find(Providers.cbegin(), Providers.cend(), lowerPrefix) == Providers.cend()) {
+ if (!Ctx.HasCluster(prefix)) {
+ Error() << "Unknown pragma prefix: " << prefix << ", please use cluster name or one of provider " <<
+ JoinRange(", ", Providers.cbegin(), Providers.cend());
+ Ctx.IncrementMonCounter("sql_errors", "UnknownPragma");
+ return {};
+ }
+ }
+
+ if (normalizedPragma != "flags" && normalizedPragma != "packageversion") {
+ if (values.size() > 1) {
+ Error() << "Expected at most one value in the pragma";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ } else {
+ if (pragmaValueDefault || values.size() < 1) {
+ Error() << "Expected at least one value in the pragma";
+ Ctx.IncrementMonCounter("sql_errors", "BadPragmaValue");
+ return {};
+ }
+ }
+
+ success = true;
+ Ctx.IncrementMonCounter("sql_pragma", pragma);
+ return BuildPragma(Ctx.Pos(), lowerPrefix, normalizedPragma, values, pragmaValueDefault);
+ }
+ success = true;
+ return {};
+}
+
+TNodePtr TSqlQuery::Build(const TRule_delete_stmt& stmt) {
+ TTableRef table;
+ if (!SimpleTableRefImpl(stmt.GetRule_simple_table_ref3(), table)) {
+ return nullptr;
+ }
+
+ const bool isKikimr = table.Service == KikimrProviderName;
+ if (!isKikimr) {
+ Ctx.Error(GetPos(stmt.GetToken1())) << "DELETE is unsupported for " << table.Service;
+ return nullptr;
+ }
+
+ TSourcePtr source = BuildTableSource(Ctx.Pos(), table);
+
+ TNodePtr options = nullptr;
+ if (stmt.HasBlock5()) {
+ options = ReturningList(stmt.GetBlock5().GetRule_returning_columns_list1());
+ options = options->Y(options);
+ }
+
+ if (stmt.HasBlock4()) {
+ switch (stmt.GetBlock4().Alt_case()) {
+ case TRule_delete_stmt_TBlock4::kAlt1: {
+ const auto& alt = stmt.GetBlock4().GetAlt1();
+
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TSqlExpression sqlExpr(Ctx, Mode);
+ auto whereExpr = sqlExpr.Build(alt.GetRule_expr2());
+ if (!whereExpr) {
+ return nullptr;
+ }
+ source->AddFilter(Ctx, whereExpr);
+ break;
+ }
+
+ case TRule_delete_stmt_TBlock4::kAlt2: {
+ const auto& alt = stmt.GetBlock4().GetAlt2();
+
+ auto values = TSqlIntoValues(Ctx, Mode).Build(alt.GetRule_into_values_source2(), "DELETE ON");
+ if (!values) {
+ return nullptr;
+ }
+
+ return BuildWriteColumns(Ctx.Pos(), Ctx.Scoped, table, EWriteColumnMode::DeleteOn, std::move(values), options);
+ }
+
+ case TRule_delete_stmt_TBlock4::ALT_NOT_SET:
+ return nullptr;
+ }
+ }
+
+ return BuildDelete(Ctx.Pos(), Ctx.Scoped, table, std::move(source), options);
+}
+
+TNodePtr TSqlQuery::Build(const TRule_update_stmt& stmt) {
+ TTableRef table;
+ if (!SimpleTableRefImpl(stmt.GetRule_simple_table_ref2(), table)) {
+ return nullptr;
+ }
+
+ const bool isKikimr = table.Service == KikimrProviderName;
+
+ if (!isKikimr) {
+ Ctx.Error(GetPos(stmt.GetToken1())) << "UPDATE is unsupported for " << table.Service;
+ return nullptr;
+ }
+
+ TNodePtr options = nullptr;
+ if (stmt.HasBlock4()) {
+ options = ReturningList(stmt.GetBlock4().GetRule_returning_columns_list1());
+ options = options->Y(options);
+ }
+
+ switch (stmt.GetBlock3().Alt_case()) {
+ case TRule_update_stmt_TBlock3::kAlt1: {
+ const auto& alt = stmt.GetBlock3().GetAlt1();
+ TSourcePtr values = Build(alt.GetRule_set_clause_choice2());
+ auto source = BuildTableSource(Ctx.Pos(), table);
+
+ if (alt.HasBlock3()) {
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TSqlExpression sqlExpr(Ctx, Mode);
+ auto whereExpr = sqlExpr.Build(alt.GetBlock3().GetRule_expr2());
+ if (!whereExpr) {
+ return nullptr;
+ }
+ source->AddFilter(Ctx, whereExpr);
+ }
+
+ return BuildUpdateColumns(Ctx.Pos(), Ctx.Scoped, table, std::move(values), std::move(source), options);
+ }
+
+ case TRule_update_stmt_TBlock3::kAlt2: {
+ const auto& alt = stmt.GetBlock3().GetAlt2();
+
+ auto values = TSqlIntoValues(Ctx, Mode).Build(alt.GetRule_into_values_source2(), "UPDATE ON");
+ if (!values) {
+ return nullptr;
+ }
+
+ return BuildWriteColumns(Ctx.Pos(), Ctx.Scoped, table, EWriteColumnMode::UpdateOn, std::move(values), options);
+ }
+
+ case TRule_update_stmt_TBlock3::ALT_NOT_SET:
+ return nullptr;
+ }
+}
+
+TSourcePtr TSqlQuery::Build(const TRule_set_clause_choice& stmt) {
+ switch (stmt.Alt_case()) {
+ case TRule_set_clause_choice::kAltSetClauseChoice1:
+ return Build(stmt.GetAlt_set_clause_choice1().GetRule_set_clause_list1());
+ case TRule_set_clause_choice::kAltSetClauseChoice2:
+ return Build(stmt.GetAlt_set_clause_choice2().GetRule_multiple_column_assignment1());
+ case TRule_set_clause_choice::ALT_NOT_SET:
+ AltNotImplemented("set_clause_choice", stmt);
+ return nullptr;
+ }
+}
+
+bool TSqlQuery::FillSetClause(const TRule_set_clause& node, TVector<TString>& targetList, TVector<TNodePtr>& values) {
+ targetList.push_back(ColumnNameAsSingleStr(*this, node.GetRule_set_target1().GetRule_column_name1()));
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (!Expr(sqlExpr, values, node.GetRule_expr3())) {
+ return false;
+ }
+ return true;
+}
+
+TSourcePtr TSqlQuery::Build(const TRule_set_clause_list& stmt) {
+ TVector<TString> targetList;
+ TVector<TNodePtr> values;
+ const TPosition pos(Ctx.Pos());
+ if (!FillSetClause(stmt.GetRule_set_clause1(), targetList, values)) {
+ return nullptr;
+ }
+ for (auto& block: stmt.GetBlock2()) {
+ if (!FillSetClause(block.GetRule_set_clause2(), targetList, values)) {
+ return nullptr;
+ }
+ }
+ Y_DEBUG_ABORT_UNLESS(targetList.size() == values.size());
+ return BuildUpdateValues(pos, targetList, values);
+}
+
+TSourcePtr TSqlQuery::Build(const TRule_multiple_column_assignment& stmt) {
+ TVector<TString> targetList;
+ FillTargetList(*this, stmt.GetRule_set_target_list1(), targetList);
+ auto simpleValuesNode = stmt.GetRule_simple_values_source4();
+ const TPosition pos(Ctx.Pos());
+ switch (simpleValuesNode.Alt_case()) {
+ case TRule_simple_values_source::kAltSimpleValuesSource1: {
+ TVector<TNodePtr> values;
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (!ExprList(sqlExpr, values, simpleValuesNode.GetAlt_simple_values_source1().GetRule_expr_list1())) {
+ return nullptr;
+ }
+ return BuildUpdateValues(pos, targetList, values);
+ }
+ case TRule_simple_values_source::kAltSimpleValuesSource2: {
+ TSqlSelect select(Ctx, Mode);
+ TPosition selectPos;
+ auto source = select.Build(simpleValuesNode.GetAlt_simple_values_source2().GetRule_select_stmt1(), selectPos);
+ if (!source) {
+ return nullptr;
+ }
+ return BuildWriteValues(pos, "UPDATE", targetList, std::move(source));
+ }
+ case TRule_simple_values_source::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownSimpleValuesSourceAlt");
+ AltNotImplemented("simple_values_source", simpleValuesNode);
+ return nullptr;
+ }
+}
+
+TNodePtr TSqlQuery::Build(const TSQLv1ParserAST& ast) {
+ if (Mode == NSQLTranslation::ESqlMode::QUERY) {
+ // inject externally declared named expressions
+ for (auto [name, type] : Ctx.Settings.DeclaredNamedExprs) {
+ if (name.empty()) {
+ Error() << "Empty names for externally declared expressions are not allowed";
+ return nullptr;
+ }
+ TString varName = "$" + name;
+ if (IsAnonymousName(varName)) {
+ Error() << "Externally declared name '" << name << "' is anonymous";
+ return nullptr;
+ }
+
+ auto parsed = ParseType(type, *Ctx.Pool, Ctx.Issues, Ctx.Pos());
+ if (!parsed) {
+ Error() << "Failed to parse type for externally declared name '" << name << "'";
+ return nullptr;
+ }
+
+ TNodePtr typeNode = BuildBuiltinFunc(Ctx, Ctx.Pos(), "ParseType", { BuildLiteralRawString(Ctx.Pos(), type) });
+ PushNamedAtom(Ctx.Pos(), varName);
+ // no duplicates are possible at this stage
+ bool isWeak = true;
+ Ctx.DeclareVariable(varName, {}, typeNode, isWeak);
+ // avoid 'Symbol is not used' warning for externally declared expression
+ YQL_ENSURE(GetNamedNode(varName));
+ }
+ }
+
+ const auto& query = ast.GetRule_sql_query();
+ TVector<TNodePtr> blocks;
+ Ctx.PushCurrentBlocks(&blocks);
+ Y_DEFER {
+ Ctx.PopCurrentBlocks();
+ };
+ if (query.Alt_case() == TRule_sql_query::kAltSqlQuery1) {
+ const auto& statements = query.GetAlt_sql_query1().GetRule_sql_stmt_list1();
+ if (!Statement(blocks, statements.GetRule_sql_stmt2().GetRule_sql_stmt_core2())) {
+ return nullptr;
+ }
+ for (auto block: statements.GetBlock3()) {
+ if (!Statement(blocks, block.GetRule_sql_stmt2().GetRule_sql_stmt_core2())) {
+ return nullptr;
+ }
+ }
+ }
+
+ ui32 topLevelSelects = 0;
+ bool hasTailOps = false;
+ for (auto& block : blocks) {
+ if (block->SubqueryAlias()) {
+ continue;
+ }
+
+ if (block->HasSelectResult()) {
+ ++topLevelSelects;
+ } else if (topLevelSelects) {
+ hasTailOps = true;
+ }
+ }
+
+ if ((Mode == NSQLTranslation::ESqlMode::SUBQUERY || Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) && (topLevelSelects != 1 || hasTailOps)) {
+ Error() << "Strictly one select/process/reduce statement is expected at the end of "
+ << (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW ? "view" : "subquery");
+ return nullptr;
+ }
+
+ if (!Ctx.PragmaAutoCommit && Ctx.Settings.EndOfQueryCommit && IsQueryMode(Mode)) {
+ AddStatementToBlocks(blocks, BuildCommitClusters(Ctx.Pos()));
+ }
+
+ auto result = BuildQuery(Ctx.Pos(), blocks, true, Ctx.Scoped);
+ WarnUnusedNodes();
+ return result;
+}
+
+TNodePtr TSqlQuery::Build(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core>& statements) {
+ if (Mode == NSQLTranslation::ESqlMode::QUERY) {
+ // inject externally declared named expressions
+ for (auto [name, type] : Ctx.Settings.DeclaredNamedExprs) {
+ if (name.empty()) {
+ Error() << "Empty names for externally declared expressions are not allowed";
+ return nullptr;
+ }
+ TString varName = "$" + name;
+ if (IsAnonymousName(varName)) {
+ Error() << "Externally declared name '" << name << "' is anonymous";
+ return nullptr;
+ }
+
+ auto parsed = ParseType(type, *Ctx.Pool, Ctx.Issues, Ctx.Pos());
+ if (!parsed) {
+ Error() << "Failed to parse type for externally declared name '" << name << "'";
+ return nullptr;
+ }
+
+ TNodePtr typeNode = BuildBuiltinFunc(Ctx, Ctx.Pos(), "ParseType", { BuildLiteralRawString(Ctx.Pos(), type) });
+ PushNamedAtom(Ctx.Pos(), varName);
+ // no duplicates are possible at this stage
+ bool isWeak = true;
+ Ctx.DeclareVariable(varName, {}, typeNode, isWeak);
+ // avoid 'Symbol is not used' warning for externally declared expression
+ YQL_ENSURE(GetNamedNode(varName));
+ }
+ }
+
+ TVector<TNodePtr> blocks;
+ Ctx.PushCurrentBlocks(&blocks);
+ Y_DEFER {
+ Ctx.PopCurrentBlocks();
+ };
+ for (const auto& statement : statements) {
+ if (!Statement(blocks, statement)) {
+ return nullptr;
+ }
+ }
+
+ ui32 topLevelSelects = 0;
+ bool hasTailOps = false;
+ for (auto& block : blocks) {
+ if (block->SubqueryAlias()) {
+ continue;
+ }
+
+ if (block->HasSelectResult()) {
+ ++topLevelSelects;
+ } else if (topLevelSelects) {
+ hasTailOps = true;
+ }
+ }
+
+ if ((Mode == NSQLTranslation::ESqlMode::SUBQUERY || Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) && (topLevelSelects != 1 || hasTailOps)) {
+ Error() << "Strictly one select/process/reduce statement is expected at the end of "
+ << (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW ? "view" : "subquery");
+ return nullptr;
+ }
+
+ if (!Ctx.PragmaAutoCommit && Ctx.Settings.EndOfQueryCommit && IsQueryMode(Mode)) {
+ AddStatementToBlocks(blocks, BuildCommitClusters(Ctx.Pos()));
+ }
+
+ auto result = BuildQuery(Ctx.Pos(), blocks, true, Ctx.Scoped);
+ return result;
+}
+namespace {
+
+ static bool BuildColumnFeatures(std::map<TString, TDeferredAtom>& result, const TRule_column_schema& columnSchema, const NYql::TPosition& pos, TSqlTranslation& translation) {
+ const TString columnName(Id(columnSchema.GetRule_an_id_schema1(), translation));
+ TString columnType;
+
+ const auto constraints = ColumnConstraints(columnSchema, translation);
+ if (!constraints) {
+ return false;
+ }
+
+ auto& typeBind = columnSchema.GetRule_type_name_or_bind2();
+ switch (typeBind.Alt_case()) {
+ case TRule_type_name_or_bind::kAltTypeNameOrBind1:
+ {
+ auto& typeNameOrBind = typeBind.GetAlt_type_name_or_bind1().GetRule_type_name1();
+ if (typeNameOrBind.Alt_case() != TRule_type_name::kAltTypeName2) {
+ return false;
+ }
+ auto& alt = typeNameOrBind.GetAlt_type_name2();
+ auto& block = alt.GetBlock1();
+ auto& simpleType = block.GetAlt2().GetRule_type_name_simple1();
+ columnType = Id(simpleType.GetRule_an_id_pure1(), translation);
+ if (columnType.empty()) {
+ return false;
+ }
+ break;
+ }
+ case TRule_type_name_or_bind::kAltTypeNameOrBind2:
+ return false;
+ case TRule_type_name_or_bind::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ result["NAME"] = TDeferredAtom(pos, columnName);
+ YQL_ENSURE(columnType, "Unknown column type");
+ result["TYPE"] = TDeferredAtom(pos, columnType);
+ if (!constraints->Nullable) {
+ result["NOT_NULL"] = TDeferredAtom(pos, "true");
+ }
+ return true;
+ }
+}
+
+bool TSqlQuery::ParseTableStoreFeatures(std::map<TString, TDeferredAtom> & result, const TRule_alter_table_store_action & actions) {
+ switch (actions.Alt_case()) {
+ case TRule_alter_table_store_action::kAltAlterTableStoreAction1: {
+ // ADD COLUMN
+ const auto& addRule = actions.GetAlt_alter_table_store_action1().GetRule_alter_table_add_column1();
+ if (!BuildColumnFeatures(result, addRule.GetRule_column_schema3(), Ctx.Pos(), *this)) {
+ return false;
+ }
+ result["ACTION"] = TDeferredAtom(Ctx.Pos(), "NEW_COLUMN");
+ break;
+ }
+ case TRule_alter_table_store_action::kAltAlterTableStoreAction2: {
+ // DROP COLUMN
+ const auto& dropRule = actions.GetAlt_alter_table_store_action2().GetRule_alter_table_drop_column1();
+ TString columnName = Id(dropRule.GetRule_an_id3(), *this);
+ if (!columnName) {
+ return false;
+ }
+ result["NAME"] = TDeferredAtom(Ctx.Pos(), columnName);
+ result["ACTION"] = TDeferredAtom(Ctx.Pos(), "DROP_COLUMN");
+ break;
+ }
+ case TRule_alter_table_store_action::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ return true;
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_query.h b/yql/essentials/sql/v1/sql_query.h
new file mode 100644
index 00000000000..99e1a9c4efd
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_query.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include "sql_translation.h"
+
+#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
+#include <util/string/split.h>
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+class TSqlQuery: public TSqlTranslation {
+public:
+ TSqlQuery(TContext& ctx, NSQLTranslation::ESqlMode mode, bool topLevel)
+ : TSqlTranslation(ctx, mode)
+ , TopLevel(topLevel)
+ {
+ }
+
+ TNodePtr Build(const TSQLv1ParserAST& ast);
+ TNodePtr Build(const std::vector<::NSQLv1Generated::TRule_sql_stmt_core>& ast);
+
+ bool Statement(TVector<TNodePtr>& blocks, const TRule_sql_stmt_core& core);
+private:
+ bool DeclareStatement(const TRule_declare_stmt& stmt);
+ bool ExportStatement(const TRule_export_stmt& stmt);
+ bool AlterTableAction(const TRule_alter_table_action& node, TAlterTableParameters& params);
+ bool AlterExternalTableAction(const TRule_alter_external_table_action& node, TAlterTableParameters& params);
+ bool AlterTableAddColumn(const TRule_alter_table_add_column& node, TAlterTableParameters& params);
+ bool AlterTableDropColumn(const TRule_alter_table_drop_column& node, TAlterTableParameters& params);
+ bool AlterTableAlterColumn(const TRule_alter_table_alter_column& node, TAlterTableParameters& params);
+ bool AlterTableAddFamily(const TRule_family_entry& node, TAlterTableParameters& params);
+ bool AlterTableAlterFamily(const TRule_alter_table_alter_column_family& node, TAlterTableParameters& params);
+ bool AlterTableSetTableSetting(const TRule_alter_table_set_table_setting_uncompat& node, TTableSettings& tableSettings, ETableType tableType);
+ bool AlterTableSetTableSetting(const TRule_alter_table_set_table_setting_compat& node, TTableSettings& tableSettings, ETableType tableType);
+ bool AlterTableResetTableSetting(const TRule_alter_table_reset_table_setting& node, TTableSettings& tableSettings, ETableType tableType);
+ bool AlterTableAddIndex(const TRule_alter_table_add_index& node, TAlterTableParameters& params);
+ void AlterTableDropIndex(const TRule_alter_table_drop_index& node, TAlterTableParameters& params);
+ void AlterTableRenameTo(const TRule_alter_table_rename_to& node, TAlterTableParameters& params);
+ bool AlterTableAddChangefeed(const TRule_alter_table_add_changefeed& node, TAlterTableParameters& params);
+ bool AlterTableAlterChangefeed(const TRule_alter_table_alter_changefeed& node, TAlterTableParameters& params);
+ void AlterTableDropChangefeed(const TRule_alter_table_drop_changefeed& node, TAlterTableParameters& params);
+ void AlterTableRenameIndexTo(const TRule_alter_table_rename_index_to& node, TAlterTableParameters& params);
+ bool AlterTableAlterIndex(const TRule_alter_table_alter_index& node, TAlterTableParameters& params);
+ TNodePtr PragmaStatement(const TRule_pragma_stmt& stmt, bool& success);
+ void AddStatementToBlocks(TVector<TNodePtr>& blocks, TNodePtr node);
+ bool ParseTableStoreFeatures(std::map<TString, TDeferredAtom> & result, const TRule_alter_table_store_action & actions);
+ bool AlterTableAlterColumnDropNotNull(const TRule_alter_table_alter_column_drop_not_null& node, TAlterTableParameters& params);
+
+ TNodePtr Build(const TRule_delete_stmt& stmt);
+
+ TNodePtr Build(const TRule_update_stmt& stmt);
+ TSourcePtr Build(const TRule_set_clause_choice& stmt);
+ bool FillSetClause(const TRule_set_clause& node, TVector<TString>& targetList, TVector<TNodePtr>& values);
+ TSourcePtr Build(const TRule_set_clause_list& stmt);
+ TSourcePtr Build(const TRule_multiple_column_assignment& stmt);
+
+ template<class TNode>
+ void ParseStatementName(const TNode& node, TString& internalStatementName, TString& humanStatementName) {
+ internalStatementName.clear();
+ humanStatementName.clear();
+ const auto& descr = AltDescription(node);
+ TVector<TString> parts;
+ if (!Ctx.Settings.Antlr4Parser) {
+ const auto pos = descr.find(": ");
+ Y_DEBUG_ABORT_UNLESS(pos != TString::npos);
+ Split(TString(descr.begin() + pos + 2, descr.end()), "_", parts);
+ } else {
+ Split(descr, "_", parts);
+ }
+ Y_DEBUG_ABORT_UNLESS(parts.size() > 1);
+ parts.pop_back();
+ for (auto& part: parts) {
+ part.to_upper(0, 1);
+ internalStatementName += part;
+ if (!humanStatementName.empty()) {
+ humanStatementName += ' ';
+ }
+ humanStatementName += to_upper(part);
+ }
+ }
+
+ const bool TopLevel;
+};
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_select.cpp b/yql/essentials/sql/v1/sql_select.cpp
new file mode 100644
index 00000000000..4a06f8e51b1
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_select.cpp
@@ -0,0 +1,1470 @@
+#include "sql_select.h"
+#include "sql_call_expr.h"
+#include "sql_expression.h"
+#include "sql_group_by.h"
+#include "sql_values.h"
+#include "sql_match_recognize.h"
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+namespace {
+
+bool IsColumnsOnly(const TVector<TSortSpecificationPtr>& container) {
+ for (const auto& elem: container) {
+ if (!elem->OrderExpr->GetColumnName()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool CollectJoinLinkSettings(TPosition pos, TJoinLinkSettings& linkSettings, TContext& ctx) {
+ linkSettings = {};
+ auto hints = ctx.PullHintForToken(pos);
+ for (const auto& hint: hints) {
+ const auto canonizedName = to_lower(hint.Name);
+ auto newStrategy = TJoinLinkSettings::EStrategy::Default;
+ if (canonizedName == "merge") {
+ newStrategy = TJoinLinkSettings::EStrategy::SortedMerge;
+ } else if (canonizedName == "streamlookup") {
+ newStrategy = TJoinLinkSettings::EStrategy::StreamLookup;
+ } else if (canonizedName == "map") {
+ newStrategy = TJoinLinkSettings::EStrategy::ForceMap;
+ } else if (canonizedName == "grace") {
+ newStrategy = TJoinLinkSettings::EStrategy::ForceGrace;
+ } else if (canonizedName == "compact") {
+ linkSettings.Compact = true;
+ continue;
+ } else {
+ ctx.Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Unsupported join hint: " << hint.Name;
+ }
+
+ if (TJoinLinkSettings::EStrategy::Default == linkSettings.Strategy) {
+ linkSettings.Strategy = newStrategy;
+ } else if (newStrategy == linkSettings.Strategy) {
+ ctx.Error() << "Duplicate join strategy hint";
+ return false;
+ } else {
+ ctx.Error() << "Conflicting join strategy hints";
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+bool TSqlSelect::JoinOp(ISource* join, const TRule_join_source::TBlock3& block, TMaybe<TPosition> anyPos) {
+ // block: (join_op (ANY)? flatten_source join_constraint?)
+ // join_op:
+ // COMMA
+ // | (NATURAL)? ((LEFT (ONLY | SEMI)? | RIGHT (ONLY | SEMI)? | EXCLUSION | FULL)? (OUTER)? | INNER | CROSS) JOIN
+ //;
+ const auto& node = block.GetRule_join_op1();
+ TString joinOp("Inner");
+ TJoinLinkSettings linkSettings;
+ switch (node.Alt_case()) {
+ case TRule_join_op::kAltJoinOp1: {
+ joinOp = "Cross";
+ if (!Ctx.AnsiImplicitCrossJoin) {
+ Error() << "Cartesian product of tables is disabled. Please use "
+ "explicit CROSS JOIN or enable it via PRAGMA AnsiImplicitCrossJoin";
+ return false;
+ }
+ auto alt = node.GetAlt_join_op1();
+ if (!CollectJoinLinkSettings(Ctx.TokenPosition(alt.GetToken1()), linkSettings, Ctx)) {
+ return false;
+ }
+ Ctx.IncrementMonCounter("sql_join_operations", "CartesianProduct");
+ break;
+ }
+ case TRule_join_op::kAltJoinOp2: {
+ auto alt = node.GetAlt_join_op2();
+ if (alt.HasBlock1()) {
+ Ctx.IncrementMonCounter("sql_join_operations", "Natural");
+ Error() << "Natural join is not implemented yet";
+ return false;
+ }
+ if (!CollectJoinLinkSettings(Ctx.TokenPosition(alt.GetToken3()), linkSettings, Ctx)) {
+ return false;
+ }
+ switch (alt.GetBlock2().Alt_case()) {
+ case TRule_join_op::TAlt2::TBlock2::kAlt1:
+ if (alt.GetBlock2().GetAlt1().HasBlock1()) {
+ auto block = alt.GetBlock2().GetAlt1().GetBlock1();
+ switch (block.Alt_case()) {
+ case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt1:
+ // left
+ joinOp = Token(block.GetAlt1().GetToken1());
+ if (block.GetAlt1().HasBlock2()) {
+ joinOp += " " + Token(block.GetAlt1().GetBlock2().GetToken1());
+ }
+ break;
+ case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt2:
+ // right
+ joinOp = Token(block.GetAlt2().GetToken1());
+ if (block.GetAlt2().HasBlock2()) {
+ joinOp += " " + Token(block.GetAlt2().GetBlock2().GetToken1());
+ }
+
+ break;
+ case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt3:
+ // exclusion
+ joinOp = Token(block.GetAlt3().GetToken1());
+ break;
+ case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt4:
+ // full
+ joinOp = Token(block.GetAlt4().GetToken1());
+ break;
+ case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation");
+ AltNotImplemented("join_op", node);
+ return false;
+ }
+ }
+ if (alt.GetBlock2().GetAlt1().HasBlock2()) {
+ TString normalizedOp = alt.GetBlock2().GetAlt1().HasBlock1() ? joinOp : "";
+ normalizedOp.to_upper();
+ if (!(normalizedOp == "LEFT" || normalizedOp == "RIGHT" || normalizedOp == "FULL")) {
+ Token(alt.GetBlock2().GetAlt1().GetBlock2().GetToken1());
+ Error() << "Invalid join type: " << normalizedOp << (normalizedOp.empty() ? "" : " ") << "OUTER JOIN. "
+ << "OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL";
+ Ctx.IncrementMonCounter("sql_errors", "BadJoinType");
+ return false;
+ }
+ }
+ break;
+ case TRule_join_op::TAlt2::TBlock2::kAlt2:
+ joinOp = Token(alt.GetBlock2().GetAlt2().GetToken1());
+ break;
+ case TRule_join_op::TAlt2::TBlock2::kAlt3:
+ joinOp = Token(alt.GetBlock2().GetAlt3().GetToken1());
+ break;
+ case TRule_join_op::TAlt2::TBlock2::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation");
+ AltNotImplemented("join_op", node);
+ return false;
+ }
+ Ctx.IncrementMonCounter("sql_features", "Join");
+ Ctx.IncrementMonCounter("sql_join_operations", joinOp);
+ break;
+ }
+ case TRule_join_op::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation2");
+ AltNotImplemented("join_op", node);
+ return false;
+ }
+ joinOp = NormalizeJoinOp(joinOp);
+ if (linkSettings.Strategy != TJoinLinkSettings::EStrategy::Default && joinOp == "Cross") {
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_UNUSED_HINT) << "Non-default join strategy will not be used for CROSS JOIN";
+ linkSettings.Strategy = TJoinLinkSettings::EStrategy::Default;
+ }
+
+ TNodePtr joinKeyExpr;
+ if (block.HasBlock4()) {
+ if (joinOp == "Cross") {
+ Error() << "Cross join should not have ON or USING expression";
+ Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
+ return false;
+ }
+
+ joinKeyExpr = JoinExpr(join, block.GetBlock4().GetRule_join_constraint1());
+ if (!joinKeyExpr) {
+ Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
+ return false;
+ }
+ }
+ else {
+ if (joinOp != "Cross") {
+ Error() << "Expected ON or USING expression";
+ Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
+ return false;
+ }
+ }
+
+ if (joinOp == "Cross" && anyPos) {
+ Ctx.Error(*anyPos) << "ANY should not be used with Cross JOIN";
+ Ctx.IncrementMonCounter("sql_errors", "BadJoinAny");
+ return false;
+ }
+
+ Y_DEBUG_ABORT_UNLESS(join->GetJoin());
+ join->GetJoin()->SetupJoin(joinOp, joinKeyExpr, linkSettings);
+
+ return true;
+}
+
+TNodePtr TSqlSelect::JoinExpr(ISource* join, const TRule_join_constraint& node) {
+ switch (node.Alt_case()) {
+ case TRule_join_constraint::kAltJoinConstraint1: {
+ auto& alt = node.GetAlt_join_constraint1();
+ Token(alt.GetToken1());
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TSqlExpression expr(Ctx, Mode);
+ return expr.Build(alt.GetRule_expr2());
+ }
+ case TRule_join_constraint::kAltJoinConstraint2: {
+ auto& alt = node.GetAlt_join_constraint2();
+ Token(alt.GetToken1());
+ TPosition pos(Ctx.Pos());
+ TVector<TDeferredAtom> names;
+ if (!PureColumnOrNamedListStr(alt.GetRule_pure_column_or_named_list2(), *this, names)) {
+ return nullptr;
+ }
+
+ Y_DEBUG_ABORT_UNLESS(join->GetJoin());
+ return join->GetJoin()->BuildJoinKeys(Ctx, names);
+ }
+ case TRule_join_constraint::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownJoinConstraint");
+ AltNotImplemented("join_constraint", node);
+ break;
+ }
+ return nullptr;
+}
+
+bool TSqlSelect::FlattenByArg(const TString& sourceLabel, TVector<TNodePtr>& flattenByColumns, TVector<TNodePtr>& flattenByExprs,
+ const TRule_flatten_by_arg& node)
+{
+ // flatten_by_arg:
+ // named_column
+ // | LPAREN named_expr_list COMMA? RPAREN
+ // ;
+
+ flattenByColumns.clear();
+ flattenByExprs.clear();
+
+ TVector<TNodePtr> namedExprs;
+ switch (node.Alt_case()) {
+ case TRule_flatten_by_arg::kAltFlattenByArg1: {
+ TVector<TNodePtr> columns;
+ if (!NamedColumn(columns, node.GetAlt_flatten_by_arg1().GetRule_named_column1())) {
+ return false;
+ }
+ YQL_ENSURE(columns.size() == 1);
+ auto& column = columns.back();
+ auto columnNamePtr = column->GetColumnName();
+ YQL_ENSURE(columnNamePtr && *columnNamePtr);
+
+ auto sourcePtr = column->GetSourceName();
+ const bool isEmptySource = !sourcePtr || !*sourcePtr;
+ if (isEmptySource || *sourcePtr == sourceLabel) {
+ // select * from T flatten by x
+ // select * from T as s flatten by x
+ // select * from T as s flatten by s.x
+ flattenByColumns.emplace_back(std::move(column));
+ } else {
+ // select * from T as s flatten by x.y as z
+ if (!column->GetLabel()) {
+ Ctx.Error(column->GetPos()) << "Unnamed expression after FLATTEN BY is not allowed";
+ return false;
+ }
+ flattenByColumns.emplace_back(BuildColumn(column->GetPos(), column->GetLabel()));
+
+ TVector<INode::TIdPart> ids;
+ ids.push_back(BuildColumn(column->GetPos()));
+ ids.push_back(*sourcePtr);
+ ids.push_back(*columnNamePtr);
+ auto node = BuildAccess(column->GetPos(), ids, false);
+ node->SetLabel(column->GetLabel());
+ flattenByExprs.emplace_back(std::move(node));
+ }
+
+ break;
+ }
+ case TRule_flatten_by_arg::kAltFlattenByArg2: {
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ if (!NamedExprList(node.GetAlt_flatten_by_arg2().GetRule_named_expr_list2(), namedExprs) || Ctx.HasPendingErrors) {
+ return false;
+ }
+ for (auto& namedExprNode : namedExprs) {
+ YQL_ENSURE(!namedExprNode->ContentListPtr());
+
+ auto sourcePtr = namedExprNode->GetSourceName();
+ const bool isEmptySource = !sourcePtr || !*sourcePtr;
+ auto columnNamePtr = namedExprNode->GetColumnName();
+ if (columnNamePtr && (isEmptySource || *sourcePtr == sourceLabel)) {
+ namedExprNode->AssumeColumn();
+ flattenByColumns.emplace_back(std::move(namedExprNode));
+ } else {
+ auto nodeLabel = namedExprNode->GetLabel();
+ if (!nodeLabel) {
+ Ctx.Error(namedExprNode->GetPos()) << "Unnamed expression after FLATTEN BY is not allowed";
+ return false;
+ }
+ flattenByColumns.emplace_back(BuildColumn(namedExprNode->GetPos(), nodeLabel));
+ flattenByExprs.emplace_back(std::move(namedExprNode));
+ }
+ }
+ break;
+ }
+ case TRule_flatten_by_arg::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownFlattenByArg");
+ AltNotImplemented("flatten_by_arg", node);
+ return false;
+ }
+ return true;
+}
+
+TSourcePtr TSqlSelect::FlattenSource(const TRule_flatten_source& node) {
+ auto source = NamedSingleSource(node.GetRule_named_single_source1(), true);
+ if (!source) {
+ return nullptr;
+ }
+ if (node.HasBlock2()) {
+ auto flatten = node.GetBlock2();
+ auto flatten2 = flatten.GetBlock2();
+ switch (flatten2.Alt_case()) {
+ case TRule_flatten_source::TBlock2::TBlock2::kAlt1: {
+ TString mode = "auto";
+ if (flatten2.GetAlt1().HasBlock1()) {
+ mode = to_lower(Token(flatten2.GetAlt1().GetBlock1().GetToken1()));
+ }
+
+ TVector<TNodePtr> flattenByColumns;
+ TVector<TNodePtr> flattenByExprs;
+ if (!FlattenByArg(source->GetLabel(), flattenByColumns, flattenByExprs, flatten2.GetAlt1().GetRule_flatten_by_arg3())) {
+ return nullptr;
+ }
+
+ Ctx.IncrementMonCounter("sql_features", "FlattenByColumns");
+ if (!source->AddExpressions(Ctx, flattenByColumns, EExprSeat::FlattenBy)) {
+ return nullptr;
+ }
+
+ if (!source->AddExpressions(Ctx, flattenByExprs, EExprSeat::FlattenByExpr)) {
+ return nullptr;
+ }
+
+ source->SetFlattenByMode(mode);
+ break;
+ }
+ case TRule_flatten_source::TBlock2::TBlock2::kAlt2: {
+ Ctx.IncrementMonCounter("sql_features", "FlattenColumns");
+ source->MarkFlattenColumns();
+ break;
+ }
+
+ case TRule_flatten_source::TBlock2::TBlock2::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownOrdinaryNamedColumn");
+ AltNotImplemented("flatten_source", flatten2);
+ }
+ }
+ return source;
+}
+
+TSourcePtr TSqlSelect::JoinSource(const TRule_join_source& node) {
+ // join_source: (ANY)? flatten_source (join_op (ANY)? flatten_source join_constraint?)*;
+ if (node.HasBlock1() && !node.Block3Size()) {
+ Error() << "ANY is not allowed without JOIN";
+ return nullptr;
+ }
+
+ TSourcePtr source(FlattenSource(node.GetRule_flatten_source2()));
+ if (!source) {
+ return nullptr;
+ }
+
+ if (node.Block3Size()) {
+ TPosition pos(Ctx.Pos());
+ TVector<TSourcePtr> sources;
+ TVector<TMaybe<TPosition>> anyPositions;
+ TVector<bool> anyFlags;
+
+ sources.emplace_back(std::move(source));
+ anyPositions.emplace_back(node.HasBlock1() ? Ctx.TokenPosition(node.GetBlock1().GetToken1()) : TMaybe<TPosition>());
+ anyFlags.push_back(bool(anyPositions.back()));
+
+ for (auto& block: node.GetBlock3()) {
+ sources.emplace_back(FlattenSource(block.GetRule_flatten_source3()));
+ if (!sources.back()) {
+ Ctx.IncrementMonCounter("sql_errors", "NoJoinWith");
+ return nullptr;
+ }
+
+ anyPositions.emplace_back(block.HasBlock2() ? Ctx.TokenPosition(block.GetBlock2().GetToken1()) : TMaybe<TPosition>());
+ anyFlags.push_back(bool(anyPositions.back()));
+ }
+
+ source = BuildEquiJoin(pos, std::move(sources), std::move(anyFlags), Ctx.Scoped->StrictJoinKeyTypes);
+ size_t idx = 1;
+ for (auto& block: node.GetBlock3()) {
+ YQL_ENSURE(idx < anyPositions.size());
+ TMaybe<TPosition> leftAny = (idx == 1) ? anyPositions[0] : Nothing();
+ TMaybe<TPosition> rightAny = anyPositions[idx];
+
+ if (!JoinOp(source.Get(), block, leftAny ? leftAny : rightAny)) {
+ Ctx.IncrementMonCounter("sql_errors", "NoJoinOp");
+ return nullptr;
+ }
+ ++idx;
+ }
+ }
+
+ return source;
+}
+
+bool TSqlSelect::SelectTerm(TVector<TNodePtr>& terms, const TRule_result_column& node) {
+ // result_column:
+ // opt_id_prefix ASTERISK
+ // | expr ((AS an_id) | an_id_pure)?
+ // ;
+ switch (node.Alt_case()) {
+ case TRule_result_column::kAltResultColumn1: {
+ auto alt = node.GetAlt_result_column1();
+
+ Token(alt.GetToken2());
+ auto idAsteriskQualify = OptIdPrefixAsStr(alt.GetRule_opt_id_prefix1(), *this);
+ Ctx.IncrementMonCounter("sql_features", idAsteriskQualify ? "QualifyAsterisk" : "Asterisk");
+ terms.push_back(BuildColumn(Ctx.Pos(), "*", idAsteriskQualify));
+ break;
+ }
+ case TRule_result_column::kAltResultColumn2: {
+ auto alt = node.GetAlt_result_column2();
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TSqlExpression expr(Ctx, Mode);
+ TNodePtr term(expr.Build(alt.GetRule_expr1()));
+ if (!term) {
+ Ctx.IncrementMonCounter("sql_errors", "NoTerm");
+ return false;
+ }
+ if (alt.HasBlock2()) {
+ TString label;
+ bool implicitLabel = false;
+ switch (alt.GetBlock2().Alt_case()) {
+ case TRule_result_column_TAlt2_TBlock2::kAlt1:
+ label = Id(alt.GetBlock2().GetAlt1().GetRule_an_id_or_type2(), *this);
+ break;
+ case TRule_result_column_TAlt2_TBlock2::kAlt2:
+ label = Id(alt.GetBlock2().GetAlt2().GetRule_an_id_as_compat1(), *this);
+ if (!Ctx.AnsiOptionalAs) {
+ // AS is mandatory
+ Ctx.Error() << "Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility";
+ return false;
+ }
+ implicitLabel = true;
+ break;
+ case TRule_result_column_TAlt2_TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ term->SetLabel(label, Ctx.Pos());
+ term->MarkImplicitLabel(implicitLabel);
+ }
+ terms.push_back(term);
+ break;
+ }
+ case TRule_result_column::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownResultColumn");
+ AltNotImplemented("result_column", node);
+ return false;
+ }
+ return true;
+}
+
+bool TSqlSelect::ValidateSelectColumns(const TVector<TNodePtr>& terms) {
+ TSet<TString> labels;
+ TSet<TString> asteriskSources;
+ for (const auto& term: terms) {
+ const auto& label = term->GetLabel();
+ if (!Ctx.PragmaAllowDotInAlias && label.find('.') != TString::npos) {
+ Ctx.Error(term->GetPos()) << "Unable to use '.' in column name. Invalid column name: " << label;
+ return false;
+ }
+ if (!label.empty()) {
+ if (!labels.insert(label).second) {
+ Ctx.Error(term->GetPos()) << "Unable to use duplicate column names. Collision in name: " << label;
+ return false;
+ }
+ }
+ if (term->IsAsterisk()) {
+ const auto& source = *term->GetSourceName();
+ if (source.empty() && terms.ysize() > 1) {
+ Ctx.Error(term->GetPos()) << "Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).";
+ return false;
+ } else if (!asteriskSources.insert(source).second) {
+ Ctx.Error(term->GetPos()) << "Unable to use twice same quialified asterisk. Invalid source: " << source;
+ return false;
+ }
+ } else if (label.empty()) {
+ const auto* column = term->GetColumnName();
+ if (column && !column->empty()) {
+ const auto& source = *term->GetSourceName();
+ const auto usedName = source.empty() ? *column : source + '.' + *column;
+ if (!labels.insert(usedName).second) {
+ Ctx.Error(term->GetPos()) << "Unable to use duplicate column names. Collision in name: " << usedName;
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+TSourcePtr TSqlSelect::SingleSource(const TRule_single_source& node, const TVector<TString>& derivedColumns, TPosition derivedColumnsPos, bool unorderedSubquery) {
+ switch (node.Alt_case()) {
+ case TRule_single_source::kAltSingleSource1: {
+ const auto& alt = node.GetAlt_single_source1();
+ const auto& table_ref = alt.GetRule_table_ref1();
+
+ if (auto maybeSource = AsTableImpl(table_ref)) {
+ auto source = *maybeSource;
+ if (!source) {
+ return nullptr;
+ }
+
+ return source;
+ } else {
+ TTableRef table;
+ if (!TableRefImpl(alt.GetRule_table_ref1(), table, unorderedSubquery)) {
+ return nullptr;
+ }
+
+ if (table.Source) {
+ return table.Source;
+ }
+
+ TPosition pos(Ctx.Pos());
+ Ctx.IncrementMonCounter("sql_select_clusters", table.Cluster.GetLiteral() ? *table.Cluster.GetLiteral() : "unknown");
+ return BuildTableSource(pos, table);
+ }
+ }
+ case TRule_single_source::kAltSingleSource2: {
+ const auto& alt = node.GetAlt_single_source2();
+ Token(alt.GetToken1());
+ TSqlSelect innerSelect(Ctx, Mode);
+ TPosition pos;
+ auto source = innerSelect.Build(alt.GetRule_select_stmt2(), pos);
+ if (!source) {
+ return nullptr;
+ }
+ return BuildInnerSource(pos, BuildSourceNode(pos, std::move(source)), Ctx.Scoped->CurrService, Ctx.Scoped->CurrCluster);
+ }
+ case TRule_single_source::kAltSingleSource3: {
+ const auto& alt = node.GetAlt_single_source3();
+ TPosition pos;
+ return TSqlValues(Ctx, Mode).Build(alt.GetRule_values_stmt2(), pos, derivedColumns, derivedColumnsPos);
+ }
+ case TRule_single_source::ALT_NOT_SET:
+ AltNotImplemented("single_source", node);
+ Ctx.IncrementMonCounter("sql_errors", "UnknownSingleSource");
+ return nullptr;
+ }
+}
+
+TSourcePtr TSqlSelect::NamedSingleSource(const TRule_named_single_source& node, bool unorderedSubquery) {
+ // named_single_source: single_source match_recognize_clause? (((AS an_id) | an_id_as_compat) pure_column_list?)? (sample_clause | tablesample_clause)?;
+ TVector<TString> derivedColumns;
+ TPosition derivedColumnsPos;
+ if (node.HasBlock3() && node.GetBlock3().HasBlock2()) {
+ const auto& columns = node.GetBlock3().GetBlock2().GetRule_pure_column_list1();
+ Token(columns.GetToken1());
+ derivedColumnsPos = Ctx.Pos();
+
+ if (node.GetRule_single_source1().Alt_case() != TRule_single_source::kAltSingleSource3) {
+ Error() << "Derived column list is only supported for VALUES";
+ return nullptr;
+ }
+
+ PureColumnListStr(columns, *this, derivedColumns);
+ }
+
+ auto singleSource = SingleSource(node.GetRule_single_source1(), derivedColumns, derivedColumnsPos, unorderedSubquery);
+ if (!singleSource) {
+ return nullptr;
+ }
+ if (node.HasBlock2()) {
+ if (node.HasBlock4()) {
+ //CAN/CSA-ISO/IEC 9075-2:18 7.6 <table reference>
+ //4) TF shall not simply contain both a <sample clause> and a <row pattern recognition clause and name>.
+ Ctx.Error() << "Source shall not simply contain both a sample clause and a row pattern recognition clause";
+ return {};
+ }
+ auto matchRecognizeClause = TSqlMatchRecognizeClause(Ctx, Mode);
+ auto matchRecognize = matchRecognizeClause.CreateBuilder(node.GetBlock2().GetRule_row_pattern_recognition_clause1());
+ singleSource->SetMatchRecognize(matchRecognize);
+ }
+ if (node.HasBlock3()) {
+ TString label;
+ switch (node.GetBlock3().GetBlock1().Alt_case()) {
+ case TRule_named_single_source_TBlock3_TBlock1::kAlt1:
+ label = Id(node.GetBlock3().GetBlock1().GetAlt1().GetRule_an_id2(), *this);
+ break;
+ case TRule_named_single_source_TBlock3_TBlock1::kAlt2:
+ label = Id(node.GetBlock3().GetBlock1().GetAlt2().GetRule_an_id_as_compat1(), *this);
+ if (!Ctx.AnsiOptionalAs) {
+ // AS is mandatory
+ Ctx.Error() << "Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility";
+ return {};
+ }
+ break;
+ case TRule_named_single_source_TBlock3_TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ singleSource->SetLabel(label);
+ }
+ if (node.HasBlock4()) {
+ ESampleClause sampleClause;
+ ESampleMode mode;
+ TSqlExpression expr(Ctx, Mode);
+ TNodePtr samplingRateNode;
+ TNodePtr samplingSeedNode;
+ const auto& sampleBlock = node.GetBlock4();
+ TPosition pos;
+ switch (sampleBlock.Alt_case()) {
+ case TRule_named_single_source::TBlock4::kAlt1:
+ {
+ sampleClause = ESampleClause::Sample;
+ mode = ESampleMode::Bernoulli;
+ const auto& sampleExpr = sampleBlock.GetAlt1().GetRule_sample_clause1().GetRule_expr2();
+ samplingRateNode = expr.Build(sampleExpr);
+ if (!samplingRateNode) {
+ return nullptr;
+ }
+ pos = GetPos(sampleBlock.GetAlt1().GetRule_sample_clause1().GetToken1());
+ Ctx.IncrementMonCounter("sql_features", "SampleClause");
+ }
+ break;
+ case TRule_named_single_source::TBlock4::kAlt2:
+ {
+ sampleClause = ESampleClause::TableSample;
+ const auto& tableSampleClause = sampleBlock.GetAlt2().GetRule_tablesample_clause1();
+ const auto& modeToken = tableSampleClause.GetRule_sampling_mode2().GetToken1();
+ const TCiString& token = Token(modeToken);
+ if (token == "system") {
+ mode = ESampleMode::System;
+ } else if (token == "bernoulli") {
+ mode = ESampleMode::Bernoulli;
+ } else {
+ Ctx.Error(GetPos(modeToken)) << "Unsupported sampling mode: " << token;
+ Ctx.IncrementMonCounter("sql_errors", "UnsupportedSamplingMode");
+ return nullptr;
+ }
+ const auto& tableSampleExpr = tableSampleClause.GetRule_expr4();
+ samplingRateNode = expr.Build(tableSampleExpr);
+ if (!samplingRateNode) {
+ return nullptr;
+ }
+ if (tableSampleClause.HasBlock6()) {
+ const auto& repeatableExpr = tableSampleClause.GetBlock6().GetRule_repeatable_clause1().GetRule_expr3();
+ samplingSeedNode = expr.Build(repeatableExpr);
+ if (!samplingSeedNode) {
+ return nullptr;
+ }
+ }
+ pos = GetPos(sampleBlock.GetAlt2().GetRule_tablesample_clause1().GetToken1());
+ Ctx.IncrementMonCounter("sql_features", "SampleClause");
+ }
+ break;
+ case TRule_named_single_source::TBlock4::ALT_NOT_SET:
+ Y_ABORT("SampleClause: does not corresond to grammar changes");
+ }
+ if (!singleSource->SetSamplingOptions(Ctx, pos, sampleClause, mode, samplingRateNode, samplingSeedNode)) {
+ Ctx.IncrementMonCounter("sql_errors", "IncorrectSampleClause");
+ return nullptr;
+ }
+ }
+ return singleSource;
+}
+
+bool TSqlSelect::ColumnName(TVector<TNodePtr>& keys, const TRule_column_name& node) {
+ const auto sourceName = OptIdPrefixAsStr(node.GetRule_opt_id_prefix1(), *this);
+ const auto columnName = Id(node.GetRule_an_id2(), *this);
+ if (columnName.empty()) {
+ // TDOD: Id() should return TMaybe<TString>
+ if (!Ctx.HasPendingErrors) {
+ Ctx.Error() << "Empty column name is not allowed";
+ }
+ return false;
+ }
+ keys.push_back(BuildColumn(Ctx.Pos(), columnName, sourceName));
+ return true;
+}
+
+bool TSqlSelect::ColumnName(TVector<TNodePtr>& keys, const TRule_without_column_name& node) {
+ // without_column_name: (an_id DOT an_id) | an_id_without;
+ TString sourceName;
+ TString columnName;
+ switch (node.Alt_case()) {
+ case TRule_without_column_name::kAltWithoutColumnName1:
+ sourceName = Id(node.GetAlt_without_column_name1().GetRule_an_id1(), *this);
+ columnName = Id(node.GetAlt_without_column_name1().GetRule_an_id3(), *this);
+ break;
+ case TRule_without_column_name::kAltWithoutColumnName2:
+ columnName = Id(node.GetAlt_without_column_name2().GetRule_an_id_without1(), *this);
+ break;
+ case TRule_without_column_name::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ if (columnName.empty()) {
+ // TDOD: Id() should return TMaybe<TString>
+ if (!Ctx.HasPendingErrors) {
+ Ctx.Error() << "Empty column name is not allowed";
+ }
+ return false;
+ }
+ keys.push_back(BuildColumn(Ctx.Pos(), columnName, sourceName));
+ return true;
+}
+
+template<typename TRule>
+bool TSqlSelect::ColumnList(TVector<TNodePtr>& keys, const TRule& node) {
+ bool result;
+ if constexpr (std::is_same_v<TRule, TRule_column_list>) {
+ result = ColumnName(keys, node.GetRule_column_name1());
+ } else {
+ result = ColumnName(keys, node.GetRule_without_column_name1());
+ }
+
+ if (!result) {
+ return false;
+ }
+
+ for (auto b: node.GetBlock2()) {
+ Token(b.GetToken1());
+ if constexpr (std::is_same_v<TRule, TRule_column_list>) {
+ result = ColumnName(keys, b.GetRule_column_name2());
+ } else {
+ result = ColumnName(keys, b.GetRule_without_column_name2());
+ }
+ if (!result) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlSelect::NamedColumn(TVector<TNodePtr>& columnList, const TRule_named_column& node) {
+ if (!ColumnName(columnList, node.GetRule_column_name1())) {
+ return false;
+ }
+ if (node.HasBlock2()) {
+ const auto label = Id(node.GetBlock2().GetRule_an_id2(), *this);
+ columnList.back()->SetLabel(label);
+ }
+ return true;
+}
+
+TSourcePtr TSqlSelect::ProcessCore(const TRule_process_core& node, const TWriteSettings& settings, TPosition& selectPos) {
+ // PROCESS STREAM? named_single_source (COMMA named_single_source)* (USING using_call_expr (AS an_id)?
+ // (WITH external_call_settings)?
+ // (WHERE expr)? (HAVING expr)? (ASSUME order_by_clause)?)?
+
+ Token(node.GetToken1());
+ TPosition startPos(Ctx.Pos());
+
+ if (!selectPos) {
+ selectPos = startPos;
+ }
+
+ const bool hasUsing = node.HasBlock5();
+ const bool unorderedSubquery = hasUsing;
+ TSourcePtr source(NamedSingleSource(node.GetRule_named_single_source3(), unorderedSubquery));
+ if (!source) {
+ return nullptr;
+ }
+ if (node.GetBlock4().size()) {
+ TVector<TSourcePtr> sources(1, source);
+ for (auto& s: node.GetBlock4()) {
+ sources.push_back(NamedSingleSource(s.GetRule_named_single_source2(), unorderedSubquery));
+ if (!sources.back()) {
+ return nullptr;
+ }
+ }
+ auto pos = source->GetPos();
+ source = BuildMuxSource(pos, std::move(sources));
+ }
+
+ const bool processStream = node.HasBlock2();
+
+ if (!hasUsing) {
+ return BuildProcess(startPos, std::move(source), nullptr, false, {}, false, processStream, settings, {});
+ }
+
+ const auto& block5 = node.GetBlock5();
+ if (block5.HasBlock5()) {
+ TSqlExpression expr(Ctx, Mode);
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TNodePtr where = expr.Build(block5.GetBlock5().GetRule_expr2());
+ if (!where || !source->AddFilter(Ctx, where)) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "ProcessWhere");
+ } else {
+ Ctx.IncrementMonCounter("sql_features", processStream ? "ProcessStream" : "Process");
+ }
+
+ if (block5.HasBlock6()) {
+ Ctx.Error() << "PROCESS does not allow HAVING yet! You may request it on yql@ maillist.";
+ return nullptr;
+ }
+
+ bool listCall = false;
+ TSqlCallExpr call(Ctx, Mode);
+ bool initRet = call.Init(block5.GetRule_using_call_expr2());
+ if (initRet) {
+ call.IncCounters();
+ }
+
+ if (!initRet) {
+ return nullptr;
+ }
+
+ auto args = call.GetArgs();
+ for (auto& arg: args) {
+ if (auto placeholder = dynamic_cast<TTableRows*>(arg.Get())) {
+ if (listCall) {
+ Ctx.Error() << "Only one TableRows() argument is allowed.";
+ return nullptr;
+ }
+ listCall = true;
+ }
+ }
+
+ if (!call.IsExternal() && block5.HasBlock4()) {
+ Ctx.Error() << "PROCESS without USING EXTERNAL FUNCTION doesn't allow WITH block";
+ return nullptr;
+ }
+
+ if (block5.HasBlock4()) {
+ const auto& block54 = block5.GetBlock4();
+ if (!call.ConfigureExternalCall(block54.GetRule_external_call_settings2())) {
+ return nullptr;
+ }
+ }
+
+ TSqlCallExpr finalCall(call, args);
+ TNodePtr with(finalCall.IsExternal() ? finalCall.BuildCall() : finalCall.BuildUdf(/* forReduce = */ false));
+ if (!with) {
+ return {};
+ }
+ args = finalCall.GetArgs();
+ if (call.IsExternal())
+ listCall = true;
+
+ if (block5.HasBlock3()) {
+ with->SetLabel(Id(block5.GetBlock3().GetRule_an_id2(), *this));
+ }
+
+ if (call.IsExternal() && block5.HasBlock7()) {
+ Ctx.Error() << "PROCESS with USING EXTERNAL FUNCTION doesn't allow ASSUME block";
+ return nullptr;
+ }
+
+ TVector<TSortSpecificationPtr> assumeOrderBy;
+ if (block5.HasBlock7()) {
+ if (!OrderByClause(block5.GetBlock7().GetRule_order_by_clause2(), assumeOrderBy)) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(assumeOrderBy) ? "AssumeOrderBy" : "AssumeOrderByExpr");
+ }
+
+ return BuildProcess(startPos, std::move(source), with, finalCall.IsExternal(), std::move(args), listCall, processStream, settings, assumeOrderBy);
+}
+
+TSourcePtr TSqlSelect::ReduceCore(const TRule_reduce_core& node, const TWriteSettings& settings, TPosition& selectPos) {
+ // REDUCE named_single_source (COMMA named_single_source)* (PRESORT sort_specification_list)?
+ // ON column_list USING ALL? using_call_expr (AS an_id)?
+ // (WHERE expr)? (HAVING expr)? (ASSUME order_by_clause)?
+ Token(node.GetToken1());
+ TPosition startPos(Ctx.Pos());
+ if (!selectPos) {
+ selectPos = startPos;
+ }
+
+ TSourcePtr source(NamedSingleSource(node.GetRule_named_single_source2(), true));
+ if (!source) {
+ return {};
+ }
+ if (node.GetBlock3().size()) {
+ TVector<TSourcePtr> sources(1, source);
+ for (auto& s: node.GetBlock3()) {
+ sources.push_back(NamedSingleSource(s.GetRule_named_single_source2(), true));
+ if (!sources.back()) {
+ return nullptr;
+ }
+ }
+ auto pos = source->GetPos();
+ source = BuildMuxSource(pos, std::move(sources));
+ }
+
+ TVector<TSortSpecificationPtr> orderBy;
+ if (node.HasBlock4()) {
+ if (!SortSpecificationList(node.GetBlock4().GetRule_sort_specification_list2(), orderBy)) {
+ return {};
+ }
+ }
+
+ TVector<TNodePtr> keys;
+ if (!ColumnList(keys, node.GetRule_column_list6())) {
+ return nullptr;
+ }
+
+ if (node.HasBlock11()) {
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TSqlExpression expr(Ctx, Mode);
+ TNodePtr where = expr.Build(node.GetBlock11().GetRule_expr2());
+ if (!where || !source->AddFilter(Ctx, where)) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "ReduceWhere");
+ } else {
+ Ctx.IncrementMonCounter("sql_features", "Reduce");
+ }
+
+ TNodePtr having;
+ if (node.HasBlock12()) {
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TSqlExpression expr(Ctx, Mode);
+ having = expr.Build(node.GetBlock12().GetRule_expr2());
+ if (!having) {
+ return nullptr;
+ }
+ }
+
+ bool listCall = false;
+ TSqlCallExpr call(Ctx, Mode);
+ bool initRet = call.Init(node.GetRule_using_call_expr9());
+ if (initRet) {
+ call.IncCounters();
+ }
+
+ if (!initRet) {
+ return nullptr;
+ }
+
+ auto args = call.GetArgs();
+ for (auto& arg: args) {
+ if (auto placeholder = dynamic_cast<TTableRows*>(arg.Get())) {
+ if (listCall) {
+ Ctx.Error() << "Only one TableRows() argument is allowed.";
+ return nullptr;
+ }
+ listCall = true;
+ }
+ }
+
+ TSqlCallExpr finalCall(call, args);
+
+ TNodePtr udf(finalCall.BuildUdf(/* forReduce = */ true));
+ if (!udf) {
+ return {};
+ }
+
+ if (node.HasBlock10()) {
+ udf->SetLabel(Id(node.GetBlock10().GetRule_an_id2(), *this));
+ }
+
+ const auto reduceMode = node.HasBlock8() ? ReduceMode::ByAll : ReduceMode::ByPartition;
+
+ TVector<TSortSpecificationPtr> assumeOrderBy;
+ if (node.HasBlock13()) {
+ if (!OrderByClause(node.GetBlock13().GetRule_order_by_clause2(), assumeOrderBy)) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(assumeOrderBy) ? "AssumeOrderBy" : "AssumeOrderByExpr");
+ }
+
+ return BuildReduce(startPos, reduceMode, std::move(source), std::move(orderBy), std::move(keys), std::move(args), udf, having,
+ settings, assumeOrderBy, listCall);
+}
+
+TSourcePtr TSqlSelect::SelectCore(const TRule_select_core& node, const TWriteSettings& settings, TPosition& selectPos,
+ TMaybe<TSelectKindPlacement> placement, TVector<TSortSpecificationPtr>& selectOpOrderBy, bool& selectOpAssumeOrderBy)
+{
+ // (FROM join_source)? SELECT STREAM? opt_set_quantifier result_column (COMMA result_column)* COMMA? (WITHOUT column_list)? (FROM join_source)? (WHERE expr)?
+ // group_by_clause? (HAVING expr)? window_clause? ext_order_by_clause?
+ selectOpOrderBy = {};
+ selectOpAssumeOrderBy = false;
+ if (node.HasBlock1()) {
+ Token(node.GetBlock1().GetToken1());
+ } else {
+ Token(node.GetToken2());
+ }
+
+ TPosition startPos(Ctx.Pos());
+ if (!selectPos) {
+ selectPos = Ctx.Pos();
+ }
+
+ const auto hints = Ctx.PullHintForToken(selectPos);
+ TColumnsSets uniqueSets, distinctSets;
+ for (const auto& hint : hints) {
+ if (const auto& name = to_lower(hint.Name); name == "unique")
+ uniqueSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend()));
+ else if (name == "distinct") {
+ uniqueSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend()));
+ distinctSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend()));
+ } else {
+ Ctx.Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Hint " << hint.Name << " will not be used";
+ }
+ }
+
+ const bool distinct = IsDistinctOptSet(node.GetRule_opt_set_quantifier4());
+ if (distinct) {
+ Ctx.IncrementMonCounter("sql_features", "DistinctInSelect");
+ }
+
+ TSourcePtr source(BuildFakeSource(selectPos, /* missingFrom = */ true, Mode == NSQLTranslation::ESqlMode::SUBQUERY));
+ if (node.HasBlock1() && node.HasBlock9()) {
+ Token(node.GetBlock9().GetToken1());
+ Ctx.IncrementMonCounter("sql_errors", "DoubleFrom");
+ Ctx.Error() << "Only one FROM clause is allowed";
+ return nullptr;
+ }
+ if (node.HasBlock1()) {
+ source = JoinSource(node.GetBlock1().GetRule_join_source2());
+ Ctx.IncrementMonCounter("sql_features", "FromInFront");
+ } else if (node.HasBlock9()) {
+ source = JoinSource(node.GetBlock9().GetRule_join_source2());
+ }
+ if (!source) {
+ return nullptr;
+ }
+
+ const bool selectStream = node.HasBlock3();
+ TVector<TNodePtr> without;
+ if (node.HasBlock8()) {
+ if (!ColumnList(without, node.GetBlock8().GetRule_without_column_list2())) {
+ return nullptr;
+ }
+ }
+ if (node.HasBlock10()) {
+ auto block = node.GetBlock10();
+ Token(block.GetToken1());
+ TPosition pos(Ctx.Pos());
+ TNodePtr where;
+ {
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TSqlExpression expr(Ctx, Mode);
+ where = expr.Build(block.GetRule_expr2());
+ }
+ if (!where) {
+ Ctx.IncrementMonCounter("sql_errors", "WhereInvalid");
+ return nullptr;
+ }
+ if (!source->AddFilter(Ctx, where)) {
+ Ctx.IncrementMonCounter("sql_errors", "WhereNotSupportedBySource");
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "Where");
+ }
+
+ /// \todo merge gtoupByExpr and groupBy in one
+ TVector<TNodePtr> groupByExpr, groupBy;
+ TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec;
+ bool compactGroupBy = false;
+ TString groupBySuffix;
+ if (node.HasBlock11()) {
+ TGroupByClause clause(Ctx, Mode);
+ if (!clause.Build(node.GetBlock11().GetRule_group_by_clause1())) {
+ return nullptr;
+ }
+ bool hasHopping = (bool)clause.GetLegacyHoppingWindow();
+ for (const auto& exprAlias: clause.Aliases()) {
+ YQL_ENSURE(exprAlias.first == exprAlias.second->GetLabel());
+ groupByExpr.emplace_back(exprAlias.second);
+ hasHopping |= (bool)dynamic_cast<THoppingWindow*>(exprAlias.second.Get());
+ }
+ groupBy = std::move(clause.Content());
+ clause.SetFeatures("sql_features");
+ legacyHoppingWindowSpec = clause.GetLegacyHoppingWindow();
+ compactGroupBy = clause.IsCompactGroupBy();
+ groupBySuffix = clause.GetSuffix();
+
+ if (source->IsStream() && !hasHopping) {
+ Ctx.Error() << "Streaming group by query must have a hopping window specification.";
+ return nullptr;
+ }
+ }
+
+ TNodePtr having;
+ if (node.HasBlock12()) {
+ TSqlExpression expr(Ctx, Mode);
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ having = expr.Build(node.GetBlock12().GetRule_expr2());
+ if (!having) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "Having");
+ }
+
+ TWinSpecs windowSpec;
+ if (node.HasBlock13()) {
+ if (source->IsStream()) {
+ Ctx.Error() << "WINDOW is not allowed in streaming queries";
+ return nullptr;
+ }
+ if (!WindowClause(node.GetBlock13().GetRule_window_clause1(), windowSpec)) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", "WindowClause");
+ }
+
+ bool assumeSorted = false;
+ TVector<TSortSpecificationPtr> orderBy;
+ if (node.HasBlock14()) {
+ auto& orderBlock = node.GetBlock14().GetRule_ext_order_by_clause1();
+ assumeSorted = orderBlock.HasBlock1();
+
+ Token(orderBlock.GetRule_order_by_clause2().GetToken1());
+
+ if (source->IsStream()) {
+ Ctx.Error() << "ORDER BY is not allowed in streaming queries";
+ return nullptr;
+ }
+
+ if (!ValidateLimitOrderByWithSelectOp(placement, "ORDER BY")) {
+ return nullptr;
+ }
+
+ if (!OrderByClause(orderBlock.GetRule_order_by_clause2(), orderBy)) {
+ return nullptr;
+ }
+ Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(orderBy)
+ ? (assumeSorted ? "AssumeOrderBy" : "OrderBy")
+ : (assumeSorted ? "AssumeOrderByExpr" : "OrderByExpr")
+ );
+
+ if (!NeedPassLimitOrderByToUnderlyingSelect(placement)) {
+ selectOpOrderBy.swap(orderBy);
+ std::swap(selectOpAssumeOrderBy, assumeSorted);
+ }
+ }
+
+ TVector<TNodePtr> terms;
+ {
+ class TScopedWinSpecs {
+ public:
+ TScopedWinSpecs(TContext& ctx, TWinSpecs& specs)
+ : Ctx(ctx)
+ {
+ Ctx.WinSpecsScopes.push_back(std::ref(specs));
+ }
+ ~TScopedWinSpecs() {
+ Ctx.WinSpecsScopes.pop_back();
+ }
+ private:
+ TContext& Ctx;
+ };
+
+
+ TScopedWinSpecs scoped(Ctx, windowSpec);
+ if (!SelectTerm(terms, node.GetRule_result_column5())) {
+ return nullptr;
+ }
+ for (auto block: node.GetBlock6()) {
+ if (!SelectTerm(terms, block.GetRule_result_column2())) {
+ return nullptr;
+ }
+ }
+
+ }
+ if (!ValidateSelectColumns(terms)) {
+ return nullptr;
+ }
+ return BuildSelectCore(Ctx, startPos, std::move(source), groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted, orderBy, having,
+ std::move(windowSpec), legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
+}
+
+bool TSqlSelect::WindowDefinition(const TRule_window_definition& rule, TWinSpecs& winSpecs) {
+ const TString windowName = Id(rule.GetRule_new_window_name1().GetRule_window_name1().GetRule_an_id_window1(), *this);
+ if (winSpecs.contains(windowName)) {
+ Ctx.Error() << "Unable to declare window with same name: " << windowName;
+ return false;
+ }
+ auto windowSpec = WindowSpecification(rule.GetRule_window_specification3().GetRule_window_specification_details2());
+ if (!windowSpec) {
+ return false;
+ }
+ winSpecs.emplace(windowName, std::move(windowSpec));
+ return true;
+}
+
+bool TSqlSelect::WindowClause(const TRule_window_clause& rule, TWinSpecs& winSpecs) {
+ auto windowList = rule.GetRule_window_definition_list2();
+ if (!WindowDefinition(windowList.GetRule_window_definition1(), winSpecs)) {
+ return false;
+ }
+ for (auto& block: windowList.GetBlock2()) {
+ if (!WindowDefinition(block.GetRule_window_definition2(), winSpecs)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlTranslation::OrderByClause(const TRule_order_by_clause& node, TVector<TSortSpecificationPtr>& orderBy) {
+ return SortSpecificationList(node.GetRule_sort_specification_list3(), orderBy);
+}
+
+bool TSqlSelect::ValidateLimitOrderByWithSelectOp(TMaybe<TSelectKindPlacement> placement, TStringBuf what) {
+ if (!placement.Defined()) {
+ // not in select_op chain
+ return true;
+ }
+
+ if (!placement->IsLastInSelectOp) {
+ Ctx.Error() << what << " within UNION ALL is only allowed after last subquery";
+ return false;
+ }
+ return true;
+}
+
+bool TSqlSelect::NeedPassLimitOrderByToUnderlyingSelect(TMaybe<TSelectKindPlacement> placement) {
+ return !placement.Defined() || !placement->IsLastInSelectOp;
+}
+
+TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind_partial& node, TPosition& selectPos,
+ TMaybe<TSelectKindPlacement> placement)
+{
+ auto res = SelectKind(node.GetRule_select_kind1(), selectPos, placement);
+ if (!res) {
+ return {};
+ }
+ TPosition startPos(Ctx.Pos());
+ /// LIMIT INTEGER block
+ TNodePtr skipTake;
+ if (node.HasBlock2()) {
+ auto block = node.GetBlock2();
+
+ Token(block.GetToken1());
+ TPosition pos(Ctx.Pos());
+
+ if (!ValidateLimitOrderByWithSelectOp(placement, "LIMIT")) {
+ return {};
+ }
+
+ TSqlExpression takeExpr(Ctx, Mode);
+ auto take = takeExpr.Build(block.GetRule_expr2());
+ if (!take) {
+ return{};
+ }
+
+ TNodePtr skip;
+ if (block.HasBlock3()) {
+ TSqlExpression skipExpr(Ctx, Mode);
+ skip = skipExpr.Build(block.GetBlock3().GetRule_expr2());
+ if (!skip) {
+ return {};
+ }
+ if (Token(block.GetBlock3().GetToken1()) == ",") {
+ // LIMIT skip, take
+ skip.Swap(take);
+ Ctx.IncrementMonCounter("sql_features", "LimitSkipTake");
+ } else {
+ Ctx.IncrementMonCounter("sql_features", "LimitOffset");
+ }
+ }
+
+ auto st = BuildSkipTake(pos, skip, take);
+ if (NeedPassLimitOrderByToUnderlyingSelect(placement)) {
+ skipTake = st;
+ } else {
+ res.SelectOpSkipTake = st;
+ }
+
+ Ctx.IncrementMonCounter("sql_features", "Limit");
+ }
+
+ res.Source = BuildSelect(startPos, std::move(res.Source), skipTake);
+ return res;
+}
+
+TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind& node, TPosition& selectPos,
+ TMaybe<TSelectKindPlacement> placement)
+{
+ const bool discard = node.HasBlock1();
+ const bool hasLabel = node.HasBlock3();
+ if (hasLabel && (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW || Mode == NSQLTranslation::ESqlMode::SUBQUERY)) {
+ Ctx.Error() << "INTO RESULT is not allowed in current mode";
+ return {};
+ }
+
+ if (discard && hasLabel) {
+ Ctx.Error() << "DISCARD and INTO RESULT cannot be used at the same time";
+ return {};
+ }
+
+ if (discard && !selectPos) {
+ selectPos = Ctx.TokenPosition(node.GetBlock1().GetToken1());
+ }
+
+ TWriteSettings settings;
+ settings.Discard = discard;
+ if (hasLabel) {
+ settings.Label = PureColumnOrNamed(node.GetBlock3().GetRule_pure_column_or_named3(), *this);
+ }
+
+ TSelectKindResult res;
+ if (placement.Defined()) {
+ if (placement->IsFirstInSelectOp) {
+ res.Settings.Discard = settings.Discard;
+ } else if (settings.Discard) {
+ auto discardPos = Ctx.TokenPosition(node.GetBlock1().GetToken1());
+ Ctx.Error(discardPos) << "DISCARD within UNION ALL is only allowed before first subquery";
+ return {};
+ }
+
+ if (placement->IsLastInSelectOp) {
+ res.Settings.Label = settings.Label;
+ } else if (!settings.Label.Empty()) {
+ auto labelPos = Ctx.TokenPosition(node.GetBlock3().GetToken1());
+ Ctx.Error(labelPos) << "INTO RESULT within UNION ALL is only allowed after last subquery";
+ return {};
+ }
+
+ settings = {};
+ }
+
+ switch (node.GetBlock2().Alt_case()) {
+ case TRule_select_kind_TBlock2::kAlt1:
+ res.Source = ProcessCore(node.GetBlock2().GetAlt1().GetRule_process_core1(), settings, selectPos);
+ break;
+ case TRule_select_kind_TBlock2::kAlt2:
+ res.Source = ReduceCore(node.GetBlock2().GetAlt2().GetRule_reduce_core1(), settings, selectPos);
+ break;
+ case TRule_select_kind_TBlock2::kAlt3: {
+ res.Source = SelectCore(node.GetBlock2().GetAlt3().GetRule_select_core1(), settings, selectPos,
+ placement, res.SelectOpOrderBy, res.SelectOpAssumeOrderBy);
+ break;
+ }
+ case TRule_select_kind_TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ return res;
+}
+
+TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind_parenthesis& node, TPosition& selectPos,
+ TMaybe<TSelectKindPlacement> placement)
+{
+ if (node.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis1) {
+ return SelectKind(node.GetAlt_select_kind_parenthesis1().GetRule_select_kind_partial1(), selectPos, placement);
+ } else {
+ return SelectKind(node.GetAlt_select_kind_parenthesis2().GetRule_select_kind_partial2(), selectPos, {});
+ }
+}
+
+template<typename TRule>
+TSourcePtr TSqlSelect::Build(const TRule& node, TPosition pos, TSelectKindResult&& first) {
+ if (node.GetBlock2().empty()) {
+ return std::move(first.Source);
+ }
+
+ auto blocks = node.GetBlock2();
+
+ TPosition unionPos = pos; // Position of first select
+ TVector<TSortSpecificationPtr> orderBy;
+ bool assumeOrderBy = false;
+ TNodePtr skipTake;
+ TWriteSettings outermostSettings;
+ outermostSettings.Discard = first.Settings.Discard;
+
+ TVector<TSourcePtr> sources{ std::move(first.Source)};
+ bool currentQuantifier = false;
+
+ for (int i = 0; i < blocks.size(); ++i) {
+ auto& b = blocks[i];
+ const bool second = (i == 0);
+ const bool last = (i + 1 == blocks.size());
+ TSelectKindPlacement placement;
+ placement.IsLastInSelectOp = last;
+
+ TSelectKindResult next = SelectKind(b.GetRule_select_kind_parenthesis2(), pos, placement);
+ if (!next) {
+ return nullptr;
+ }
+
+ if (last) {
+ orderBy = next.SelectOpOrderBy;
+ assumeOrderBy = next.SelectOpAssumeOrderBy;
+ skipTake = next.SelectOpSkipTake;
+ outermostSettings.Label = next.Settings.Label;
+ }
+
+ switch (b.GetRule_select_op1().Alt_case()) {
+ case TRule_select_op::kAltSelectOp1:
+ break;
+ case TRule_select_op::kAltSelectOp2:
+ case TRule_select_op::kAltSelectOp3:
+ Ctx.Error() << "INTERSECT and EXCEPT are not implemented yet";
+ return nullptr;
+ case TRule_select_op::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ const bool quantifier = b.GetRule_select_op1().GetAlt_select_op1().HasBlock2();
+
+ if (!second && quantifier != currentQuantifier) {
+ auto source = BuildUnion(pos, std::move(sources), currentQuantifier, {});
+ sources.clear();
+ sources.emplace_back(std::move(source));
+ }
+
+ sources.emplace_back(std::move(next.Source));
+ currentQuantifier = quantifier;
+ }
+
+ auto result = BuildUnion(pos, std::move(sources), currentQuantifier, outermostSettings);
+
+ if (orderBy) {
+ TVector<TNodePtr> groupByExpr;
+ TVector<TNodePtr> groupBy;
+ bool compactGroupBy = false;
+ TString groupBySuffix = "";
+ TNodePtr having;
+ TWinSpecs winSpecs;
+ TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec;
+ bool distinct = false;
+ TVector<TNodePtr> without;
+ bool stream = false;
+
+ TVector<TNodePtr> terms;
+ terms.push_back(BuildColumn(unionPos, "*", ""));
+
+ result = BuildSelectCore(Ctx, unionPos, std::move(result), groupByExpr, groupBy, compactGroupBy, groupBySuffix,
+ assumeOrderBy, orderBy, having, std::move(winSpecs), legacyHoppingWindowSpec, std::move(terms),
+ distinct, std::move(without), stream, outermostSettings, {}, {});
+
+ result = BuildSelect(unionPos, std::move(result), skipTake);
+ } else if (skipTake) {
+ result = BuildSelect(unionPos, std::move(result), skipTake);
+ }
+
+ return result;
+}
+
+TSourcePtr TSqlSelect::Build(const TRule_select_stmt& node, TPosition& selectPos) {
+ TMaybe<TSelectKindPlacement> placement;
+ if (!node.GetBlock2().empty()) {
+ placement.ConstructInPlace();
+ placement->IsFirstInSelectOp = true;
+ }
+
+ auto res = SelectKind(node.GetRule_select_kind_parenthesis1(), selectPos, placement);
+ if (!res) {
+ return nullptr;
+ }
+
+ return Build(node, selectPos, std::move(res));
+}
+
+TSourcePtr TSqlSelect::Build(const TRule_select_unparenthesized_stmt& node, TPosition& selectPos) {
+ TMaybe<TSelectKindPlacement> placement;
+ if (!node.GetBlock2().empty()) {
+ placement.ConstructInPlace();
+ placement->IsFirstInSelectOp = true;
+ }
+
+ auto res = SelectKind(node.GetRule_select_kind_partial1(), selectPos, placement);
+ if (!res) {
+ return nullptr;
+ }
+
+ return Build(node, selectPos, std::move(res));
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_select.h b/yql/essentials/sql/v1/sql_select.h
new file mode 100644
index 00000000000..fd6f0bece52
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_select.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "sql_translation.h"
+#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+class TSqlSelect: public TSqlTranslation {
+public:
+ TSqlSelect(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlTranslation(ctx, mode)
+ {
+ }
+
+ TSourcePtr Build(const TRule_select_stmt& node, TPosition& selectPos);
+ TSourcePtr Build(const TRule_select_unparenthesized_stmt& node, TPosition& selectPos);
+
+private:
+ bool SelectTerm(TVector<TNodePtr>& terms, const TRule_result_column& node);
+ bool ValidateSelectColumns(const TVector<TNodePtr>& terms);
+ bool ColumnName(TVector<TNodePtr>& keys, const TRule_column_name& node);
+ bool ColumnName(TVector<TNodePtr>& keys, const TRule_without_column_name& node);
+ template<typename TRule>
+ bool ColumnList(TVector<TNodePtr>& keys, const TRule& node);
+ bool NamedColumn(TVector<TNodePtr>& columnList, const TRule_named_column& node);
+ TSourcePtr SingleSource(const TRule_single_source& node, const TVector<TString>& derivedColumns, TPosition derivedColumnsPos, bool unorderedSubquery);
+ TSourcePtr NamedSingleSource(const TRule_named_single_source& node, bool unorderedSubquery);
+ bool FlattenByArg(const TString& sourceLabel, TVector<TNodePtr>& flattenByColumns, TVector<TNodePtr>& flattenByExprs, const TRule_flatten_by_arg& node);
+ TSourcePtr FlattenSource(const TRule_flatten_source& node);
+ TSourcePtr JoinSource(const TRule_join_source& node);
+ bool JoinOp(ISource* join, const TRule_join_source::TBlock3& block, TMaybe<TPosition> anyPos);
+ TNodePtr JoinExpr(ISource*, const TRule_join_constraint& node);
+ TSourcePtr ProcessCore(const TRule_process_core& node, const TWriteSettings& settings, TPosition& selectPos);
+ TSourcePtr ReduceCore(const TRule_reduce_core& node, const TWriteSettings& settings, TPosition& selectPos);
+
+ struct TSelectKindPlacement {
+ bool IsFirstInSelectOp = false;
+ bool IsLastInSelectOp = false;
+ };
+
+ TSourcePtr SelectCore(const TRule_select_core& node, const TWriteSettings& settings, TPosition& selectPos,
+ TMaybe<TSelectKindPlacement> placement, TVector<TSortSpecificationPtr>& selectOpOrederBy, bool& selectOpAssumeOrderBy);
+
+ bool WindowDefinition(const TRule_window_definition& node, TWinSpecs& winSpecs);
+ bool WindowClause(const TRule_window_clause& node, TWinSpecs& winSpecs);
+
+ struct TSelectKindResult {
+ TSourcePtr Source;
+ TWriteSettings Settings;
+
+ TVector<TSortSpecificationPtr> SelectOpOrderBy;
+ bool SelectOpAssumeOrderBy = false;
+ TNodePtr SelectOpSkipTake;
+
+ inline explicit operator bool() const {
+ return static_cast<bool>(Source);
+ }
+ };
+
+ bool ValidateLimitOrderByWithSelectOp(TMaybe<TSelectKindPlacement> placement, TStringBuf what);
+ bool NeedPassLimitOrderByToUnderlyingSelect(TMaybe<TSelectKindPlacement> placement);
+
+ template<typename TRule>
+ TSourcePtr Build(const TRule& node, TPosition pos, TSelectKindResult&& first);
+
+
+ TSelectKindResult SelectKind(const TRule_select_kind& node, TPosition& selectPos, TMaybe<TSelectKindPlacement> placement);
+ TSelectKindResult SelectKind(const TRule_select_kind_partial& node, TPosition& selectPos, TMaybe<TSelectKindPlacement> placement);
+ TSelectKindResult SelectKind(const TRule_select_kind_parenthesis& node, TPosition& selectPos, TMaybe<TSelectKindPlacement> placement);
+};
+
+} //namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_translation.cpp b/yql/essentials/sql/v1/sql_translation.cpp
new file mode 100644
index 00000000000..4bcfb7de847
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_translation.cpp
@@ -0,0 +1,5149 @@
+#include "sql_translation.h"
+#include "sql_expression.h"
+#include "sql_call_expr.h"
+#include "sql_query.h"
+#include "sql_values.h"
+#include "sql_select.h"
+#include "source.h"
+
+#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
+#include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
+#include <yql/essentials/sql/settings/partitioning.h>
+#include <yql/essentials/sql/v1/proto_parser/proto_parser.h>
+
+#include <util/generic/scope.h>
+#include <util/string/join.h>
+
+#include <library/cpp/protobuf/util/simple_reflection.h>
+
+namespace {
+
+using namespace NSQLTranslationV1;
+
+template <typename Callback>
+void VisitAllFields(const NProtoBuf::Message& msg, Callback& callback) {
+ const auto* descr = msg.GetDescriptor();
+ for (int i = 0; i < descr->field_count(); ++i) {
+ const auto* fd = descr->field(i);
+ NProtoBuf::TConstField field(msg, fd);
+ if (field.IsMessage()) {
+ for (size_t j = 0; j < field.Size(); ++j) {
+ const auto& message = *field.Get<NProtoBuf::Message>(j);
+ callback(message);
+ VisitAllFields(message, callback);
+ }
+ }
+ }
+}
+
+struct TTokenCollector {
+ void operator()(const NProtoBuf::Message& message) {
+ if (const auto* token = dynamic_cast<const NSQLv1Generated::TToken*>(&message)) {
+ if (!Tokens.empty()) {
+ Tokens << ' ';
+ }
+ Tokens << token->GetValue();
+ }
+ }
+
+ TStringBuilder Tokens;
+};
+
+TString CollectTokens(const TRule_select_stmt& selectStatement) {
+ TTokenCollector tokenCollector;
+ VisitAllFields(selectStatement, tokenCollector);
+ return tokenCollector.Tokens;
+}
+
+bool RecreateContext(
+ TContext& ctx, const NSQLTranslation::TTranslationSettings& settings, const TString& recreationQuery
+) {
+ if (!recreationQuery) {
+ return true;
+ }
+ const TString queryName = "context recreation query";
+
+ const auto* ast = NSQLTranslationV1::SqlAST(
+ recreationQuery, queryName, ctx.Issues,
+ settings.MaxErrors, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena
+ );
+ if (!ast) {
+ return false;
+ }
+
+ TSqlQuery queryTranslator(ctx, ctx.Settings.Mode, true);
+ auto node = queryTranslator.Build(static_cast<const TSQLv1ParserAST&>(*ast));
+
+ return node && node->Init(ctx, nullptr) && node->Translate(ctx);
+}
+
+TNodePtr BuildViewSelect(
+ const TRule_select_stmt& selectStatement,
+ TContext& parentContext,
+ const TString& contextRecreationQuery
+) {
+ TIssues issues;
+ TContext context(parentContext.Settings, {}, issues);
+ if (!RecreateContext(context, context.Settings, contextRecreationQuery)) {
+ parentContext.Issues.AddIssues(issues);
+ return nullptr;
+ }
+ issues.Clear();
+
+ // Holds (among other things) subquery references.
+ // These references need to be passed to the parent context
+ // to be able to compile view queries with subqueries.
+ context.PushCurrentBlocks(&parentContext.GetCurrentBlocks());
+
+ context.Settings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW;
+
+ TSqlSelect selectTranslator(context, context.Settings.Mode);
+ TPosition pos = parentContext.Pos();
+ auto source = selectTranslator.Build(selectStatement, pos);
+ if (!source) {
+ parentContext.Issues.AddIssues(issues);
+ return nullptr;
+ }
+ auto node = BuildSelectResult(
+ pos,
+ std::move(source),
+ false,
+ false,
+ context.Scoped
+ );
+ if (!node) {
+ parentContext.Issues.AddIssues(issues);
+ return nullptr;
+ }
+ return node;
+}
+
+}
+
+namespace NSQLTranslationV1 {
+
+using NALPDefault::SQLv1LexerTokens;
+using NALPDefaultAntlr4::SQLv1Antlr4Lexer;
+
+using namespace NSQLv1Generated;
+
+TIdentifier GetKeywordId(TTranslation& ctx, const TRule_keyword& node) {
+ // keyword:
+ // keyword_compat
+ // | keyword_expr_uncompat
+ // | keyword_table_uncompat
+ // | keyword_select_uncompat
+ // | keyword_alter_uncompat
+ // | keyword_in_uncompat
+ // | keyword_window_uncompat
+ // | keyword_hint_uncompat
+ //;
+ switch (node.Alt_case()) {
+ case TRule_keyword::kAltKeyword1:
+ return GetIdentifier(ctx, node.GetAlt_keyword1().GetRule_keyword_compat1());
+ case TRule_keyword::kAltKeyword2:
+ return GetIdentifier(ctx, node.GetAlt_keyword2().GetRule_keyword_expr_uncompat1());
+ case TRule_keyword::kAltKeyword3:
+ return GetIdentifier(ctx, node.GetAlt_keyword3().GetRule_keyword_table_uncompat1());
+ case TRule_keyword::kAltKeyword4:
+ return GetIdentifier(ctx, node.GetAlt_keyword4().GetRule_keyword_select_uncompat1());
+ case TRule_keyword::kAltKeyword5:
+ return GetIdentifier(ctx, node.GetAlt_keyword5().GetRule_keyword_alter_uncompat1());
+ case TRule_keyword::kAltKeyword6:
+ return GetIdentifier(ctx, node.GetAlt_keyword6().GetRule_keyword_in_uncompat1());
+ case TRule_keyword::kAltKeyword7:
+ return GetIdentifier(ctx, node.GetAlt_keyword7().GetRule_keyword_window_uncompat1());
+ case TRule_keyword::kAltKeyword8:
+ return GetIdentifier(ctx, node.GetAlt_keyword8().GetRule_keyword_hint_uncompat1());
+ case TRule_keyword::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_id& node, TTranslation& ctx) {
+ // id: identifier | keyword;
+ switch (node.Alt_case()) {
+ case TRule_id::kAltId1:
+ return Id(node.GetAlt_id1().GetRule_identifier1(), ctx);
+ case TRule_id::kAltId2:
+ return GetKeyword(ctx, node.GetAlt_id2().GetRule_keyword1());
+ case TRule_id::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_id_or_type& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_id_or_type::kAltIdOrType1:
+ return Id(node.GetAlt_id_or_type1().GetRule_id1(), ctx);
+ case TRule_id_or_type::kAltIdOrType2:
+ return ctx.Identifier(node.GetAlt_id_or_type2().GetRule_type_id1().GetToken1());
+ case TRule_id_or_type::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_id_as_compat& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_id_as_compat::kAltIdAsCompat1:
+ return Id(node.GetAlt_id_as_compat1().GetRule_identifier1(), ctx);
+ case TRule_id_as_compat::kAltIdAsCompat2:
+ return ctx.Token(node.GetAlt_id_as_compat2().GetRule_keyword_as_compat1().GetToken1());
+ case TRule_id_as_compat::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_an_id_as_compat& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_an_id_as_compat::kAltAnIdAsCompat1:
+ return Id(node.GetAlt_an_id_as_compat1().GetRule_id_as_compat1(), ctx);
+ case TRule_an_id_as_compat::kAltAnIdAsCompat2:
+ return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_as_compat2().GetToken1()));
+ case TRule_an_id_as_compat::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_id_schema& node, TTranslation& ctx) {
+ //id_schema:
+ // identifier
+ // | keyword_compat
+ // | keyword_expr_uncompat
+ // // | keyword_table_uncompat
+ // | keyword_select_uncompat
+ // // | keyword_alter_uncompat
+ // | keyword_in_uncompat
+ // | keyword_window_uncompat
+ // | keyword_hint_uncompat
+ //;
+ switch (node.Alt_case()) {
+ case TRule_id_schema::kAltIdSchema1:
+ return Id(node.GetAlt_id_schema1().GetRule_identifier1(), ctx);
+ case TRule_id_schema::kAltIdSchema2:
+ return GetKeyword(ctx, node.GetAlt_id_schema2().GetRule_keyword_compat1());
+ case TRule_id_schema::kAltIdSchema3:
+ return GetKeyword(ctx, node.GetAlt_id_schema3().GetRule_keyword_expr_uncompat1());
+ case TRule_id_schema::kAltIdSchema4:
+ return GetKeyword(ctx, node.GetAlt_id_schema4().GetRule_keyword_select_uncompat1());
+ case TRule_id_schema::kAltIdSchema5:
+ return GetKeyword(ctx, node.GetAlt_id_schema5().GetRule_keyword_in_uncompat1());
+ case TRule_id_schema::kAltIdSchema6:
+ return GetKeyword(ctx, node.GetAlt_id_schema6().GetRule_keyword_window_uncompat1());
+ case TRule_id_schema::kAltIdSchema7:
+ return GetKeyword(ctx, node.GetAlt_id_schema7().GetRule_keyword_hint_uncompat1());
+ case TRule_id_schema::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_an_id_or_type& node, TTranslation& ctx) {
+ // an_id_or_type: id_or_type | STRING_VALUE;
+ switch (node.Alt_case()) {
+ case TRule_an_id_or_type::kAltAnIdOrType1:
+ return Id(node.GetAlt_an_id_or_type1().GetRule_id_or_type1(), ctx);
+ case TRule_an_id_or_type::kAltAnIdOrType2:
+ return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_or_type2().GetToken1()));
+ case TRule_an_id_or_type::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+std::pair<bool, TString> Id(const TRule_id_or_at& node, TTranslation& ctx) {
+ bool hasAt = node.HasBlock1();
+ return std::make_pair(hasAt, Id(node.GetRule_an_id_or_type2(), ctx) );
+}
+
+TString Id(const TRule_id_table& node, TTranslation& ctx) {
+ //id_table:
+ // identifier
+ // | keyword_compat
+ // | keyword_expr_uncompat
+ // // | keyword_table_uncompat
+ // | keyword_select_uncompat
+ // // | keyword_alter_uncompat
+ // | keyword_in_uncompat
+ // | keyword_window_uncompat
+ // | keyword_hint_uncompat
+ //;
+ switch (node.Alt_case()) {
+ case TRule_id_table::kAltIdTable1:
+ return Id(node.GetAlt_id_table1().GetRule_identifier1(), ctx);
+ case TRule_id_table::kAltIdTable2:
+ return GetKeyword(ctx, node.GetAlt_id_table2().GetRule_keyword_compat1());
+ case TRule_id_table::kAltIdTable3:
+ return GetKeyword(ctx, node.GetAlt_id_table3().GetRule_keyword_expr_uncompat1());
+ case TRule_id_table::kAltIdTable4:
+ return GetKeyword(ctx, node.GetAlt_id_table4().GetRule_keyword_select_uncompat1());
+ case TRule_id_table::kAltIdTable5:
+ return GetKeyword(ctx, node.GetAlt_id_table5().GetRule_keyword_in_uncompat1());
+ case TRule_id_table::kAltIdTable6:
+ return GetKeyword(ctx, node.GetAlt_id_table6().GetRule_keyword_window_uncompat1());
+ case TRule_id_table::kAltIdTable7:
+ return GetKeyword(ctx, node.GetAlt_id_table7().GetRule_keyword_hint_uncompat1());
+ case TRule_id_table::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_an_id_table& node, TTranslation& ctx) {
+ // an_id_table: id_table | STRING_VALUE;
+ switch (node.Alt_case()) {
+ case TRule_an_id_table::kAltAnIdTable1:
+ return Id(node.GetAlt_an_id_table1().GetRule_id_table1(), ctx);
+ case TRule_an_id_table::kAltAnIdTable2:
+ return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_table2().GetToken1()));
+ case TRule_an_id_table::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_id_table_or_type& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_id_table_or_type::kAltIdTableOrType1:
+ return Id(node.GetAlt_id_table_or_type1().GetRule_an_id_table1(), ctx);
+ case TRule_id_table_or_type::kAltIdTableOrType2:
+ return ctx.Identifier(node.GetAlt_id_table_or_type2().GetRule_type_id1().GetToken1());
+ case TRule_id_table_or_type::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_id_expr& node, TTranslation& ctx) {
+ //id_expr:
+ // identifier
+ // | keyword_compat
+ // // | keyword_expr_uncompat
+ // // | keyword_table_uncompat
+ // // | keyword_select_uncompat
+ // | keyword_alter_uncompat
+ // | keyword_in_uncompat
+ // | keyword_window_uncompat
+ // | keyword_hint_uncompat
+ //;
+ switch (node.Alt_case()) {
+ case TRule_id_expr::kAltIdExpr1:
+ return Id(node.GetAlt_id_expr1().GetRule_identifier1(), ctx);
+ case TRule_id_expr::kAltIdExpr2:
+ return GetKeyword(ctx, node.GetAlt_id_expr2().GetRule_keyword_compat1());
+ case TRule_id_expr::kAltIdExpr3:
+ return GetKeyword(ctx, node.GetAlt_id_expr3().GetRule_keyword_alter_uncompat1());
+ case TRule_id_expr::kAltIdExpr4:
+ return GetKeyword(ctx, node.GetAlt_id_expr4().GetRule_keyword_in_uncompat1());
+ case TRule_id_expr::kAltIdExpr5:
+ return GetKeyword(ctx, node.GetAlt_id_expr5().GetRule_keyword_window_uncompat1());
+ case TRule_id_expr::kAltIdExpr6:
+ return GetKeyword(ctx, node.GetAlt_id_expr6().GetRule_keyword_hint_uncompat1());
+ case TRule_id_expr::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+bool IsQuotedId(const TRule_id_expr& node, TTranslation& ctx) {
+ if (node.Alt_case() != TRule_id_expr::kAltIdExpr1) {
+ return false;
+ }
+ const auto& id = ctx.Token(node.GetAlt_id_expr1().GetRule_identifier1().GetToken1());
+ // identifier: ID_PLAIN | ID_QUOTED;
+ return id.StartsWith('`');
+}
+
+TString Id(const TRule_id_expr_in& node, TTranslation& ctx) {
+ //id_expr_in:
+ // identifier
+ // | keyword_compat
+ // // | keyword_expr_uncompat
+ // // | keyword_table_uncompat
+ // // | keyword_select_uncompat
+ // | keyword_alter_uncompat
+ // // | keyword_in_uncompat
+ // | keyword_window_uncompat
+ // | keyword_hint_uncompat
+ //;
+ switch (node.Alt_case()) {
+ case TRule_id_expr_in::kAltIdExprIn1:
+ return Id(node.GetAlt_id_expr_in1().GetRule_identifier1(), ctx);
+ case TRule_id_expr_in::kAltIdExprIn2:
+ return GetKeyword(ctx, node.GetAlt_id_expr_in2().GetRule_keyword_compat1());
+ case TRule_id_expr_in::kAltIdExprIn3:
+ return GetKeyword(ctx, node.GetAlt_id_expr_in3().GetRule_keyword_alter_uncompat1());
+ case TRule_id_expr_in::kAltIdExprIn4:
+ return GetKeyword(ctx, node.GetAlt_id_expr_in4().GetRule_keyword_window_uncompat1());
+ case TRule_id_expr_in::kAltIdExprIn5:
+ return GetKeyword(ctx, node.GetAlt_id_expr_in5().GetRule_keyword_hint_uncompat1());
+ case TRule_id_expr_in::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_id_window& node, TTranslation& ctx) {
+ //id_window:
+ // identifier
+ // | keyword_compat
+ // | keyword_expr_uncompat
+ // | keyword_table_uncompat
+ // | keyword_select_uncompat
+ // | keyword_alter_uncompat
+ // | keyword_in_uncompat
+ // // | keyword_window_uncompat
+ // | keyword_hint_uncompat
+ //;
+ switch (node.Alt_case()) {
+ case TRule_id_window::kAltIdWindow1:
+ return Id(node.GetAlt_id_window1().GetRule_identifier1(), ctx);
+ case TRule_id_window::kAltIdWindow2:
+ return GetKeyword(ctx, node.GetAlt_id_window2().GetRule_keyword_compat1());
+ case TRule_id_window::kAltIdWindow3:
+ return GetKeyword(ctx, node.GetAlt_id_window3().GetRule_keyword_expr_uncompat1());
+ case TRule_id_window::kAltIdWindow4:
+ return GetKeyword(ctx, node.GetAlt_id_window4().GetRule_keyword_table_uncompat1());
+ case TRule_id_window::kAltIdWindow5:
+ return GetKeyword(ctx, node.GetAlt_id_window5().GetRule_keyword_select_uncompat1());
+ case TRule_id_window::kAltIdWindow6:
+ return GetKeyword(ctx, node.GetAlt_id_window6().GetRule_keyword_alter_uncompat1());
+ case TRule_id_window::kAltIdWindow7:
+ return GetKeyword(ctx, node.GetAlt_id_window7().GetRule_keyword_in_uncompat1());
+ case TRule_id_window::kAltIdWindow8:
+ return GetKeyword(ctx, node.GetAlt_id_window8().GetRule_keyword_hint_uncompat1());
+ case TRule_id_window::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_id_without& node, TTranslation& ctx) {
+ //id_without:
+ // identifier
+ // | keyword_compat
+ // // | keyword_expr_uncompat
+ // | keyword_table_uncompat
+ // // | keyword_select_uncompat
+ // | keyword_alter_uncompat
+ // | keyword_in_uncompat
+ // | keyword_window_uncompat
+ // | keyword_hint_uncompat
+ //;
+ switch (node.Alt_case()) {
+ case TRule_id_without::kAltIdWithout1:
+ return Id(node.GetAlt_id_without1().GetRule_identifier1(), ctx);
+ case TRule_id_without::kAltIdWithout2:
+ return GetKeyword(ctx, node.GetAlt_id_without2().GetRule_keyword_compat1());
+ case TRule_id_without::kAltIdWithout3:
+ return GetKeyword(ctx, node.GetAlt_id_without3().GetRule_keyword_table_uncompat1());
+ case TRule_id_without::kAltIdWithout4:
+ return GetKeyword(ctx, node.GetAlt_id_without4().GetRule_keyword_alter_uncompat1());
+ case TRule_id_without::kAltIdWithout5:
+ return GetKeyword(ctx, node.GetAlt_id_without5().GetRule_keyword_in_uncompat1());
+ case TRule_id_without::kAltIdWithout6:
+ return GetKeyword(ctx, node.GetAlt_id_without6().GetRule_keyword_window_uncompat1());
+ case TRule_id_without::kAltIdWithout7:
+ return GetKeyword(ctx, node.GetAlt_id_without7().GetRule_keyword_hint_uncompat1());
+ case TRule_id_without::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_id_hint& node, TTranslation& ctx) {
+ //id_hint:
+ // identifier
+ // | keyword_compat
+ // | keyword_expr_uncompat
+ // | keyword_table_uncompat
+ // | keyword_select_uncompat
+ // | keyword_alter_uncompat
+ // | keyword_in_uncompat
+ // | keyword_window_uncompat
+ // // | keyword_hint_uncompat
+ //;
+ switch (node.Alt_case()) {
+ case TRule_id_hint::kAltIdHint1:
+ return Id(node.GetAlt_id_hint1().GetRule_identifier1(), ctx);
+ case TRule_id_hint::kAltIdHint2:
+ return GetKeyword(ctx, node.GetAlt_id_hint2().GetRule_keyword_compat1());
+ case TRule_id_hint::kAltIdHint3:
+ return GetKeyword(ctx, node.GetAlt_id_hint3().GetRule_keyword_expr_uncompat1());
+ case TRule_id_hint::kAltIdHint4:
+ return GetKeyword(ctx, node.GetAlt_id_hint4().GetRule_keyword_table_uncompat1());
+ case TRule_id_hint::kAltIdHint5:
+ return GetKeyword(ctx, node.GetAlt_id_hint5().GetRule_keyword_select_uncompat1());
+ case TRule_id_hint::kAltIdHint6:
+ return GetKeyword(ctx, node.GetAlt_id_hint6().GetRule_keyword_alter_uncompat1());
+ case TRule_id_hint::kAltIdHint7:
+ return GetKeyword(ctx, node.GetAlt_id_hint7().GetRule_keyword_in_uncompat1());
+ case TRule_id_hint::kAltIdHint8:
+ return GetKeyword(ctx, node.GetAlt_id_hint8().GetRule_keyword_window_uncompat1());
+ case TRule_id_hint::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_an_id& node, TTranslation& ctx) {
+ // an_id: id | STRING_VALUE;
+ switch (node.Alt_case()) {
+ case TRule_an_id::kAltAnId1:
+ return Id(node.GetAlt_an_id1().GetRule_id1(), ctx);
+ case TRule_an_id::kAltAnId2:
+ return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id2().GetToken1()));
+ case TRule_an_id::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_an_id_schema& node, TTranslation& ctx) {
+ // an_id_schema: id_schema | STRING_VALUE;
+ switch (node.Alt_case()) {
+ case TRule_an_id_schema::kAltAnIdSchema1:
+ return Id(node.GetAlt_an_id_schema1().GetRule_id_schema1(), ctx);
+ case TRule_an_id_schema::kAltAnIdSchema2:
+ return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_schema2().GetToken1()));
+ case TRule_an_id_schema::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_an_id_expr& node, TTranslation& ctx) {
+ // an_id_expr: id_expr | STRING_VALUE;
+ switch (node.Alt_case()) {
+ case TRule_an_id_expr::kAltAnIdExpr1:
+ return Id(node.GetAlt_an_id_expr1().GetRule_id_expr1(), ctx);
+ case TRule_an_id_expr::kAltAnIdExpr2:
+ return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_expr2().GetToken1()));
+ case TRule_an_id_expr::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_an_id_window& node, TTranslation& ctx) {
+ // an_id_window: id_window | STRING_VALUE;
+ switch (node.Alt_case()) {
+ case TRule_an_id_window::kAltAnIdWindow1:
+ return Id(node.GetAlt_an_id_window1().GetRule_id_window1(), ctx);
+ case TRule_an_id_window::kAltAnIdWindow2:
+ return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_window2().GetToken1()));
+ case TRule_an_id_window::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_an_id_without& node, TTranslation& ctx) {
+ // an_id_without: id_without | STRING_VALUE;
+ switch (node.Alt_case()) {
+ case TRule_an_id_without::kAltAnIdWithout1:
+ return Id(node.GetAlt_an_id_without1().GetRule_id_without1(), ctx);
+ case TRule_an_id_without::kAltAnIdWithout2:
+ return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_without2().GetToken1()));
+ case TRule_an_id_without::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_an_id_hint& node, TTranslation& ctx) {
+ // an_id_hint: id_hint | STRING_VALUE;
+ switch (node.Alt_case()) {
+ case TRule_an_id_hint::kAltAnIdHint1:
+ return Id(node.GetAlt_an_id_hint1().GetRule_id_hint1(), ctx);
+ case TRule_an_id_hint::kAltAnIdHint2:
+ return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_hint2().GetToken1()));
+ case TRule_an_id_hint::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TString Id(const TRule_an_id_pure& node, TTranslation& ctx) {
+ // an_id_pure: identifier | STRING_VALUE;
+ switch (node.Alt_case()) {
+ case TRule_an_id_pure::kAltAnIdPure1:
+ return Id(node.GetAlt_an_id_pure1().GetRule_identifier1(), ctx);
+ case TRule_an_id_pure::kAltAnIdPure2:
+ return IdContentFromString(ctx.Context(), ctx.Token(node.GetAlt_an_id_pure2().GetToken1()));
+ case TRule_an_id_pure::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TViewDescription Id(const TRule_view_name& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_view_name::kAltViewName1:
+ return {Id(node.GetAlt_view_name1().GetRule_an_id1(), ctx)};
+ case TRule_view_name::kAltViewName2:
+ return {"", true};
+ case TRule_view_name::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+bool NamedNodeImpl(const TRule_bind_parameter& node, TString& name, TTranslation& ctx) {
+ // bind_parameter: DOLLAR (an_id_or_type | TRUE | FALSE);
+ TString id;
+ switch (node.GetBlock2().Alt_case()) {
+ case TRule_bind_parameter::TBlock2::kAlt1:
+ id = Id(node.GetBlock2().GetAlt1().GetRule_an_id_or_type1(), ctx);
+ break;
+ case TRule_bind_parameter::TBlock2::kAlt2:
+ id = ctx.Token(node.GetBlock2().GetAlt2().GetToken1());
+ break;
+ case TRule_bind_parameter::TBlock2::kAlt3:
+ id = ctx.Token(node.GetBlock2().GetAlt3().GetToken1());
+ break;
+ case TRule_bind_parameter::TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ auto dollar = ctx.Token(node.GetToken1());
+ if (id.empty()) {
+ ctx.Error() << "Empty symbol name is not allowed";
+ return false;
+ }
+
+ name = dollar + id;
+ return true;
+}
+
+TString OptIdPrefixAsStr(const TRule_opt_id_prefix& node, TTranslation& ctx, const TString& defaultStr) {
+ if (!node.HasBlock1()) {
+ return defaultStr;
+ }
+ return Id(node.GetBlock1().GetRule_an_id1(), ctx);
+}
+
+TString OptIdPrefixAsStr(const TRule_opt_id_prefix_or_type& node, TTranslation& ctx, const TString& defaultStr) {
+ if (!node.HasBlock1()) {
+ return defaultStr;
+ }
+ return Id(node.GetBlock1().GetRule_an_id_or_type1(), ctx);
+}
+
+void PureColumnListStr(const TRule_pure_column_list& node, TTranslation& ctx, TVector<TString>& outList) {
+ outList.push_back(Id(node.GetRule_an_id2(), ctx));
+ for (auto& block: node.GetBlock3()) {
+ outList.push_back(Id(block.GetRule_an_id2(), ctx));
+ }
+}
+
+bool NamedNodeImpl(const TRule_opt_bind_parameter& node, TString& name, bool& isOptional, TTranslation& ctx) {
+ // opt_bind_parameter: bind_parameter QUESTION?;
+ isOptional = false;
+ if (!NamedNodeImpl(node.GetRule_bind_parameter1(), name, ctx)) {
+ return false;
+ }
+ isOptional = node.HasBlock2();
+ return true;
+}
+
+TDeferredAtom PureColumnOrNamed(const TRule_pure_column_or_named& node, TTranslation& ctx) {
+ switch (node.Alt_case()) {
+ case TRule_pure_column_or_named::kAltPureColumnOrNamed1: {
+ TString named;
+ if (!NamedNodeImpl(node.GetAlt_pure_column_or_named1().GetRule_bind_parameter1(), named, ctx)) {
+ return {};
+ }
+ auto namedNode = ctx.GetNamedNode(named);
+ if (!namedNode) {
+ return {};
+ }
+
+ return TDeferredAtom(namedNode, ctx.Context());
+ }
+
+ case TRule_pure_column_or_named::kAltPureColumnOrNamed2:
+ return TDeferredAtom(ctx.Context().Pos(), Id(node.GetAlt_pure_column_or_named2().GetRule_an_id1(), ctx));
+ case TRule_pure_column_or_named::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+bool PureColumnOrNamedListStr(const TRule_pure_column_or_named_list& node, TTranslation& ctx, TVector<TDeferredAtom>& outList) {
+ outList.push_back(PureColumnOrNamed(node.GetRule_pure_column_or_named2(), ctx));
+ if (outList.back().Empty()) {
+ return false;
+ }
+
+ for (auto& block : node.GetBlock3()) {
+ outList.push_back(PureColumnOrNamed(block.GetRule_pure_column_or_named2(), ctx));
+ if (outList.back().Empty()) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::CreateTableIndex(const TRule_table_index& node, TVector<TIndexDescription>& indexes) {
+ indexes.emplace_back(IdEx(node.GetRule_an_id2(), *this));
+
+ const auto& indexType = node.GetRule_table_index_type3().GetBlock1();
+ switch (indexType.Alt_case()) {
+ // "GLOBAL"
+ case TRule_table_index_type_TBlock1::kAlt1: {
+ auto globalIndex = indexType.GetAlt1().GetRule_global_index1();
+ bool uniqIndex = false;
+ if (globalIndex.HasBlock2()) {
+ uniqIndex = true;
+ }
+ if (globalIndex.HasBlock3()) {
+ const TString token = to_lower(Ctx.Token(globalIndex.GetBlock3().GetToken1()));
+ if (token == "sync") {
+ if (uniqIndex) {
+ indexes.back().Type = TIndexDescription::EType::GlobalSyncUnique;
+ } else {
+ indexes.back().Type = TIndexDescription::EType::GlobalSync;
+ }
+ } else if (token == "async") {
+ if (uniqIndex) {
+ AltNotImplemented("unique", indexType);
+ return false;
+ }
+ indexes.back().Type = TIndexDescription::EType::GlobalAsync;
+ } else {
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ }
+ }
+ break;
+ // "LOCAL"
+ case TRule_table_index_type_TBlock1::kAlt2:
+ AltNotImplemented("local", indexType);
+ return false;
+ case TRule_table_index_type_TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ if (node.GetRule_table_index_type3().HasBlock2()) {
+ const TString subType = to_upper(IdEx(node.GetRule_table_index_type3().GetBlock2().GetRule_index_subtype2().GetRule_an_id1(), *this).Name) ;
+ if (subType == "VECTOR_KMEANS_TREE") {
+ if (indexes.back().Type != TIndexDescription::EType::GlobalSync) {
+ Ctx.Error() << subType << " index can only be GLOBAL [SYNC]";
+ return false;
+ }
+
+ indexes.back().Type = TIndexDescription::EType::GlobalVectorKmeansTree;
+ } else {
+ Ctx.Error() << subType << " index subtype is not supported";
+ return false;
+ }
+ }
+
+ // WITH
+ if (node.HasBlock10()) {
+ //const auto& with = node.GetBlock4();
+ auto& index = indexes.back();
+ if (index.Type == TIndexDescription::EType::GlobalVectorKmeansTree) {
+ auto& vectorSettings = index.IndexSettings.emplace<TVectorIndexSettings>();
+ if (!CreateIndexSettings(node.GetBlock10().GetRule_with_index_settings1(), index.Type, index.IndexSettings)) {
+ return false;
+ }
+ if (!vectorSettings.Validate(Ctx)) {
+ return false;
+ }
+
+ } else {
+ AltNotImplemented("with", indexType);
+ return false;
+ }
+ }
+
+ indexes.back().IndexColumns.emplace_back(IdEx(node.GetRule_an_id_schema6(), *this));
+ for (const auto& block : node.GetBlock7()) {
+ indexes.back().IndexColumns.emplace_back(IdEx(block.GetRule_an_id_schema2(), *this));
+ }
+
+ if (node.HasBlock9()) {
+ const auto& block = node.GetBlock9();
+ indexes.back().DataColumns.emplace_back(IdEx(block.GetRule_an_id_schema3(), *this));
+ for (const auto& inner : block.GetBlock4()) {
+ indexes.back().DataColumns.emplace_back(IdEx(inner.GetRule_an_id_schema2(), *this));
+ }
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::CreateIndexSettings(const TRule_with_index_settings& settingsNode,
+ TIndexDescription::EType indexType,
+ TIndexDescription::TIndexSettings& indexSettings) {
+ const auto& firstEntry = settingsNode.GetRule_index_setting_entry3();
+ if (!CreateIndexSettingEntry(IdEx(firstEntry.GetRule_an_id1(), *this), firstEntry.GetRule_index_setting_value3(), indexType, indexSettings)) {
+ return false;
+ }
+ for (auto& block : settingsNode.GetBlock4()) {
+ const auto& entry = block.GetRule_index_setting_entry2();
+ if (!CreateIndexSettingEntry(IdEx(entry.GetRule_an_id1(), *this), entry.GetRule_index_setting_value3(), indexType, indexSettings)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+template<typename T>
+std::tuple<bool, T, TString> TSqlTranslation::GetIndexSettingValue(const TRule_index_setting_value& node) {
+ T value{};
+ // id_or_type
+ if (node.HasAlt_index_setting_value1()) {
+ const TString stringValue = to_lower(IdEx(node.GetAlt_index_setting_value1().GetRule_id_or_type1(), *this).Name);
+ if (!TryFromString<T>(stringValue, value)) {
+ return {false, value, stringValue};
+ }
+ return {true, value, stringValue};
+ }
+ // STRING_VALUE
+ else if (node.HasAlt_index_setting_value2()) {
+ const TString stringValue = to_lower(Token(node.GetAlt_index_setting_value2().GetToken1()));
+ const auto unescaped = StringContent(Ctx, Ctx.Pos(), stringValue);
+ if (!unescaped) {
+ return {false, value, stringValue};
+ }
+ if (!TryFromString<T>(unescaped->Content, value)) {
+ return {false, value, stringValue};
+ }
+ return {true, value, unescaped->Content};
+ } else {
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+template<>
+std::tuple<bool, ui64, TString> TSqlTranslation::GetIndexSettingValue(const TRule_index_setting_value& node) {
+ const auto& intNode = node.GetAlt_index_setting_value3().GetRule_integer1();
+ const TString stringValue = Token(intNode.GetToken1());
+ ui64 value = 0;
+ TString suffix;
+ if (!ParseNumbers(Ctx, stringValue, value, suffix)) {
+ return {false, value, stringValue};
+ }
+ return {true, value, stringValue};
+}
+
+template<>
+std::tuple<bool, bool, TString> TSqlTranslation::GetIndexSettingValue(const TRule_index_setting_value& node) {
+ bool value = false;
+ const TString stringValue = to_lower(Token(node.GetAlt_index_setting_value4().GetRule_bool_value1().GetToken1()));;
+ if (!TryFromString<bool>(stringValue, value)) {
+ return {false, value, stringValue};
+ }
+ return {true, value, stringValue};
+}
+
+bool TSqlTranslation::CreateIndexSettingEntry(const TIdentifier &id,
+ const TRule_index_setting_value& node,
+ TIndexDescription::EType indexType,
+ TIndexDescription::TIndexSettings& indexSettings) {
+
+
+ if (indexType == TIndexDescription::EType::GlobalVectorKmeansTree) {
+ TVectorIndexSettings &vectorIndexSettings = std::get<TVectorIndexSettings>(indexSettings);
+
+ if (to_lower(id.Name) == "distance") {
+ const auto [success, value, stringValue] = GetIndexSettingValue<TVectorIndexSettings::EDistance>(node);
+ if (!success) {
+ Ctx.Error() << "Invalid distance: " << stringValue;
+ return false;
+ }
+ vectorIndexSettings.Distance = value;
+ } else if (to_lower(id.Name) == "similarity") {
+ const auto [success, value, stringValue] = GetIndexSettingValue<TVectorIndexSettings::ESimilarity>(node);
+ if (!success) {
+ Ctx.Error() << "Invalid similarity: " << stringValue;
+ return false;
+ }
+ vectorIndexSettings.Similarity = value;
+ } else if (to_lower(id.Name) == "vector_type") {
+ const auto [success, value, stringValue] = GetIndexSettingValue<TVectorIndexSettings::EVectorType>(node);
+ if (!success) {
+ Ctx.Error() << "Invalid vector_type: " << stringValue;
+ return false;
+ }
+ vectorIndexSettings.VectorType = value;
+ } else if (to_lower(id.Name) == "vector_dimension") {
+ const auto [success, value, stringValue] = GetIndexSettingValue<ui64>(node);
+ if (!success || value > Max<ui32>()) {
+ Ctx.Error() << "Invalid vector_dimension: " << stringValue;
+ return false;
+ }
+ vectorIndexSettings.VectorDimension = value;
+ } else if (to_lower(id.Name) == "clusters") {
+ const auto [success, value, stringValue] = GetIndexSettingValue<ui64>(node);
+ if (!success || value > Max<ui32>()) {
+ Ctx.Error() << "Invalid clusters: " << stringValue;
+ return false;
+ }
+ vectorIndexSettings.Clusters = value;
+ } else if (to_lower(id.Name) == "levels") {
+ const auto [success, value, stringValue] = GetIndexSettingValue<ui64>(node);
+ if (!success || value > Max<ui32>()) {
+ Ctx.Error() << "Invalid levels: " << stringValue;
+ return false;
+ }
+ vectorIndexSettings.Levels = value;
+ } else {
+ Ctx.Error() << "Unknown index setting: " << id.Name;
+ return false;
+ }
+ } else {
+ Ctx.Error() << "Unknown index setting: " << id.Name;
+ return false;
+ }
+ return true;
+
+}
+
+std::pair<TString, TViewDescription> TableKeyImpl(const std::pair<bool, TString>& nameWithAt, TViewDescription view, TTranslation& ctx) {
+ if (nameWithAt.first) {
+ view = {"@"};
+ ctx.Context().IncrementMonCounter("sql_features", "AnonymousTable");
+ }
+
+ return std::make_pair(nameWithAt.second, view);
+}
+
+std::pair<TString, TViewDescription> TableKeyImpl(const TRule_table_key& node, TTranslation& ctx, bool hasAt) {
+ auto name(Id(node.GetRule_id_table_or_type1(), ctx));
+ TViewDescription view;
+ if (node.HasBlock2()) {
+ view = Id(node.GetBlock2().GetRule_view_name2(), ctx);
+ ctx.Context().IncrementMonCounter("sql_features", "View");
+ }
+
+ return TableKeyImpl(std::make_pair(hasAt, name), view, ctx);
+}
+
+/// \return optional prefix
+TString ColumnNameAsStr(TTranslation& ctx, const TRule_column_name& node, TString& id) {
+ id = Id(node.GetRule_an_id2(), ctx);
+ return OptIdPrefixAsStr(node.GetRule_opt_id_prefix1(), ctx);
+}
+
+TString ColumnNameAsSingleStr(TTranslation& ctx, const TRule_column_name& node) {
+ TString body;
+ const TString prefix = ColumnNameAsStr(ctx, node, body);
+ return prefix ? prefix + '.' + body : body;
+}
+
+TTableHints GetContextHints(TContext& ctx) {
+ TTableHints hints;
+ if (ctx.PragmaInferSchema) {
+ hints["infer_schema"] = {};
+ }
+ if (ctx.PragmaDirectRead) {
+ hints["direct_read"] = {};
+ }
+
+ return hints;
+}
+
+TTableHints GetTableFuncHints(TStringBuf funcName) {
+ TCiString func(funcName);
+ TTableHints res;
+ if (func.StartsWith("range") || func.StartsWith("like") || func.StartsWith("regexp") || func.StartsWith("filter")) {
+ res.emplace("ignore_non_existing", TVector<TNodePtr>{});
+ } else if (func.StartsWith("each")) {
+ res.emplace("ignore_non_existing", TVector<TNodePtr>{});
+ res.emplace("warn_non_existing", TVector<TNodePtr>{});
+ }
+
+ return res;
+}
+
+
+TNodePtr TSqlTranslation::NamedExpr(const TRule_named_expr& node, EExpr exprMode) {
+ TSqlExpression expr(Ctx, Mode);
+ if (exprMode == EExpr::GroupBy) {
+ expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::GroupBy);
+ } else if (exprMode == EExpr::SqlLambdaParams) {
+ expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::SqlLambdaParams);
+ }
+ if (node.HasBlock2()) {
+ expr.MarkAsNamed();
+ }
+ TNodePtr exprNode(expr.Build(node.GetRule_expr1()));
+ if (!exprNode) {
+ Ctx.IncrementMonCounter("sql_errors", "NamedExprInvalid");
+ return nullptr;
+ }
+ if (node.HasBlock2()) {
+ exprNode = SafeClone(exprNode);
+ exprNode->SetLabel(Id(node.GetBlock2().GetRule_an_id_or_type2(), *this));
+ }
+ return exprNode;
+}
+
+bool TSqlTranslation::NamedExprList(const TRule_named_expr_list& node, TVector<TNodePtr>& exprs, EExpr exprMode) {
+ exprs.emplace_back(NamedExpr(node.GetRule_named_expr1(), exprMode));
+ if (!exprs.back()) {
+ return false;
+ }
+ for (auto& b: node.GetBlock2()) {
+ exprs.emplace_back(NamedExpr(b.GetRule_named_expr2(), exprMode));
+ if (!exprs.back()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlTranslation::BindList(const TRule_bind_parameter_list& node, TVector<TSymbolNameWithPos>& bindNames) {
+ bindNames.clear();
+
+ TString name;
+ if (!NamedNodeImpl(node.GetRule_bind_parameter1(), name, *this)) {
+ return false;
+ }
+
+ bindNames.emplace_back(TSymbolNameWithPos{name, Ctx.Pos()});
+ for (auto& b: node.GetBlock2()) {
+ if (!NamedNodeImpl(b.GetRule_bind_parameter2(), name, *this)) {
+ return false;
+ }
+
+ bindNames.emplace_back(TSymbolNameWithPos{name, Ctx.Pos()});
+ }
+ return true;
+}
+
+bool TSqlTranslation::ActionOrSubqueryArgs(const TRule_action_or_subquery_args& node, TVector<TSymbolNameWithPos>& bindNames, ui32& optionalArgsCount) {
+ bindNames.clear();
+ optionalArgsCount = 0;
+
+ TString name;
+ bool isOptional = false;
+ if (!NamedNodeImpl(node.GetRule_opt_bind_parameter1(), name, isOptional, *this)) {
+ return false;
+ }
+
+ if (isOptional) {
+ optionalArgsCount++;
+ }
+ bindNames.emplace_back(TSymbolNameWithPos{name, Ctx.Pos()});
+
+ for (auto& b: node.GetBlock2()) {
+ if (!NamedNodeImpl(b.GetRule_opt_bind_parameter2(), name, isOptional, *this)) {
+ return false;
+ }
+
+ if (isOptional) {
+ optionalArgsCount++;
+ } else if (optionalArgsCount > 0) {
+ Context().Error() << "Non-optional argument can not follow optional one";
+ return false;
+ }
+ bindNames.emplace_back(TSymbolNameWithPos{name, Ctx.Pos()});
+ }
+ return true;
+}
+
+bool TSqlTranslation::ModulePath(const TRule_module_path& node, TVector<TString>& path) {
+ if (node.HasBlock1()) {
+ path.emplace_back(TString());
+ }
+ path.emplace_back(Id(node.GetRule_an_id2(), *this));
+ for (auto& b: node.GetBlock3()) {
+ path.emplace_back(Id(b.GetRule_an_id2(), *this));
+ }
+ return true;
+}
+
+bool TSqlTranslation::NamedBindList(const TRule_named_bind_parameter_list& node, TVector<TSymbolNameWithPos>& names,
+ TVector<TSymbolNameWithPos>& aliases)
+{
+ names.clear();
+ aliases.clear();
+ TSymbolNameWithPos name;
+ TSymbolNameWithPos alias;
+
+ if (!NamedBindParam(node.GetRule_named_bind_parameter1(), name, alias)) {
+ return false;
+ }
+ names.push_back(name);
+ aliases.push_back(alias);
+
+ for (auto& b: node.GetBlock2()) {
+ if (!NamedBindParam(b.GetRule_named_bind_parameter2(), name, alias)) {
+ return false;
+ }
+ names.push_back(name);
+ aliases.push_back(alias);
+ }
+ return true;
+}
+
+bool TSqlTranslation::NamedBindParam(const TRule_named_bind_parameter& node, TSymbolNameWithPos& name, TSymbolNameWithPos& alias) {
+ name = alias = {};
+ if (!NamedNodeImpl(node.GetRule_bind_parameter1(), name.Name, *this)) {
+ return false;
+ }
+ name.Pos = Ctx.Pos();
+ if (node.HasBlock2()) {
+ if (!NamedNodeImpl(node.GetBlock2().GetRule_bind_parameter2(), alias.Name, *this)) {
+ return false;
+ }
+ alias.Pos = Ctx.Pos();
+ }
+ return true;
+}
+
+TMaybe<TTableArg> TSqlTranslation::TableArgImpl(const TRule_table_arg& node) {
+ TTableArg ret;
+ ret.HasAt = node.HasBlock1();
+ TColumnRefScope scope(Ctx, EColumnRefState::AsStringLiteral);
+ ret.Expr = NamedExpr(node.GetRule_named_expr2());
+ if (!ret.Expr) {
+ return Nothing();
+ }
+
+ if (node.HasBlock3()) {
+ ret.View = Id(node.GetBlock3().GetRule_view_name2(), *this);
+ Context().IncrementMonCounter("sql_features", "View");
+ }
+
+ return ret;
+}
+
+bool TSqlTranslation::ClusterExpr(const TRule_cluster_expr& node, bool allowWildcard, TString& service, TDeferredAtom& cluster) {
+ bool allowBinding = false;
+ bool isBinding;
+ return ClusterExpr(node, allowWildcard, allowBinding, service, cluster, isBinding);
+}
+
+bool TSqlTranslation::ClusterExprOrBinding(const TRule_cluster_expr& node, TString& service, TDeferredAtom& cluster, bool& isBinding) {
+ bool allowWildcard = false;
+ bool allowBinding = true;
+ return ClusterExpr(node, allowWildcard, allowBinding, service, cluster, isBinding);
+}
+
+bool TSqlTranslation::ClusterExpr(const TRule_cluster_expr& node, bool allowWildcard, bool allowBinding, TString& service,
+ TDeferredAtom& cluster, bool& isBinding)
+{
+ service = "";
+ cluster = TDeferredAtom();
+ isBinding = false;
+ if (node.HasBlock1()) {
+ service = to_lower(Id(node.GetBlock1().GetRule_an_id1(), *this));
+ allowBinding = false;
+ if (service != YtProviderName &&
+ service != KikimrProviderName &&
+ service != RtmrProviderName && service != StatProviderName) {
+ Ctx.Error() << "Unknown service: " << service;
+ return false;
+ }
+ }
+
+ switch (node.GetBlock2().Alt_case()) {
+ case TRule_cluster_expr::TBlock2::kAlt1: {
+ auto value = PureColumnOrNamed(node.GetBlock2().GetAlt1().GetRule_pure_column_or_named1(), *this);
+ if (value.Empty()) {
+ return false;
+ }
+
+ if (value.GetLiteral()) {
+ TString clusterName = *value.GetLiteral();
+ if (allowBinding && to_lower(clusterName) == "bindings") {
+ switch (Ctx.Settings.BindingsMode) {
+ case NSQLTranslation::EBindingsMode::DISABLED:
+ Ctx.Error(Ctx.Pos(), TIssuesIds::YQL_DISABLED_BINDINGS) << "Please remove 'bindings.' from your query, the support for this syntax has ended";
+ Ctx.IncrementMonCounter("sql_errors", "DisabledBinding");
+ return false;
+ case NSQLTranslation::EBindingsMode::ENABLED:
+ isBinding = true;
+ break;
+ case NSQLTranslation::EBindingsMode::DROP_WITH_WARNING:
+ Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_DEPRECATED_BINDINGS) << "Please remove 'bindings.' from your query, the support for this syntax will be dropped soon";
+ Ctx.IncrementMonCounter("sql_errors", "DeprecatedBinding");
+ [[fallthrough]];
+ case NSQLTranslation::EBindingsMode::DROP:
+ service = Context().Scoped->CurrService;
+ cluster = Context().Scoped->CurrCluster;
+ break;
+ }
+
+ return true;
+ }
+ TString normalizedClusterName;
+ auto foundProvider = Ctx.GetClusterProvider(clusterName, normalizedClusterName);
+ if (!foundProvider) {
+ Ctx.Error() << "Unknown cluster: " << clusterName;
+ return false;
+ }
+
+ if (service && *foundProvider != service) {
+ Ctx.Error() << "Mismatch of cluster " << clusterName << " service, expected: "
+ << *foundProvider << ", got: " << service;
+ return false;
+ }
+
+ if (!service) {
+ service = *foundProvider;
+ }
+
+ value = TDeferredAtom(Ctx.Pos(), normalizedClusterName);
+ } else {
+ if (!service) {
+ Ctx.Error() << "Cluster service is not set";
+ return false;
+ }
+ }
+
+ cluster = value;
+ return true;
+ }
+ case TRule_cluster_expr::TBlock2::kAlt2: {
+ if (!allowWildcard) {
+ Ctx.Error() << "Cluster wildcards allowed only in USE statement";
+ return false;
+ }
+
+ return true;
+ }
+ case TRule_cluster_expr::TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+
+bool TSqlTranslation::ApplyTableBinding(const TString& binding, TTableRef& tr, TTableHints& hints) {
+ NSQLTranslation::TBindingInfo bindingInfo;
+ if (const auto& error = ExtractBindingInfo(Context().Settings, binding, bindingInfo)) {
+ Ctx.Error() << error;
+ return false;
+ }
+
+ if (bindingInfo.Schema) {
+ TNodePtr schema = BuildQuotedAtom(Ctx.Pos(), bindingInfo.Schema);
+
+ TNodePtr type = new TCallNodeImpl(Ctx.Pos(), "SqlTypeFromYson", { schema });
+ TNodePtr columns = new TCallNodeImpl(Ctx.Pos(), "SqlColumnOrderFromYson", { schema });
+
+ hints["user_schema"] = { type, columns };
+ }
+
+ for (auto& [key, values] : bindingInfo.Attributes) {
+ TVector<TNodePtr> hintValue;
+ for (auto& column : values) {
+ hintValue.push_back(BuildQuotedAtom(Ctx.Pos(), column));
+ }
+ hints[key] = std::move(hintValue);
+ }
+
+ tr.Service = bindingInfo.ClusterType;
+ tr.Cluster = TDeferredAtom(Ctx.Pos(), bindingInfo.Cluster);
+
+ const TString view = "";
+ tr.Keys = BuildTableKey(Ctx.Pos(), tr.Service, tr.Cluster, TDeferredAtom(Ctx.Pos(), bindingInfo.Path), {view});
+
+ return true;
+}
+
+bool TSqlTranslation::TableRefImpl(const TRule_table_ref& node, TTableRef& result, bool unorderedSubquery) {
+ // table_ref:
+ // (cluster_expr DOT)? AT?
+ // (table_key | an_id_expr LPAREN (table_arg (COMMA table_arg)*)? RPAREN |
+ // bind_parameter (LPAREN expr_list? RPAREN)? (VIEW an_id)?)
+ // table_hints?;
+ if (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW && node.HasBlock1()) {
+ Ctx.Error() << "Cluster should not be used in limited view";
+ return false;
+ }
+ auto service = Context().Scoped->CurrService;
+ auto cluster = Context().Scoped->CurrCluster;
+ const bool hasAt = node.HasBlock2();
+ bool isBinding = false;
+ if (node.HasBlock1()) {
+ const auto& clusterExpr = node.GetBlock1().GetRule_cluster_expr1();
+ bool result = !hasAt ?
+ ClusterExprOrBinding(clusterExpr, service, cluster, isBinding) : ClusterExpr(clusterExpr, false, service, cluster);
+ if (!result) {
+ return false;
+ }
+ }
+
+ TTableRef tr(Context().MakeName("table"), service, cluster, nullptr);
+ TPosition pos(Context().Pos());
+ TTableHints hints = GetContextHints(Ctx);
+ TTableHints tableHints;
+
+ TMaybe<TString> keyFunc;
+
+ auto& block = node.GetBlock3();
+ switch (block.Alt_case()) {
+ case TRule_table_ref::TBlock3::kAlt1: {
+ if (!isBinding && cluster.Empty()) {
+ Ctx.Error() << "No cluster name given and no default cluster is selected";
+ return false;
+ }
+
+ auto pair = TableKeyImpl(block.GetAlt1().GetRule_table_key1(), *this, hasAt);
+ if (isBinding) {
+ TString binding = pair.first;
+ auto view = pair.second;
+ if (!view.ViewName.empty()) {
+ YQL_ENSURE(view != TViewDescription{"@"});
+ Ctx.Error() << "VIEW is not supported for table bindings";
+ return false;
+ }
+
+ if (!ApplyTableBinding(binding, tr, tableHints)) {
+ return false;
+ }
+ } else {
+ tr.Keys = BuildTableKey(pos, service, cluster, TDeferredAtom(pos, pair.first), pair.second);
+ }
+ break;
+ }
+ case TRule_table_ref::TBlock3::kAlt2: {
+ if (cluster.Empty()) {
+ Ctx.Error() << "No cluster name given and no default cluster is selected";
+ return false;
+ }
+
+ auto& alt = block.GetAlt2();
+ keyFunc = Id(alt.GetRule_an_id_expr1(), *this);
+ TVector<TTableArg> args;
+ if (alt.HasBlock3()) {
+ auto& argsBlock = alt.GetBlock3();
+ auto arg = TableArgImpl(argsBlock.GetRule_table_arg1());
+ if (!arg) {
+ return false;
+ }
+
+ args.push_back(std::move(*arg));
+ for (auto& b : argsBlock.GetBlock2()) {
+ arg = TableArgImpl(b.GetRule_table_arg2());
+ if (!arg) {
+ return false;
+ }
+
+ args.push_back(std::move(*arg));
+ }
+ }
+ tableHints = GetTableFuncHints(*keyFunc);
+ tr.Keys = BuildTableKeys(pos, service, cluster, *keyFunc, args);
+ break;
+ }
+ case TRule_table_ref::TBlock3::kAlt3: {
+ auto& alt = block.GetAlt3();
+ Ctx.IncrementMonCounter("sql_features", "NamedNodeUseSource");
+ TString named;
+ if (!NamedNodeImpl(alt.GetRule_bind_parameter1(), named, *this)) {
+ return false;
+ }
+ if (hasAt) {
+ if (alt.HasBlock2()) {
+ Ctx.Error() << "Subquery must not be used as anonymous table name";
+ return false;
+ }
+
+ if (alt.HasBlock3()) {
+ Ctx.Error() << "View is not supported for anonymous tables";
+ return false;
+ }
+
+ if (node.HasBlock4()) {
+ Ctx.Error() << "Hints are not supported for anonymous tables";
+ return false;
+ }
+
+ auto namedNode = GetNamedNode(named);
+ if (!namedNode) {
+ return false;
+ }
+
+ auto source = TryMakeSourceFromExpression(Ctx.Pos(), Ctx, service, cluster, namedNode, "@");
+ if (!source) {
+ Ctx.Error() << "Cannot infer cluster and table name";
+ return false;
+ }
+
+ result.Source = source;
+ return true;
+ }
+ auto nodePtr = GetNamedNode(named);
+ if (!nodePtr) {
+ Ctx.IncrementMonCounter("sql_errors", "NamedNodeSourceError");
+ return false;
+ }
+ if (alt.HasBlock2()) {
+ if (alt.HasBlock3()) {
+ Ctx.Error() << "View is not supported for subqueries";
+ return false;
+ }
+
+ if (node.HasBlock4()) {
+ Ctx.Error() << "Hints are not supported for subqueries";
+ return false;
+ }
+
+ TVector<TNodePtr> values;
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "Apply", TNodeFlags::Default));
+ values.push_back(nodePtr);
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "world", TNodeFlags::Default));
+
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (alt.GetBlock2().HasBlock2() && !ExprList(sqlExpr, values, alt.GetBlock2().GetBlock2().GetRule_expr_list1())) {
+ return false;
+ }
+
+ TNodePtr apply = new TAstListNodeImpl(Ctx.Pos(), std::move(values));
+ if (unorderedSubquery && Ctx.UnorderedSubqueries) {
+ apply = new TCallNodeImpl(Ctx.Pos(), "UnorderedSubquery", { apply });
+ }
+ result.Source = BuildNodeSource(Ctx.Pos(), apply);
+ return true;
+ }
+
+ TTableHints hints;
+ TTableHints contextHints = GetContextHints(Ctx);
+ auto ret = BuildInnerSource(Ctx.Pos(), nodePtr, service, cluster);
+ if (alt.HasBlock3()) {
+ auto view = Id(alt.GetBlock3().GetRule_view_name2(), *this);
+ Ctx.IncrementMonCounter("sql_features", "View");
+ bool result = view.PrimaryFlag
+ ? ret->SetPrimaryView(Ctx, Ctx.Pos())
+ : ret->SetViewName(Ctx, Ctx.Pos(), view.ViewName);
+ if (!result) {
+ return false;
+ }
+ }
+
+ if (node.HasBlock4()) {
+ auto tmp = TableHintsImpl(node.GetBlock4().GetRule_table_hints1(), service, keyFunc.GetOrElse(""));
+ if (!tmp) {
+ return false;
+ }
+
+ hints = *tmp;
+ }
+
+ if (hints || contextHints) {
+ if (!ret->SetTableHints(Ctx, Ctx.Pos(), hints, contextHints)) {
+ return false;
+ }
+ }
+
+ result.Source = ret;
+ return true;
+ }
+ case TRule_table_ref::TBlock3::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ MergeHints(hints, tableHints);
+
+ if (node.HasBlock4()) {
+ auto tmp = TableHintsImpl(node.GetBlock4().GetRule_table_hints1(), service, keyFunc.GetOrElse(""));
+ if (!tmp) {
+ Ctx.Error() << "Failed to parse table hints";
+ return false;
+ }
+
+ MergeHints(hints, *tmp);
+ }
+
+ if (!hints.empty()) {
+ tr.Options = BuildInputOptions(pos, hints);
+ }
+
+ if (!tr.Keys) {
+ return false;
+ }
+
+ result = tr;
+ return true;
+}
+
+TMaybe<TSourcePtr> TSqlTranslation::AsTableImpl(const TRule_table_ref& node) {
+ const auto& block = node.GetBlock3();
+
+ if (block.Alt_case() == TRule_table_ref::TBlock3::kAlt2) {
+ auto& alt = block.GetAlt2();
+ TCiString func(Id(alt.GetRule_an_id_expr1(), *this));
+
+ if (func == "as_table") {
+ if (node.HasBlock1()) {
+ Ctx.Error() << "Cluster shouldn't be specified for AS_TABLE source";
+ return TMaybe<TSourcePtr>(nullptr);
+ }
+
+ if (!alt.HasBlock3() || !alt.GetBlock3().GetBlock2().empty()) {
+ Ctx.Error() << "Expected single argument for AS_TABLE source";
+ return TMaybe<TSourcePtr>(nullptr);
+ }
+
+ if (node.HasBlock4()) {
+ Ctx.Error() << "No hints expected for AS_TABLE source";
+ return TMaybe<TSourcePtr>(nullptr);
+ }
+
+ auto arg = TableArgImpl(alt.GetBlock3().GetRule_table_arg1());
+ if (!arg) {
+ return TMaybe<TSourcePtr>(nullptr);
+ }
+
+ if (arg->Expr->GetSource()) {
+ Ctx.Error() << "AS_TABLE shouldn't be used for table sources";
+ return TMaybe<TSourcePtr>(nullptr);
+ }
+
+ return BuildNodeSource(Ctx.Pos(), arg->Expr, true);
+ }
+ }
+
+ return Nothing();
+}
+
+TMaybe<TColumnConstraints> ColumnConstraints(const TRule_column_schema& node, TTranslation& ctx) {
+ TNodePtr defaultExpr = nullptr;
+ bool nullable = true;
+
+ auto constraintsNode = node.GetRule_opt_column_constraints4();
+ if (constraintsNode.HasBlock1()) {
+ nullable = !constraintsNode.GetBlock1().HasBlock1();
+ }
+ if (constraintsNode.HasBlock2()) {
+ TSqlExpression expr(ctx.Context(), ctx.Context().Settings.Mode);
+ defaultExpr = expr.Build(constraintsNode.GetBlock2().GetRule_expr2());
+ if (!defaultExpr) {
+ return {};
+ }
+ }
+
+ return TColumnConstraints(defaultExpr, nullable);
+}
+
+TMaybe<TColumnSchema> TSqlTranslation::ColumnSchemaImpl(const TRule_column_schema& node) {
+ const TString name(Id(node.GetRule_an_id_schema1(), *this));
+ const TPosition pos(Context().Pos());
+ TNodePtr type = SerialTypeNode(node.GetRule_type_name_or_bind2());
+ const bool serial = (type != nullptr);
+
+ const auto constraints = ColumnConstraints(node, *this);
+ if (!constraints){
+ return {};
+ }
+
+ if (!type) {
+ type = TypeNodeOrBind(node.GetRule_type_name_or_bind2());
+ }
+
+ if (!type) {
+ return {};
+ }
+ TVector<TIdentifier> families;
+ if (node.HasBlock3()) {
+ const auto& familyRelation = node.GetBlock3().GetRule_family_relation1();
+ families.push_back(IdEx(familyRelation.GetRule_an_id2(), *this));
+ }
+ return TColumnSchema(pos, name, type, constraints->Nullable, families, serial, constraints->DefaultExpr);
+}
+
+TNodePtr TSqlTranslation::SerialTypeNode(const TRule_type_name_or_bind& node) {
+ if (node.Alt_case() != TRule_type_name_or_bind::kAltTypeNameOrBind1) {
+ return nullptr;
+ }
+
+ TPosition pos = Ctx.Pos();
+
+ auto typeNameNode = node.GetAlt_type_name_or_bind1().GetRule_type_name1();
+ if (typeNameNode.Alt_case() != TRule_type_name::kAltTypeName2) {
+ return nullptr;
+ }
+
+ auto alt = typeNameNode.GetAlt_type_name2();
+ auto& block = alt.GetBlock1();
+ if (block.Alt_case() != TRule_type_name::TAlt2::TBlock1::kAlt2) {
+ return nullptr;
+ }
+
+ auto alt2 = block.GetAlt2().GetRule_type_name_simple1();
+ const TString name = Id(alt2.GetRule_an_id_pure1(), *this);
+ if (name.empty()) {
+ return nullptr;
+ }
+
+ const auto res = to_lower(name);
+ if (res == "bigserial" || res == "serial8") {
+ return new TCallNodeImpl(pos, "DataType", { BuildQuotedAtom(pos, "Int64", TNodeFlags::Default) });
+ } else if (res == "serial" || res == "serial4") {
+ return new TCallNodeImpl(pos, "DataType", { BuildQuotedAtom(pos, "Int32", TNodeFlags::Default) });
+ } else if (res == "smallserial" || res == "serial2") {
+ return new TCallNodeImpl(pos, "DataType", { BuildQuotedAtom(pos, "Int16", TNodeFlags::Default) });
+ }
+
+ return nullptr;
+}
+
+bool TSqlTranslation::FillFamilySettingsEntry(const TRule_family_settings_entry& settingNode, TFamilyEntry& family) {
+ TIdentifier id = IdEx(settingNode.GetRule_an_id1(), *this);
+ const TRule_family_setting_value& value = settingNode.GetRule_family_setting_value3();
+ if (to_lower(id.Name) == "data") {
+ const TString stringValue(Ctx.Token(value.GetAlt_family_setting_value1().GetToken1()));
+ family.Data = BuildLiteralSmartString(Ctx, stringValue);
+ } else if (to_lower(id.Name) == "compression") {
+ const TString stringValue(Ctx.Token(value.GetAlt_family_setting_value1().GetToken1()));
+ family.Compression = BuildLiteralSmartString(Ctx, stringValue);
+ } else if (to_lower(id.Name) == "compression_level") {
+ family.CompressionLevel = LiteralNumber(Ctx, value.GetAlt_family_setting_value2().GetRule_integer1());
+ } else {
+ Ctx.Error() << "Unknown table setting: " << id.Name;
+ return false;
+ }
+ return true;
+}
+
+bool TSqlTranslation::FillFamilySettings(const TRule_family_settings& settingsNode, TFamilyEntry& family) {
+ // family_settings: LPAREN (family_settings_entry (COMMA family_settings_entry)*)? RPAREN;
+ if (settingsNode.HasBlock2()) {
+ auto& settings = settingsNode.GetBlock2();
+ if (!FillFamilySettingsEntry(settings.GetRule_family_settings_entry1(), family)) {
+ return false;
+ }
+ for (auto& block : settings.GetBlock2()) {
+ if (!FillFamilySettingsEntry(block.GetRule_family_settings_entry2(), family)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+
+
+bool TSqlTranslation::CreateTableEntry(const TRule_create_table_entry& node, TCreateTableParameters& params, const bool isCreateTableAs)
+{
+ switch (node.Alt_case()) {
+ case TRule_create_table_entry::kAltCreateTableEntry1:
+ {
+ if (isCreateTableAs) {
+ Ctx.Error() << "Column types are not supported for CREATE TABLE AS";
+ return false;
+ }
+ // column_schema
+ auto columnSchema = ColumnSchemaImpl(node.GetAlt_create_table_entry1().GetRule_column_schema1());
+ if (!columnSchema) {
+ return false;
+ }
+ if (columnSchema->Families.size() > 1) {
+ Ctx.Error() << "Several column families for a single column are not yet supported";
+ return false;
+ }
+ params.Columns.push_back(*columnSchema);
+ break;
+ }
+ case TRule_create_table_entry::kAltCreateTableEntry2:
+ {
+ // table_constraint
+ auto& constraint = node.GetAlt_create_table_entry2().GetRule_table_constraint1();
+ switch (constraint.Alt_case()) {
+ case TRule_table_constraint::kAltTableConstraint1: {
+ if (!params.PkColumns.empty()) {
+ Ctx.Error() << "PRIMARY KEY statement must be specified only once";
+ return false;
+ }
+ auto& pkConstraint = constraint.GetAlt_table_constraint1();
+ params.PkColumns.push_back(IdEx(pkConstraint.GetRule_an_id4(), *this));
+ for (auto& block : pkConstraint.GetBlock5()) {
+ params.PkColumns.push_back(IdEx(block.GetRule_an_id2(), *this));
+ }
+ break;
+ }
+ case TRule_table_constraint::kAltTableConstraint2: {
+ if (!params.PartitionByColumns.empty()) {
+ Ctx.Error() << "PARTITION BY statement must be specified only once";
+ return false;
+ }
+ auto& pbConstraint = constraint.GetAlt_table_constraint2();
+ params.PartitionByColumns.push_back(IdEx(pbConstraint.GetRule_an_id4(), *this));
+ for (auto& block : pbConstraint.GetBlock5()) {
+ params.PartitionByColumns.push_back(IdEx(block.GetRule_an_id2(), *this));
+ }
+ break;
+ }
+ case TRule_table_constraint::kAltTableConstraint3: {
+ if (!params.OrderByColumns.empty()) {
+ Ctx.Error() << "ORDER BY statement must be specified only once";
+ return false;
+ }
+ auto& obConstraint = constraint.GetAlt_table_constraint3();
+ auto extractDirection = [this] (const TRule_column_order_by_specification& spec, bool& desc) {
+ desc = false;
+ if (!spec.HasBlock2()) {
+ return true;
+ }
+
+ auto& token = spec.GetBlock2().GetToken1();
+ auto tokenId = token.GetId();
+ if (IS_TOKEN(tokenId, ASC)) {
+ return true;
+ } else if (IS_TOKEN(tokenId, DESC)) {
+ desc = true;
+ return true;
+ } else {
+ Ctx.Error() << "Unsupported direction token: " << token.GetId();
+ return false;
+ }
+ };
+
+ bool desc = false;
+ auto& obSpec = obConstraint.GetRule_column_order_by_specification4();
+ if (!extractDirection(obSpec, desc)) {
+ return false;
+ }
+ params.OrderByColumns.push_back(std::make_pair(IdEx(obSpec.GetRule_an_id1(), *this), desc));
+
+ for (auto& block : obConstraint.GetBlock5()) {
+ auto& obSpec = block.GetRule_column_order_by_specification2();
+ if (!extractDirection(obSpec, desc)) {
+ return false;
+ }
+ params.OrderByColumns.push_back(std::make_pair(IdEx(obSpec.GetRule_an_id1(), *this), desc));
+ }
+ break;
+ }
+ default:
+ AltNotImplemented("table_constraint", constraint);
+ return false;
+ }
+ break;
+ }
+ case TRule_create_table_entry::kAltCreateTableEntry3:
+ {
+ // table_index
+ auto& table_index = node.GetAlt_create_table_entry3().GetRule_table_index1();
+ if (!CreateTableIndex(table_index, params.Indexes)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_create_table_entry::kAltCreateTableEntry4:
+ {
+ if (isCreateTableAs) {
+ Ctx.Error() << "Column families are not supported for CREATE TABLE AS";
+ return false;
+ }
+ // family_entry
+ auto& family_entry = node.GetAlt_create_table_entry4().GetRule_family_entry1();
+ TFamilyEntry family(IdEx(family_entry.GetRule_an_id2(), *this));
+ if (!FillFamilySettings(family_entry.GetRule_family_settings3(), family)) {
+ return false;
+ }
+ params.ColumnFamilies.push_back(family);
+ break;
+ }
+ case TRule_create_table_entry::kAltCreateTableEntry5:
+ {
+ // changefeed
+ auto& changefeed = node.GetAlt_create_table_entry5().GetRule_changefeed1();
+ TSqlExpression expr(Ctx, Mode);
+ if (!CreateChangefeed(changefeed, expr, params.Changefeeds)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_create_table_entry::kAltCreateTableEntry6:
+ {
+ if (!isCreateTableAs) {
+ Ctx.Error() << "Column requires a type";
+ return false;
+ }
+ // an_id_schema
+ const TString name(Id(node.GetAlt_create_table_entry6().GetRule_an_id_schema1(), *this));
+ const TPosition pos(Context().Pos());
+
+ params.Columns.push_back(TColumnSchema(pos, name, nullptr, true, {}, false, nullptr));
+ break;
+ }
+ default:
+ AltNotImplemented("create_table_entry", node);
+ return false;
+ }
+ return true;
+}
+
+namespace {
+ bool StoreId(const TRule_table_setting_value& from, TMaybe<TIdentifier>& to, TTranslation& ctx) {
+ switch (from.Alt_case()) {
+ case TRule_table_setting_value::kAltTableSettingValue1: {
+ // id
+ to = IdEx(from.GetAlt_table_setting_value1().GetRule_id1(), ctx);
+ break;
+ }
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ bool StoreString(const TRule_table_setting_value& from, TNodePtr& to, TContext& ctx) {
+ switch (from.Alt_case()) {
+ case TRule_table_setting_value::kAltTableSettingValue2: {
+ // STRING_VALUE
+ const TString stringValue(ctx.Token(from.GetAlt_table_setting_value2().GetToken1()));
+ to = BuildLiteralSmartString(ctx, stringValue);
+ break;
+ }
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ bool StoreString(const TRule_table_setting_value& from, TDeferredAtom& to, TContext& ctx, const TString& errorPrefix = {}) {
+ switch (from.Alt_case()) {
+ case TRule_table_setting_value::kAltTableSettingValue2: {
+ // STRING_VALUE
+ const TString stringValue(ctx.Token(from.GetAlt_table_setting_value2().GetToken1()));
+ auto unescaped = StringContent(ctx, ctx.Pos(), stringValue);
+ if (!unescaped) {
+ ctx.Error() << errorPrefix << " value cannot be unescaped";
+ return false;
+ }
+ to = TDeferredAtom(ctx.Pos(), unescaped->Content);
+ break;
+ }
+ default:
+ ctx.Error() << errorPrefix << " value should be a string literal";
+ return false;
+ }
+ return true;
+ }
+
+ bool StoreInt(const TRule_table_setting_value& from, TNodePtr& to, TContext& ctx) {
+ switch (from.Alt_case()) {
+ case TRule_table_setting_value::kAltTableSettingValue3: {
+ // integer
+ to = LiteralNumber(ctx, from.GetAlt_table_setting_value3().GetRule_integer1());
+ break;
+ }
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ bool StoreInt(const TRule_table_setting_value& from, TDeferredAtom& to, TContext& ctx, const TString& errorPrefix = {}) {
+ switch (from.Alt_case()) {
+ case TRule_table_setting_value::kAltTableSettingValue3: {
+ // integer
+ to = TDeferredAtom(LiteralNumber(ctx, from.GetAlt_table_setting_value3().GetRule_integer1()), ctx);
+ break;
+ }
+ default:
+ ctx.Error() << errorPrefix << " value should be an integer";
+ return false;
+ }
+ return true;
+ }
+
+ bool StoreSplitBoundary(const TRule_literal_value_list& boundary, TVector<TVector<TNodePtr>>& to,
+ TSqlExpression& expr, TContext& ctx) {
+ TVector<TNodePtr> boundaryKeys;
+ auto first_key = expr.LiteralExpr(boundary.GetRule_literal_value2());
+ if (!first_key) {
+ ctx.Error() << "Empty key in partition at keys";
+ return false;
+ }
+ if (!first_key->Expr) {
+ ctx.Error() << "Identifier is not expected in partition at keys";
+ return false;
+ }
+ boundaryKeys.emplace_back(first_key->Expr);
+ for (auto& key : boundary.GetBlock3()) {
+ auto keyExprOrIdent = expr.LiteralExpr(key.GetRule_literal_value2());
+ if (!keyExprOrIdent) {
+ ctx.Error() << "Empty key in partition at keys";
+ return false;
+ }
+ if (!keyExprOrIdent->Expr) {
+ ctx.Error() << "Identifier is not expected in partition at keys";
+ return false;
+ }
+ boundaryKeys.emplace_back(keyExprOrIdent->Expr);
+ }
+ to.push_back(boundaryKeys);
+ return true;
+ }
+
+ bool StoreSplitBoundaries(const TRule_table_setting_value& from, TVector<TVector<TNodePtr>>& to,
+ TSqlExpression& expr, TContext& ctx) {
+ switch (from.Alt_case()) {
+ case TRule_table_setting_value::kAltTableSettingValue4: {
+ // split_boundaries
+ const auto& boundariesNode = from.GetAlt_table_setting_value4().GetRule_split_boundaries1();
+ switch (boundariesNode.Alt_case()) {
+ case TRule_split_boundaries::kAltSplitBoundaries1: {
+ // literal_value_list (COMMA literal_value_list)*
+ auto& complexBoundaries = boundariesNode.GetAlt_split_boundaries1();
+
+ auto& first_boundary = complexBoundaries.GetRule_literal_value_list2();
+ if (!StoreSplitBoundary(first_boundary, to, expr, ctx)) {
+ return false;
+ }
+
+ for (auto& boundary : complexBoundaries.GetBlock3()) {
+ if (!StoreSplitBoundary(boundary.GetRule_literal_value_list2(), to, expr, ctx)) {
+ return false;
+ }
+ }
+ break;
+ }
+ case TRule_split_boundaries::kAltSplitBoundaries2: {
+ // literal_value_list
+ auto& simpleBoundaries = boundariesNode.GetAlt_split_boundaries2().GetRule_literal_value_list1();
+ auto first_key = expr.LiteralExpr(simpleBoundaries.GetRule_literal_value2());
+ if (!first_key) {
+ ctx.Error() << "Empty key in partition at keys";
+ return false;
+ }
+ if (!first_key->Expr) {
+ ctx.Error() << "Identifier is not expected in partition at keys";
+ return false;
+ }
+ to.push_back(TVector<TNodePtr>(1, first_key->Expr));
+ for (auto& key : simpleBoundaries.GetBlock3()) {
+ auto keyExprOrIdent = expr.LiteralExpr(key.GetRule_literal_value2());
+ if (!keyExprOrIdent) {
+ ctx.Error() << "Empty key in partition at keys";
+ return false;
+ }
+ if (!first_key->Expr) {
+ ctx.Error() << "Identifier is not expected in partition at keys";
+ return false;
+ }
+ to.push_back(
+ TVector<TNodePtr>(1, keyExprOrIdent->Expr)
+ );
+ }
+ break;
+ }
+ default:
+ return false;
+ }
+ break;
+ }
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ bool StoreTtlSettings(const TRule_table_setting_value& from, TResetableSetting<TTtlSettings, void>& to,
+ TSqlExpression& expr, TContext& ctx, TTranslation& txc) {
+ switch (from.Alt_case()) {
+ case TRule_table_setting_value::kAltTableSettingValue5: {
+ auto columnName = IdEx(from.GetAlt_table_setting_value5().GetRule_an_id3(), txc);
+ auto exprNode = expr.Build(from.GetAlt_table_setting_value5().GetRule_expr1());
+ if (!exprNode) {
+ return false;
+ }
+
+ if (exprNode->GetOpName() != "Interval") {
+ ctx.Error() << "Literal of Interval type is expected for TTL";
+ return false;
+ }
+
+ TMaybe<TTtlSettings::EUnit> columnUnit;
+ if (from.GetAlt_table_setting_value5().HasBlock4()) {
+ const TString unit = to_lower(ctx.Token(from.GetAlt_table_setting_value5().GetBlock4().GetToken2()));
+ columnUnit.ConstructInPlace();
+ if (!TryFromString<TTtlSettings::EUnit>(unit, *columnUnit)) {
+ ctx.Error() << "Invalid unit: " << unit;
+ return false;
+ }
+ }
+
+ to.Set(TTtlSettings(columnName, exprNode, columnUnit));
+ break;
+ }
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ template<typename TChar>
+ struct TPatternComponent {
+ TBasicString<TChar> Prefix;
+ TBasicString<TChar> Suffix;
+ bool IsSimple = true;
+
+ void AppendPlain(TChar c) {
+ if (IsSimple) {
+ Prefix.push_back(c);
+ }
+ Suffix.push_back(c);
+ }
+
+ void AppendAnyChar() {
+ IsSimple = false;
+ Suffix.clear();
+ }
+ };
+
+ template<typename TChar>
+ TVector<TPatternComponent<TChar>> SplitPattern(const TBasicString<TChar>& pattern, TMaybe<char> escape, bool& inEscape) {
+ inEscape = false;
+ TVector<TPatternComponent<TChar>> result;
+ TPatternComponent<TChar> current;
+ bool prevIsPercentChar = false;
+ for (const TChar c : pattern) {
+ if (inEscape) {
+ current.AppendPlain(c);
+ inEscape = false;
+ prevIsPercentChar = false;
+ } else if (escape && c == static_cast<TChar>(*escape)) {
+ inEscape = true;
+ } else if (c == '%') {
+ if (!prevIsPercentChar) {
+ result.push_back(std::move(current));
+ }
+ current = {};
+ prevIsPercentChar = true;
+ } else if (c == '_') {
+ current.AppendAnyChar();
+ prevIsPercentChar = false;
+ } else {
+ current.AppendPlain(c);
+ prevIsPercentChar = false;
+ }
+ }
+ result.push_back(std::move(current));
+ return result;
+ }
+}
+
+bool TSqlTranslation::StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value,
+ TTableSettings& settings, ETableType tableType, bool alter, bool reset) {
+ switch (tableType) {
+ case ETableType::ExternalTable:
+ return StoreExternalTableSettingsEntry(id, value, settings, alter, reset);
+ case ETableType::Table:
+ case ETableType::TableStore:
+ return StoreTableSettingsEntry(id, value, settings, alter, reset);
+ }
+}
+
+bool TSqlTranslation::StoreExternalTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value,
+ TTableSettings& settings, bool alter, bool reset) {
+ YQL_ENSURE(value || reset);
+ YQL_ENSURE(!reset || reset && alter);
+ if (to_lower(id.Name) == "data_source") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ TDeferredAtom dataSource;
+ if (!StoreString(*value, dataSource, Ctx, to_upper(id.Name))) {
+ return false;
+ }
+ TString service = Context().Scoped->CurrService;
+ TDeferredAtom cluster = Context().Scoped->CurrCluster;
+ TNodePtr root = new TAstListNodeImpl(Ctx.Pos());
+ root->Add("String", Ctx.GetPrefixedPath(service, cluster, dataSource));
+ settings.DataSourcePath = root;
+ } else if (to_lower(id.Name) == "location") {
+ if (reset) {
+ settings.Location.Reset();
+ } else {
+ TNodePtr location;
+ if (!StoreString(*value, location, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be a string literal";
+ return false;
+ }
+ settings.Location.Set(location);
+ }
+ } else {
+ auto& setting = settings.ExternalSourceParameters.emplace_back();
+ if (reset) {
+ setting.Reset(id);
+ } else {
+ TNodePtr node;
+ if (!StoreString(*value, node, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be a string literal";
+ return false;
+ }
+ setting.Set(std::pair<TIdentifier, TNodePtr>{id, std::move(node)});
+ }
+ }
+ return true;
+}
+
+bool TSqlTranslation::ValidateTableSettings(const TTableSettings& settings) {
+ if (settings.PartitionCount) {
+ if (!settings.StoreType || to_lower(settings.StoreType->Name) != "column") {
+ Ctx.Error() << " PARTITION_COUNT can be used only with STORE=COLUMN";
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value,
+ TTableSettings& settings, bool alter, bool reset) {
+ YQL_ENSURE(value || reset);
+ YQL_ENSURE(!reset || reset && alter);
+ if (to_lower(id.Name) == "compaction_policy") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreString(*value, settings.CompactionPolicy, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be a string literal";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "auto_partitioning_by_size") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreId(*value, settings.AutoPartitioningBySize, *this)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be an identifier";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "auto_partitioning_partition_size_mb") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreInt(*value, settings.PartitionSizeMb, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "auto_partitioning_by_load") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreId(*value, settings.AutoPartitioningByLoad, *this)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be an identifier";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "auto_partitioning_min_partitions_count") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreInt(*value, settings.MinPartitions, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "auto_partitioning_max_partitions_count") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreInt(*value, settings.MaxPartitions, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "partition_count") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+
+ if (!StoreInt(*value, settings.PartitionCount, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "uniform_partitions") {
+ if (alter) {
+ Ctx.Error() << to_upper(id.Name) << " alter is not supported";
+ return false;
+ }
+ if (!StoreInt(*value, settings.UniformPartitions, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "partition_at_keys") {
+ if (alter) {
+ Ctx.Error() << to_upper(id.Name) << " alter is not supported";
+ return false;
+ }
+ TSqlExpression expr(Ctx, Mode);
+ if (!StoreSplitBoundaries(*value, settings.PartitionAtKeys, expr, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be a list of keys. "
+ << "Example1: (10, 1000) Example2: ((10), (1000, \"abc\"))";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "key_bloom_filter") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreId(*value, settings.KeyBloomFilter, *this)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be an identifier";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "read_replicas_settings") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreString(*value, settings.ReadReplicasSettings, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be a string literal";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "ttl") {
+ if (!reset) {
+ TSqlExpression expr(Ctx, Mode);
+ if (!StoreTtlSettings(*value, settings.TtlSettings, expr, Ctx, *this)) {
+ Ctx.Error() << "Invalid TTL settings";
+ return false;
+ }
+ } else {
+ settings.TtlSettings.Reset();
+ }
+ } else if (to_lower(id.Name) == "tiering") {
+ if (!reset) {
+ TNodePtr tieringNode;
+ if (!StoreString(*value, tieringNode, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be a string literal";
+ return false;
+ }
+ settings.Tiering.Set(tieringNode);
+ } else {
+ settings.Tiering.Reset();
+ }
+ } else if (to_lower(id.Name) == "store") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreId(*value, settings.StoreType, *this)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be an identifier";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "partition_by_hash_function") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreString(*value, settings.PartitionByHashFunction, Ctx)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be a string literal";
+ return false;
+ }
+ } else if (to_lower(id.Name) == "store_external_blobs") {
+ if (reset) {
+ Ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!StoreId(*value, settings.StoreExternalBlobs, *this)) {
+ Ctx.Error() << to_upper(id.Name) << " value should be an identifier";
+ return false;
+ }
+ } else {
+ Ctx.Error() << "Unknown table setting: " << id.Name;
+ return false;
+ }
+
+ return ValidateTableSettings(settings);
+}
+
+bool TSqlTranslation::StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value& value,
+ TTableSettings& settings, ETableType tableType, bool alter) {
+ return StoreTableSettingsEntry(id, &value, settings, tableType, alter, false);
+}
+
+bool TSqlTranslation::ResetTableSettingsEntry(const TIdentifier& id, TTableSettings& settings, ETableType tableType) {
+ return StoreTableSettingsEntry(id, nullptr, settings, tableType, true, true);
+}
+
+bool TSqlTranslation::CreateTableSettings(const TRule_with_table_settings& settingsNode, TCreateTableParameters& params) {
+ const auto& firstEntry = settingsNode.GetRule_table_settings_entry3();
+ if (!StoreTableSettingsEntry(IdEx(firstEntry.GetRule_an_id1(), *this), firstEntry.GetRule_table_setting_value3(),
+ params.TableSettings, params.TableType)) {
+ return false;
+ }
+ for (auto& block : settingsNode.GetBlock4()) {
+ const auto& entry = block.GetRule_table_settings_entry2();
+ if (!StoreTableSettingsEntry(IdEx(entry.GetRule_an_id1(), *this), entry.GetRule_table_setting_value3(), params.TableSettings, params.TableType)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool StoreConsumerSettingsEntry(
+ const TIdentifier& id, const TRule_topic_consumer_setting_value* value, TSqlExpression& ctx,
+ TTopicConsumerSettings& settings,
+ bool reset
+) {
+ YQL_ENSURE(value || reset);
+ TNodePtr valueExprNode;
+ if (value) {
+ valueExprNode = ctx.Build(value->GetRule_expr1());
+ if (!valueExprNode) {
+ ctx.Error() << "invalid value for setting: " << id.Name;
+ return false;
+ }
+ }
+ if (to_lower(id.Name) == "important") {
+ if (settings.Important) {
+ ctx.Error() << to_upper(id.Name) << " specified multiple times in ALTER CONSUMER statements for single consumer";
+ return false;
+ }
+ if (reset) {
+ ctx.Error() << to_upper(id.Name) << " reset is not supported";
+ return false;
+ }
+ if (!valueExprNode->IsLiteral() || valueExprNode->GetLiteralType() != "Bool") {
+ ctx.Error() << to_upper(id.Name) << " value should be boolean";
+ return false;
+ }
+ settings.Important = valueExprNode;
+
+ } else if (to_lower(id.Name) == "read_from") {
+ if (settings.ReadFromTs) {
+ ctx.Error() << to_upper(id.Name) << " specified multiple times in ALTER CONSUMER statements for single consumer";
+ return false;
+ }
+ if (reset) {
+ settings.ReadFromTs.Reset();
+ } else {
+ //ToDo: !! validate
+ settings.ReadFromTs.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "supported_codecs") {
+ if (settings.SupportedCodecs) {
+ ctx.Error() << to_upper(id.Name) << " specified multiple times in ALTER CONSUMER statements for single consumer";
+ return false;
+ }
+ if (reset) {
+ settings.SupportedCodecs.Reset();
+ } else {
+ if (!valueExprNode->IsLiteral() || valueExprNode->GetLiteralType() != "String") {
+ ctx.Error() << to_upper(id.Name) << " value should be a string literal";
+ return false;
+ }
+ settings.SupportedCodecs.Set(valueExprNode);
+ }
+ } else {
+ ctx.Error() << to_upper(id.Name) << ": unknown option for consumer";
+ return false;
+ }
+ return true;
+}
+
+TIdentifier TSqlTranslation::GetTopicConsumerId(const TRule_topic_consumer_ref& node) {
+ return IdEx(node.GetRule_an_id_pure1(), *this);
+}
+
+bool TSqlTranslation::CreateConsumerSettings(
+ const TRule_topic_consumer_settings& node, TTopicConsumerSettings& settings
+) {
+ const auto& firstEntry = node.GetRule_topic_consumer_settings_entry1();
+ TSqlExpression expr(Ctx, Mode);
+ if (!StoreConsumerSettingsEntry(
+ IdEx(firstEntry.GetRule_an_id1(), *this),
+ &firstEntry.GetRule_topic_consumer_setting_value3(),
+ expr, settings, false
+ )) {
+ return false;
+ }
+ for (auto& block : node.GetBlock2()) {
+ const auto& entry = block.GetRule_topic_consumer_settings_entry2();
+ if (!StoreConsumerSettingsEntry(
+ IdEx(entry.GetRule_an_id1(), *this),
+ &entry.GetRule_topic_consumer_setting_value3(),
+ expr, settings, false
+ )) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlTranslation::CreateTopicConsumer(
+ const TRule_topic_create_consumer_entry& node,
+ TVector<TTopicConsumerDescription>& consumers
+) {
+ consumers.emplace_back(IdEx(node.GetRule_an_id2(), *this));
+
+ if (node.HasBlock3()) {
+ auto& settings = node.GetBlock3().GetRule_topic_consumer_with_settings1().GetRule_topic_consumer_settings3();
+ if (!CreateConsumerSettings(settings, consumers.back().Settings)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::AlterTopicConsumerEntry(
+ const TRule_alter_topic_alter_consumer_entry& node, TTopicConsumerDescription& alterConsumer
+) {
+ switch (node.Alt_case()) {
+ case TRule_alter_topic_alter_consumer_entry::kAltAlterTopicAlterConsumerEntry1:
+ return CreateConsumerSettings(
+ node.GetAlt_alter_topic_alter_consumer_entry1().GetRule_topic_alter_consumer_set1()
+ .GetRule_topic_consumer_settings3(),
+ alterConsumer.Settings
+ );
+ //case TRule_alter_topic_alter_consumer_entry::ALT_NOT_SET:
+ case TRule_alter_topic_alter_consumer_entry::kAltAlterTopicAlterConsumerEntry2: {
+ auto& resetNode = node.GetAlt_alter_topic_alter_consumer_entry2().GetRule_topic_alter_consumer_reset1();
+ TSqlExpression expr(Ctx, Mode);
+ if (!StoreConsumerSettingsEntry(
+ IdEx(resetNode.GetRule_an_id3(), *this),
+ nullptr,
+ expr, alterConsumer.Settings, true
+ )) {
+ return false;
+ }
+
+ for (auto& resetItem: resetNode.GetBlock4()) {
+ if (!StoreConsumerSettingsEntry(
+ IdEx(resetItem.GetRule_an_id2(), *this),
+ nullptr,
+ expr, alterConsumer.Settings, true
+ )) {
+ return false;
+ }
+ }
+ return true;
+ }
+ default:
+ Ctx.Error() << "unknown alter consumer action";
+ return false;
+ }
+ return true;
+}
+
+bool TSqlTranslation::AlterTopicConsumer(
+ const TRule_alter_topic_alter_consumer& node,
+ THashMap<TString, TTopicConsumerDescription>& alterConsumers
+) {
+ auto consumerId = GetTopicConsumerId(node.GetRule_topic_consumer_ref3());
+ TString name = to_lower(consumerId.Name);
+ auto iter = alterConsumers.insert(std::make_pair(
+ name, TTopicConsumerDescription(std::move(consumerId))
+ )).first;
+ if (!AlterTopicConsumerEntry(node.GetRule_alter_topic_alter_consumer_entry4(), iter->second)) {
+ return false;
+ }
+ return true;
+}
+
+bool TSqlTranslation::CreateTopicEntry(const TRule_create_topic_entry& node, TCreateTopicParameters& params) {
+ // Will need a switch() here if (ever) create_topic_entry gets more than 1 type of statement
+ auto& consumer = node.GetRule_topic_create_consumer_entry1();
+ if (!CreateTopicConsumer(consumer, params.Consumers)) {
+ return false;
+ }
+ return true;
+}
+
+static bool StoreTopicSettingsEntry(
+ const TIdentifier& id, const TRule_topic_setting_value* value, TSqlExpression& ctx,
+ TTopicSettings& settings, bool reset
+) {
+ YQL_ENSURE(value || reset);
+ TNodePtr valueExprNode;
+ if (value) {
+ valueExprNode = ctx.Build(value->GetRule_expr1());
+ if (!valueExprNode) {
+ ctx.Error() << "invalid value for setting: " << id.Name;
+ return false;
+ }
+ }
+
+ if (to_lower(id.Name) == "min_active_partitions") {
+ if (reset) {
+ settings.MinPartitions.Reset();
+ } else {
+ if (!valueExprNode->IsIntegerLiteral()) {
+ ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ settings.MinPartitions.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "partition_count_limit" || to_lower(id.Name) == "max_active_partitions") {
+ if (reset) {
+ settings.MaxPartitions.Reset();
+ } else {
+ if (!valueExprNode->IsIntegerLiteral()) {
+ ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ settings.MaxPartitions.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "retention_period") {
+ if (reset) {
+ settings.RetentionPeriod.Reset();
+ } else {
+ if (valueExprNode->GetOpName() != "Interval") {
+ ctx.Error() << "Literal of Interval type is expected for retention";
+ return false;
+ }
+ settings.RetentionPeriod.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "retention_storage_mb") {
+ if (reset) {
+ settings.RetentionStorage.Reset();
+ } else {
+ if (!valueExprNode->IsIntegerLiteral()) {
+ ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ settings.RetentionStorage.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "partition_write_speed_bytes_per_second") {
+ if (reset) {
+ settings.PartitionWriteSpeed.Reset();
+ } else {
+ if (!valueExprNode->IsIntegerLiteral()) {
+ ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ settings.PartitionWriteSpeed.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "partition_write_burst_bytes") {
+ if (reset) {
+ settings.PartitionWriteBurstSpeed.Reset();
+ } else {
+ if (!valueExprNode->IsIntegerLiteral()) {
+ ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ settings.PartitionWriteBurstSpeed.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "metering_mode") {
+ if (reset) {
+ settings.MeteringMode.Reset();
+ } else {
+ if (!valueExprNode->IsLiteral() || valueExprNode->GetLiteralType() != "String") {
+ ctx.Error() << to_upper(id.Name) << " value should be string";
+ return false;
+ }
+ settings.MeteringMode.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "supported_codecs") {
+ if (reset) {
+ settings.SupportedCodecs.Reset();
+ } else {
+ if (!valueExprNode->IsLiteral() || valueExprNode->GetLiteralType() != "String") {
+ ctx.Error() << to_upper(id.Name) << " value should be string";
+ return false;
+ }
+ settings.SupportedCodecs.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "auto_partitioning_stabilization_window") {
+ if (reset) {
+ settings.AutoPartitioningStabilizationWindow.Reset();
+ } else {
+ if (valueExprNode->GetOpName() != "Interval") {
+ ctx.Error() << "Literal of Interval type is expected for retention";
+ return false;
+ }
+ settings.AutoPartitioningStabilizationWindow.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "auto_partitioning_up_utilization_percent") {
+ if (reset) {
+ settings.AutoPartitioningUpUtilizationPercent.Reset();
+ } else {
+ if (!valueExprNode->IsIntegerLiteral()) {
+ ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ settings.AutoPartitioningUpUtilizationPercent.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "auto_partitioning_down_utilization_percent") {
+ if (reset) {
+ settings.AutoPartitioningDownUtilizationPercent.Reset();
+ } else {
+ if (!valueExprNode->IsIntegerLiteral()) {
+ ctx.Error() << to_upper(id.Name) << " value should be an integer";
+ return false;
+ }
+ settings.AutoPartitioningDownUtilizationPercent.Set(valueExprNode);
+ }
+ } else if (to_lower(id.Name) == "auto_partitioning_strategy") {
+ if (reset) {
+ settings.AutoPartitioningStrategy.Reset();
+ } else {
+ if (!valueExprNode->IsLiteral() || valueExprNode->GetLiteralType() != "String") {
+ ctx.Error() << to_upper(id.Name) << " value should be string";
+ return false;
+ }
+ settings.AutoPartitioningStrategy.Set(valueExprNode);
+ }
+ } else {
+ ctx.Error() << "unknown topic setting: " << id.Name;
+ return false;
+ }
+ return true;
+}
+
+bool TSqlTranslation::AlterTopicAction(const TRule_alter_topic_action& node, TAlterTopicParameters& params) {
+// alter_topic_action:
+// alter_topic_add_consumer
+// | alter_topic_alter_consumer
+// | alter_topic_drop_consumer
+// | alter_topic_set_settings
+// | alter_topic_reset_settings
+
+ switch (node.Alt_case()) {
+ case TRule_alter_topic_action::kAltAlterTopicAction1: // alter_topic_add_consumer
+ return CreateTopicConsumer(
+ node.GetAlt_alter_topic_action1().GetRule_alter_topic_add_consumer1()
+ .GetRule_topic_create_consumer_entry2(),
+ params.AddConsumers
+ );
+
+ case TRule_alter_topic_action::kAltAlterTopicAction2: // alter_topic_alter_consumer
+ return AlterTopicConsumer(
+ node.GetAlt_alter_topic_action2().GetRule_alter_topic_alter_consumer1(),
+ params.AlterConsumers
+ );
+
+ case TRule_alter_topic_action::kAltAlterTopicAction3: // drop_consumer
+ params.DropConsumers.emplace_back(GetTopicConsumerId(
+ node.GetAlt_alter_topic_action3().GetRule_alter_topic_drop_consumer1()
+ .GetRule_topic_consumer_ref3()
+ ));
+ return true;
+
+ case TRule_alter_topic_action::kAltAlterTopicAction4: // set_settings
+ return CreateTopicSettings(
+ node.GetAlt_alter_topic_action4().GetRule_alter_topic_set_settings1()
+ .GetRule_topic_settings3(),
+ params.TopicSettings
+ );
+
+ case TRule_alter_topic_action::kAltAlterTopicAction5: { // reset_settings
+ auto& resetNode = node.GetAlt_alter_topic_action5().GetRule_alter_topic_reset_settings1();
+ TSqlExpression expr(Ctx, Mode);
+ if (!StoreTopicSettingsEntry(
+ IdEx(resetNode.GetRule_an_id3(), *this),
+ nullptr, expr,
+ params.TopicSettings, true
+ )) {
+ return false;
+ }
+
+ for (auto& resetItem: resetNode.GetBlock4()) {
+ if (!StoreTopicSettingsEntry(
+ IdEx(resetItem.GetRule_an_id_pure2(), *this),
+ nullptr, expr,
+ params.TopicSettings, true
+ )) {
+ return false;
+ }
+ }
+ return true;
+ }
+ default:
+ Ctx.Error() << "unknown alter topic action";
+ return false;
+ }
+ return true;
+}
+
+bool TSqlTranslation::CreateTopicSettings(const TRule_topic_settings& node, TTopicSettings& settings) {
+ const auto& firstEntry = node.GetRule_topic_settings_entry1();
+ TSqlExpression expr(Ctx, Mode);
+
+ if (!StoreTopicSettingsEntry(
+ IdEx(firstEntry.GetRule_an_id1(), *this),
+ &firstEntry.GetRule_topic_setting_value3(),
+ expr, settings, false
+ )) {
+ return false;
+ }
+ for (auto& block : node.GetBlock2()) {
+ const auto& entry = block.GetRule_topic_settings_entry2();
+ if (!StoreTopicSettingsEntry(
+ IdEx(entry.GetRule_an_id1(), *this),
+ &entry.GetRule_topic_setting_value3(),
+ expr, settings, false
+ )) {
+ return false;
+ }
+ }
+ return true;
+}
+
+TNodePtr TSqlTranslation::IntegerOrBind(const TRule_integer_or_bind& node) {
+ switch (node.Alt_case()) {
+ case TRule_integer_or_bind::kAltIntegerOrBind1: {
+ const TString intString = Ctx.Token(node.GetAlt_integer_or_bind1().GetRule_integer1().GetToken1());
+ ui64 value;
+ TString suffix;
+ if (!ParseNumbers(Ctx, intString, value, suffix)) {
+ return {};
+ }
+ return BuildQuotedAtom(Ctx.Pos(), ToString(value), TNodeFlags::ArbitraryContent);
+ }
+ case TRule_integer_or_bind::kAltIntegerOrBind2: {
+ TString bindName;
+ if (!NamedNodeImpl(node.GetAlt_integer_or_bind2().GetRule_bind_parameter1(), bindName, *this)) {
+ return {};
+ }
+ auto namedNode = GetNamedNode(bindName);
+ if (!namedNode) {
+ return {};
+ }
+ auto atom = MakeAtomFromExpression(Ctx.Pos(), Ctx, namedNode);
+ return atom.Build();
+ }
+ case TRule_integer_or_bind::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TNodePtr TSqlTranslation::TypeNameTag(const TRule_type_name_tag& node) {
+ switch (node.Alt_case()) {
+ case TRule_type_name_tag::kAltTypeNameTag1: {
+ auto content = Id(node.GetAlt_type_name_tag1().GetRule_id1(), *this);
+ auto atom = TDeferredAtom(Ctx.Pos(), content);
+ return atom.Build();
+ }
+ case TRule_type_name_tag::kAltTypeNameTag2: {
+ auto value = Token(node.GetAlt_type_name_tag2().GetToken1());
+ auto parsed = StringContentOrIdContent(Ctx, Ctx.Pos(), value);
+ if (!parsed) {
+ return {};
+ }
+ auto atom = TDeferredAtom(Ctx.Pos(), parsed->Content);
+ return atom.Build();
+ }
+ case TRule_type_name_tag::kAltTypeNameTag3: {
+ TString bindName;
+ if (!NamedNodeImpl(node.GetAlt_type_name_tag3().GetRule_bind_parameter1(), bindName, *this)) {
+ return {};
+ }
+ auto namedNode = GetNamedNode(bindName);
+ if (!namedNode) {
+ return {};
+ }
+ TDeferredAtom atom;
+ MakeTableFromExpression(Ctx.Pos(), Ctx, namedNode, atom);
+ return atom.Build();
+ }
+ case TRule_type_name_tag::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TNodePtr TSqlTranslation::TypeSimple(const TRule_type_name_simple& node, bool onlyDataAllowed) {
+ const TString origName = Id(node.GetRule_an_id_pure1(), *this);
+ if (origName.empty()) {
+ return {};
+ }
+ return BuildSimpleType(Ctx, Ctx.Pos(), origName, onlyDataAllowed);
+}
+
+TNodePtr TSqlTranslation::TypeDecimal(const TRule_type_name_decimal& node) {
+ auto pos = Ctx.Pos();
+ auto flags = TNodeFlags::Default;
+
+ auto paramOne = IntegerOrBind(node.GetRule_integer_or_bind3());
+ if (!paramOne) {
+ return {};
+ }
+ auto paramTwo = IntegerOrBind(node.GetRule_integer_or_bind5());
+ if (!paramTwo) {
+ return {};
+ }
+ return new TCallNodeImpl(pos, "DataType", { BuildQuotedAtom(pos, "Decimal", flags), paramOne, paramTwo });
+}
+
+TNodePtr TSqlTranslation::AddOptionals(const TNodePtr& node, size_t optionalCount) {
+ TNodePtr result = node;
+ if (node) {
+ TPosition pos = node->GetPos();
+ for (size_t i = 0; i < optionalCount; ++i) {
+ result = new TCallNodeImpl(pos, "OptionalType", { result });
+ }
+ }
+ return result;
+}
+
+
+TMaybe<std::pair<TVector<TNodePtr>, bool>> TSqlTranslation::CallableArgList(const TRule_callable_arg_list& argList, bool namedArgsStarted) {
+ auto pos = Ctx.Pos();
+ auto flags = TNodeFlags::Default;
+ auto& arg1 = argList.GetRule_callable_arg1();
+ auto& varArg = arg1.GetRule_variant_arg1();
+ TVector<TNodePtr> result;
+ TVector<TNodePtr> items;
+ auto typeNode = TypeNodeOrBind(varArg.GetRule_type_name_or_bind2());
+ if (!typeNode) {
+ return {};
+ }
+ items.push_back(typeNode);
+ if (varArg.HasBlock1()) {
+ namedArgsStarted = true;
+ auto tag = TypeNameTag(varArg.GetBlock1().GetRule_type_name_tag1());
+ if (!tag) {
+ return {};
+ }
+ items.push_back(tag);
+ }
+ if (arg1.HasBlock2()) {
+ if (!varArg.HasBlock1()) {
+ items.push_back(BuildQuotedAtom(pos, "", flags));
+ }
+ items.push_back(BuildQuotedAtom(pos, "1", flags));
+ }
+ result.push_back(new TAstListNodeImpl(pos, items));
+
+ for (auto& arg : argList.GetBlock2()) {
+ auto& varArg = arg.GetRule_callable_arg2().GetRule_variant_arg1();
+ TVector<TNodePtr> items;
+ auto typeNode = TypeNodeOrBind(varArg.GetRule_type_name_or_bind2());
+ if (!typeNode) {
+ return {};
+ }
+ items.push_back(typeNode);
+ if (varArg.HasBlock1()) {
+ auto tag = TypeNameTag(varArg.GetBlock1().GetRule_type_name_tag1());
+ if (!tag) {
+ return {};
+ }
+ items.push_back(tag);
+ } else {
+ if (namedArgsStarted) {
+ Ctx.Error() << "Expected named argument, previous argument was named";
+ return {};
+ }
+ items.push_back(BuildQuotedAtom(pos, "", flags));
+ }
+ if (arg.GetRule_callable_arg2().HasBlock2()) {
+ if (!varArg.HasBlock1()) {
+ items.push_back(BuildQuotedAtom(pos, "", flags));
+ }
+ items.push_back(BuildQuotedAtom(pos, "1", flags));
+ }
+ result.push_back(new TAstListNodeImpl(pos, items));
+ }
+ return std::make_pair(result, namedArgsStarted);
+}
+
+TNodePtr TSqlTranslation::TypeNodeOrBind(const TRule_type_name_or_bind& node) {
+ switch (node.Alt_case()) {
+ case TRule_type_name_or_bind::kAltTypeNameOrBind1: {
+ return TypeNode(node.GetAlt_type_name_or_bind1().GetRule_type_name1());
+ }
+ case TRule_type_name_or_bind::kAltTypeNameOrBind2: {
+ TString bindName;
+ if (!NamedNodeImpl(node.GetAlt_type_name_or_bind2().GetRule_bind_parameter1(), bindName, *this)) {
+ return {};
+ }
+ return GetNamedNode(bindName);
+ }
+ case TRule_type_name_or_bind::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+TNodePtr TSqlTranslation::TypeNode(const TRule_type_name& node) {
+ //type_name:
+ // type_name_composite
+ // | (type_name_decimal | type_name_simple) QUESTION*;
+ if (node.Alt_case() == TRule_type_name::kAltTypeName1) {
+ return TypeNode(node.GetAlt_type_name1().GetRule_type_name_composite1());
+ }
+
+ TNodePtr result;
+ TPosition pos = Ctx.Pos();
+
+ auto& alt = node.GetAlt_type_name2();
+ auto& block = alt.GetBlock1();
+ switch (block.Alt_case()) {
+ case TRule_type_name::TAlt2::TBlock1::kAlt1: {
+ auto& decimalType = block.GetAlt1().GetRule_type_name_decimal1();
+ result = TypeDecimal(decimalType);
+ break;
+ }
+ case TRule_type_name::TAlt2::TBlock1::kAlt2: {
+ auto& simpleType = block.GetAlt2().GetRule_type_name_simple1();
+ result = TypeSimple(simpleType, false);
+ break;
+ }
+ case TRule_type_name::TAlt2::TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ return AddOptionals(result, alt.GetBlock2().size());
+}
+
+TNodePtr TSqlTranslation::TypeNode(const TRule_type_name_composite& node) {
+ //type_name_composite:
+ // ( type_name_optional
+ // | type_name_tuple
+ // | type_name_struct
+ // | type_name_variant
+ // | type_name_list
+ // | type_name_stream
+ // | type_name_flow
+ // | type_name_dict
+ // | type_name_set
+ // | type_name_enum
+ // | type_name_resource
+ // | type_name_tagged
+ // | type_name_callable
+ // ) QUESTION*;
+ TNodePtr result;
+ TPosition pos = Ctx.Pos();
+ auto flags = TNodeFlags::Default;
+
+ auto wrapOneParamType = [&] (const TRule_type_name_or_bind& param, const char* type) -> TNodePtr {
+ auto node = TypeNodeOrBind(param);
+ return node ? new TAstListNodeImpl(pos, { BuildAtom(pos, type, flags), node }) : nullptr;
+ };
+ auto makeVoid = [&] () -> TNodePtr {
+ return new TAstListNodeImpl(pos, { BuildAtom(pos, "VoidType", flags) });
+ };
+ auto makeQuote = [&] (const TNodePtr& node) -> TNodePtr {
+ return new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "quote", 0), node });
+ };
+
+ auto& block = node.GetBlock1();
+ switch (block.Alt_case()) {
+ case TRule_type_name_composite_TBlock1::kAlt1: {
+ auto& optionalType = block.GetAlt1().GetRule_type_name_optional1();
+ result = wrapOneParamType(optionalType.GetRule_type_name_or_bind3(), "OptionalType");
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt2: {
+ auto& tupleType = block.GetAlt2().GetRule_type_name_tuple1();
+ TVector<TNodePtr> items;
+ items.push_back(BuildAtom(pos, "TupleType", flags));
+
+ switch (tupleType.GetBlock2().Alt_case()) {
+ case TRule_type_name_tuple::TBlock2::kAlt1: {
+ if (tupleType.GetBlock2().GetAlt1().HasBlock2()) {
+ auto typeNode = TypeNodeOrBind(tupleType.GetBlock2().GetAlt1().GetBlock2().GetRule_type_name_or_bind1());
+ if (!typeNode) {
+ return {};
+ }
+ items.push_back(typeNode);
+ for (auto& arg : tupleType.GetBlock2().GetAlt1().GetBlock2().GetBlock2()) {
+ auto typeNode = TypeNodeOrBind(arg.GetRule_type_name_or_bind2());
+ if (!typeNode) {
+ return {};
+ }
+ items.push_back(typeNode);
+ }
+ }
+ [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME
+ }
+ case TRule_type_name_tuple::TBlock2::kAlt2:
+ break;
+ case TRule_type_name_tuple::TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ result = new TAstListNodeImpl(pos, items);
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt3: {
+ auto& structType = block.GetAlt3().GetRule_type_name_struct1();
+ TVector<TNodePtr> items;
+ items.push_back(BuildAtom(pos, "StructType", flags));
+
+ switch (structType.GetBlock2().Alt_case()) {
+ case TRule_type_name_struct::TBlock2::kAlt1: {
+ if (structType.GetBlock2().GetAlt1().HasBlock2()) {
+ auto& structArg = structType.GetBlock2().GetAlt1().GetBlock2().GetRule_struct_arg1();
+ auto typeNode = TypeNodeOrBind(structArg.GetRule_type_name_or_bind3());
+ if (!typeNode) {
+ return {};
+ }
+ auto tag = TypeNameTag(structArg.GetRule_type_name_tag1());
+ if (!tag) {
+ return {};
+ }
+
+ items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, typeNode })));
+ for (auto& arg : structType.GetBlock2().GetAlt1().GetBlock2().GetBlock2()) {
+ auto typeNode = TypeNodeOrBind(arg.GetRule_struct_arg2().GetRule_type_name_or_bind3());
+ if (!typeNode) {
+ return {};
+ }
+ auto tag = TypeNameTag(arg.GetRule_struct_arg2().GetRule_type_name_tag1());
+ if (!tag) {
+ return {};
+ }
+ items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, typeNode })));
+ }
+ }
+ [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME
+ }
+ case TRule_type_name_struct::TBlock2::kAlt2:
+ break;
+ case TRule_type_name_struct::TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ result = new TAstListNodeImpl(pos, items);
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt4: {
+ auto& variantType = block.GetAlt4().GetRule_type_name_variant1();
+ TVector<TNodePtr> items;
+ bool overStruct = false;
+ auto& variantArg = variantType.GetRule_variant_arg3();
+ auto typeNode = TypeNodeOrBind(variantArg.GetRule_type_name_or_bind2());
+ if (!typeNode) {
+ return {};
+ }
+ if (variantArg.HasBlock1()) {
+ items.push_back(BuildAtom(pos, "StructType", flags));
+ overStruct = true;
+ auto tag = TypeNameTag(variantArg.GetBlock1().GetRule_type_name_tag1());
+ if (!tag) {
+ return {};
+ }
+ items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, typeNode })));
+ } else {
+ items.push_back(BuildAtom(pos, "TupleType", flags));
+ items.push_back(typeNode);
+ }
+
+ for (auto& arg : variantType.GetBlock4()) {
+ auto typeNode = TypeNodeOrBind(arg.GetRule_variant_arg2().GetRule_type_name_or_bind2());
+ if (!typeNode) {
+ return {};
+ }
+ if (overStruct) {
+ if (!arg.GetRule_variant_arg2().HasBlock1()) {
+ Ctx.Error() << "Variant over struct and tuple mixture";
+ return {};
+ }
+ auto tag = TypeNameTag(arg.GetRule_variant_arg2().GetBlock1().GetRule_type_name_tag1());
+ if (!tag) {
+ return {};
+ }
+ items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, typeNode })));
+ } else {
+ if (arg.GetRule_variant_arg2().HasBlock1()) {
+ Ctx.Error() << "Variant over struct and tuple mixture";
+ return {};
+ }
+ items.push_back(typeNode);
+ }
+ }
+ typeNode = new TAstListNodeImpl(pos, items);
+ result = new TAstListNodeImpl(pos, { BuildAtom(pos, "VariantType", flags), typeNode });
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt5: {
+ auto& listType = block.GetAlt5().GetRule_type_name_list1();
+ result = wrapOneParamType(listType.GetRule_type_name_or_bind3(), "ListType");
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt6: {
+ auto& streamType = block.GetAlt6().GetRule_type_name_stream1();
+ result = wrapOneParamType(streamType.GetRule_type_name_or_bind3(), "StreamType");
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt7: {
+ auto& flowType = block.GetAlt7().GetRule_type_name_flow1();
+ result = wrapOneParamType(flowType.GetRule_type_name_or_bind3(), "FlowType");
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt8: {
+ auto& dictType = block.GetAlt8().GetRule_type_name_dict1();
+ TVector<TNodePtr> items;
+ items.push_back(BuildAtom(pos, "DictType", flags));
+ auto typeNode = TypeNodeOrBind(dictType.GetRule_type_name_or_bind3());
+ if (!typeNode) {
+ return {};
+ }
+ items.push_back(typeNode);
+ typeNode = TypeNodeOrBind(dictType.GetRule_type_name_or_bind5());
+ if (!typeNode) {
+ return {};
+ }
+ items.push_back(typeNode);
+ result = new TAstListNodeImpl(pos, items);
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt9: {
+ auto& setType = block.GetAlt9().GetRule_type_name_set1();
+ auto typeNode = TypeNodeOrBind(setType.GetRule_type_name_or_bind3());
+ if (!typeNode) {
+ return {};
+ }
+ result = new TAstListNodeImpl(pos, { BuildAtom(pos, "DictType", flags), typeNode, makeVoid() });
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt10: {
+ auto& enumType = block.GetAlt10().GetRule_type_name_enum1();
+ TVector<TNodePtr> items;
+ items.push_back(BuildAtom(pos, "StructType", flags));
+ auto tag = TypeNameTag(enumType.GetRule_type_name_tag3());
+ if (!tag) {
+ return {};
+ }
+ items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, makeVoid() })));
+ for (auto& arg : enumType.GetBlock4()) {
+ auto tag = TypeNameTag(arg.GetRule_type_name_tag2());
+ if (!tag) {
+ return {};
+ }
+ items.push_back(makeQuote(new TAstListNodeImpl(pos, { tag, makeVoid() })));
+ }
+ auto typeNode = new TAstListNodeImpl(pos, items);
+ result = new TAstListNodeImpl(pos, { BuildAtom(pos, "VariantType", flags), typeNode });
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt11: {
+ auto& resourceType = block.GetAlt11().GetRule_type_name_resource1();
+ auto tag = TypeNameTag(resourceType.GetRule_type_name_tag3());
+ if (!tag) {
+ return {};
+ }
+ result = new TAstListNodeImpl(pos, { BuildAtom(pos, "ResourceType", flags), tag });
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt12: {
+ auto& taggedType = block.GetAlt12().GetRule_type_name_tagged1();
+ auto typeNode = TypeNodeOrBind(taggedType.GetRule_type_name_or_bind3());
+ if (!typeNode) {
+ return {};
+ }
+ auto tag = TypeNameTag(taggedType.GetRule_type_name_tag5());
+ if (!tag) {
+ return {};
+ }
+ result = new TAstListNodeImpl(pos, { BuildAtom(pos, "TaggedType", flags), typeNode, tag });
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::kAlt13: {
+ auto& callableType = block.GetAlt13().GetRule_type_name_callable1();
+ TMaybe<std::pair<TVector<TNodePtr>, bool>> requiredArgs, optionalArgs;
+ bool namedArgsStarted = false;
+ size_t optionalArgsCount = 0;
+ if (callableType.HasBlock4()) {
+ auto& argList = callableType.GetBlock4().GetRule_callable_arg_list1();
+ requiredArgs = CallableArgList(argList, namedArgsStarted);
+ if (!requiredArgs) {
+ return {};
+ }
+ namedArgsStarted = requiredArgs->second;
+ }
+ if (callableType.HasBlock6()) {
+ auto& argList = callableType.GetBlock6().GetRule_callable_arg_list2();
+ optionalArgs = CallableArgList(argList, namedArgsStarted);
+ if (!optionalArgs) {
+ return {};
+ }
+ optionalArgsCount = optionalArgs->first.size();
+ }
+ auto returnType = TypeNodeOrBind(callableType.GetRule_type_name_or_bind9());
+ if (!returnType) {
+ return {};
+ }
+ TVector<TNodePtr> items;
+ items.push_back(BuildAtom(pos, "CallableType", flags));
+ if (optionalArgsCount) {
+ items.push_back(makeQuote(new TAstListNodeImpl(pos,
+ { BuildQuotedAtom(pos, ToString(optionalArgsCount), flags) })));
+ } else {
+ items.push_back(makeQuote(new TAstListNodeImpl(pos, {})));
+ }
+ items.push_back(makeQuote(new TAstListNodeImpl(pos, { returnType })));
+ if (requiredArgs) {
+ for (auto& arg: requiredArgs->first) {
+ items.push_back(makeQuote(arg));
+ }
+ }
+ if (optionalArgs) {
+ for (auto& arg: optionalArgs->first) {
+ items.push_back(makeQuote(arg));
+ }
+ }
+ result = new TAstListNodeImpl(pos, items);
+ break;
+ }
+ case TRule_type_name_composite_TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ return AddOptionals(result, node.GetBlock2().size());
+}
+
+TNodePtr TSqlTranslation::ValueConstructorLiteral(const TRule_value_constructor_literal& node) {
+ return BuildLiteralSmartString(Ctx, Token(node.GetToken1()));
+}
+
+TNodePtr TSqlTranslation::ValueConstructor(const TRule_value_constructor& node) {
+ TSqlCallExpr call(Ctx, Mode);
+ if (!call.Init(node)) {
+ return {};
+ }
+ return call.BuildCall();
+}
+
+TNodePtr TSqlTranslation::ListLiteral(const TRule_list_literal& node) {
+ TVector<TNodePtr> values;
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "AsListMayWarn", TNodeFlags::Default));
+
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (node.HasBlock2() && !ExprList(sqlExpr, values, node.GetBlock2().GetRule_expr_list1())) {
+ return nullptr;
+ }
+
+ return new TAstListNodeImpl(Ctx.Pos(), std::move(values));
+}
+
+TNodePtr TSqlTranslation::DictLiteral(const TRule_dict_literal& node) {
+ TVector<TNodePtr> values;
+ if (node.HasBlock2()) {
+ const auto& list = node.GetBlock2().GetRule_expr_dict_list1();
+ const bool isSet = !list.HasBlock2();
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), isSet ? "AsSet" : "AsDict", TNodeFlags::Default));
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (isSet) {
+ if (!Expr(sqlExpr, values, list.GetRule_expr1())) {
+ return nullptr;
+ }
+ } else {
+ TVector<TNodePtr> tupleItems;
+ if (!Expr(sqlExpr, tupleItems, list.GetRule_expr1())) {
+ return nullptr;
+ }
+
+ if (!Expr(sqlExpr, tupleItems, list.GetBlock2().GetRule_expr2())) {
+ return nullptr;
+ }
+
+ values.push_back(new TTupleNode(Ctx.Pos(), std::move(tupleItems)));
+ }
+
+ for (auto& b : list.GetBlock3()) {
+ sqlExpr.Token(b.GetToken1());
+ const bool isSetCurr = !b.HasBlock3();
+ if (isSetCurr != isSet) {
+ Error() << "Expected keys/values pair or keys, but got mix of them";
+ return nullptr;
+ }
+
+ if (isSet) {
+ if (!Expr(sqlExpr, values, b.GetRule_expr2())) {
+ return nullptr;
+ }
+ } else {
+ TVector<TNodePtr> tupleItems;
+ if (!Expr(sqlExpr, tupleItems, b.GetRule_expr2())) {
+ return nullptr;
+ }
+
+ if (!Expr(sqlExpr, tupleItems, b.GetBlock3().GetRule_expr2())) {
+ return nullptr;
+ }
+
+ values.push_back(new TTupleNode(Ctx.Pos(), std::move(tupleItems)));
+ }
+ }
+ } else {
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "AsDict", TNodeFlags::Default));
+ }
+
+ return new TAstListNodeImpl(Ctx.Pos(), std::move(values));
+}
+
+bool TSqlTranslation::StructLiteralItem(TVector<TNodePtr>& labels, const TRule_expr& label, TVector<TNodePtr>& values, const TRule_expr& value) {
+ // label expr
+ {
+ TColumnRefScope scope(Ctx, EColumnRefState::AsStringLiteral, /* topLevel */ false);
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (!Expr(sqlExpr, labels, label)) {
+ return false;
+ }
+
+ TDeferredAtom atom;
+ MakeTableFromExpression(Ctx.Pos(), Ctx, labels.back(), atom);
+ labels.back() = atom.Build();
+ if (!labels.back()) {
+ return false;
+ }
+ }
+
+ // value expr
+ {
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (!Expr(sqlExpr, values, value)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+TNodePtr TSqlTranslation::StructLiteral(const TRule_struct_literal& node) {
+ TVector<TNodePtr> labels;
+ TVector<TNodePtr> values;
+ TPosition pos = Ctx.TokenPosition(node.GetToken1());
+ if (node.HasBlock2()) {
+ const auto& list = node.GetBlock2().GetRule_expr_struct_list1();
+
+ if (!StructLiteralItem(labels, list.GetRule_expr1(), values, list.GetRule_expr3())) {
+ return {};
+ }
+
+ for (auto& b : list.GetBlock4()) {
+ if (!StructLiteralItem(labels, b.GetRule_expr2(), values, b.GetRule_expr4())) {
+ return {};
+ }
+ }
+ }
+ return BuildStructure(pos, values, labels);
+}
+
+bool TSqlTranslation::TableHintImpl(const TRule_table_hint& rule, TTableHints& hints, const TString& provider, const TString& keyFunc) {
+ // table_hint:
+ // an_id_hint (EQUALS (type_name_tag | LPAREN type_name_tag (COMMA type_name_tag)* COMMA? RPAREN))?
+ // | (SCHEMA | COLUMNS) EQUALS? type_name_or_bind
+ // | SCHEMA EQUALS? LPAREN (struct_arg_positional (COMMA struct_arg_positional)*)? COMMA? RPAREN
+ switch (rule.Alt_case()) {
+ case TRule_table_hint::kAltTableHint1: {
+ const auto& alt = rule.GetAlt_table_hint1();
+ const TString id = Id(alt.GetRule_an_id_hint1(), *this);
+ const auto idLower = to_lower(id);
+ if (idLower == "schema" || idLower == "columns") {
+ Error() << "Expected type after " << to_upper(id);
+ return false;
+ }
+ TVector<TNodePtr> hint_val;
+ if (alt.HasBlock2()) {
+ auto& tags = alt.GetBlock2().GetBlock2();
+ switch (tags.Alt_case()) {
+ case TRule_table_hint_TAlt1_TBlock2_TBlock2::kAlt1:
+ hint_val.push_back(TypeNameTag(tags.GetAlt1().GetRule_type_name_tag1()));
+ break;
+ case TRule_table_hint_TAlt1_TBlock2_TBlock2::kAlt2: {
+ hint_val.push_back(TypeNameTag(tags.GetAlt2().GetRule_type_name_tag2()));
+ for (auto& tag : tags.GetAlt2().GetBlock3()) {
+ hint_val.push_back(TypeNameTag(tag.GetRule_type_name_tag2()));
+ }
+ break;
+ }
+ case TRule_table_hint_TAlt1_TBlock2_TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ }
+ hints[id] = hint_val;
+ break;
+ }
+
+ case TRule_table_hint::kAltTableHint2: {
+ const auto& alt2 = rule.GetAlt_table_hint2();
+ auto node = TypeNodeOrBind(alt2.GetRule_type_name_or_bind3());
+ if (!node) {
+ return false;
+ }
+
+ hints["user_" + to_lower(alt2.GetToken1().GetValue())] = { node };
+ break;
+ }
+
+ case TRule_table_hint::kAltTableHint3: {
+ const auto& alt = rule.GetAlt_table_hint3();
+ TVector<TNodePtr> labels;
+ TVector<TNodePtr> structTypeItems;
+ if (alt.HasBlock4()) {
+ bool warn = false;
+ auto processItem = [&](const TRule_struct_arg_positional& arg) {
+ // struct_arg_positional:
+ // type_name_tag type_name_or_bind (NOT? NULL)?
+ // | type_name_or_bind AS type_name_tag; //deprecated
+ const bool altCurrent = arg.Alt_case() == TRule_struct_arg_positional::kAltStructArgPositional1;
+ auto& typeNameOrBind = altCurrent ?
+ arg.GetAlt_struct_arg_positional1().GetRule_type_name_or_bind2() :
+ arg.GetAlt_struct_arg_positional2().GetRule_type_name_or_bind1();
+ auto typeNode = TypeNodeOrBind(typeNameOrBind);
+ if (!typeNode) {
+ return false;
+ }
+
+ auto pos = Ctx.Pos();
+ if (!altCurrent && !warn) {
+ Ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_POSITIONAL_SCHEMA)
+ << "Deprecated syntax for positional schema: please use 'column type' instead of 'type AS column'";
+ warn = true;
+ }
+
+ if (altCurrent) {
+ bool notNull = arg.GetAlt_struct_arg_positional1().HasBlock3() && arg.GetAlt_struct_arg_positional1().GetBlock3().HasBlock1();
+ if (!notNull) {
+ typeNode = new TCallNodeImpl(pos, "AsOptionalType", { typeNode });
+ }
+ }
+
+ auto& typeNameTag = altCurrent ?
+ arg.GetAlt_struct_arg_positional1().GetRule_type_name_tag1() :
+ arg.GetAlt_struct_arg_positional2().GetRule_type_name_tag3();
+ auto tag = TypeNameTag(typeNameTag);
+ if (!tag) {
+ return false;
+ }
+
+ labels.push_back(tag);
+ structTypeItems.push_back(BuildTuple(pos, { tag, typeNode }));
+ return true;
+ };
+
+ if (!processItem(alt.GetBlock4().GetRule_struct_arg_positional1())) {
+ return false;
+ }
+
+ for (auto& entry : alt.GetBlock4().GetBlock2()) {
+ if (!processItem(entry.GetRule_struct_arg_positional2())) {
+ return false;
+ }
+ }
+ }
+
+ TPosition pos = Ctx.TokenPosition(alt.GetToken1());
+ TNodePtr structType = new TCallNodeImpl(pos, "StructType", structTypeItems);
+ bool shouldEmitLabel = provider != YtProviderName || TCiString(keyFunc) == "object";
+ if (shouldEmitLabel) {
+ auto labelsTuple = BuildTuple(pos, labels);
+ hints["user_" + to_lower(alt.GetToken1().GetValue())] = { structType, labelsTuple };
+ break;
+ } else {
+ hints["user_" + to_lower(alt.GetToken1().GetValue())] = { structType };
+ break;
+ }
+ }
+
+ case TRule_table_hint::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ return true;
+}
+
+TMaybe<TTableHints> TSqlTranslation::TableHintsImpl(const TRule_table_hints& node, const TString& provider, const TString& keyFunc) {
+ TTableHints hints;
+ auto& block = node.GetBlock2();
+ bool hasErrors = false;
+ switch (block.Alt_case()) {
+ case TRule_table_hints::TBlock2::kAlt1: {
+ hasErrors = !TableHintImpl(block.GetAlt1().GetRule_table_hint1(), hints, provider, keyFunc);
+ break;
+ }
+ case TRule_table_hints::TBlock2::kAlt2: {
+ hasErrors = !TableHintImpl(block.GetAlt2().GetRule_table_hint2(), hints, provider, keyFunc);
+ for (const auto& x : block.GetAlt2().GetBlock3()) {
+ hasErrors = hasErrors || !TableHintImpl(x.GetRule_table_hint2(), hints, provider, keyFunc);
+ }
+
+ break;
+ }
+ case TRule_table_hints::TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ if (hasErrors) {
+ return Nothing();
+ }
+
+ return hints;
+}
+
+bool TSqlTranslation::SimpleTableRefImpl(const TRule_simple_table_ref& node, TTableRef& result) {
+ // simple_table_ref: simple_table_ref_core table_hints?;
+ if (!SimpleTableRefCoreImpl(node.GetRule_simple_table_ref_core1(), result)) {
+ return false;
+ }
+
+ TTableHints hints = GetContextHints(Context());
+ if (node.HasBlock2()) {
+ const TString& service = Context().Scoped->CurrService;
+ auto tmp = TableHintsImpl(node.GetBlock2().GetRule_table_hints1(), service);
+ if (!tmp) {
+ Error() << "Failed to parse table hints";
+ return false;
+ }
+
+ hints = *tmp;
+ }
+
+ if (!hints.empty()) {
+ result.Options = BuildInputOptions(Context().Pos(), hints);
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::SimpleTableRefCoreImpl(const TRule_simple_table_ref_core& node, TTableRef& result) {
+ // simple_table_ref_core: ((cluster_expr DOT)? id_or_at) | AT? bind_parameter;
+ TString service = Context().Scoped->CurrService;
+ TDeferredAtom cluster = Context().Scoped->CurrCluster;
+ switch (node.Alt_case()) {
+ case TRule_simple_table_ref_core::AltCase::kAltSimpleTableRefCore1: {
+ if (node.GetAlt_simple_table_ref_core1().GetRule_object_ref1().HasBlock1()) {
+ if (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) {
+ Error() << "Cluster should not be used in limited view";
+ return false;
+ }
+
+ if (!ClusterExpr(node.GetAlt_simple_table_ref_core1().GetRule_object_ref1().GetBlock1().GetRule_cluster_expr1(), false, service, cluster)) {
+ return false;
+ }
+ }
+
+ if (cluster.Empty()) {
+ Error() << "No cluster name given and no default cluster is selected";
+ return false;
+ }
+
+ result = TTableRef(Context().MakeName("table"), service, cluster, nullptr);
+ auto tableOrAt = Id(node.GetAlt_simple_table_ref_core1().GetRule_object_ref1().GetRule_id_or_at2(), *this);
+ auto tableAndView = TableKeyImpl(tableOrAt, {}, *this);
+ result.Keys = BuildTableKey(Context().Pos(), result.Service, result.Cluster,
+ TDeferredAtom(Context().Pos(), tableAndView.first), tableAndView.second);
+ break;
+ }
+ case TRule_simple_table_ref_core::AltCase::kAltSimpleTableRefCore2: {
+ if (cluster.Empty()) {
+ Error() << "No cluster name given and no default cluster is selected";
+ return false;
+ }
+
+ auto at = node.GetAlt_simple_table_ref_core2().HasBlock1();
+ TString bindName;
+ if (!NamedNodeImpl(node.GetAlt_simple_table_ref_core2().GetRule_bind_parameter2(), bindName, *this)) {
+ return false;
+ }
+ auto named = GetNamedNode(bindName);
+ if (!named) {
+ return false;
+ }
+
+ TDeferredAtom table;
+ MakeTableFromExpression(Context().Pos(), Context(), named, table);
+ result = TTableRef(Context().MakeName("table"), service, cluster, nullptr);
+ result.Keys = BuildTableKey(Context().Pos(), result.Service, result.Cluster, table, {at ? "@" : ""});
+ break;
+ }
+ case TRule_simple_table_ref_core::AltCase::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ return result.Keys != nullptr;
+}
+
+bool TSqlTranslation::TopicRefImpl(const TRule_topic_ref& node, TTopicRef& result) {
+ TString service = Context().Scoped->CurrService;
+ TDeferredAtom cluster = Context().Scoped->CurrCluster;
+ if (node.HasBlock1()) {
+ if (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW) {
+ Error() << "Cluster should not be used in limited view";
+ return false;
+ }
+
+ if (!ClusterExpr(node.GetBlock1().GetRule_cluster_expr1(), false, service, cluster)) {
+ return false;
+ }
+ }
+
+ if (cluster.Empty()) {
+ Error() << "No cluster name given and no default cluster is selected";
+ return false;
+ }
+
+ result = TTopicRef(Context().MakeName("topic"), cluster, nullptr);
+ auto topic = Id(node.GetRule_an_id2(), *this);
+ result.Keys = BuildTopicKey(Context().Pos(), result.Cluster, TDeferredAtom(Context().Pos(), topic));
+
+ return true;
+}
+
+TNodePtr TSqlTranslation::NamedNode(const TRule_named_nodes_stmt& rule, TVector<TSymbolNameWithPos>& names) {
+ // named_nodes_stmt: bind_parameter_list EQUALS (expr | subselect_stmt);
+ // subselect_stmt: (LPAREN select_stmt RPAREN | select_unparenthesized_stmt);
+ if (!BindList(rule.GetRule_bind_parameter_list1(), names)) {
+ return {};
+ }
+
+ TNodePtr nodeExpr = nullptr;
+ switch (rule.GetBlock3().Alt_case()) {
+ case TRule_named_nodes_stmt::TBlock3::kAlt1: {
+ TSqlExpression expr(Ctx, Mode);
+ auto result = expr.Build(rule.GetBlock3().GetAlt1().GetRule_expr1());
+ return result;
+ }
+
+ case TRule_named_nodes_stmt::TBlock3::kAlt2:{
+ const auto& subselect_rule = rule.GetBlock3().GetAlt2().GetRule_subselect_stmt1();
+
+ TSqlSelect expr(Ctx, Mode);
+ TPosition pos;
+ TSourcePtr source = nullptr;
+ switch (subselect_rule.GetBlock1().Alt_case()) {
+ case TRule_subselect_stmt::TBlock1::kAlt1:
+ source = expr.Build(subselect_rule.GetBlock1().GetAlt1().GetRule_select_stmt2(), pos);
+ break;
+
+ case TRule_subselect_stmt::TBlock1::kAlt2:
+ source = expr.Build(subselect_rule.GetBlock1().GetAlt2().GetRule_select_unparenthesized_stmt1(), pos);
+ break;
+
+ case TRule_subselect_stmt::TBlock1::ALT_NOT_SET:
+ AltNotImplemented("subselect_stmt", subselect_rule.GetBlock1());
+ Ctx.IncrementMonCounter("sql_errors", "UnknownNamedNode");
+ return nullptr;
+ }
+
+ if (!source) {
+ return {};
+ }
+
+ return BuildSourceNode(pos, std::move(source));
+ }
+
+ case TRule_named_nodes_stmt::TBlock3::ALT_NOT_SET:
+ AltNotImplemented("named_node", rule.GetBlock3());
+ Ctx.IncrementMonCounter("sql_errors", "UnknownNamedNode");
+ return nullptr;
+ }
+}
+
+bool TSqlTranslation::ImportStatement(const TRule_import_stmt& stmt, TVector<TString>* namesPtr) {
+ TVector<TString> modulePath;
+ if (!ModulePath(stmt.GetRule_module_path2(), modulePath)) {
+ return false;
+ }
+
+ TVector<TSymbolNameWithPos> names;
+ TVector<TSymbolNameWithPos> aliases;
+ if (!NamedBindList(stmt.GetRule_named_bind_parameter_list4(), names, aliases)) {
+ return false;
+ }
+ YQL_ENSURE(names.size() == aliases.size());
+ const TString moduleAlias = Ctx.AddImport(std::move(modulePath));
+ if (!moduleAlias) {
+ return false;
+ }
+
+ for (size_t i = 0; i < names.size(); ++i) {
+ auto& name = names[i];
+ auto& alias = aliases[i];
+
+ auto& var = alias.Name ? alias : name;
+ if (IsAnonymousName(var.Name)) {
+ Ctx.Error(var.Pos) << "Can not import anonymous name " << var.Name;
+ return false;
+ }
+
+ auto builder = [&](const TString& realName) {
+ YQL_ENSURE(realName == var.Name);
+ auto atom = BuildQuotedAtom(name.Pos, name.Name);
+ return atom->Y("bind", moduleAlias, atom);
+ };
+
+ var.Name = PushNamedNode(var.Pos, var.Name, builder);
+ if (namesPtr) {
+ namesPtr->push_back(var.Name);
+ }
+ }
+ return true;
+}
+
+bool TSqlTranslation::SortSpecification(const TRule_sort_specification& node, TVector<TSortSpecificationPtr>& sortSpecs) {
+ bool asc = true;
+ TSqlExpression expr(Ctx, Mode);
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ TNodePtr exprNode = expr.Build(node.GetRule_expr1());
+ if (!exprNode) {
+ return false;
+ }
+ if (node.HasBlock2()) {
+ const auto& token = node.GetBlock2().GetToken1();
+ Token(token);
+ auto tokenId = token.GetId();
+ if (IS_TOKEN(tokenId, ASC)) {
+ Ctx.IncrementMonCounter("sql_features", "OrderByAsc");
+ } else if (IS_TOKEN(tokenId, DESC)) {
+ asc = false;
+ Ctx.IncrementMonCounter("sql_features", "OrderByDesc");
+ } else {
+ Ctx.IncrementMonCounter("sql_errors", "UnknownOrderBy");
+ Error() << "Unsupported direction token: " << token.GetId();
+ return false;
+ }
+ } else {
+ Ctx.IncrementMonCounter("sql_features", "OrderByDefault");
+ }
+ sortSpecs.emplace_back(MakeIntrusive<TSortSpecification>(exprNode, asc));
+ return true;
+}
+
+bool TSqlTranslation::SortSpecificationList(const TRule_sort_specification_list& node, TVector<TSortSpecificationPtr>& sortSpecs) {
+ if (!SortSpecification(node.GetRule_sort_specification1(), sortSpecs)) {
+ return false;
+ }
+ for (auto sortSpec: node.GetBlock2()) {
+ Token(sortSpec.GetToken1());
+ if (!SortSpecification(sortSpec.GetRule_sort_specification2(), sortSpecs)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlTranslation::IsDistinctOptSet(const TRule_opt_set_quantifier& node) const {
+ TPosition pos;
+ return node.HasBlock1() && IS_TOKEN(node.GetBlock1().GetToken1().GetId(), DISTINCT);
+}
+
+bool TSqlTranslation::IsDistinctOptSet(const TRule_opt_set_quantifier& node, TPosition& distinctPos) const {
+ if (node.HasBlock1() && IS_TOKEN(node.GetBlock1().GetToken1().GetId(), DISTINCT)) {
+ distinctPos = Ctx.TokenPosition(node.GetBlock1().GetToken1());
+ return true;
+ }
+ return false;
+}
+
+bool TSqlTranslation::RoleNameClause(const TRule_role_name& node, TDeferredAtom& result, bool allowSystemRoles) {
+ // role_name: an_id_or_type | bind_parameter;
+ switch (node.Alt_case()) {
+ case TRule_role_name::kAltRoleName1:
+ {
+ TString name = Id(node.GetAlt_role_name1().GetRule_an_id_or_type1(), *this);
+ result = TDeferredAtom(Ctx.Pos(), name);
+ break;
+ }
+ case TRule_role_name::kAltRoleName2:
+ {
+ if (!BindParameterClause(node.GetAlt_role_name2().GetRule_bind_parameter1(), result)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_role_name::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ if (auto literalName = result.GetLiteral(); literalName && !allowSystemRoles) {
+ static const THashSet<TStringBuf> systemRoles = { "current_role", "current_user", "session_user" };
+ if (systemRoles.contains(to_lower(*literalName))) {
+ Ctx.Error() << "System role " << to_upper(*literalName) << " can not be used here";
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::RoleParameters(const TRule_create_user_option& node, TRoleParameters& result) {
+ // create_user_option: ENCRYPTED? PASSWORD expr;
+ result = TRoleParameters{};
+
+ TSqlExpression expr(Ctx, Mode);
+ TNodePtr password = expr.Build(node.GetRule_expr3());
+ if (!password) {
+ return false;
+ }
+
+ result.IsPasswordEncrypted = node.HasBlock1();
+ if (!password->IsNull()) {
+ result.Password = MakeAtomFromExpression(Ctx.Pos(), Ctx, password);
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::PermissionNameClause(const TRule_permission_id& node, TDeferredAtom& result) {
+ // permission_id:
+ // CONNECT
+ // | LIST
+ // | INSERT
+ // | MANAGE
+ // | DROP
+ // | GRANT
+ // | MODIFY (TABLES | ATTRIBUTES)
+ // | (UPDATE | ERASE) ROW
+ // | (REMOVE | DESCRIBE | ALTER) SCHEMA
+ // | SELECT (TABLES | ATTRIBUTES | ROW)?
+ // | (USE | FULL) LEGACY?
+ // | CREATE (DIRECTORY | TABLE | QUEUE)?
+
+ auto handleOneIdentifier = [&result, this] (const auto& permissionNameKeyword) {
+ result = TDeferredAtom(Ctx.Pos(), GetIdentifier(*this, permissionNameKeyword).Name);
+ };
+
+ auto handleTwoIdentifiers = [&result, this] (const auto& permissionNameKeyword) {
+ const auto& token1 = permissionNameKeyword.GetToken1();
+ const auto& token2 = permissionNameKeyword.GetToken2();
+ TString identifierName = TIdentifier(TPosition(token1.GetColumn(), token1.GetLine()), Identifier(token1)).Name +
+ "_" +
+ TIdentifier(TPosition(token2.GetColumn(), token2.GetLine()), Identifier(token2)).Name;
+ result = TDeferredAtom(Ctx.Pos(), identifierName);
+ };
+
+ auto handleOneOrTwoIdentifiers = [&result, this] (const auto& permissionNameKeyword) {
+ TString identifierName = GetIdentifier(*this, permissionNameKeyword).Name;
+ if (permissionNameKeyword.HasBlock2()) {
+ identifierName += "_" + GetIdentifier(*this, permissionNameKeyword.GetBlock2()).Name;
+ }
+ result = TDeferredAtom(Ctx.Pos(), identifierName);
+ };
+
+ switch (node.GetAltCase()) {
+ case TRule_permission_id::kAltPermissionId1:
+ {
+ // CONNECT
+ handleOneIdentifier(node.GetAlt_permission_id1());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId2:
+ {
+ // LIST
+ handleOneIdentifier(node.GetAlt_permission_id2());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId3:
+ {
+ // INSERT
+ handleOneIdentifier(node.GetAlt_permission_id3());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId4:
+ {
+ // MANAGE
+ handleOneIdentifier(node.GetAlt_permission_id4());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId5:
+ {
+ // DROP
+ handleOneIdentifier(node.GetAlt_permission_id5());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId6:
+ {
+ // GRANT
+ handleOneIdentifier(node.GetAlt_permission_id6());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId7:
+ {
+ // MODIFY (TABLES | ATTRIBUTES)
+ handleTwoIdentifiers(node.GetAlt_permission_id7());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId8:
+ {
+ // (UPDATE | ERASE) ROW
+ handleTwoIdentifiers(node.GetAlt_permission_id8());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId9:
+ {
+ // (REMOVE | DESCRIBE | ALTER) SCHEMA
+ handleTwoIdentifiers(node.GetAlt_permission_id9());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId10:
+ {
+ // SELECT (TABLES | ATTRIBUTES | ROW)?
+ handleOneOrTwoIdentifiers(node.GetAlt_permission_id10());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId11:
+ {
+ // (USE | FULL) LEGACY?
+ handleOneOrTwoIdentifiers(node.GetAlt_permission_id11());
+ break;
+ }
+ case TRule_permission_id::kAltPermissionId12:
+ {
+ // CREATE (DIRECTORY | TABLE | QUEUE)?
+ handleOneOrTwoIdentifiers(node.GetAlt_permission_id12());
+ break;
+ }
+ case TRule_permission_id::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ return true;
+}
+
+bool TSqlTranslation::PermissionNameClause(const TRule_permission_name& node, TDeferredAtom& result) {
+ // permission_name: permission_id | STRING_VALUE;
+ switch (node.Alt_case()) {
+ case TRule_permission_name::kAltPermissionName1:
+ {
+ return PermissionNameClause(node.GetAlt_permission_name1().GetRule_permission_id1(), result);
+ break;
+ }
+ case TRule_permission_name::kAltPermissionName2:
+ {
+ const TString stringValue(Ctx.Token(node.GetAlt_permission_name2().GetToken1()));
+ auto unescaped = StringContent(Ctx, Ctx.Pos(), stringValue);
+ if (!unescaped) {
+ return false;
+ }
+ result = TDeferredAtom(Ctx.Pos(), unescaped->Content);
+ break;
+ }
+ case TRule_permission_name::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ return true;
+}
+
+bool TSqlTranslation::PermissionNameClause(const TRule_permission_name_target& node, TVector<TDeferredAtom>& result, bool withGrantOption) {
+ // permission_name_target: permission_name (COMMA permission_name)* COMMA? | ALL PRIVILEGES?;
+ switch (node.Alt_case()) {
+ case TRule_permission_name_target::kAltPermissionNameTarget1:
+ {
+ const auto& permissionNameRule = node.GetAlt_permission_name_target1();
+ result.emplace_back();
+ if (!PermissionNameClause(permissionNameRule.GetRule_permission_name1(), result.back())) {
+ return false;
+ }
+ for (const auto& item : permissionNameRule.GetBlock2()) {
+ result.emplace_back();
+ if (!PermissionNameClause(item.GetRule_permission_name2(), result.back())) {
+ return false;
+ }
+ }
+ break;
+ }
+ case TRule_permission_name_target::kAltPermissionNameTarget2:
+ {
+ result.emplace_back(Ctx.Pos(), "all_privileges");
+ break;
+ }
+ case TRule_permission_name_target::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ if (withGrantOption) {
+ result.emplace_back(Ctx.Pos(), "grant");
+ }
+ return true;
+}
+
+bool TSqlTranslation::StoreStringSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result) {
+ YQL_ENSURE(value);
+
+ const TString key = to_lower(id.Name);
+ if (result.find(key) != result.end()) {
+ Ctx.Error() << to_upper(key) << " duplicate keys";
+ return false;
+ }
+
+ switch (value->Alt_case()) {
+ case TRule_table_setting_value::kAltTableSettingValue2:
+ return StoreString(*value, result[key], Ctx, to_upper(key));
+
+ default:
+ Ctx.Error() << to_upper(key) << " value should be a string literal";
+ return false;
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::StoreStringSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result) {
+ const TIdentifier id = IdEx(entry.GetRule_an_id1(), *this);
+ return StoreStringSettingsEntry(id, &entry.GetRule_table_setting_value3(), result);
+}
+
+bool TSqlTranslation::ParseBackupCollectionSettings(std::map<TString, TDeferredAtom>& result, const TRule_backup_collection_settings& settings) {
+ const auto& firstEntry = settings.GetRule_backup_collection_settings_entry1();
+ if (!StoreStringSettingsEntry(IdEx(firstEntry.GetRule_an_id1(), *this), &firstEntry.GetRule_table_setting_value3(), result)) {
+ return false;
+ }
+ for (const auto& block : settings.GetBlock2()) {
+ const auto& entry = block.GetRule_backup_collection_settings_entry2();
+ if (!StoreStringSettingsEntry(IdEx(entry.GetRule_an_id1(), *this), &entry.GetRule_table_setting_value3(), result)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool TSqlTranslation::ParseBackupCollectionSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_backup_collection_actions& actions) {
+ auto parseAction = [&](auto& actionVariant) {
+ switch (actionVariant.Alt_case()) {
+ case TRule_alter_backup_collection_action::kAltAlterBackupCollectionAction1: {
+ const auto& action = actionVariant.GetAlt_alter_backup_collection_action1().GetRule_alter_table_set_table_setting_compat1();
+ if (!StoreStringSettingsEntry(action.GetRule_alter_table_setting_entry3(), result)) {
+ return false;
+ }
+ for (const auto& entry : action.GetBlock4()) {
+ if (!StoreStringSettingsEntry(entry.GetRule_alter_table_setting_entry2(), result)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ case TRule_alter_backup_collection_action::kAltAlterBackupCollectionAction2: {
+ const auto& action = actionVariant.GetAlt_alter_backup_collection_action2().GetRule_alter_table_reset_table_setting1();
+ const TString firstKey = to_lower(IdEx(action.GetRule_an_id3(), *this).Name);
+ toReset.insert(firstKey);
+ for (const auto& key : action.GetBlock4()) {
+ toReset.insert(to_lower(IdEx(key.GetRule_an_id2(), *this).Name));
+ }
+ return true;
+ }
+ case TRule_alter_backup_collection_action::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ };
+
+ const auto& firstAction = actions.GetRule_alter_backup_collection_action1();
+ if (!parseAction(firstAction)) {
+ return false;
+ }
+
+ for (const auto& action : actions.GetBlock2()) {
+ if (!parseAction(action.GetRule_alter_backup_collection_action2())) {
+ return false;
+ }
+ }
+
+
+ return true;
+}
+
+bool TSqlTranslation::ParseBackupCollectionTables(TVector<TDeferredAtom>& result, const TRule_table_list& tables) {
+ const auto& firstEntry = tables.GetRule_an_id_table2();
+ result.push_back(TDeferredAtom(Ctx.Pos(), Id(firstEntry, *this)));
+ for (const auto& block : tables.GetBlock3()) {
+ const auto& entry = block.GetRule_an_id_table3();
+ result.push_back(TDeferredAtom(Ctx.Pos(), Id(entry, *this)));
+ }
+ return true;
+}
+
+bool TSqlTranslation::ParseBackupCollectionEntry(
+ bool& addDatabase,
+ bool& removeDatabase,
+ TVector<TDeferredAtom>& addTables,
+ TVector<TDeferredAtom>& removeTables,
+ const TRule_alter_backup_collection_entry& entry)
+{
+ switch (entry.Alt_case()) {
+ case TRule_alter_backup_collection_entry::kAltAlterBackupCollectionEntry1: {
+ addDatabase = true;
+ return true;
+ }
+ case TRule_alter_backup_collection_entry::kAltAlterBackupCollectionEntry2: {
+ removeDatabase = true;
+ return true;
+ }
+ case TRule_alter_backup_collection_entry::kAltAlterBackupCollectionEntry3: {
+ auto table = entry.GetAlt_alter_backup_collection_entry3().GetRule_an_id_table3();
+ addTables.push_back(TDeferredAtom(Ctx.Pos(), Id(table, *this)));
+ return true;
+ }
+ case TRule_alter_backup_collection_entry::kAltAlterBackupCollectionEntry4: {
+ auto table = entry.GetAlt_alter_backup_collection_entry4().GetRule_an_id_table3();
+ removeTables.push_back(TDeferredAtom(Ctx.Pos(), Id(table, *this)));
+ return true;
+ }
+ case TRule_alter_backup_collection_entry::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ return true;
+}
+
+bool TSqlTranslation::ParseBackupCollectionEntries(
+ bool& addDatabase,
+ bool& removeDatabase,
+ TVector<TDeferredAtom>& addTables,
+ TVector<TDeferredAtom>& removeTables,
+ const TRule_alter_backup_collection_entries& entries)
+{
+ const auto& firstEntry = entries.GetRule_alter_backup_collection_entry1();
+ if (!ParseBackupCollectionEntry(addDatabase, removeDatabase, addTables, removeTables, firstEntry)) {
+ return false;
+ }
+ for (const auto& block : entries.GetBlock2()) {
+ const auto& entry = block.GetRule_alter_backup_collection_entry2();
+ if (!ParseBackupCollectionEntry(addDatabase, removeDatabase, addTables, removeTables, entry)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+TString TSqlTranslation::FrameSettingsToString(EFrameSettings settings, bool isUnbounded) {
+ TString result;
+ switch (settings) {
+ case FramePreceding:
+ result = "PRECEDING"; break;
+ case FrameCurrentRow:
+ YQL_ENSURE(!isUnbounded);
+ result = "CURRENT ROW"; break;
+ case FrameFollowing:
+ result = "FOLLOWING"; break;
+ default:
+ Y_ABORT("Unexpected frame settings");
+ }
+
+ return (isUnbounded ? "UNBOUNDED " : "") + result;
+}
+
+bool CheckFrameBoundLiteral(TContext& ctx, const TFrameBound& bound, TMaybe<i32>& boundValue) {
+ boundValue = {};
+ auto node = bound.Bound;
+ if (node && node->IsLiteral()) {
+ auto type = node->GetLiteralType();
+ if (type != "Int32") {
+ ctx.Error(node->GetPos()) << "Expecting Int32 as frame bound value, but got " << type << " literal";
+ return false;
+ }
+
+ i32 value = FromString<i32>(node->GetLiteralValue());
+ if (value < 0) {
+ ctx.Error(node->GetPos()) << "Expecting non-negative value for frame bound, but got " << value;
+ return false;
+ }
+
+ boundValue = value;
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::IsValidFrameSettings(TContext& ctx, const TFrameSpecification& frameSpec, size_t sortSpecSize) {
+ const TFrameBound& begin = *frameSpec.FrameBegin;
+ const TFrameBound& end = *frameSpec.FrameEnd;
+
+ YQL_ENSURE(begin.Settings != FrameUndefined);
+ YQL_ENSURE(end.Settings != FrameUndefined);
+
+ const bool beginUnbounded = !begin.Bound && begin.Settings != FrameCurrentRow;
+ const bool endUnbounded = !end.Bound && end.Settings != FrameCurrentRow;
+
+ if (beginUnbounded && begin.Settings == FrameFollowing) {
+ ctx.Error(begin.Pos) << "Frame cannot start from " << FrameSettingsToString(begin.Settings, beginUnbounded);
+ return false;
+ }
+
+ if (endUnbounded && end.Settings == FramePreceding) {
+ ctx.Error(end.Pos) << "Frame cannot end with " << FrameSettingsToString(end.Settings, endUnbounded);
+ return false;
+ }
+
+ if (begin.Settings > end.Settings) {
+ ctx.Error(begin.Pos) << "Frame cannot start from " << FrameSettingsToString(begin.Settings, beginUnbounded)
+ << " and end with " << FrameSettingsToString(end.Settings, endUnbounded);
+ return false;
+ }
+
+ if (frameSpec.FrameType == FrameByRange && sortSpecSize != 1) {
+ TStringBuf msg = "RANGE with <offset> PRECEDING/FOLLOWING requires exactly one expression in ORDER BY partition clause";
+ if (begin.Bound) {
+ ctx.Error(begin.Bound->GetPos()) << msg;
+ return false;
+ }
+ if (end.Bound) {
+ ctx.Error(end.Bound->GetPos()) << msg;
+ return false;
+ }
+ }
+
+ TMaybe<i32> beginValue;
+ TMaybe<i32> endValue;
+
+ if (frameSpec.FrameType != EFrameType::FrameByRange) {
+ if (!CheckFrameBoundLiteral(ctx, begin, beginValue) || !CheckFrameBoundLiteral(ctx, end, endValue)) {
+ return false;
+ }
+ }
+
+ if (beginValue.Defined() && endValue.Defined()) {
+ if (begin.Settings == FramePreceding) {
+ beginValue = 0 - *beginValue;
+ }
+ if (end.Settings == FramePreceding) {
+ endValue = 0 - *endValue;
+ }
+
+ if (*beginValue > *endValue) {
+ YQL_ENSURE(begin.Bound);
+ ctx.Warning(begin.Bound->GetPos(), TIssuesIds::YQL_EMPTY_WINDOW_FRAME) << "Used frame specification implies empty window frame";
+ }
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::FrameBound(const TRule_window_frame_bound& rule, TFrameBoundPtr& bound) {
+ // window_frame_bound:
+ // CURRENT ROW
+ // | (expr | UNBOUNDED) (PRECEDING | FOLLOWING)
+ // ;
+ bound = new TFrameBound;
+ switch (rule.Alt_case()) {
+ case TRule_window_frame_bound::kAltWindowFrameBound1:
+ bound->Pos = GetPos(rule.GetAlt_window_frame_bound1().GetToken1());
+ bound->Settings = FrameCurrentRow;
+ break;
+ case TRule_window_frame_bound::kAltWindowFrameBound2: {
+ auto block = rule.GetAlt_window_frame_bound2().GetBlock1();
+ switch (block.Alt_case()) {
+ case TRule_window_frame_bound_TAlt2_TBlock1::kAlt1: {
+ TSqlExpression boundExpr(Ctx, Mode);
+ bound->Bound = boundExpr.Build(block.GetAlt1().GetRule_expr1());
+ if (!bound->Bound) {
+ return false;
+ }
+ bound->Pos = bound->Bound->GetPos();
+ break;
+ }
+ case TRule_window_frame_bound_TAlt2_TBlock1::kAlt2:
+ bound->Pos = GetPos(block.GetAlt2().GetToken1());
+ break;
+ case TRule_window_frame_bound_TAlt2_TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ const TString settingToken = to_lower(Token(rule.GetAlt_window_frame_bound2().GetToken2()));
+ if (settingToken == "preceding") {
+ bound->Settings = FramePreceding;
+ } else if (settingToken == "following") {
+ bound->Settings = FrameFollowing;
+ } else {
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ break;
+ }
+ case TRule_window_frame_bound::ALT_NOT_SET:
+ Y_ABORT("FrameClause: frame bound not corresond to grammar changes");
+ }
+ return true;
+}
+
+bool TSqlTranslation::FrameClause(const TRule_window_frame_clause& rule, TFrameSpecificationPtr& frameSpec, size_t sortSpecSize) {
+ // window_frame_clause: window_frame_units window_frame_extent window_frame_exclusion?;
+ frameSpec = new TFrameSpecification;
+ const TString frameUnitStr = to_lower(Token(rule.GetRule_window_frame_units1().GetToken1()));
+ if (frameUnitStr == "rows") {
+ frameSpec->FrameType = EFrameType::FrameByRows;
+ } else if (frameUnitStr == "range") {
+ frameSpec->FrameType = EFrameType::FrameByRange;
+ } else {
+ YQL_ENSURE(frameUnitStr == "groups");
+ frameSpec->FrameType = EFrameType::FrameByGroups;
+ }
+
+ auto frameExtent = rule.GetRule_window_frame_extent2();
+ // window_frame_extent: window_frame_bound | window_frame_between;
+ switch (frameExtent.Alt_case()) {
+ case TRule_window_frame_extent::kAltWindowFrameExtent1: {
+ auto start = frameExtent.GetAlt_window_frame_extent1().GetRule_window_frame_bound1();
+ if (!FrameBound(start, frameSpec->FrameBegin)) {
+ return false;
+ }
+
+ // frame end is CURRENT ROW
+ frameSpec->FrameEnd = new TFrameBound;
+ frameSpec->FrameEnd->Pos = frameSpec->FrameBegin->Pos;
+ frameSpec->FrameEnd->Settings = FrameCurrentRow;
+ break;
+ }
+ case TRule_window_frame_extent::kAltWindowFrameExtent2: {
+ // window_frame_between: BETWEEN window_frame_bound AND window_frame_bound;
+ auto between = frameExtent.GetAlt_window_frame_extent2().GetRule_window_frame_between1();
+ if (!FrameBound(between.GetRule_window_frame_bound2(), frameSpec->FrameBegin)) {
+ return false;
+ }
+ if (!FrameBound(between.GetRule_window_frame_bound4(), frameSpec->FrameEnd)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_window_frame_extent::ALT_NOT_SET:
+ Y_ABORT("FrameClause: frame extent not correspond to grammar changes");
+ }
+ YQL_ENSURE(frameSpec->FrameBegin);
+ YQL_ENSURE(frameSpec->FrameEnd);
+ if (!IsValidFrameSettings(Ctx, *frameSpec, sortSpecSize)) {
+ return false;
+ }
+
+ if (rule.HasBlock3()) {
+ // window_frame_exclusion: EXCLUDE CURRENT ROW | EXCLUDE GROUP | EXCLUDE TIES | EXCLUDE NO OTHERS;
+ switch (rule.GetBlock3().GetRule_window_frame_exclusion1().Alt_case()) {
+ case TRule_window_frame_exclusion::kAltWindowFrameExclusion1:
+ frameSpec->FrameExclusion = FrameExclCurRow;
+ break;
+ case TRule_window_frame_exclusion::kAltWindowFrameExclusion2:
+ frameSpec->FrameExclusion = FrameExclGroup;
+ break;
+ case TRule_window_frame_exclusion::kAltWindowFrameExclusion3:
+ frameSpec->FrameExclusion = FrameExclTies;
+ break;
+ case TRule_window_frame_exclusion::kAltWindowFrameExclusion4:
+ frameSpec->FrameExclusion = FrameExclNone;
+ break;
+ case TRule_window_frame_exclusion::ALT_NOT_SET:
+ Y_ABORT("FrameClause: frame exclusion not correspond to grammar changes");
+ }
+ }
+
+ if (frameSpec->FrameExclusion != FrameExclNone) {
+ Ctx.Error() << "Frame exclusion is not supported yet";
+ return false;
+ }
+
+ return true;
+}
+
+TWindowSpecificationPtr TSqlTranslation::WindowSpecification(const TRule_window_specification_details& rule) {
+ /*
+ window_specification_details:
+ existing_window_name?
+ window_partition_clause?
+ window_order_clause?
+ window_frame_clause?
+ */
+ TWindowSpecificationPtr winSpecPtr = new TWindowSpecification;
+ if (rule.HasBlock1()) {
+ Ctx.Error() << "Existing window name is not supported in window specification yet!";
+ return {};
+ }
+ if (rule.HasBlock2()) {
+ /*
+ window_partition_clause: PARTITION COMPACT? BY named_expr_list;
+ */
+ auto& partitionClause = rule.GetBlock2().GetRule_window_partition_clause1();
+ winSpecPtr->IsCompact = partitionClause.HasBlock2();
+ if (!winSpecPtr->IsCompact) {
+ auto hints = Ctx.PullHintForToken(Ctx.TokenPosition(partitionClause.GetToken1()));
+ winSpecPtr->IsCompact = AnyOf(hints, [](const NSQLTranslation::TSQLHint& hint) { return to_lower(hint.Name) == "compact"; });
+ }
+ TColumnRefScope scope(Ctx, EColumnRefState::Allow);
+ if (!NamedExprList(partitionClause.GetRule_named_expr_list4(), winSpecPtr->Partitions)) {
+ return {};
+ }
+ // ignore empty unnamed tuples:
+ // "PARTITION BY (), foo(x) as y, (), (z)" is allowed and will work exactly the same as
+ // "PARTITION BY foo(x) as y, z"
+ auto removed = std::remove_if(winSpecPtr->Partitions.begin(), winSpecPtr->Partitions.end(),
+ [](const TNodePtr& partitionNode) {
+ return !partitionNode->GetLabel() && !partitionNode->GetColumnName() &&
+ partitionNode->GetTupleNode() != nullptr &&
+ partitionNode->GetTupleSize() == 0;
+ });
+ winSpecPtr->Partitions.erase(removed, winSpecPtr->Partitions.end());
+
+ }
+ if (rule.HasBlock3()) {
+ if (!OrderByClause(rule.GetBlock3().GetRule_window_order_clause1().GetRule_order_by_clause1(), winSpecPtr->OrderBy)) {
+ return {};
+ }
+ }
+ const bool ordered = !winSpecPtr->OrderBy.empty();
+ if (rule.HasBlock4()) {
+ if (!FrameClause(rule.GetBlock4().GetRule_window_frame_clause1(), winSpecPtr->Frame, winSpecPtr->OrderBy.size())) {
+ return {};
+ }
+ } else {
+ winSpecPtr->Frame = new TFrameSpecification;
+ winSpecPtr->Frame->FrameBegin = new TFrameBound;
+ winSpecPtr->Frame->FrameEnd = new TFrameBound;
+ winSpecPtr->Frame->FrameBegin->Pos = winSpecPtr->Frame->FrameEnd->Pos = Ctx.Pos();
+ winSpecPtr->Frame->FrameExclusion = EFrameExclusions::FrameExclNone;
+
+ winSpecPtr->Frame->FrameBegin->Settings = EFrameSettings::FramePreceding;
+ if (Ctx.AnsiCurrentRow) {
+ // RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+ winSpecPtr->Frame->FrameType = EFrameType::FrameByRange;
+ winSpecPtr->Frame->FrameEnd->Settings = EFrameSettings::FrameCurrentRow;
+ } else if (ordered) {
+ // legacy behavior
+ // ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+ winSpecPtr->Frame->FrameType = EFrameType::FrameByRows;
+ winSpecPtr->Frame->FrameEnd->Settings = EFrameSettings::FrameCurrentRow;
+ } else {
+ // ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ winSpecPtr->Frame->FrameType = EFrameType::FrameByRows;
+ winSpecPtr->Frame->FrameEnd->Settings = EFrameSettings::FrameFollowing;
+ }
+ }
+
+ // Normalize and simplify
+ auto replaceCurrentWith = [](TFrameBound& frame, bool preceding, TNodePtr value ) {
+ frame.Settings = preceding ? EFrameSettings::FramePreceding : EFrameSettings::FrameFollowing;
+ frame.Bound = value;
+ };
+
+ const auto frameSpec = winSpecPtr->Frame;
+ if (!ordered && frameSpec->FrameType != EFrameType::FrameByRows) {
+ // CURRENT ROW -> UNBOUNDED
+ if (frameSpec->FrameBegin->Settings == EFrameSettings::FrameCurrentRow) {
+ replaceCurrentWith(*frameSpec->FrameBegin, true, nullptr);
+ }
+ if (frameSpec->FrameEnd->Settings == EFrameSettings::FrameCurrentRow) {
+ replaceCurrentWith(*frameSpec->FrameBegin, false, nullptr);
+ }
+ }
+
+ // RANGE/GROUPS UNBOUNDED -> ROWS UNBOUNDED
+ if (frameSpec->FrameBegin->Settings == EFrameSettings::FramePreceding && !frameSpec->FrameBegin->Bound &&
+ frameSpec->FrameEnd->Settings == EFrameSettings::FrameFollowing && !frameSpec->FrameEnd->Bound)
+ {
+ frameSpec->FrameType = EFrameType::FrameByRows;
+ }
+
+ if (frameSpec->FrameType != EFrameType::FrameByRange) {
+ // replace FrameCurrentRow for ROWS/GROUPS with 0 preceding/following
+ // FrameCurrentRow has special meaning ( = first/last peer row)
+ if (frameSpec->FrameBegin->Settings == EFrameSettings::FrameCurrentRow) {
+ TNodePtr zero = new TLiteralNumberNode<i32>(winSpecPtr->Frame->FrameBegin->Pos, "Int32", "0");
+ replaceCurrentWith(*frameSpec->FrameBegin, true, zero);
+ }
+
+ if (frameSpec->FrameEnd->Settings == EFrameSettings::FrameCurrentRow) {
+ TNodePtr zero = new TLiteralNumberNode<i32>(winSpecPtr->Frame->FrameEnd->Pos, "Int32", "0");
+ replaceCurrentWith(*frameSpec->FrameEnd, false, zero);
+ }
+ }
+
+ return winSpecPtr;
+}
+
+TNodePtr TSqlTranslation::DoStatement(const TRule_do_stmt& stmt, bool makeLambda, const TVector<TString>& args) {
+ switch (stmt.GetBlock2().Alt_case()) {
+ case TRule_do_stmt_TBlock2::kAlt1: {
+ const auto& callAction = stmt.GetBlock2().GetAlt1().GetRule_call_action1();
+ TNodePtr action;
+ switch (callAction.GetBlock1().GetAltCase()) {
+ case TRule_call_action_TBlock1::kAlt1: {
+ TString bindName;
+ if (!NamedNodeImpl(callAction.GetBlock1().GetAlt1().GetRule_bind_parameter1(), bindName, *this)) {
+ return nullptr;
+ }
+ action = GetNamedNode(bindName);
+ if (!action) {
+ return nullptr;
+ }
+ break;
+ }
+ case TRule_call_action_TBlock1::kAlt2:
+ action = BuildEmptyAction(Ctx.Pos());
+ break;
+ case TRule_call_action_TBlock1::ALT_NOT_SET:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownDoStmt");
+ AltNotImplemented("do_stmt", callAction.GetBlock1());
+ return nullptr;
+ }
+
+ TVector<TNodePtr> values;
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "Apply", TNodeFlags::Default));
+ values.push_back(action);
+ values.push_back(new TAstAtomNodeImpl(Ctx.Pos(), "world", TNodeFlags::Default));
+
+ TSqlExpression sqlExpr(Ctx, Mode);
+ if (callAction.HasBlock3() && !ExprList(sqlExpr, values, callAction.GetBlock3().GetRule_expr_list1())) {
+ return nullptr;
+ }
+
+ TNodePtr apply = new TAstListNodeImpl(Ctx.Pos(), std::move(values));
+ if (!makeLambda) {
+ return BuildDoCall(Ctx.Pos(), apply);
+ }
+
+ TNodePtr params = new TAstListNodeImpl(Ctx.Pos());
+ params->Add("world");
+ for (const auto& arg : args) {
+ params->Add(new TAstAtomNodeImpl(Ctx.Pos(), arg, TNodeFlags::ArbitraryContent));
+ }
+
+ return BuildDoCall(Ctx.Pos(), BuildLambda(Ctx.Pos(), params, apply));
+ }
+ case TRule_do_stmt_TBlock2::kAlt2: {
+ const auto& inlineAction = stmt.GetBlock2().GetAlt2().GetRule_inline_action1();
+ const auto& body = inlineAction.GetRule_define_action_or_subquery_body2();
+
+ auto saveScoped = Ctx.Scoped;
+ Ctx.Scoped = MakeIntrusive<TScopedState>();
+ Ctx.AllScopes.push_back(Ctx.Scoped);
+ *Ctx.Scoped = *saveScoped;
+ Ctx.Scoped->Local = TScopedState::TLocal{};
+ Ctx.ScopeLevel++;
+ TSqlQuery query(Ctx, Ctx.Settings.Mode, false);
+ TBlocks innerBlocks;
+
+ const bool hasValidBody = DefineActionOrSubqueryBody(query, innerBlocks, body);
+ auto ret = hasValidBody ? BuildQuery(Ctx.Pos(), innerBlocks, false, Ctx.Scoped) : nullptr;
+ WarnUnusedNodes();
+ Ctx.ScopeLevel--;
+ Ctx.Scoped = saveScoped;
+
+ if (!ret) {
+ return {};
+ }
+
+ TNodePtr blockNode = new TAstListNodeImpl(Ctx.Pos());
+ blockNode->Add("block");
+ blockNode->Add(blockNode->Q(ret));
+ if (!makeLambda) {
+ return blockNode;
+ }
+
+ TNodePtr params = new TAstListNodeImpl(Ctx.Pos());
+ params->Add("world");
+ for (const auto& arg : args) {
+ params->Add(new TAstAtomNodeImpl(Ctx.Pos(), arg, TNodeFlags::ArbitraryContent));
+ }
+
+ return BuildLambda(Ctx.Pos(), params, blockNode);
+ }
+ case TRule_do_stmt_TBlock2::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+bool TSqlTranslation::DefineActionOrSubqueryBody(TSqlQuery& query, TBlocks& blocks, const TRule_define_action_or_subquery_body& body) {
+ if (body.HasBlock2()) {
+ Ctx.PushCurrentBlocks(&blocks);
+ Y_DEFER {
+ Ctx.PopCurrentBlocks();
+ };
+ if (!query.Statement(blocks, body.GetBlock2().GetRule_sql_stmt_core1())) {
+ return false;
+ }
+
+ for (const auto& nestedStmtItem : body.GetBlock2().GetBlock2()) {
+ const auto& nestedStmt = nestedStmtItem.GetRule_sql_stmt_core2();
+ if (!query.Statement(blocks, nestedStmt)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::DefineActionOrSubqueryStatement(const TRule_define_action_or_subquery_stmt& stmt, TSymbolNameWithPos& nameAndPos, TNodePtr& lambda) {
+ auto kind = Ctx.Token(stmt.GetToken2());
+ const bool isSubquery = to_lower(kind) == "subquery";
+ if (!isSubquery && Mode == NSQLTranslation::ESqlMode::SUBQUERY) {
+ Error() << "Definition of actions is not allowed in the subquery";
+ return false;
+ }
+
+ TString actionName;
+ if (!NamedNodeImpl(stmt.GetRule_bind_parameter3(), actionName, *this)) {
+ return false;
+ }
+ if (IsAnonymousName(actionName)) {
+ Error() << "Can not use anonymous name '" << actionName << "' as " << to_upper(kind) << " name";
+ return false;
+ }
+ TPosition actionNamePos = Ctx.Pos();
+
+ TVector<TSymbolNameWithPos> argNames;
+ ui32 optionalArgumentsCount = 0;
+ if (stmt.HasBlock5() && !ActionOrSubqueryArgs(stmt.GetBlock5().GetRule_action_or_subquery_args1(), argNames, optionalArgumentsCount)) {
+ return false;
+ }
+
+ auto saveScoped = Ctx.Scoped;
+ Ctx.Scoped = MakeIntrusive<TScopedState>();
+ Ctx.AllScopes.push_back(Ctx.Scoped);
+ *Ctx.Scoped = *saveScoped;
+ Ctx.Scoped->Local = TScopedState::TLocal{};
+ Ctx.ScopeLevel++;
+
+ for (auto& arg : argNames) {
+ arg.Name = PushNamedAtom(arg.Pos, arg.Name);
+ }
+
+ auto saveMode = Ctx.Settings.Mode;
+ if (isSubquery) {
+ Ctx.Settings.Mode = NSQLTranslation::ESqlMode::SUBQUERY;
+ }
+
+ TSqlQuery query(Ctx, Ctx.Settings.Mode, false);
+ TBlocks innerBlocks;
+ const bool hasValidBody = DefineActionOrSubqueryBody(query, innerBlocks, stmt.GetRule_define_action_or_subquery_body8());
+
+ ui32 topLevelSelects = 0;
+ bool hasTailOps = false;
+ for (auto& block : innerBlocks) {
+ if (block->SubqueryAlias()) {
+ continue;
+ }
+
+ if (block->HasSelectResult()) {
+ ++topLevelSelects;
+ } else if (topLevelSelects) {
+ hasTailOps = true;
+ }
+ }
+
+ if (isSubquery && (topLevelSelects != 1 || hasTailOps)) {
+ Error() << "Strictly one select/process/reduce statement is expected at the end of subquery";
+ return false;
+ }
+
+ auto ret = hasValidBody ? BuildQuery(Ctx.Pos(), innerBlocks, false, Ctx.Scoped) : nullptr;
+ WarnUnusedNodes();
+ Ctx.Scoped = saveScoped;
+ Ctx.ScopeLevel--;
+ Ctx.Settings.Mode = saveMode;
+
+ if (!ret) {
+ return false;
+ }
+
+ TNodePtr blockNode = new TAstListNodeImpl(Ctx.Pos());
+ blockNode->Add("block");
+ blockNode->Add(blockNode->Q(ret));
+
+ TNodePtr params = new TAstListNodeImpl(Ctx.Pos());
+ params->Add("world");
+ for (const auto& arg : argNames) {
+ params->Add(BuildAtom(arg.Pos, arg.Name));
+ }
+
+ lambda = BuildLambda(Ctx.Pos(), params, blockNode);
+ if (optionalArgumentsCount > 0) {
+ lambda = new TCallNodeImpl(Ctx.Pos(), "WithOptionalArgs", {
+ lambda,
+ BuildQuotedAtom(Ctx.Pos(), ToString(optionalArgumentsCount), TNodeFlags::Default)
+ });
+ }
+
+ nameAndPos.Name = actionName;
+ nameAndPos.Pos = actionNamePos;
+ return true;
+}
+
+TNodePtr TSqlTranslation::IfStatement(const TRule_if_stmt& stmt) {
+ bool isEvaluate = stmt.HasBlock1();
+ TSqlExpression expr(Ctx, Mode);
+ auto exprNode = expr.Build(stmt.GetRule_expr3());
+ if (!exprNode) {
+ return {};
+ }
+
+ auto thenNode = DoStatement(stmt.GetRule_do_stmt4(), isEvaluate);
+ if (!thenNode) {
+ return {};
+ }
+
+ TNodePtr elseNode;
+ if (stmt.HasBlock5()) {
+ elseNode = DoStatement(stmt.GetBlock5().GetRule_do_stmt2(), isEvaluate);
+ if (!elseNode) {
+ return {};
+ }
+ }
+
+ return BuildWorldIfNode(Ctx.Pos(), exprNode, thenNode, elseNode, isEvaluate);
+}
+
+TNodePtr TSqlTranslation::ForStatement(const TRule_for_stmt& stmt) {
+ bool isEvaluate = stmt.HasBlock1();
+ bool isParallel = stmt.HasBlock2();
+ TSqlExpression expr(Ctx, Mode);
+ TString itemArgName;
+ if (!NamedNodeImpl(stmt.GetRule_bind_parameter4(), itemArgName, *this)) {
+ return {};
+ }
+ TPosition itemArgNamePos = Ctx.Pos();
+
+ auto exprNode = expr.Build(stmt.GetRule_expr6());
+ if (!exprNode) {
+ return{};
+ }
+
+ itemArgName = PushNamedAtom(itemArgNamePos, itemArgName);
+ if (isParallel) {
+ ++Ctx.ParallelModeCount;
+ }
+
+ auto bodyNode = DoStatement(stmt.GetRule_do_stmt7(), true, { itemArgName });
+ if (isParallel) {
+ --Ctx.ParallelModeCount;
+ }
+
+ PopNamedNode(itemArgName);
+ if (!bodyNode) {
+ return{};
+ }
+
+ TNodePtr elseNode;
+ if (stmt.HasBlock8()) {
+ elseNode = DoStatement(stmt.GetBlock8().GetRule_do_stmt2(), true);
+ if (!elseNode) {
+ return{};
+ }
+ }
+
+ return BuildWorldForNode(Ctx.Pos(), exprNode, bodyNode, elseNode, isEvaluate, isParallel);
+}
+
+bool TSqlTranslation::BindParameterClause(const TRule_bind_parameter& node, TDeferredAtom& result) {
+ TString paramName;
+ if (!NamedNodeImpl(node, paramName, *this)) {
+ return false;
+ }
+ auto named = GetNamedNode(paramName);
+ if (!named) {
+ return false;
+ }
+
+ result = MakeAtomFromExpression(Ctx.Pos(), Ctx, named);
+ return true;
+}
+
+bool TSqlTranslation::ObjectFeatureValueClause(const TRule_object_feature_value& node, TDeferredAtom& result) {
+ // object_feature_value: id_or_type | bind_parameter | STRING_VALUE | bool_value;
+ switch (node.Alt_case()) {
+ case TRule_object_feature_value::kAltObjectFeatureValue1:
+ {
+ TString name = Id(node.GetAlt_object_feature_value1().GetRule_id_or_type1(), *this);
+ result = TDeferredAtom(Ctx.Pos(), name);
+ break;
+ }
+ case TRule_object_feature_value::kAltObjectFeatureValue2:
+ {
+ if (!BindParameterClause(node.GetAlt_object_feature_value2().GetRule_bind_parameter1(), result)) {
+ return false;
+ }
+ break;
+ }
+ case TRule_object_feature_value::kAltObjectFeatureValue3:
+ {
+ auto strValue = StringContent(Ctx, Ctx.Pos(), Ctx.Token(node.GetAlt_object_feature_value3().GetToken1()));
+ if (!strValue) {
+ Error() << "Cannot parse string correctly: " << Ctx.Token(node.GetAlt_object_feature_value3().GetToken1());
+ return false;
+ }
+ result = TDeferredAtom(Ctx.Pos(), strValue->Content);
+ break;
+ }
+ case TRule_object_feature_value::kAltObjectFeatureValue4:
+ {
+ TString value = Ctx.Token(node.GetAlt_object_feature_value4().GetRule_bool_value1().GetToken1());
+ result = TDeferredAtom(BuildLiteralBool(Ctx.Pos(), FromString<bool>(value)), Ctx);
+ break;
+ }
+ case TRule_object_feature_value::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ return true;
+}
+
+bool TSqlTranslation::AddObjectFeature(std::map<TString, TDeferredAtom>& result, const TRule_object_feature& feature) {
+ if (feature.has_alt_object_feature1()) {
+ auto& kv = feature.GetAlt_object_feature1().GetRule_object_feature_kv1();
+ const TString& key = Id(kv.GetRule_an_id_or_type1(), *this);
+ auto& ruleValue = kv.GetRule_object_feature_value3();
+ TDeferredAtom value;
+ if (!ObjectFeatureValueClause(ruleValue, value)) {
+ return false;
+ }
+ result[key] = value;
+ } else if (feature.has_alt_object_feature2()) {
+ result[Id(feature.GetAlt_object_feature2().GetRule_object_feature_flag1().GetRule_an_id_or_type1(), *this)] = TDeferredAtom();
+ }
+ return true;
+}
+
+bool TSqlTranslation::ParseObjectFeatures(std::map<TString, TDeferredAtom>& result, const TRule_object_features& features) {
+ if (features.has_alt_object_features1()) {
+ if (!AddObjectFeature(result, features.alt_object_features1().GetRule_object_feature1())) {
+ return false;
+ }
+
+ } else if (features.has_alt_object_features2()) {
+ if (!AddObjectFeature(result, features.alt_object_features2().GetRule_object_feature2())) {
+ return false;
+ }
+ for (auto&& i : features.alt_object_features2().GetBlock3()) {
+ if (!AddObjectFeature(result, i.GetRule_object_feature2())) {
+ return false;
+ }
+ }
+ } else {
+ return false;
+ }
+ return true;
+}
+
+bool TSqlTranslation::StoreDataSourceSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result) {
+ YQL_ENSURE(value);
+
+ const TString key = to_lower(id.Name);
+ if (result.find(key) != result.end()) {
+ Ctx.Error() << to_upper(key) << " duplicate keys";
+ return false;
+ }
+
+ if (!StoreString(*value, result[key], Ctx, to_upper(key))) {
+ return false;
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::StoreDataSourceSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result) {
+ const TIdentifier id = IdEx(entry.GetRule_an_id1(), *this);
+ return StoreDataSourceSettingsEntry(id, &entry.GetRule_table_setting_value3(), result);
+}
+
+bool TSqlTranslation::ParseExternalDataSourceSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settingsNode) {
+ const auto& firstEntry = settingsNode.GetRule_table_settings_entry3();
+ if (!StoreDataSourceSettingsEntry(IdEx(firstEntry.GetRule_an_id1(), *this), &firstEntry.GetRule_table_setting_value3(),
+ result)) {
+ return false;
+ }
+ for (auto& block : settingsNode.GetBlock4()) {
+ const auto& entry = block.GetRule_table_settings_entry2();
+ if (!StoreDataSourceSettingsEntry(IdEx(entry.GetRule_an_id1(), *this), &entry.GetRule_table_setting_value3(), result)) {
+ return false;
+ }
+ }
+ if (result.find("source_type") == result.end()) {
+ Ctx.Error() << "SOURCE_TYPE requires key";
+ return false;
+ }
+ if (!ValidateAuthMethod(result)) {
+ return false;
+ }
+ return true;
+}
+
+bool TSqlTranslation::ParseExternalDataSourceSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_external_data_source_action& alterAction) {
+ switch (alterAction.Alt_case()) {
+ case TRule_alter_external_data_source_action::kAltAlterExternalDataSourceAction1: {
+ const auto& action = alterAction.GetAlt_alter_external_data_source_action1().GetRule_alter_table_set_table_setting_uncompat1();
+ if (!StoreDataSourceSettingsEntry(IdEx(action.GetRule_an_id2(), *this), &action.GetRule_table_setting_value3(), result)) {
+ return false;
+ }
+ return true;
+ }
+ case TRule_alter_external_data_source_action::kAltAlterExternalDataSourceAction2: {
+ const auto& action = alterAction.GetAlt_alter_external_data_source_action2().GetRule_alter_table_set_table_setting_compat1();
+ if (!StoreDataSourceSettingsEntry(action.GetRule_alter_table_setting_entry3(), result)) {
+ return false;
+ }
+ for (const auto& entry : action.GetBlock4()) {
+ if (!StoreDataSourceSettingsEntry(entry.GetRule_alter_table_setting_entry2(), result)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ case TRule_alter_external_data_source_action::kAltAlterExternalDataSourceAction3: {
+ const auto& action = alterAction.GetAlt_alter_external_data_source_action3().GetRule_alter_table_reset_table_setting1();
+ const TString key = to_lower(IdEx(action.GetRule_an_id3(), *this).Name);
+ toReset.insert(key);
+ for (const auto& keys : action.GetBlock4()) {
+ const TString key = to_lower(IdEx(keys.GetRule_an_id2(), *this).Name);
+ toReset.insert(key);
+ }
+ return true;
+ }
+ case TRule_alter_external_data_source_action::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+bool TSqlTranslation::ValidateAuthMethod(const std::map<TString, TDeferredAtom>& result) {
+ const static TSet<TStringBuf> allAuthFields{
+ "service_account_id",
+ "service_account_secret_name",
+ "login",
+ "password_secret_name",
+ "aws_access_key_id_secret_name",
+ "aws_secret_access_key_secret_name",
+ "aws_region",
+ "token_secret_name"
+ };
+ const static TMap<TStringBuf, TSet<TStringBuf>> authMethodFields{
+ {"NONE", {}},
+ {"SERVICE_ACCOUNT", {"service_account_id", "service_account_secret_name"}},
+ {"BASIC", {"login", "password_secret_name"}},
+ {"AWS", {"aws_access_key_id_secret_name", "aws_secret_access_key_secret_name", "aws_region"}},
+ {"MDB_BASIC", {"service_account_id", "service_account_secret_name", "login", "password_secret_name"}},
+ {"TOKEN", {"token_secret_name"}}
+ };
+ auto authMethodIt = result.find("auth_method");
+ if (authMethodIt == result.end() || authMethodIt->second.GetLiteral() == nullptr) {
+ Ctx.Error() << "AUTH_METHOD requires key";
+ return false;
+ }
+ const auto& authMethod = *authMethodIt->second.GetLiteral();
+ auto it = authMethodFields.find(authMethod);
+ if (it == authMethodFields.end()) {
+ Ctx.Error() << "Unknown AUTH_METHOD = " << authMethod;
+ return false;
+ }
+ const auto& currentAuthFields = it->second;
+ for (const auto& authField: allAuthFields) {
+ if (currentAuthFields.contains(authField) && !result.contains(TString{authField})) {
+ Ctx.Error() << to_upper(TString{authField}) << " requires key";
+ return false;
+ }
+ if (!currentAuthFields.contains(authField) && result.contains(TString{authField})) {
+ Ctx.Error() << to_upper(TString{authField}) << " key is not supported for AUTH_METHOD = " << authMethod;
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlTranslation::ValidateExternalTable(const TCreateTableParameters& params) {
+ if (params.TableType != ETableType::ExternalTable) {
+ return true;
+ }
+
+ if (!params.TableSettings.DataSourcePath) {
+ Ctx.Error() << "DATA_SOURCE requires key";
+ return false;
+ }
+
+ if (!params.TableSettings.Location) {
+ Ctx.Error() << "LOCATION requires key";
+ return false;
+ }
+
+ if (params.PkColumns) {
+ Ctx.Error() << "PRIMARY KEY is not supported for external table";
+ return false;
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::ParseViewQuery(
+ std::map<TString, TDeferredAtom>& features,
+ const TRule_select_stmt& query
+) {
+ TString queryText = CollectTokens(query);
+ TString contextRecreationQuery;
+ {
+ const auto& service = Ctx.Scoped->CurrService;
+ const auto& cluster = Ctx.Scoped->CurrCluster;
+ const auto effectivePathPrefix = Ctx.GetPrefixPath(service, cluster);
+
+ // TO DO: capture all runtime pragmas in a similar fashion.
+ if (effectivePathPrefix != Ctx.Settings.PathPrefix) {
+ contextRecreationQuery = TStringBuilder() << "PRAGMA TablePathPrefix = \"" << effectivePathPrefix << "\";\n";
+ }
+
+ // TO DO: capture other compilation-affecting statements except USE.
+ if (cluster.GetLiteral() && *cluster.GetLiteral() != Ctx.Settings.DefaultCluster) {
+ contextRecreationQuery = TStringBuilder() << "USE " << *cluster.GetLiteral() << ";\n";
+ }
+ }
+ features["query_text"] = { Ctx.Pos(), contextRecreationQuery + queryText };
+
+ // AST is needed for ready-made validation of CREATE VIEW statement.
+ // Query is stored as plain text, not AST.
+ const auto viewSelect = BuildViewSelect(query, Ctx, contextRecreationQuery);
+ if (!viewSelect) {
+ return false;
+ }
+ features["query_ast"] = {viewSelect, Ctx};
+
+ return true;
+}
+
+class TReturningListColumns : public INode {
+public:
+ TReturningListColumns(TPosition pos)
+ : INode(pos)
+ {
+ }
+
+ void SetStar() {
+ ColumnNames.clear();
+ Star = true;
+ }
+
+ void AddColumn(const NSQLv1Generated::TRule_an_id & rule, TTranslation& ctx) {
+ ColumnNames.push_back(NSQLTranslationV1::Id(rule, ctx));
+ }
+
+ bool DoInit(TContext& ctx, ISource* source) override {
+ Node = Y();
+ if (Star) {
+ Node->Add(Y("ReturningStar"));
+ } else {
+ for (auto&& column : ColumnNames) {
+ Node->Add(Y("ReturningListItem", Q(column)));
+ }
+ }
+ Node = Q(Y(Q("returning"), Q(Node)));
+ return Node->Init(ctx, source);
+ }
+
+ TNodePtr DoClone() const override {
+ return new TReturningListColumns(GetPos());
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return Node->Translate(ctx);
+ }
+
+private:
+ TNodePtr Node;
+ TVector<TString> ColumnNames;
+ bool Star = false;
+};
+
+TNodePtr TSqlTranslation::ReturningList(const ::NSQLv1Generated::TRule_returning_columns_list& columns) {
+ auto result = MakeHolder<TReturningListColumns>(Ctx.Pos());
+
+ if (columns.GetBlock2().Alt_case() == TRule_returning_columns_list_TBlock2::AltCase::kAlt1) {
+ result->SetStar();
+ } else if (columns.GetBlock2().Alt_case() == TRule_returning_columns_list_TBlock2::AltCase::kAlt2) {
+ result->AddColumn(columns.GetBlock2().alt2().GetRule_an_id1(), *this);
+ for (auto& block : columns.GetBlock2().alt2().GetBlock2()) {
+ result->AddColumn(block.GetRule_an_id2(), *this);
+ }
+ }
+
+ return result.Release();
+}
+
+bool TSqlTranslation::StoreResourcePoolSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result) {
+ YQL_ENSURE(value);
+
+ const TString key = to_lower(id.Name);
+ if (result.find(key) != result.end()) {
+ Ctx.Error() << to_upper(key) << " duplicate keys";
+ return false;
+ }
+
+ switch (value->Alt_case()) {
+ case TRule_table_setting_value::kAltTableSettingValue2:
+ return StoreString(*value, result[key], Ctx, to_upper(key));
+
+ case TRule_table_setting_value::kAltTableSettingValue3:
+ return StoreInt(*value, result[key], Ctx, to_upper(key));
+
+ default:
+ Ctx.Error() << to_upper(key) << " value should be a string literal or integer";
+ return false;
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::StoreResourcePoolSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result) {
+ const TIdentifier id = IdEx(entry.GetRule_an_id1(), *this);
+ return StoreResourcePoolSettingsEntry(id, &entry.GetRule_table_setting_value3(), result);
+}
+
+bool TSqlTranslation::ParseResourcePoolSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settingsNode) {
+ const auto& firstEntry = settingsNode.GetRule_table_settings_entry3();
+ if (!StoreResourcePoolSettingsEntry(IdEx(firstEntry.GetRule_an_id1(), *this), &firstEntry.GetRule_table_setting_value3(), result)) {
+ return false;
+ }
+ for (const auto& block : settingsNode.GetBlock4()) {
+ const auto& entry = block.GetRule_table_settings_entry2();
+ if (!StoreResourcePoolSettingsEntry(IdEx(entry.GetRule_an_id1(), *this), &entry.GetRule_table_setting_value3(), result)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlTranslation::ParseResourcePoolSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_resource_pool_action& alterAction) {
+ switch (alterAction.Alt_case()) {
+ case TRule_alter_resource_pool_action::kAltAlterResourcePoolAction1: {
+ const auto& action = alterAction.GetAlt_alter_resource_pool_action1().GetRule_alter_table_set_table_setting_compat1();
+ if (!StoreResourcePoolSettingsEntry(action.GetRule_alter_table_setting_entry3(), result)) {
+ return false;
+ }
+ for (const auto& entry : action.GetBlock4()) {
+ if (!StoreResourcePoolSettingsEntry(entry.GetRule_alter_table_setting_entry2(), result)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ case TRule_alter_resource_pool_action::kAltAlterResourcePoolAction2: {
+ const auto& action = alterAction.GetAlt_alter_resource_pool_action2().GetRule_alter_table_reset_table_setting1();
+ const TString firstKey = to_lower(IdEx(action.GetRule_an_id3(), *this).Name);
+ toReset.insert(firstKey);
+ for (const auto& key : action.GetBlock4()) {
+ toReset.insert(to_lower(IdEx(key.GetRule_an_id2(), *this).Name));
+ }
+ return true;
+ }
+ case TRule_alter_resource_pool_action::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+bool TSqlTranslation::StoreResourcePoolClassifierSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result) {
+ YQL_ENSURE(value);
+
+ const TString key = to_lower(id.Name);
+ if (result.find(key) != result.end()) {
+ Ctx.Error() << to_upper(key) << " duplicate keys";
+ return false;
+ }
+
+ switch (value->Alt_case()) {
+ case TRule_table_setting_value::kAltTableSettingValue2:
+ return StoreString(*value, result[key], Ctx, to_upper(key));
+
+ case TRule_table_setting_value::kAltTableSettingValue3:
+ return StoreInt(*value, result[key], Ctx, to_upper(key));
+
+ default:
+ Ctx.Error() << to_upper(key) << " value should be a string literal or integer";
+ return false;
+ }
+
+ return true;
+}
+
+bool TSqlTranslation::StoreResourcePoolClassifierSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result) {
+ const TIdentifier id = IdEx(entry.GetRule_an_id1(), *this);
+ return StoreResourcePoolClassifierSettingsEntry(id, &entry.GetRule_table_setting_value3(), result);
+}
+
+bool TSqlTranslation::ParseResourcePoolClassifierSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settingsNode) {
+ const auto& firstEntry = settingsNode.GetRule_table_settings_entry3();
+ if (!StoreResourcePoolClassifierSettingsEntry(IdEx(firstEntry.GetRule_an_id1(), *this), &firstEntry.GetRule_table_setting_value3(), result)) {
+ return false;
+ }
+ for (const auto& block : settingsNode.GetBlock4()) {
+ const auto& entry = block.GetRule_table_settings_entry2();
+ if (!StoreResourcePoolClassifierSettingsEntry(IdEx(entry.GetRule_an_id1(), *this), &entry.GetRule_table_setting_value3(), result)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlTranslation::ParseResourcePoolClassifierSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_resource_pool_classifier_action& alterAction) {
+ switch (alterAction.Alt_case()) {
+ case TRule_alter_resource_pool_classifier_action::kAltAlterResourcePoolClassifierAction1: {
+ const auto& action = alterAction.GetAlt_alter_resource_pool_classifier_action1().GetRule_alter_table_set_table_setting_compat1();
+ if (!StoreResourcePoolClassifierSettingsEntry(action.GetRule_alter_table_setting_entry3(), result)) {
+ return false;
+ }
+ for (const auto& entry : action.GetBlock4()) {
+ if (!StoreResourcePoolClassifierSettingsEntry(entry.GetRule_alter_table_setting_entry2(), result)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ case TRule_alter_resource_pool_classifier_action::kAltAlterResourcePoolClassifierAction2: {
+ const auto& action = alterAction.GetAlt_alter_resource_pool_classifier_action2().GetRule_alter_table_reset_table_setting1();
+ const TString firstKey = to_lower(IdEx(action.GetRule_an_id3(), *this).Name);
+ toReset.insert(firstKey);
+ for (const auto& key : action.GetBlock4()) {
+ toReset.insert(to_lower(IdEx(key.GetRule_an_id2(), *this).Name));
+ }
+ return true;
+ }
+ case TRule_alter_resource_pool_classifier_action::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_translation.h b/yql/essentials/sql/v1/sql_translation.h
new file mode 100644
index 00000000000..683647f16bf
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_translation.h
@@ -0,0 +1,342 @@
+#pragma once
+#include "context.h"
+#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
+#include <library/cpp/charset/ci_string.h>
+
+namespace NSQLTranslationV1 {
+
+using namespace NYql;
+using namespace NSQLv1Generated;
+
+inline TPosition GetPos(const TToken& token) {
+ return TPosition(token.GetColumn(), token.GetLine());
+}
+
+template <typename TToken>
+TIdentifier GetIdentifier(TTranslation& ctx, const TToken& node) {
+ auto token = node.GetToken1();
+ return TIdentifier(TPosition(token.GetColumn(), token.GetLine()), ctx.Identifier(token));
+}
+
+TIdentifier GetKeywordId(TTranslation& ctx, const TRule_keyword& node);
+
+inline TString GetKeyword(TTranslation& ctx, const TRule_keyword& node) {
+ return GetKeywordId(ctx, node).Name;
+}
+
+template <typename TRule>
+inline TString GetKeyword(TTranslation& ctx, const TRule& node) {
+ return GetIdentifier(ctx, node).Name;
+}
+
+inline TString Id(const TRule_identifier& node, TTranslation& ctx) {
+ // identifier: ID_PLAIN | ID_QUOTED;
+ return ctx.Identifier(node.GetToken1());
+}
+
+TString Id(const TRule_id& node, TTranslation& ctx);
+
+TString Id(const TRule_id_or_type& node, TTranslation& ctx);
+
+TString Id(const TRule_id_as_compat& node, TTranslation& ctx);
+
+TString Id(const TRule_an_id_as_compat& node, TTranslation& ctx);
+
+TString Id(const TRule_id_schema& node, TTranslation& ctx);
+
+TString Id(const TRule_an_id_or_type& node, TTranslation& ctx);
+
+std::pair<bool, TString> Id(const TRule_id_or_at& node, TTranslation& ctx);
+
+TString Id(const TRule_id_table& node, TTranslation& ctx);
+
+TString Id(const TRule_an_id_table& node, TTranslation& ctx);
+
+TString Id(const TRule_id_table_or_type& node, TTranslation& ctx);
+
+TString Id(const TRule_id_expr& node, TTranslation& ctx);
+
+bool IsQuotedId(const TRule_id_expr& node, TTranslation& ctx);
+
+TString Id(const TRule_id_expr_in& node, TTranslation& ctx);
+
+TString Id(const TRule_id_window& node, TTranslation& ctx);
+
+TString Id(const TRule_id_without& node, TTranslation& ctx);
+
+TString Id(const TRule_id_hint& node, TTranslation& ctx);
+
+TString Id(const TRule_an_id& node, TTranslation& ctx);
+
+TString Id(const TRule_an_id_schema& node, TTranslation& ctx);
+
+TString Id(const TRule_an_id_expr& node, TTranslation& ctx);
+
+TString Id(const TRule_an_id_window& node, TTranslation& ctx);
+
+TString Id(const TRule_an_id_without& node, TTranslation& ctx);
+
+TString Id(const TRule_an_id_hint& node, TTranslation& ctx);
+
+TString Id(const TRule_an_id_pure& node, TTranslation& ctx);
+
+template<typename TRule>
+inline TIdentifier IdEx(const TRule& node, TTranslation& ctx) {
+ const TString name(Id(node, ctx));
+ const TPosition pos(ctx.Context().Pos());
+ return TIdentifier(pos, name);
+}
+
+bool NamedNodeImpl(const TRule_bind_parameter& node, TString& name, TTranslation& ctx);
+
+TString OptIdPrefixAsStr(const TRule_opt_id_prefix& node, TTranslation& ctx, const TString& defaultStr = {});
+
+TString OptIdPrefixAsStr(const TRule_opt_id_prefix_or_type& node, TTranslation& ctx, const TString& defaultStr = {});
+
+void PureColumnListStr(const TRule_pure_column_list& node, TTranslation& ctx, TVector<TString>& outList);
+
+bool NamedNodeImpl(const TRule_opt_bind_parameter& node, TString& name, bool& isOptional, TTranslation& ctx);
+
+TDeferredAtom PureColumnOrNamed(const TRule_pure_column_or_named& node, TTranslation& ctx);
+
+bool PureColumnOrNamedListStr(const TRule_pure_column_or_named_list& node, TTranslation& ctx, TVector<TDeferredAtom>& outList);
+
+std::pair<TString, TViewDescription> TableKeyImpl(const std::pair<bool, TString>& nameWithAt, TViewDescription view, TTranslation& ctx);
+
+std::pair<TString, TViewDescription> TableKeyImpl(const TRule_table_key& node, TTranslation& ctx, bool hasAt);
+
+TMaybe<TColumnConstraints> ColumnConstraints(const TRule_column_schema& node, TTranslation& ctx);
+
+/// \return optional prefix
+TString ColumnNameAsStr(TTranslation& ctx, const TRule_column_name& node, TString& id);
+
+TString ColumnNameAsSingleStr(TTranslation& ctx, const TRule_column_name& node);
+
+class TSqlQuery;
+
+struct TSymbolNameWithPos {
+ TString Name;
+ TPosition Pos;
+};
+
+class TSqlTranslation: public TTranslation {
+protected:
+ TSqlTranslation(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TTranslation(ctx)
+ , Mode(mode)
+ {
+ /// \todo remove NSQLTranslation::ESqlMode params
+ YQL_ENSURE(ctx.Settings.Mode == mode);
+ }
+
+protected:
+ enum class EExpr {
+ Regular,
+ GroupBy,
+ SqlLambdaParams,
+ };
+ TNodePtr NamedExpr(const TRule_named_expr& node, EExpr exprMode = EExpr::Regular);
+ bool NamedExprList(const TRule_named_expr_list& node, TVector<TNodePtr>& exprs, EExpr exprMode = EExpr::Regular);
+ bool BindList(const TRule_bind_parameter_list& node, TVector<TSymbolNameWithPos>& bindNames);
+ bool ActionOrSubqueryArgs(const TRule_action_or_subquery_args& node, TVector<TSymbolNameWithPos>& bindNames, ui32& optionalArgsCount);
+ bool ModulePath(const TRule_module_path& node, TVector<TString>& path);
+ bool NamedBindList(const TRule_named_bind_parameter_list& node, TVector<TSymbolNameWithPos>& names,
+ TVector<TSymbolNameWithPos>& aliases);
+ bool NamedBindParam(const TRule_named_bind_parameter& node, TSymbolNameWithPos& name, TSymbolNameWithPos& alias);
+ TNodePtr NamedNode(const TRule_named_nodes_stmt& rule, TVector<TSymbolNameWithPos>& names);
+
+ bool ImportStatement(const TRule_import_stmt& stmt, TVector<TString>* namesPtr = nullptr);
+ TNodePtr DoStatement(const TRule_do_stmt& stmt, bool makeLambda, const TVector<TString>& args = {});
+ bool DefineActionOrSubqueryStatement(const TRule_define_action_or_subquery_stmt& stmt, TSymbolNameWithPos& nameAndPos, TNodePtr& lambda);
+ bool DefineActionOrSubqueryBody(TSqlQuery& query, TBlocks& blocks, const TRule_define_action_or_subquery_body& body);
+ TNodePtr IfStatement(const TRule_if_stmt& stmt);
+ TNodePtr ForStatement(const TRule_for_stmt& stmt);
+ TMaybe<TTableArg> TableArgImpl(const TRule_table_arg& node);
+ bool TableRefImpl(const TRule_table_ref& node, TTableRef& result, bool unorderedSubquery);
+ TMaybe<TSourcePtr> AsTableImpl(const TRule_table_ref& node);
+ bool ClusterExpr(const TRule_cluster_expr& node, bool allowWildcard, TString& service, TDeferredAtom& cluster);
+ bool ClusterExprOrBinding(const TRule_cluster_expr& node, TString& service, TDeferredAtom& cluster, bool& isBinding);
+ bool ApplyTableBinding(const TString& binding, TTableRef& tr, TTableHints& hints);
+
+ TMaybe<TColumnSchema> ColumnSchemaImpl(const TRule_column_schema& node);
+ bool CreateTableEntry(const TRule_create_table_entry& node, TCreateTableParameters& params, const bool isCreateTableAs);
+
+ bool FillFamilySettingsEntry(const TRule_family_settings_entry& settingNode, TFamilyEntry& family);
+ bool FillFamilySettings(const TRule_family_settings& settingsNode, TFamilyEntry& family);
+ bool CreateTableSettings(const TRule_with_table_settings& settingsNode, TCreateTableParameters& params);
+ bool StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, TTableSettings& settings,
+ ETableType tableType, bool alter, bool reset);
+ bool StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, TTableSettings& settings,
+ bool alter, bool reset);
+ bool StoreExternalTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, TTableSettings& settings,
+ bool alter, bool reset);
+ bool StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value& value, TTableSettings& settings, ETableType tableType, bool alter = false);
+ bool StoreDataSourceSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result);
+ bool StoreDataSourceSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result);
+ bool StoreResourcePoolSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result);
+ bool StoreResourcePoolSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result);
+ bool StoreResourcePoolClassifierSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result);
+ bool StoreResourcePoolClassifierSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result);
+ bool ResetTableSettingsEntry(const TIdentifier& id, TTableSettings& settings, ETableType tableType);
+
+ bool CreateTableIndex(const TRule_table_index& node, TVector<TIndexDescription>& indexes);
+ bool CreateIndexSettings(const TRule_with_index_settings& settingsNode, TIndexDescription::EType indexType, TIndexDescription::TIndexSettings& indexSettings);
+ bool CreateIndexSettingEntry(const TIdentifier& id, const TRule_index_setting_value& value, TIndexDescription::EType indexType, TIndexDescription::TIndexSettings& indexSettings);
+ template<typename T>
+ std::tuple<bool, T, TString> GetIndexSettingValue(const TRule_index_setting_value& node);
+
+ TIdentifier GetTopicConsumerId(const TRule_topic_consumer_ref& node);
+ bool CreateConsumerSettings(const TRule_topic_consumer_settings& settingsNode, TTopicConsumerSettings& settings);
+ bool CreateTopicSettings(const TRule_topic_settings& node, TTopicSettings& params);
+ bool CreateTopicConsumer(const TRule_topic_create_consumer_entry& node,
+ TVector<TTopicConsumerDescription>& consumers);
+ bool CreateTopicEntry(const TRule_create_topic_entry& node, TCreateTopicParameters& params);
+
+ bool AlterTopicConsumer(const TRule_alter_topic_alter_consumer& node,
+ THashMap<TString, TTopicConsumerDescription>& alterConsumers);
+
+ bool AlterTopicConsumerEntry(const TRule_alter_topic_alter_consumer_entry& node,
+ TTopicConsumerDescription& alterConsumer);
+
+
+ bool AlterTopicAction(const TRule_alter_topic_action& node, TAlterTopicParameters& params);
+
+
+ TNodePtr TypeSimple(const TRule_type_name_simple& node, bool onlyDataAllowed);
+ TNodePtr TypeDecimal(const TRule_type_name_decimal& node);
+ TNodePtr AddOptionals(const TNodePtr& node, size_t optionalCount);
+ TMaybe<std::pair<TVector<TNodePtr>, bool>> CallableArgList(const TRule_callable_arg_list& argList, bool namedArgsStarted);
+
+ TNodePtr IntegerOrBind(const TRule_integer_or_bind& node);
+ TNodePtr TypeNameTag(const TRule_type_name_tag& node);
+ TNodePtr TypeNodeOrBind(const TRule_type_name_or_bind& node);
+ TNodePtr SerialTypeNode(const TRule_type_name_or_bind& node);
+ TNodePtr TypeNode(const TRule_type_name& node);
+ TNodePtr TypeNode(const TRule_type_name_composite& node);
+ TNodePtr ValueConstructorLiteral(const TRule_value_constructor_literal& node);
+ TNodePtr ValueConstructor(const TRule_value_constructor& node);
+ TNodePtr ListLiteral(const TRule_list_literal& node);
+ TNodePtr DictLiteral(const TRule_dict_literal& node);
+ TNodePtr StructLiteral(const TRule_struct_literal& node);
+ TMaybe<TTableHints> TableHintsImpl(const TRule_table_hints& node, const TString& provider, const TString& keyFunc = "");
+ bool TableHintImpl(const TRule_table_hint& rule, TTableHints& hints, const TString& provider, const TString& keyFunc = "");
+ bool SimpleTableRefImpl(const TRule_simple_table_ref& node, TTableRef& result);
+ bool TopicRefImpl(const TRule_topic_ref& node, TTopicRef& result);
+ TWindowSpecificationPtr WindowSpecification(const TRule_window_specification_details& rule);
+ bool OrderByClause(const TRule_order_by_clause& node, TVector<TSortSpecificationPtr>& orderBy);
+ bool SortSpecificationList(const TRule_sort_specification_list& node, TVector<TSortSpecificationPtr>& sortSpecs);
+
+ bool IsDistinctOptSet(const TRule_opt_set_quantifier& node) const;
+ bool IsDistinctOptSet(const TRule_opt_set_quantifier& node, TPosition& distinctPos) const;
+
+ bool AddObjectFeature(std::map<TString, TDeferredAtom>& result, const TRule_object_feature& feature);
+ bool BindParameterClause(const TRule_bind_parameter& node, TDeferredAtom& result);
+ bool ObjectFeatureValueClause(const TRule_object_feature_value& node, TDeferredAtom& result);
+ bool ParseObjectFeatures(std::map<TString, TDeferredAtom>& result, const TRule_object_features& features);
+ bool ParseExternalDataSourceSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settings);
+ bool ParseExternalDataSourceSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_external_data_source_action& alterActions);
+ bool ParseViewOptions(std::map<TString, TDeferredAtom>& features, const TRule_with_table_settings& options);
+ bool ParseViewQuery(std::map<TString, TDeferredAtom>& features, const TRule_select_stmt& query);
+ bool ParseResourcePoolSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settings);
+ bool ParseResourcePoolSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_resource_pool_action& alterAction);
+ bool ParseResourcePoolClassifierSettings(std::map<TString, TDeferredAtom>& result, const TRule_with_table_settings& settings);
+ bool ParseResourcePoolClassifierSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_resource_pool_classifier_action& alterAction);
+ bool RoleNameClause(const TRule_role_name& node, TDeferredAtom& result, bool allowSystemRoles);
+ bool RoleParameters(const TRule_create_user_option& node, TRoleParameters& result);
+ bool PermissionNameClause(const TRule_permission_name_target& node, TVector<TDeferredAtom>& result, bool withGrantOption);
+ bool PermissionNameClause(const TRule_permission_name& node, TDeferredAtom& result);
+ bool PermissionNameClause(const TRule_permission_id& node, TDeferredAtom& result);
+ bool StoreStringSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, std::map<TString, TDeferredAtom>& result);
+ bool StoreStringSettingsEntry(const TRule_alter_table_setting_entry& entry, std::map<TString, TDeferredAtom>& result);
+ bool ParseBackupCollectionSettings(std::map<TString, TDeferredAtom>& result, const TRule_backup_collection_settings& settings);
+ bool ParseBackupCollectionSettings(std::map<TString, TDeferredAtom>& result, std::set<TString>& toReset, const TRule_alter_backup_collection_actions& actions);
+ bool ParseBackupCollectionTables(TVector<TDeferredAtom>& result, const TRule_table_list& tables);
+ bool ParseBackupCollectionEntry(
+ bool& addDatabase,
+ bool& removeDatabase,
+ TVector<TDeferredAtom>& addTables,
+ TVector<TDeferredAtom>& removeTables,
+ const TRule_alter_backup_collection_entry& entry);
+ bool ParseBackupCollectionEntries(
+ bool& addDatabase,
+ bool& removeDatabase,
+ TVector<TDeferredAtom>& addTables,
+ TVector<TDeferredAtom>& removeTables,
+ const TRule_alter_backup_collection_entries& entries);
+
+ bool ValidateAuthMethod(const std::map<TString, TDeferredAtom>& result);
+ bool ValidateExternalTable(const TCreateTableParameters& params);
+
+ TNodePtr ReturningList(const ::NSQLv1Generated::TRule_returning_columns_list& columns);
+private:
+ bool SimpleTableRefCoreImpl(const TRule_simple_table_ref_core& node, TTableRef& result);
+ static bool IsValidFrameSettings(TContext& ctx, const TFrameSpecification& frameSpec, size_t sortSpecSize);
+ static TString FrameSettingsToString(EFrameSettings settings, bool isUnbounded);
+
+ bool FrameBound(const TRule_window_frame_bound& rule, TFrameBoundPtr& bound);
+ bool FrameClause(const TRule_window_frame_clause& node, TFrameSpecificationPtr& frameSpec, size_t sortSpecSize);
+ bool SortSpecification(const TRule_sort_specification& node, TVector<TSortSpecificationPtr>& sortSpecs);
+
+ bool ClusterExpr(const TRule_cluster_expr& node, bool allowWildcard, bool allowBinding, TString& service, TDeferredAtom& cluster, bool& isBinding);
+ bool StructLiteralItem(TVector<TNodePtr>& labels, const TRule_expr& label, TVector<TNodePtr>& values, const TRule_expr& value);
+ bool ValidateTableSettings(const TTableSettings& settings);
+
+protected:
+ NSQLTranslation::ESqlMode Mode;
+};
+
+TNodePtr LiteralNumber(TContext& ctx, const TRule_integer& node);
+
+template<typename TChar>
+struct TPatternComponent {
+ TBasicString<TChar> Prefix;
+ TBasicString<TChar> Suffix;
+ bool IsSimple = true;
+
+ void AppendPlain(TChar c) {
+ if (IsSimple) {
+ Prefix.push_back(c);
+ }
+ Suffix.push_back(c);
+ }
+
+ void AppendAnyChar() {
+ IsSimple = false;
+ Suffix.clear();
+ }
+};
+
+template<typename TChar>
+TVector<TPatternComponent<TChar>> SplitPattern(const TBasicString<TChar>& pattern, TMaybe<char> escape, bool& inEscape) {
+ inEscape = false;
+ TVector<TPatternComponent<TChar>> result;
+ TPatternComponent<TChar> current;
+ bool prevIsPercentChar = false;
+ for (const TChar c : pattern) {
+ if (inEscape) {
+ current.AppendPlain(c);
+ inEscape = false;
+ prevIsPercentChar = false;
+ } else if (escape && c == static_cast<TChar>(*escape)) {
+ inEscape = true;
+ } else if (c == '%') {
+ if (!prevIsPercentChar) {
+ result.push_back(std::move(current));
+ }
+ current = {};
+ prevIsPercentChar = true;
+ } else if (c == '_') {
+ current.AppendAnyChar();
+ prevIsPercentChar = false;
+ } else {
+ current.AppendPlain(c);
+ prevIsPercentChar = false;
+ }
+ }
+ result.push_back(std::move(current));
+ return result;
+}
+
+bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& suffix);
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_ut.cpp b/yql/essentials/sql/v1/sql_ut.cpp
new file mode 100644
index 00000000000..6663fe97657
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_ut.cpp
@@ -0,0 +1,7462 @@
+#include "sql_ut.h"
+#include "format/sql_format.h"
+#include "lexer/lexer.h"
+
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+#include <yql/essentials/sql/sql.h>
+#include <util/generic/map.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/string/split.h>
+
+#include <format>
+
+using namespace NSQLTranslation;
+
+namespace {
+
+TParsedTokenList Tokenize(const TString& query) {
+ auto lexer = NSQLTranslationV1::MakeLexer(true, false);
+ TParsedTokenList tokens;
+ NYql::TIssues issues;
+ UNIT_ASSERT_C(Tokenize(*lexer, query, "Query", tokens, issues, SQL_MAX_PARSER_ERRORS),
+ issues.ToString());
+
+ return tokens;
+}
+
+TString ToString(const TParsedTokenList& tokens) {
+ TStringBuilder reconstructedQuery;
+ for (const auto& token : tokens) {
+ if (token.Name == "WS" || token.Name == "EOF") {
+ continue;
+ }
+ if (!reconstructedQuery.empty()) {
+ reconstructedQuery << ' ';
+ }
+ reconstructedQuery << token.Content;
+ }
+ return reconstructedQuery;
+}
+
+}
+
+Y_UNIT_TEST_SUITE(AnsiMode) {
+ Y_UNIT_TEST(PragmaAnsi) {
+ UNIT_ASSERT(SqlToYql("PRAGMA ANSI 2016;").IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(SqlParsingOnly) {
+ ///This function is used in BACKWARD COMPATIBILITY tests below that LIMIT the sets of token that CAN NOT be used
+ ///as identifiers in different contexts in a SQL request
+ ///\return list of tokens that failed this check
+ TVector<TString> ValidateTokens(const THashSet<TString>& forbidden, const std::function<TString (const TString& )>& makeRequest) {
+ THashMap<TString, bool> allTokens;
+ for (const auto& t: NSQLFormat::GetKeywords()) {
+ allTokens[t] = !forbidden.contains((t));
+ }
+ for (const auto& f: forbidden) {
+ UNIT_ASSERT(allTokens.contains(f)); //check that forbidden list contains tokens only(argument check)
+ }
+ TVector<TString> failed;
+ for (const auto& [token, allowed]: allTokens) {
+ if (SqlToYql(makeRequest(token)).IsOk() != allowed)
+ failed.push_back(token);
+ }
+ return failed;
+ }
+
+ Y_UNIT_TEST(TokensAsColumnName) { //id_expr
+ auto failed = ValidateTokens({
+ "ALL", "ANY", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST",
+ "CALLABLE", "CASE", "CAST", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
+ "DICT", "DISTINCT", "ENUM", "ERASE", "EXCEPT", "EXISTS", "FLOW", "FROM", "FULL", "GLOBAL",
+ "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL",
+ "NOT", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP",
+ "SELECT", "SET", "STREAM", "STRUCT", "SYMMETRIC", "TAGGED", "TUPLE", "UNBOUNDED",
+ "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT " << token << " FROM Plato.Input";
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsWithoutColumnName) { //id_without
+ auto failed = ValidateTokens({
+ "ALL", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST",
+ "CALLABLE", "CASE", "CAST", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
+ "DICT", "DISTINCT", "EMPTY_ACTION", "ENUM", "EXCEPT", "EXISTS", "FALSE", "FLOW", "FROM", "FULL", "GLOBAL",
+ "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL",
+ "NOT", "NULL", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP",
+ "SELECT", "SET", "STRUCT", "SYMMETRIC", "TAGGED", "TRUE", "TUPLE", "UNBOUNDED",
+ "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * WITHOUT " << token << " FROM Plato.Input";
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsColumnNameInAddColumn) { //id_schema
+ auto failed = ValidateTokens({
+ "ANY", "AUTOMAP", "CALLABLE", "COLUMN", "DICT", "ENUM", "ERASE", "FALSE", "FLOW",
+ "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE",
+ "SET", "STREAM", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "ALTER TABLE Plato.Input ADD COLUMN " << token << " Bool";
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsColumnAlias) {
+ auto failed = ValidateTokens({
+ "AUTOMAP", "FALSE",
+ "GLOBAL", "REPEATABLE", "TRUE"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT Col as " << token << " FROM Plato.Input";
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsTableName) { //id_table_or_type
+ auto failed = ValidateTokens({
+ "ANY", "AUTOMAP", "COLUMN", "ERASE", "FALSE",
+ "GLOBAL", "REPEATABLE", "STREAM", "TRUE"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * FROM Plato." << token;
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsTableAlias) { //id_table
+ auto failed = ValidateTokens({
+ "AUTOMAP", "CALLABLE", "DICT", "ENUM","FALSE", "FLOW",
+ "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE",
+ "SET", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * FROM Plato.Input AS " << token;
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsHints) { //id_hint
+ auto failed = ValidateTokens({
+ "AUTOMAP", "CALLABLE", "COLUMNS", "DICT", "ENUM", "FALSE", "FLOW",
+ "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE",
+ "SCHEMA", "SET", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * FROM Plato.Input WITH " << token;
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsWindow) { //id_window
+ auto failed = ValidateTokens({
+ "AUTOMAP", "CALLABLE", "DICT", "ENUM", "FALSE", "FLOW", "GLOBAL", "GROUPS", "LIST", "OPTIONAL",
+ "RANGE", "REPEATABLE", "RESOURCE", "ROWS", "SET", "STRUCT", "TAGGED" ,"TRUE", "TUPLE", "VARIANT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * FROM Plato.Input WINDOW " << token << " AS ()";
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsIdExprIn) { //id_expr_in
+ auto failed = ValidateTokens({
+ "ALL", "ANY", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST",
+ "CALLABLE", "CASE", "CAST", "COMPACT", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
+ "DICT", "DISTINCT", "ENUM", "ERASE", "EXCEPT", "EXISTS", "FLOW", "FROM", "FULL", "GLOBAL",
+ "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL",
+ "NOT", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP",
+ "SELECT", "SET", "STREAM", "STRUCT", "SYMMETRIC", "TAGGED", "TUPLE", "UNBOUNDED",
+ "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * FROM Plato.Input WHERE q IN " << token;
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TableHints) {
+ UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input WITH INFER_SCHEMA").IsOk());
+ UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input WITH (INFER_SCHEMA)").IsOk());
+ }
+
+ Y_UNIT_TEST(InNoHints) {
+ TString query = "SELECT * FROM plato.Input WHERE key IN (1,2,3)";
+
+ VerifySqlInHints(query, { "'('('warnNoAnsi))" }, {});
+ VerifySqlInHints(query, { "'()" }, false);
+ VerifySqlInHints(query, { "'('('ansi))" }, true);
+ }
+
+ Y_UNIT_TEST(InHintCompact) {
+ // should parse COMPACT as hint
+ TString query = "SELECT * FROM plato.Input WHERE key IN COMPACT(1, 2, 3)";
+
+ VerifySqlInHints(query, { "'('isCompact)" });
+ }
+
+ Y_UNIT_TEST(InHintSubquery) {
+ // should parse tableSource as hint
+ TString query = "$subq = (SELECT key FROM plato.Input); SELECT * FROM plato.Input WHERE key IN $subq";
+
+ VerifySqlInHints(query, { "'('tableSource)" });
+ }
+
+ Y_UNIT_TEST(InHintCompactSubquery) {
+ TString query = "$subq = (SELECT key FROM plato.Input); SELECT * FROM plato.Input WHERE key IN COMPACT $subq";
+
+ VerifySqlInHints(query, { "'('isCompact)", "'('tableSource)" });
+ }
+
+ Y_UNIT_TEST(CompactKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("SELECT COMPACT FROM plato.Input WHERE COMPACT IN COMPACT(1, 2, 3)").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT * FROM COMPACT").IsOk());
+ }
+
+ Y_UNIT_TEST(FamilyKeywordNotReservedForNames) {
+ // FIXME: check if we can get old behaviour
+ //UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE FAMILY (FAMILY Uint32, PRIMARY KEY (FAMILY));").IsOk());
+ //UNIT_ASSERT(SqlToYql("USE plato; SELECT FAMILY FROM FAMILY").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT FAMILY FROM Input").IsOk());
+ }
+
+ Y_UNIT_TEST(ResetKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE RESET (RESET Uint32, PRIMARY KEY (RESET));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT RESET FROM RESET").IsOk());
+ }
+
+ Y_UNIT_TEST(SyncKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE SYNC (SYNC Uint32, PRIMARY KEY (SYNC));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT SYNC FROM SYNC").IsOk());
+ }
+
+ Y_UNIT_TEST(AsyncKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE ASYNC (ASYNC Uint32, PRIMARY KEY (ASYNC));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT ASYNC FROM ASYNC").IsOk());
+ }
+
+ Y_UNIT_TEST(DisableKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE DISABLE (DISABLE Uint32, PRIMARY KEY (DISABLE));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT DISABLE FROM DISABLE").IsOk());
+ }
+
+ Y_UNIT_TEST(ChangefeedKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE CHANGEFEED (CHANGEFEED Uint32, PRIMARY KEY (CHANGEFEED));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT CHANGEFEED FROM CHANGEFEED").IsOk());
+ }
+
+ Y_UNIT_TEST(ReplicationKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE REPLICATION (REPLICATION Uint32, PRIMARY KEY (REPLICATION));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT REPLICATION FROM REPLICATION").IsOk());
+ }
+
+ Y_UNIT_TEST(SecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE SECONDS (SECONDS Uint32, PRIMARY KEY (SECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT SECONDS FROM SECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(MillisecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MILLISECONDS (MILLISECONDS Uint32, PRIMARY KEY (MILLISECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT MILLISECONDS FROM MILLISECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(MicrosecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MICROSECONDS (MICROSECONDS Uint32, PRIMARY KEY (MICROSECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT MICROSECONDS FROM MICROSECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(NanosecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE NANOSECONDS (NANOSECONDS Uint32, PRIMARY KEY (NANOSECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT NANOSECONDS FROM NANOSECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(Jubilee) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; INSERT INTO Arcadia (r2000000) VALUES (\"2M GET!!!\");");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(QualifiedAsteriskBefore) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA DisableSimpleColumns;"
+ "select interested_table.*, LENGTH(value) AS megahelpful_len from plato.Input as interested_table;"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ static bool seenStar = false;
+ if (word == "FlattenMembers") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("interested_table."));
+ } else if (word == "SqlProjectItem") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megahelpful_len")));
+ UNIT_ASSERT_VALUES_EQUAL(seenStar, true);
+ } else if (word == "SqlProjectStarItem") {
+ seenStar = true;
+ }
+ };
+ TWordCountHive elementStat = {{TString("FlattenMembers"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["FlattenMembers"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]);
+ }
+
+ Y_UNIT_TEST(QualifiedAsteriskAfter) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA DisableSimpleColumns;"
+ "select LENGTH(value) AS megahelpful_len, interested_table.* from plato.Input as interested_table;"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ static bool seenStar = false;
+ if (word == "FlattenMembers") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("interested_table."));
+ } else if (word == "SqlProjectItem") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megahelpful_len")));
+ UNIT_ASSERT_VALUES_EQUAL(seenStar, false);
+ } else if (word == "SqlProjectStarItem") {
+ seenStar = true;
+ }
+ };
+ TWordCountHive elementStat = {{TString("FlattenMembers"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["FlattenMembers"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]);
+ }
+
+ Y_UNIT_TEST(QualifiedMembers) {
+ NYql::TAstParseResult res = SqlToYql("select interested_table.key, interested_table.value from plato.Input as interested_table;");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ const bool fieldKey = TString::npos != line.find(Quote("key"));
+ const bool fieldValue = TString::npos != line.find(Quote("value"));
+ const bool refOnTable = TString::npos != line.find("interested_table.");
+ if (word == "SqlProjectItem") {
+ UNIT_ASSERT(fieldKey || fieldValue);
+ UNIT_ASSERT(!refOnTable);
+ } else if (word == "Write!") {
+ UNIT_ASSERT(fieldKey && fieldValue && !refOnTable);
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(JoinParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA DisableSimpleColumns;"
+ " SELECT table_bb.*, table_aa.key as megakey"
+ " FROM plato.Input AS table_aa"
+ " JOIN plato.Input AS table_bb"
+ " ON table_aa.value == table_bb.value;"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "SelectMembers") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa."));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table_bb."));
+ } else if (word == "SqlProjectItem") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megakey")));
+ } else if (word == "SqlColumn") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("table_aa")));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("key")));
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}, {TString("SelectMembers"), 0}, {TString("SqlColumn"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SelectMembers"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlColumn"]);
+ }
+
+ Y_UNIT_TEST(Join3Table) {
+ NYql::TAstParseResult res = SqlToYql(
+ " PRAGMA DisableSimpleColumns;"
+ " SELECT table_bb.*, table_aa.key as gigakey, table_cc.* "
+ " FROM plato.Input AS table_aa"
+ " JOIN plato.Input AS table_bb ON table_aa.key == table_bb.key"
+ " JOIN plato.Input AS table_cc ON table_aa.subkey == table_cc.subkey;"
+ );
+ Err2Str(res);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "SelectMembers") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa."));
+ UNIT_ASSERT(line.find("table_bb.") != TString::npos || line.find("table_cc.") != TString::npos);
+ } else if (word == "SqlProjectItem") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("gigakey")));
+ } else if (word == "SqlColumn") {
+ const auto posTableAA = line.find(Quote("table_aa"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, posTableAA);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("key")));
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa", posTableAA + 3));
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}, {TString("SelectMembers"), 0}, {TString("SqlColumn"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectStarItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SelectMembers"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlColumn"]);
+ }
+
+ Y_UNIT_TEST(DisabledJoinCartesianProduct) {
+ NYql::TAstParseResult res = SqlToYql("pragma DisableAnsiImplicitCrossJoin; use plato; select * from A,B,C");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:67: Error: Cartesian product of tables is disabled. Please use explicit CROSS JOIN or enable it via PRAGMA AnsiImplicitCrossJoin\n");
+ }
+
+ Y_UNIT_TEST(JoinCartesianProduct) {
+ NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from A,B,C");
+ UNIT_ASSERT(res.Root);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "EquiJoin") {
+ auto pos = line.find("Cross");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, pos);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Cross", pos + 1));
+ }
+ };
+ TWordCountHive elementStat = {{TString("EquiJoin"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["EquiJoin"]);
+ }
+
+ Y_UNIT_TEST(JoinWithoutConcreteColumns) {
+ NYql::TAstParseResult res = SqlToYql(
+ " use plato;"
+ " SELECT a.v, b.value"
+ " FROM `Input1` VIEW `ksv` AS a"
+ " JOIN `Input2` AS b"
+ " ON a.k == b.key;"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "SqlProjectItem") {
+ UNIT_ASSERT(line.find(Quote("a.v")) != TString::npos || line.find(Quote("b.value")) != TString::npos);
+ } else if (word == "SqlColumn") {
+ const auto posTableA = line.find(Quote("a"));
+ const auto posTableB = line.find(Quote("b"));
+ if (posTableA != TString::npos) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("v")));
+ } else {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, posTableB);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("value")));
+ }
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlColumn"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlColumn"]);
+ }
+
+ Y_UNIT_TEST(JoinWithSameValues) {
+ NYql::TAstParseResult res = SqlToYql("SELECT a.value, b.value FROM plato.Input AS a JOIN plato.Input as b ON a.key == b.key;");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "SqlProjectItem") {
+ const bool isValueFromA = TString::npos != line.find(Quote("a.value"));
+ const bool isValueFromB = TString::npos != line.find(Quote("b.value"));
+ UNIT_ASSERT(isValueFromA || isValueFromB);
+ } if (word == "Write!") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("a.a."));
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("b.b."));
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {"Write!", 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(SameColumnsForDifferentTables) {
+ NYql::TAstParseResult res = SqlToYql("SELECT a.key, b.key FROM plato.Input as a JOIN plato.Input as b on a.key==b.key;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SameColumnsForDifferentTablesFullJoin) {
+ NYql::TAstParseResult res = SqlToYql("SELECT a.key, b.key, a.value, b.value FROM plato.Input AS a FULL JOIN plato.Input AS b USING(key);");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(JoinStreamLookupStrategyHint) {
+ {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ plato.Input AS b USING(key);");
+ UNIT_ASSERT(res.Root);
+ }
+ //case insensitive
+ {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ streamlookup() */ plato.Input AS b USING(key);");
+ UNIT_ASSERT(res.Root);
+ }
+ }
+
+ Y_UNIT_TEST(JoinConflictingStrategyHint) {
+ {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ /*+ Merge() */ plato.Input AS b USING(key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:91: Error: Conflicting join strategy hints\n");
+ }
+ }
+
+ Y_UNIT_TEST(JoinDuplicatingStrategyHint) {
+ {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ /*+ StreamLookup() */ plato.Input AS b USING(key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:98: Error: Duplicate join strategy hint\n");
+ }
+ }
+
+ Y_UNIT_TEST(WarnCrossJoinStrategyHint) {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a CROSS JOIN /*+ merge() */ plato.Input AS b;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:32: Warning: Non-default join strategy will not be used for CROSS JOIN, code: 4534\n");
+ }
+
+ Y_UNIT_TEST(WarnCartesianProductStrategyHint) {
+ NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; SELECT * FROM A, /*+ merge() */ B;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:74: Warning: Non-default join strategy will not be used for CROSS JOIN, code: 4534\n");
+ }
+
+ Y_UNIT_TEST(WarnUnknownJoinStrategyHint) {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ xmerge() */ plato.Input AS b USING (key);");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:41: Warning: Unsupported join hint: xmerge, code: 4534\n");
+ }
+
+ Y_UNIT_TEST(ReverseLabels) {
+ NYql::TAstParseResult res = SqlToYql("select in.key as subkey, subkey as key from plato.Input as in;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(AutogenerationAliasWithoutCollisionConflict1) {
+ NYql::TAstParseResult res = SqlToYql("select LENGTH(Value), key as column1 from plato.Input;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(AutogenerationAliasWithoutCollision2Conflict2) {
+ NYql::TAstParseResult res = SqlToYql("select key as column0, LENGTH(Value) from plato.Input;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(InputAliasForQualifiedAsterisk) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select zyuzya.*, key from plato.Input as zyuzya;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectSupportsResultColumnsWithTrailingComma) {
+ NYql::TAstParseResult res = SqlToYql("select a, b, c, from plato.Input;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectOrderByLabeledColumn) {
+ NYql::TAstParseResult res = SqlToYql("pragma DisableOrderedColumns; select key as goal from plato.Input order by goal");
+ UNIT_ASSERT(res.Root);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "DataSource") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("plato"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Input"));
+
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("goal"));
+ } else if (word == "Sort") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("goal"));
+
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("key"));
+ }
+ };
+ TWordCountHive elementStat = {{TString("DataSource"), 0}, {TString("Sort"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["DataSource"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]);
+ }
+
+ Y_UNIT_TEST(SelectOrderBySimpleExpr) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by a + a");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectOrderByDuplicateLabels) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by a, a");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectOrderByExpression) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input as i order by cast(key as uint32) + cast(subkey as uint32)");
+ UNIT_ASSERT(res.Root);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Sort") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"+MayWarn\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("key"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("subkey"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'true)"));
+
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i.key"));
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i.subkey"));
+ }
+ };
+ TWordCountHive elementStat = {{TString("Sort"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]);
+ }
+
+ Y_UNIT_TEST(SelectOrderByExpressionDesc) {
+ NYql::TAstParseResult res = SqlToYql("pragma disablesimplecolumns; select i.*, key, subkey from plato.Input as i order by cast(i.key as uint32) - cast(i.subkey as uint32) desc");
+ UNIT_ASSERT(res.Root);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Sort") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"-MayWarn\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'false)"));
+ } else if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'columns"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("prefix"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"i.\""));
+ }
+ };
+ TWordCountHive elementStat = {{TString("Sort"), 0}, {TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(SelectOrderByExpressionAsc) {
+ NYql::TAstParseResult res = SqlToYql("select i.key, i.subkey from plato.Input as i order by cast(key as uint32) % cast(i.subkey as uint32) asc");
+ UNIT_ASSERT(res.Root);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Sort") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"%MayWarn\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'true)"));
+ } else if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'columns"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\""));
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i."));
+ }
+ };
+ TWordCountHive elementStat = {{TString("Sort"), 0}, {TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(ReferenceToKeyInSubselect) {
+ NYql::TAstParseResult res = SqlToYql("select b.key from (select a.key from plato.Input as a) as b;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(OrderByCastValue) {
+ NYql::TAstParseResult res = SqlToYql("select i.key, i.subkey from plato.Input as i order by cast(key as uint32) desc;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(GroupByCastValue) {
+ NYql::TAstParseResult res = SqlToYql("select count(1) from plato.Input as i group by cast(key as uint8);");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(KeywordInSelectColumns) {
+ NYql::TAstParseResult res = SqlToYql("select in, s.check from (select 1 as in, \"test\" as check) as s;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectAllGroupBy) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input group by subkey;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(CreateObjectWithFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"Key1\" '\"Value1\")"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(CreateObjectIfNotExists) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT IF NOT EXISTS secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectIfNotExists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(CreateObjectWithFeaturesStrings) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=\"Value1\", K2='V2', K3=V3, K4='', K5=`aaa`, K6='a\\'aa');");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"K3\" '\"V3\") '('\"K4\" '\"\") '('\"K5\" '\"aaa\") '('\"K6\" '\"a'aa\") '('\"Key1\" '\"Value1\")"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("SECRET"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ }
+
+ Y_UNIT_TEST(UpsertObjectWithFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; UPSERT OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"Key1\" '\"Value1\")"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("upsertObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(CreateObjectWithFeaturesAndFlags) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2, RECURSE);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"Key1\" '\"Value1\") '('\"RECURSE\")"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(Select1Type) {
+ NYql::TAstParseResult res = SqlToYql("SELECT 1 type;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectTableType) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; SELECT * from T type;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(CreateObjectNoFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(AlterObjectWithFeatures) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato;\n"
+ "declare $path as String;\n"
+ "ALTER OBJECT secretId (TYPE SECRET) SET (Key1=$path, K2=V2);"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"Key1\" (EvaluateAtom \"$path\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"K2\" '\"V2\""));
+
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alterObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(AlterObjectNoFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; ALTER OBJECT secretId (TYPE SECRET);");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(DropObjectNoFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(DropObjectWithFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET) WITH (A, B, C);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(DropObjectWithOneOption) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET) WITH OVERRIDE;");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"OVERRIDE\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(DropObjectIfExists) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT IF EXISTS secretId (TYPE SECRET);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObjectIfExists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(PrimaryKeyParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32, Subkey Int64, Value String, PRIMARY KEY (Key, Subkey));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"Key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"Subkey\""));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("primarykey"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["primarykey"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNonNullableYqlTypeAstCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (DataType 'Int32) '('columnConstrains '('('not_null))) '())))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNullableYqlTypeAstCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNonNullablePgTypeAstCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a pg_int4 not null);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (PgType '_int4) '('columnConstrains '('('not_null))) '())))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNullablePgTypeAstCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a pg_int4);");
+ UNIT_ASSERT(res.Root);
+
+ res.Root->PrettyPrintTo(Cout, PRETTY_FLAGS);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (PgType '_int4)) '('columnConstrains '()) '()))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNullPkColumnsAreAllowed) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32, primary key(a));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNotNullPkColumnsAreIdempotentAstCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null, primary key(a));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (DataType 'Int32) '('columnConstrains '('('not_null))) '()))) '('primarykey '('"a"))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableWithIfNotExists) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE IF NOT EXISTS t (a int32, primary key(a));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create_if_not_exists) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTempTable) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TEMP TABLE t (a int32, primary key(a));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")) '('temporary))))__"), line);
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTemporaryTable) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TEMPORARY TABLE t (a int32, primary key(a));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")) '('temporary))))__"), line);
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableWithoutTypes) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, primary key(a));");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(CreateTableAsSelectWithTypes) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32, primary key(a)) AS SELECT * FROM ts;");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(CreateTableAsSelect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, b, primary key(a)) AS SELECT * FROM ts;");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((let world (Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a") '('"b"))) '('primarykey '('"a"))))))__"));
+ }
+ if (word == "Read!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Read! world (DataSource '"yt" '"plato") (MrTableConcat (Key '('table (String '"ts")))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}, {TString("Read!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableAsSelectOnlyPrimary) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (primary key(a)) AS SELECT * FROM ts;");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((let world (Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '()) '('primarykey '('"a"))))))__"));
+ }
+ if (word == "Read!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Read! world (DataSource '"yt" '"plato") (MrTableConcat (Key '('table (String '"ts")))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}, {TString("Read!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableAsValuesFail) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, primary key(a)) AS VALUES (1), (2);");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(CreateTableDuplicatedPkColumnsFail) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null, primary key(a, a));");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(DeleteFromTableByKey) {
+ NYql::TAstParseResult res = SqlToYql("delete from plato.Input where key = 200;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DeleteFromTable) {
+ NYql::TAstParseResult res = SqlToYql("delete from plato.Input;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DeleteFromTableOnValues) {
+ NYql::TAstParseResult res = SqlToYql("delete from plato.Input on (key) values (1);",
+ 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete_on)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DeleteFromTableOnSelect) {
+ NYql::TAstParseResult res = SqlToYql(
+ "delete from plato.Input on select key from plato.Input where value > 0;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete_on)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(UpdateByValues) {
+ NYql::TAstParseResult res = SqlToYql("update plato.Input set key = 777, value = 'cool' where key = 200;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)"));
+ } else if (word == "AsStruct") {
+ const bool isKey = line.find("key") != TString::npos;
+ const bool isValue = line.find("value") != TString::npos;
+ UNIT_ASSERT(isKey || isValue);
+ if (isKey) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("777")));
+ } else if (isValue) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("cool")));
+ }
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]);
+ }
+
+ Y_UNIT_TEST(UpdateByMultiValues) {
+ NYql::TAstParseResult res = SqlToYql("update plato.Input set (key, value, subkey) = ('2','ddd',':') where key = 200;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)"));
+ } else if (word == "AsStruct") {
+ const bool isKey = line.find("key") != TString::npos;
+ const bool isSubkey = line.find("subkey") != TString::npos;
+ const bool isValue = line.find("value") != TString::npos;
+ UNIT_ASSERT(isKey || isSubkey || isValue);
+ if (isKey && !isSubkey) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("2")));
+ } else if (isSubkey) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote(":")));
+ } else if (isValue) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("ddd")));
+ }
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]);
+ }
+
+ Y_UNIT_TEST(UpdateBySelect) {
+ NYql::TAstParseResult res = SqlToYql("update plato.Input set (key, value, subkey) = (select key, value, subkey from plato.Input where key = 911) where key = 200;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ int lineIndex = 0;
+ int writeLineIndex = -1;
+ bool found = false;
+
+ TVerifyLineFunc verifyLine = [&lineIndex, &writeLineIndex, &found](const TString& word, const TString& line) {
+ if (word == "Write") {
+ writeLineIndex = lineIndex;
+ found = line.find("('mode 'update)") != TString::npos;
+ } else if (word == "mode") {
+ found |= lineIndex == writeLineIndex + 1 && line.find("('mode 'update)") != TString::npos;
+ UNIT_ASSERT(found);
+ }
+
+ ++lineIndex;
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("mode"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(UpdateSelfModifyAll) {
+ NYql::TAstParseResult res = SqlToYql("update plato.Input set subkey = subkey + 's';", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)"));
+ } else if (word == "AsStruct") {
+ const bool isSubkey = line.find("subkey") != TString::npos;
+ UNIT_ASSERT(isSubkey);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("subkey")));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("s")));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]);
+ }
+
+ Y_UNIT_TEST(UpdateOnValues) {
+ NYql::TAstParseResult res = SqlToYql("update plato.Input on (key, value) values (5, 'cool')", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update_on)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(UpdateOnSelect) {
+ NYql::TAstParseResult res = SqlToYql(
+ "update plato.Input on select key, value + 1 as value from plato.Input", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update_on)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(UnionAllTest) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input UNION ALL select subkey FROM plato.Input;");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("UnionAll"), 0}};
+ VerifyProgram(res, elementStat, {});
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["UnionAll"]);
+ }
+
+ Y_UNIT_TEST(UnionTest) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input UNION select subkey FROM plato.Input;");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("Union"), 0}};
+ VerifyProgram(res, elementStat, {});
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Union"]);
+ }
+
+ Y_UNIT_TEST(UnionAggregationTest) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ SELECT 1
+ UNION ALL
+ SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
+ UNION
+ SELECT 1 UNION SELECT 1 UNION SELECT 1 UNION SELECT 1
+ UNION ALL
+ SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1;
+ )");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("Union"), 0}, {TString("UnionAll"), 0}};
+ VerifyProgram(res, elementStat, {});
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["UnionAll"]);
+ UNIT_ASSERT_VALUES_EQUAL(3, elementStat["Union"]);
+ }
+
+ Y_UNIT_TEST(DeclareDecimalParameter) {
+ NYql::TAstParseResult res = SqlToYql("declare $value as Decimal(22,9); select $value as cnt;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SimpleGroupBy) {
+ NYql::TAstParseResult res = SqlToYql("select count(1),z from plato.Input group by key as z order by z;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(EmptyColumnName0) {
+ /// Now it's parsed well and error occur on validate step like "4:31:Empty struct member name is not allowed" in "4:31:Function: AddMember"
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output (``, list1) values (0, AsList(0, 1, 2));");
+ /// Verify that parsed well without crash
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(KikimrRollback) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select * from Input; rollback;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("rollback"), 0}};
+ VerifyProgram(res, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["rollback"]);
+ }
+
+ Y_UNIT_TEST(PragmaFile) {
+ NYql::TAstParseResult res = SqlToYql(R"(pragma file("HW", "sbr:181041334");)");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString(R"((let world (Configure! world (DataSource '"config") '"AddFileByUrl" '"HW" '"sbr:181041334")))"), 0}};
+ VerifyProgram(res, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat.cbegin()->second);
+ }
+
+ Y_UNIT_TEST(DoNotCrashOnNamedInFilter) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; $all = ($table_name) -> { return true; }; SELECT * FROM FILTER(Input, $all)");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(PragmasFileAndUdfOrder) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ PRAGMA file("libvideoplayers_udf.so", "https://proxy.sandbox.yandex-team.ru/235185290");
+ PRAGMA udf("libvideoplayers_udf.so");
+ )");
+ UNIT_ASSERT(res.Root);
+
+ const auto programm = GetPrettyPrint(res);
+ const auto file = programm.find("AddFileByUrl");
+ const auto udfs = programm.find("ImportUdfs");
+ UNIT_ASSERT(file < udfs);
+ }
+
+ Y_UNIT_TEST(ProcessUserType) {
+ NYql::TAstParseResult res = SqlToYql("process plato.Input using Kikimr::PushData(TableRows());", 1, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Kikimr.PushData") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TupleType"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TypeOf"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Kikimr.PushData"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Kikimr.PushData"]);
+ }
+
+ Y_UNIT_TEST(ProcessUserTypeAuth) {
+ NYql::TAstParseResult res = SqlToYql("process plato.Input using YDB::PushData(TableRows(), AsTuple('oauth', SecureParam('api:oauth')));", 1, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "YDB.PushData") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TupleType"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TypeOf"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("api:oauth"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("YDB.PushData"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["YDB.PushData"]);
+ }
+
+ Y_UNIT_TEST(SelectStreamRtmr) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato; INSERT INTO Output SELECT STREAM key FROM Input;",
+ 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+
+ res = SqlToYql(
+ "USE plato; INSERT INTO Output SELECT key FROM Input;",
+ 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectStreamRtmrJoinWithYt) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato; INSERT INTO Output SELECT STREAM key FROM Input LEFT JOIN hahn.ttt as t ON Input.key = t.Name;",
+ 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectStreamNonRtmr) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato; INSERT INTO Output SELECT STREAM key FROM Input;",
+ 10);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:31: Error: SELECT STREAM is unsupported for non-streaming sources\n");
+ }
+
+ Y_UNIT_TEST(GroupByHopRtmr) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato; INSERT INTO Output SELECT key, SUM(value) AS value FROM Input
+ GROUP BY key, HOP(subkey, "PT10S", "PT30S", "PT20S");
+ )", 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(GroupByHopRtmrSubquery) {
+ // 'use plato' intentially avoided
+ NYql::TAstParseResult res = SqlToYql(R"(
+ SELECT COUNT(*) AS value FROM (SELECT * FROM plato.Input)
+ GROUP BY HOP(Data, "PT10S", "PT30S", "PT20S")
+ )", 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(GroupByHopRtmrSubqueryBinding) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ $q = SELECT * FROM Input;
+ INSERT INTO Output SELECT STREAM * FROM (
+ SELECT COUNT(*) AS value FROM $q
+ GROUP BY HOP(Data, "PT10S", "PT30S", "PT20S")
+ );
+ )", 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(GroupByNoHopRtmr) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato; INSERT INTO Output SELECT STREAM key, SUM(value) AS value FROM Input
+ GROUP BY key;
+ )", 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:22: Error: Streaming group by query must have a hopping window specification.\n");
+ }
+
+ Y_UNIT_TEST(KikimrInserts) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ INSERT INTO Output SELECT key, value FROM Input;
+ INSERT OR ABORT INTO Output SELECT key, value FROM Input;
+ INSERT OR IGNORE INTO Output SELECT key, value FROM Input;
+ INSERT OR REVERT INTO Output SELECT key, value FROM Input;
+ )", 10, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(WarnMissingIsBeforeNotNull) {
+ NYql::TAstParseResult res = SqlToYql("select 1 NOT NULL");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Missing IS keyword before NOT NULL, code: 4507\n");
+ }
+
+ Y_UNIT_TEST(Subqueries) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ $sq1 = (SELECT * FROM plato.Input);
+
+ $sq2 = SELECT * FROM plato.Input;
+
+ $squ1 = (
+ SELECT * FROM plato.Input
+ UNION ALL
+ SELECT * FROM plato.Input
+ );
+
+ $squ2 =
+ SELECT * FROM plato.Input
+ UNION ALL
+ SELECT * FROM plato.Input;
+
+ $squ3 = (
+ (SELECT * FROM plato.Input)
+ UNION ALL
+ (SELECT * FROM plato.Input)
+ );
+
+ SELECT * FROM $sq1;
+ SELECT * FROM $sq2;
+ SELECT * FROM $squ1;
+ SELECT * FROM $squ2;
+ SELECT * FROM $squ3;
+ )");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SubqueriesJoin) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+
+ $left = SELECT * FROM plato.Input1 WHERE value != "BadValue";
+ $right = SELECT * FROM plato.Input2;
+
+ SELECT * FROM $left AS l
+ JOIN $right AS r
+ ON l.key == r.key;
+ )");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(AnyInBackticksAsTableName) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select * from `any`;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(AnyJoinForTableAndSubQuery) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+
+ $r = SELECT * FROM plato.Input2;
+
+ SELECT * FROM ANY plato.Input1 AS l
+ LEFT JOIN ANY $r AS r
+ USING (key);
+ )");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "EquiJoin") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('left 'any)"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('right 'any)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["right"]);
+ }
+
+ Y_UNIT_TEST(AnyJoinForTableAndTableSource) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+
+ $r = AsList(
+ AsStruct("aaa" as key, "bbb" as subkey, "ccc" as value)
+ );
+
+ SELECT * FROM ANY plato.Input1 AS l
+ LEFT JOIN ANY AS_TABLE($r) AS r
+ USING (key);
+ )");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "EquiJoin") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('left 'any)"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('right 'any)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["right"]);
+ }
+
+ Y_UNIT_TEST(AnyJoinNested) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+
+ FROM ANY Input1 as a
+ JOIN Input2 as b ON a.key = b.key
+ LEFT JOIN ANY Input3 as c ON a.key = c.key
+ RIGHT JOIN ANY Input4 as d ON d.key = b.key
+ CROSS JOIN Input5
+ SELECT *;
+ )");
+
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}};
+ VerifyProgram(res, elementStat);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["right"]);
+ }
+
+ Y_UNIT_TEST(InlineAction) {
+ NYql::TAstParseResult res = SqlToYql(
+ "do begin\n"
+ " select 1\n"
+ "; end do\n");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "");
+ }
+
+ Y_UNIT_TEST(FlattenByCorrelationName) {
+ UNIT_ASSERT(SqlToYql("select * from plato.Input as t flatten by t.x").IsOk());
+ UNIT_ASSERT(SqlToYql("select * from plato.Input as t flatten by t -- same as flatten by t.t").IsOk());
+ }
+
+ Y_UNIT_TEST(DiscoveryMode) {
+ UNIT_ASSERT(SqlToYqlWithMode("insert into plato.Output select * from plato.Input", NSQLTranslation::ESqlMode::DISCOVERY).IsOk());
+ UNIT_ASSERT(SqlToYqlWithMode("select * from plato.concat(Input1, Input2)", NSQLTranslation::ESqlMode::DISCOVERY).IsOk());
+ UNIT_ASSERT(SqlToYqlWithMode("select * from plato.each(AsList(\"Input1\", \"Input2\"))", NSQLTranslation::ESqlMode::DISCOVERY).IsOk());
+ }
+
+ Y_UNIT_TEST(CubeWithAutoGeneratedLikeColumnName) {
+ UNIT_ASSERT(SqlToYql("select key,subkey,group from plato.Input group by cube(key,subkey,group)").IsOk());
+ }
+
+ Y_UNIT_TEST(CubeWithAutoGeneratedLikeAlias) {
+ UNIT_ASSERT(SqlToYql("select key,subkey,group from plato.Input group by cube(key,subkey,value as group)").IsOk());
+ }
+
+ Y_UNIT_TEST(FilterCanBeUsedAsColumnIdOrBind) {
+ UNIT_ASSERT(SqlToYql("select filter from plato.Input").IsOk());
+ UNIT_ASSERT(SqlToYql("select 1 as filter").IsOk());
+ UNIT_ASSERT(SqlToYql("$filter = 1; select $filter").IsOk());
+ }
+
+ Y_UNIT_TEST(DuplicateSemicolonsAreAllowedBetweenTopLevelStatements) {
+ UNIT_ASSERT(SqlToYql(";;select 1; ; select 2;/*comment*/;select 3;;--comment\n;select 4;;").IsOk());
+ }
+
+ Y_UNIT_TEST(DuplicateAndMissingTrailingSemicolonsAreAllowedBetweenActionStatements) {
+ TString req =
+ "define action $action($b,$c) as\n"
+ " ;;$d = $b + $c;\n"
+ " select $b;\n"
+ " select $c;;\n"
+ " select $d,\n"
+ "end define;\n"
+ "\n"
+ "do $action(1,2);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(DuplicateAndMissingTrailingSemicolonsAreAllowedBetweenInlineActionStatements) {
+ TString req =
+ "do begin\n"
+ " ;select 1,\n"
+ "end do;\n"
+ "evaluate for $i in AsList(1,2,3) do begin\n"
+ " select $i;;\n"
+ " select $i + $i;;\n"
+ "end do;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(DuplicateSemicolonsAreAllowedBetweenLambdaStatements) {
+ TString req =
+ "$x=1;\n"
+ "$foo = ($a, $b)->{\n"
+ " ;;$v = $a + $b;\n"
+ " $bar = ($c) -> {; return $c << $x};;\n"
+ " return $bar($v);;\n"
+ "};\n"
+ "select $foo(1,2);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(StringLiteralWithEscapedBackslash) {
+ NYql::TAstParseResult res1 = SqlToYql(R"foo(SELECT 'a\\';)foo");
+ NYql::TAstParseResult res2 = SqlToYql(R"foo(SELECT "a\\";)foo");
+ UNIT_ASSERT(res1.Root);
+ UNIT_ASSERT(res2.Root);
+
+ TWordCountHive elementStat = {{TString("a\\"), 0}};
+
+ VerifyProgram(res1, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["a\\"]);
+
+ VerifyProgram(res2, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["a\\"]);
+ }
+
+ Y_UNIT_TEST(StringMultiLineLiteralWithEscapes) {
+ UNIT_ASSERT(SqlToYql("SELECT @@@foo@@@@bar@@@").IsOk());
+ UNIT_ASSERT(SqlToYql("SELECT @@@@@@@@@").IsOk());
+ }
+
+ Y_UNIT_TEST(StringMultiLineLiteralConsequitiveAt) {
+ UNIT_ASSERT(!SqlToYql("SELECT @").IsOk());
+ UNIT_ASSERT(!SqlToYql("SELECT @@").IsOk());
+ UNIT_ASSERT(!SqlToYql("SELECT @@@").IsOk());
+ UNIT_ASSERT( SqlToYql("SELECT @@@@").IsOk());
+ UNIT_ASSERT( SqlToYql("SELECT @@@@@").IsOk());
+
+ UNIT_ASSERT(!SqlToYql("SELECT @@@@@@").IsOk());
+ UNIT_ASSERT(!SqlToYql("SELECT @@@@@@@").IsOk());
+
+ UNIT_ASSERT( SqlToYql("SELECT @@@@@@@@").IsOk());
+ UNIT_ASSERT( SqlToYql("SELECT @@@@@@@@@").IsOk());
+ UNIT_ASSERT(!SqlToYql("SELECT @@@@@@@@@@").IsOk());
+ }
+
+ Y_UNIT_TEST(ConstnessForListDictSetCreate) {
+ auto req = "$foo = ($x, $y) -> (\"aaaa\");\n"
+ "\n"
+ "select\n"
+ " $foo(sum(key), ListCreate(String)),\n"
+ " $foo(sum(key), DictCreate(String, String)),\n"
+ " $foo(sum(key), SetCreate(String)),\n"
+ "from (select 1 as key);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(CanUseEmptyTupleInWindowPartitionBy) {
+ auto req = "select sum(key) over w\n"
+ "from plato.Input\n"
+ "window w as (partition compact by (), (subkey), (), value || value as dvalue);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(DenyAnsiOrderByLimitLegacyMode) {
+ auto req = "pragma DisableAnsiOrderByLimitInUnionAll;\n"
+ "use plato;\n"
+ "\n"
+ "select * from Input order by key limit 10\n"
+ "union all\n"
+ "select * from Input order by key limit 1;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: DisableAnsiOrderByLimitInUnionAll pragma is deprecated and no longer supported\n");
+ }
+
+ Y_UNIT_TEST(ReduceUsingUdfWithShortcutsWorks) {
+ auto req = "use plato;\n"
+ "\n"
+ "$arg = 'foo';\n"
+ "$func = XXX::YYY($arg);\n"
+ "\n"
+ "REDUCE Input ON key using $func(subkey);\n"
+ "REDUCE Input ON key using $func(UUU::VVV(TableRow()));\n";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ req = "use plato;\n"
+ "\n"
+ "$arg = 'foo';\n"
+ "$func = XXX::YYY($arg);\n"
+ "\n"
+ "REDUCE Input ON key using all $func(subkey);\n"
+ "REDUCE Input ON key using all $func(UUU::VVV(TableRow()));";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(YsonDisableStrict) {
+ UNIT_ASSERT(SqlToYql("pragma yson.DisableStrict = \"false\";").IsOk());
+ UNIT_ASSERT(SqlToYql("pragma yson.DisableStrict;").IsOk());
+ }
+
+ Y_UNIT_TEST(YsonStrict) {
+ UNIT_ASSERT(SqlToYql("pragma yson.Strict = \"false\";").IsOk());
+ UNIT_ASSERT(SqlToYql("pragma yson.Strict;").IsOk());
+ }
+
+ Y_UNIT_TEST(JoinByTuple) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from T1 as a\n"
+ "join T2 as b\n"
+ "on AsTuple(a.key, a.subkey) = AsTuple(b.key, b.subkey);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(JoinByStruct) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from T1 as a\n"
+ "join T2 as b\n"
+ "on AsStruct(a.key as k, a.subkey as sk) = AsStruct(b.key as k, b.subkey as sk);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(JoinByUdf) {
+ auto req = "use plato;\n"
+ "\n"
+ "select a.align\n"
+ "from T1 as a\n"
+ "join T2 as b\n"
+ "on Yson::SerializeJsonEncodeUtf8(a.align)=b.align;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(EscapedIdentifierAsLambdaArg) {
+ auto req = "$f = ($`foo bar`, $x) -> { return $`foo bar` + $x; };\n"
+ "\n"
+ "select $f(1, 2);";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(lambda '(\"$foo bar\" \"$x\")";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarOnlyCallable) {
+ auto req = "SELECT Udf(DateTime::FromString)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType)))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarTypeNoRun) {
+ auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\")";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarRunNoType) {
+ auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, Void() as RunConfig)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"\" (Void))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarFullTest) {
+ auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, Void() As RunConfig)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (Void))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarOtherRunConfigs) {
+ auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, '55' As RunConfig)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (String '\"55\"))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarOtherRunConfigs2) {
+ auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, AsTuple(32, 'no', AsStruct(1e-9 As SomeFloat)) As RunConfig)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" '((Int32 '\"32\") (String '\"no\") (AsStruct '('\"SomeFloat\" (Double '\"1e-9\")))))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarOptional) {
+ auto req = "SELECT Udf(DateTime::FromString, String?, Int32??, Tuple<Int32, Float>, \"foo\" as TypeConfig, Void() As RunConfig)(\"2022-01-01\");";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (OptionalType (DataType 'String)) (OptionalType (OptionalType (DataType 'Int32))) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (Void))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(CompactionPolicyParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key))
+ WITH ( COMPACTION_POLICY = "SomeCompactionPreset" );)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("compactionPolicy"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SomeCompactionPreset"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AutoPartitioningBySizeParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key))
+ WITH ( AUTO_PARTITIONING_BY_SIZE = ENABLED );)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("autoPartitioningBySize"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("ENABLED"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(UniformPartitionsParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key))
+ WITH ( UNIFORM_PARTITIONS = 16 );)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("uniformPartitions"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("16"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DateTimeTtlParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key))
+ WITH (TTL = Interval("P1D") On CreatedAt);)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(IntTtlParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key))
+ WITH (TTL = Interval("P1D") On CreatedAt AS SECONDS);)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnUnit"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("seconds"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(TieringParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key))
+ WITH ( TIERING = 'my_tiering' );)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiering"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("my_tiering"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(StoreExternalBlobsParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key))
+ WITH ( STORE_EXTERNAL_BLOBS = ENABLED );)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("storeExternalBlobs"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("ENABLED"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DefaultValueColumn2) {
+ auto res = SqlToYql(R"( use plato;
+ $lambda = () -> {
+ RETURN CAST(RandomUuid(2) as String)
+ };
+
+ CREATE TABLE tableName (
+ Key Uint32 DEFAULT RandomNumber(1),
+ Value String DEFAULT $lambda,
+ PRIMARY KEY (Key)
+ );
+ )");
+
+ UNIT_ASSERT_C(res.Root, Err2Str(res));
+
+ const auto program = GetPrettyPrint(res);
+
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("RandomNumber"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("RandomUuid"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("columnConstrains"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("columnConstrains"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("Write"));
+
+#if 0
+ Cerr << program << Endl;
+#endif
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DefaultValueColumn3) {
+ auto res = SqlToYql(R"( use plato;
+
+ CREATE TABLE tableName (
+ database_id Utf8,
+ cloud_id Utf8,
+ global_id Utf8 DEFAULT database_id || "=====",
+ PRIMARY KEY (database_id)
+ );
+ )");
+
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:6:40: Error: Column reference \"database_id\" is not allowed in current scope\n");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(DefaultValueColumn) {
+ auto res = SqlToYql(R"( use plato;
+ CREATE TABLE tableName (
+ Key Uint32 FAMILY cold DEFAULT 5,
+ Value String FAMILY default DEFAULT "empty",
+ PRIMARY KEY (Key),
+ FAMILY default (
+ DATA = "test",
+ COMPRESSION = "lz4"
+ ),
+ FAMILY cold (
+ DATA = "test",
+ COMPRESSION = "off"
+ )
+ );
+ )");
+
+ UNIT_ASSERT_C(res.Root, Err2Str(res));
+
+#if 0
+ const auto program = GetPrettyPrint(res);
+ Cerr << program << Endl;
+#endif
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("default"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnConstrains"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnFamilies"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(ChangefeedParseCorrect) {
+ auto res = SqlToYql(R"( USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (
+ MODE = 'KEYS_ONLY',
+ FORMAT = 'json',
+ INITIAL_SCAN = TRUE,
+ VIRTUAL_TIMESTAMPS = FALSE,
+ RESOLVED_TIMESTAMPS = Interval("PT1S"),
+ RETENTION_PERIOD = Interval("P1D"),
+ TOPIC_MIN_ACTIVE_PARTITIONS = 10,
+ AWS_REGION = 'aws:region'
+ )
+ );
+ )");
+ UNIT_ASSERT_C(res.Root, Err2Str(res));
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("changefeed"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("mode"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("KEYS_ONLY"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("format"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("json"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("initial_scan"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("true"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("virtual_timestamps"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("false"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("resolved_timestamps"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("retention_period"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("topic_min_active_partitions"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("aws_region"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("aws:region"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CloneForAsTableWorksWithCube) {
+ UNIT_ASSERT(SqlToYql("SELECT * FROM AS_TABLE([<|k1:1, k2:1|>]) GROUP BY CUBE(k1, k2);").IsOk());
+ }
+
+ Y_UNIT_TEST(WindowPartitionByColumnProperlyEscaped) {
+ NYql::TAstParseResult res = SqlToYql("SELECT SUM(key) OVER w FROM plato.Input WINDOW w AS (PARTITION BY `column with space`);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "CalcOverWindow") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"column with space\""));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("CalcOverWindow"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["CalcOverWindow"]);
+ }
+
+ Y_UNIT_TEST(WindowPartitionByExpressionWithoutAliasesAreAllowed) {
+ NYql::TAstParseResult res = SqlToYql("SELECT SUM(key) OVER w FROM plato.Input as i WINDOW w AS (PARTITION BY ii.subkey);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "AddMember") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("AddMember row 'group_w_0 (SqlAccess 'struct (Member row '\"ii\")"));
+ }
+ if (word == "CalcOverWindow") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("CalcOverWindow core '('\"group_w_0\")"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("CalcOverWindow"), 0}, {TString("AddMember"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["CalcOverWindow"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AddMember"]);
+ }
+
+ Y_UNIT_TEST(PqReadByAfterUse) {
+ ExpectFailWithError("use plato; pragma PqReadBy='plato2';",
+ "<main>:1:28: Error: Cluster in PqReadPqBy pragma differs from cluster specified in USE statement: plato2 != plato\n");
+
+ UNIT_ASSERT(SqlToYql("pragma PqReadBy='plato2';").IsOk());
+ UNIT_ASSERT(SqlToYql("pragma PqReadBy='plato2'; use plato;").IsOk());
+ UNIT_ASSERT(SqlToYql("$x='plato'; use rtmr:$x; pragma PqReadBy='plato2';").IsOk());
+ UNIT_ASSERT(SqlToYql("use plato; pragma PqReadBy='dq';").IsOk());
+ }
+
+ Y_UNIT_TEST(MrObject) {
+ NYql::TAstParseResult res = SqlToYql(
+ "declare $path as String;\n"
+ "select * from plato.object($path, `format`, \"comp\" || \"ression\" as compression, 1 as bar) with schema (Int32 as y, String as x)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "MrObject") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((MrObject (EvaluateAtom "$path") '"format" '('('"compression" (Concat (String '"comp") (String '"ression"))) '('"bar" (Int32 '"1")))))__"));
+ } else if (word == "userschema") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__('('('"userschema" (StructType '('"y" (DataType 'Int32)) '('"x" (DataType 'String))) '('"y" '"x"))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("MrObject"), 0}, {TString("userschema"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["MrObject"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["userschema"]);
+ }
+
+ Y_UNIT_TEST(TableBindings) {
+ NSQLTranslation::TTranslationSettings settings = GetSettingsWithS3Binding("foo");
+ NYql::TAstParseResult res = SqlToYqlWithSettings(
+ "select * from bindings.foo",
+ settings
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "MrObject") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((MrTableConcat (Key '('table (String '"path")))) (Void) '('('"bar" '"1") '('"compression" '"ccompression") '('"format" '"format") '('"partitionedby" '"key" '"subkey") '('"userschema" (SqlTypeFromYson)__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("MrTableConcat"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["MrTableConcat"]);
+
+ settings.DefaultCluster = "plato";
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DISABLED;
+ res = SqlToYqlWithSettings(
+ "select * from bindings.foo",
+ settings
+ );
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:15: Error: Please remove 'bindings.' from your query, the support for this syntax has ended, code: 4601\n");
+ UNIT_ASSERT(!res.Root);
+
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP;
+ res = SqlToYqlWithSettings(
+ "select * from bindings.foo",
+ settings
+ );
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine2 = [](const TString& word, const TString& line) {
+ if (word == "MrTableConcat") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((MrTableConcat (Key '('table (String '"foo")))) (Void) '())))__"));
+ }
+ };
+
+ TWordCountHive elementStat2 = {{TString("MrTableConcat"), 0}};
+ VerifyProgram(res, elementStat2, verifyLine2);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat2["MrTableConcat"]);
+
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP_WITH_WARNING;
+ res = SqlToYqlWithSettings(
+ "select * from bindings.foo",
+ settings
+ );
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:15: Warning: Please remove 'bindings.' from your query, the support for this syntax will be dropped soon, code: 4538\n");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat3 = {{TString("MrTableConcat"), 0}};
+ VerifyProgram(res, elementStat3, verifyLine2);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat3["MrTableConcat"]);
+ }
+
+ Y_UNIT_TEST(TableBindingsWithInsert) {
+ NSQLTranslation::TTranslationSettings settings = GetSettingsWithS3Binding("foo");
+ NYql::TAstParseResult res = SqlToYqlWithSettings(
+ "insert into bindings.foo with truncate (x, y) values (1, 2);",
+ settings
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('table (String '"path"))) values '('('"bar" '"1") '('"compression" '"ccompression") '('"format" '"format") '('"partitionedby" '"key" '"subkey") '('"userschema" (SqlTypeFromYson)__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+
+ settings.DefaultCluster = "plato";
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DISABLED;
+ res = SqlToYqlWithSettings(
+ "insert into bindings.foo with truncate (x, y) values (1, 2);",
+ settings
+ );
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:13: Error: Please remove 'bindings.' from your query, the support for this syntax has ended, code: 4601\n");
+ UNIT_ASSERT(!res.Root);
+
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP;
+ res = SqlToYqlWithSettings(
+ "insert into bindings.foo with truncate (x, y) values (1, 2);",
+ settings
+ );
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine2 = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ //UNIT_ASSERT_VALUES_EQUAL(line, "");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('table (String '"foo"))) values '('('mode 'renew)))__"));
+ }
+ };
+
+ TWordCountHive elementStat2 = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat2, verifyLine2);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat2["Write!"]);
+
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP_WITH_WARNING;
+ res = SqlToYqlWithSettings(
+ "insert into bindings.foo with truncate (x, y) values (1, 2);",
+ settings
+ );
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:13: Warning: Please remove 'bindings.' from your query, the support for this syntax will be dropped soon, code: 4538\n");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat3 = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat3, verifyLine2);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat3["Write!"]);
+ }
+
+ Y_UNIT_TEST(TrailingCommaInWithout) {
+ UNIT_ASSERT(SqlToYql("SELECT * WITHOUT stream, FROM plato.Input").IsOk());
+ UNIT_ASSERT(SqlToYql("SELECT a.* WITHOUT a.intersect, FROM plato.Input AS a").IsOk());
+ UNIT_ASSERT(SqlToYql("SELECT a.* WITHOUT col1, col2, a.col3, FROM plato.Input AS a").IsOk());
+ }
+
+ Y_UNIT_TEST(NoStackOverflowOnBigCaseStatement) {
+ TStringBuilder req;
+ req << "select case 1 + 123";
+ for (size_t i = 0; i < 20000; ++i) {
+ req << " when " << i << " then " << i + 1;
+ }
+ req << " else 100500 end;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(CollectPreaggregatedInListLiteral) {
+ UNIT_ASSERT(SqlToYql("SELECT [COUNT(DISTINCT a+b)] FROM plato.Input").IsOk());
+ }
+
+ Y_UNIT_TEST(SmartParenInGroupByClause) {
+ UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input GROUP BY (k, v)").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableRenameToIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table RENAME TO moved").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableAddDropColumnIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ADD COLUMN addc uint64, DROP COLUMN dropc, ADD addagain uint64").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableSetTTLIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column)").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column AS SECONDS)").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableSetTieringIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TIERING = 'my_tiering')").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableAddChangefeedIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ADD CHANGEFEED feed WITH (MODE = 'UPDATES', FORMAT = 'json')").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableAlterChangefeedIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ALTER CHANGEFEED feed DISABLE").IsOk());
+ ExpectFailWithError("USE plato; ALTER TABLE table ALTER CHANGEFEED feed SET (FORMAT = 'proto');",
+ "<main>:1:57: Error: FORMAT alter is not supported\n");
+ }
+
+ Y_UNIT_TEST(AlterTableDropChangefeedIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table DROP CHANGEFEED feed").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableSetPartitioningIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (AUTO_PARTITIONING_BY_SIZE = DISABLED)").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableAddIndexWithIsNotSupported) {
+ ExpectFailWithFuzzyError("USE plato; ALTER TABLE table ADD INDEX idx GLOBAL ON (col) WITH (a=b)",
+ "<main>:1:40: Error: with: alternative is not implemented yet: \\d+:\\d+: global_index\\n");
+ }
+
+ Y_UNIT_TEST(AlterTableAddIndexLocalIsNotSupported) {
+ ExpectFailWithFuzzyError("USE plato; ALTER TABLE table ADD INDEX idx LOCAL ON (col)",
+ "<main>:1:40: Error: local: alternative is not implemented yet: \\d+:\\d+: local_index\\n");
+ }
+
+ Y_UNIT_TEST(CreateTableAddIndexVector) {
+ const auto result = SqlToYql(R"(USE plato;
+ CREATE TABLE table (
+ pk INT32 NOT NULL,
+ col String,
+ INDEX idx GLOBAL USING vector_kmeans_tree
+ ON (col) COVER (col)
+ WITH (distance=cosine, vector_type=float, vector_dimension=1024,),
+ PRIMARY KEY (pk))
+ )");
+ UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(AlterTableAddIndexVector) {
+ const auto result = SqlToYql(R"(USE plato;
+ ALTER TABLE table ADD INDEX idx
+ GLOBAL USING vector_kmeans_tree
+ ON (col) COVER (col)
+ WITH (distance=cosine, vector_type="float", vector_dimension=1024)
+ )");
+ UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(AlterTableAddIndexUnknownSubtype) {
+ ExpectFailWithError("USE plato; ALTER TABLE table ADD INDEX idx GLOBAL USING unknown ON (col)",
+ "<main>:1:57: Error: UNKNOWN index subtype is not supported\n");
+ }
+
+ Y_UNIT_TEST(AlterTableAddIndexMissedParameter) {
+ ExpectFailWithError(R"(USE plato;
+ ALTER TABLE table ADD INDEX idx
+ GLOBAL USING vector_kmeans_tree
+ ON (col)
+ WITH (distance=cosine, vector_type=float)
+ )",
+ "<main>:5:52: Error: vector_dimension should be set\n");
+ }
+
+ Y_UNIT_TEST(AlterTableAlterIndexSetPartitioningIsCorrect) {
+ const auto result = SqlToYql("USE plato; ALTER TABLE table ALTER INDEX index SET AUTO_PARTITIONING_MIN_PARTITIONS_COUNT 10");
+ UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(AlterTableAlterIndexSetMultiplePartitioningSettings) {
+ const auto result = SqlToYql("USE plato; ALTER TABLE table ALTER INDEX index SET "
+ "(AUTO_PARTITIONING_BY_LOAD = ENABLED, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10)"
+ );
+ UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(AlterTableAlterIndexResetPartitioningIsNotSupported) {
+ ExpectFailWithError("USE plato; ALTER TABLE table ALTER INDEX index RESET (AUTO_PARTITIONING_MIN_PARTITIONS_COUNT)",
+ "<main>:1:55: Error: AUTO_PARTITIONING_MIN_PARTITIONS_COUNT reset is not supported\n"
+ );
+ }
+
+ Y_UNIT_TEST(AlterTableAlterColumnDropNotNullAstCorrect) {
+ auto reqSetNull = SqlToYql(R"(
+ USE plato;
+ CREATE TABLE tableName (
+ id Uint32,
+ val Uint32 NOT NULL,
+ PRIMARY KEY (id)
+ );
+
+ COMMIT;
+ ALTER TABLE tableName ALTER COLUMN val DROP NOT NULL;
+ )");
+
+ UNIT_ASSERT(reqSetNull.IsOk());
+ UNIT_ASSERT(reqSetNull.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(
+ R"(let world (Write! world sink (Key '('tablescheme (String '"tableName"))) (Void) '('('mode 'alter) '('actions '('('alterColumns '('('"val" '('changeColumnConstraints '('('drop_not_null)))))))))))"
+ ));
+ };
+
+ TWordCountHive elementStat({TString("\'mode \'alter")});
+ VerifyProgram(reqSetNull, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["\'mode \'alter"]);
+ }
+
+ Y_UNIT_TEST(OptionalAliases) {
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT foo FROM (SELECT key foo FROM Input);").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT a.x FROM Input1 a JOIN Input2 b ON a.key = b.key;").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT a.x FROM (VALUES (1,2), (3,4)) a(x,key) JOIN Input b ON a.key = b.key;").IsOk());
+ }
+
+ Y_UNIT_TEST(TableNameConstness) {
+ UNIT_ASSERT(SqlToYql("USE plato; $path = 'foo'; SELECT TableName($path), count(*) FROM Input;").IsOk());
+ UNIT_ASSERT(SqlToYql("$path = 'foo'; SELECT TableName($path, 'yt'), count(*) FROM plato.Input;").IsOk());
+ ExpectFailWithError("USE plato; SELECT TableName(), count(*) FROM plato.Input;",
+ "<main>:1:19: Error: Expression has to be an aggregation function or key column, because aggregation is used elsewhere in this subquery\n");
+ }
+
+ Y_UNIT_TEST(UseShouldWorkAsColumnName) {
+ UNIT_ASSERT(SqlToYql("select use from (select 1 as use);").IsOk());
+ }
+
+ Y_UNIT_TEST(TrueFalseWorkAfterDollar) {
+ UNIT_ASSERT(SqlToYql("$ true = false; SELECT $ true or false;").IsOk());
+ UNIT_ASSERT(SqlToYql("$False = 0; SELECT $False;").IsOk());
+ }
+
+ Y_UNIT_TEST(WithSchemaEquals) {
+ UNIT_ASSERT(SqlToYql("select * from plato.T with schema Struct<a:Int32, b:String>;").IsOk());
+ UNIT_ASSERT(SqlToYql("select * from plato.T with columns = Struct<a:Int32, b:String>;").IsOk());
+ }
+
+ Y_UNIT_TEST(WithNonStructSchemaS3) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.ClusterMapping["s3bucket"] = NYql::S3ProviderName;
+ UNIT_ASSERT(SqlToYql("select * from s3bucket.`foo` with schema (col1 Int32, String as col2, Int64 as col3);", settings).IsOk());
+ }
+
+ Y_UNIT_TEST(AllowNestedTuplesInGroupBy) {
+ NYql::TAstParseResult res = SqlToYql("select count(*) from plato.Input group by 1 + (x, y, z);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Aggregate core '('\"group0\")"));
+ };
+
+ TWordCountHive elementStat({"Aggregate"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["Aggregate"] == 1);
+ }
+
+ Y_UNIT_TEST(AllowGroupByWithParens) {
+ NYql::TAstParseResult res = SqlToYql("select count(*) from plato.Input group by (x, y as alias1, z);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Aggregate core '('\"x\" '\"alias1\" '\"z\")"));
+ };
+
+ TWordCountHive elementStat({"Aggregate"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["Aggregate"] == 1);
+ }
+
+ Y_UNIT_TEST(CreateAsyncReplicationParseCorrect) {
+ auto req = R"(
+ USE plato;
+ CREATE ASYNC REPLICATION MyReplication
+ FOR table1 AS table2, table3 AS table4
+ WITH (
+ CONNECTION_STRING = "grpc://localhost:2135/?database=/MyDatabase",
+ ENDPOINT = "localhost:2135",
+ DATABASE = "/MyDatabase"
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("create"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table1"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table2"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table3"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table4"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("connection_string"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("grpc://localhost:2135/?database=/MyDatabase"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("endpoint"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("localhost:2135"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("database"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("/MyDatabase"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateAsyncReplicationUnsupportedSettings) {
+ auto reqTpl = R"(
+ USE plato;
+ CREATE ASYNC REPLICATION MyReplication
+ FOR table1 AS table2, table3 AS table4
+ WITH (
+ %s = "%s"
+ )
+ )";
+
+ auto settings = THashMap<TString, TString>{
+ {"STATE", "DONE"},
+ {"FAILOVER_MODE", "FORCE"},
+ };
+
+ for (const auto& [k, v] : settings) {
+ auto req = Sprintf(reqTpl, k.c_str(), v.c_str());
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), Sprintf("<main>:6:%zu: Error: %s is not supported in CREATE\n", 20 + k.size(), k.c_str()));
+ }
+ }
+
+ Y_UNIT_TEST(AlterAsyncReplicationParseCorrect) {
+ auto req = R"(
+ USE plato;
+ ALTER ASYNC REPLICATION MyReplication
+ SET (
+ STATE = "DONE",
+ FAILOVER_MODE = "FORCE"
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("state"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DONE"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("failover_mode"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("FORCE"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AlterAsyncReplicationUnsupportedSettings) {
+ auto reqTpl = R"(
+ USE plato;
+ ALTER ASYNC REPLICATION MyReplication
+ SET (
+ %s = "%s"
+ )
+ )";
+
+ auto settings = THashMap<TString, TString>{
+ {"connection_string", "grpc://localhost:2135/?database=/MyDatabase"},
+ {"endpoint", "localhost:2135"},
+ {"database", "/MyDatabase"},
+ {"token", "foo"},
+ {"token_secret_name", "foo_secret_name"},
+ {"user", "user"},
+ {"password", "bar"},
+ {"password_secret_name", "bar_secret_name"},
+ };
+
+ for (const auto& setting : settings) {
+ auto& key = setting.first;
+ auto& value = setting.second;
+ auto req = Sprintf(reqTpl, key.c_str(), value.c_str());
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&key, &value](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(key));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(value));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+ }
+
+ Y_UNIT_TEST(AsyncReplicationInvalidSettings) {
+ auto req = R"(
+ USE plato;
+ ALTER ASYNC REPLICATION MyReplication SET (FOO = "BAR");
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:62: Error: Unknown replication setting: FOO\n");
+ }
+
+ Y_UNIT_TEST(DropAsyncReplicationParseCorrect) {
+ auto req = R"(
+ USE plato;
+ DROP ASYNC REPLICATION MyReplication;
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropAsyncReplicationCascade) {
+ auto req = R"(
+ USE plato;
+ DROP ASYNC REPLICATION MyReplication CASCADE;
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropCascade"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(PragmaCompactGroupBy) {
+ auto req = "PRAGMA CompactGroupBy; SELECT key, COUNT(*) FROM plato.Input GROUP BY key;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Aggregate") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('compact)"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Aggregate"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Aggregate"]);
+ }
+
+ Y_UNIT_TEST(PragmaDisableCompactGroupBy) {
+ auto req = "PRAGMA DisableCompactGroupBy; SELECT key, COUNT(*) FROM plato.Input GROUP /*+ compact() */ BY key;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Aggregate") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'('compact)"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Aggregate"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Aggregate"]);
+ }
+
+ Y_UNIT_TEST(AutoSampleWorksWithNamedSubquery) {
+ UNIT_ASSERT(SqlToYql("$src = select * from plato.Input; select * from $src sample 0.2").IsOk());
+ }
+
+ Y_UNIT_TEST(AutoSampleWorksWithSubquery) {
+ UNIT_ASSERT(SqlToYql("select * from (select * from plato.Input) sample 0.2").IsOk());
+ }
+
+ Y_UNIT_TEST(CreateTableTrailingComma) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32, PRIMARY KEY (Key),);").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32,);").IsOk());
+ }
+
+ Y_UNIT_TEST(BetweenSymmetric) {
+ UNIT_ASSERT(SqlToYql("select 3 between symmetric 5 and 4;").IsOk());
+ UNIT_ASSERT(SqlToYql("select 3 between asymmetric 5 and 4;").IsOk());
+ UNIT_ASSERT(SqlToYql("use plato; select key between symmetric and and and from Input;").IsOk());
+ UNIT_ASSERT(SqlToYql("use plato; select key between and and and from Input;").IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(ExternalFunction) {
+ Y_UNIT_TEST(ValidUseFunctions) {
+
+ UNIT_ASSERT(SqlToYql(
+ "PROCESS plato.Input"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', <|a: 123, b: a + 641|>)"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,"
+ " CONCURRENCY=3, OPTIMIZE_FOR='CALLS'").IsOk());
+
+ // use CALLS without quotes, as keyword
+ UNIT_ASSERT(SqlToYql(
+ "PROCESS plato.Input"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo')"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,"
+ " OPTIMIZE_FOR=CALLS").IsOk());
+
+ UNIT_ASSERT(SqlToYql(
+ "PROCESS plato.Input"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', TableRow())"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,"
+ " CONCURRENCY=3").IsOk());
+
+ UNIT_ASSERT(SqlToYql(
+ "PROCESS plato.Input"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo')"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,"
+ " CONCURRENCY=3, BATCH_SIZE=1000000, CONNECTION='yc-folder34fse-con',"
+ " INIT=[0, 900]").IsOk());
+
+ UNIT_ASSERT(SqlToYql(
+ "PROCESS plato.Input"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'bar', TableRow())"
+ " WITH UNKNOWN_PARAM_1='837747712', UNKNOWN_PARAM_2=Tuple<Uint16, Utf8>,"
+ " INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>").IsOk());
+ }
+
+
+ Y_UNIT_TEST(InValidUseFunctions) {
+ ExpectFailWithError("PROCESS plato.Input USING some::udf(*) WITH INPUT_TYPE=Struct<a:Int32>",
+ "<main>:1:33: Error: PROCESS without USING EXTERNAL FUNCTION doesn't allow WITH block\n");
+
+ ExpectFailWithError("PROCESS plato.Input USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'jhhjfh88134d')"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>"
+ " ASSUME ORDER BY key",
+ "<main>:1:129: Error: PROCESS with USING EXTERNAL FUNCTION doesn't allow ASSUME block\n");
+
+ ExpectFailWithError("PROCESS plato.Input USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', 'bar', 'baz')",
+ "<main>:1:15: Error: EXTERNAL FUNCTION requires from 2 to 3 arguments, but got: 4\n");
+
+ ExpectFailWithError("PROCESS plato.Input\n"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', <|field_1: a1, field_b: b1|>)\n"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,\n"
+ " CONCURRENCY=3, BATCH_SIZE=1000000, CONNECTION='yc-folder34fse-con',\n"
+ " CONCURRENCY=5, INPUT_TYPE=Struct<b:Bool>,\n"
+ " INIT=[0, 900]\n",
+ "<main>:5:2: Error: WITH \"CONCURRENCY\" clause should be specified only once\n"
+ "<main>:5:17: Error: WITH \"INPUT_TYPE\" clause should be specified only once\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(SqlToYQLErrors) {
+ Y_UNIT_TEST(UdfSyntaxSugarMissingCall) {
+ auto req = "SELECT Udf(DateTime::FromString, \"foo\" as RunConfig);";
+ auto res = SqlToYql(req);
+ TString a1 = Err2Str(res);
+ TString a2("<main>:1:8: Error: Abstract Udf Node can't be used as a part of expression.\n");
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarIsNotCallable) {
+ auto req = "SELECT Udf(123, \"foo\" as RunConfig);";
+ auto res = SqlToYql(req);
+ TString a1 = Err2Str(res);
+ TString a2("<main>:1:8: Error: Udf: first argument must be a callable, like Foo::Bar\n");
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarNoArgs) {
+ auto req = "SELECT Udf()();";
+ auto res = SqlToYql(req);
+ TString a1 = Err2Str(res);
+ TString a2("<main>:1:8: Error: Udf: expected at least one argument\n");
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(StrayUTF8) {
+ /// 'c' in plato is russian here
+ NYql::TAstParseResult res = SqlToYql("select * from сedar.Input");
+ UNIT_ASSERT(!res.Root);
+
+ TString a1 = Err2Str(res);
+ TString a2(R"foo(<main>:1:14: Error: Unexpected character 'с' (Unicode character <1089>) : cannot match to any predicted input...
+
+<main>:1:15: Error: Unexpected character : cannot match to any predicted input...
+
+)foo");
+
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(IvalidStringLiteralWithEscapedBackslash) {
+ NYql::TAstParseResult res1 = SqlToYql(R"foo($bar = 'a\\'b';)foo");
+ NYql::TAstParseResult res2 = SqlToYql(R"foo($bar = "a\\"b";)foo");
+ UNIT_ASSERT(!res1.Root);
+ UNIT_ASSERT(!res2.Root);
+
+ UNIT_ASSERT_NO_DIFF(Err2Str(res1), "<main>:1:15: Error: Unexpected character : syntax error...\n\n");
+ UNIT_ASSERT_NO_DIFF(Err2Str(res2), "<main>:1:15: Error: Unexpected character : syntax error...\n\n");
+ }
+
+ Y_UNIT_TEST(InvalidHexInStringLiteral) {
+ NYql::TAstParseResult res = SqlToYql("select \"foo\\x1\\xfe\"");
+ UNIT_ASSERT(!res.Root);
+ TString a1 = Err2Str(res);
+ TString a2 = "<main>:1:15: Error: Failed to parse string literal: Invalid hexadecimal value\n";
+
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(InvalidOctalInMultilineStringLiteral) {
+ NYql::TAstParseResult res = SqlToYql("select \"foo\n"
+ "bar\n"
+ "\\01\"");
+ UNIT_ASSERT(!res.Root);
+ TString a1 = Err2Str(res);
+ TString a2 = "<main>:3:4: Error: Failed to parse string literal: Invalid octal value\n";
+
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(InvalidDoubleAtString) {
+ NYql::TAstParseResult res = SqlToYql("select @@@@@@");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:13: Error: Unexpected character : syntax error...\n\n");
+ }
+
+ Y_UNIT_TEST(InvalidDoubleAtStringWhichWasAcceptedEarlier) {
+ NYql::TAstParseResult res = SqlToYql("SELECT @@foo@@ @ @@bar@@");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:7: Error: Unexpected token '@@foo@@' : cannot match to any predicted input...\n\n");
+ }
+
+ Y_UNIT_TEST(InvalidStringFromTable) {
+ NYql::TAstParseResult res = SqlToYql("select \"FOO\"\"BAR from plato.foo");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:31: Error: Unexpected character : syntax error...\n\n");
+ }
+
+ Y_UNIT_TEST(InvalidDoubleAtStringFromTable) {
+ NYql::TAstParseResult res = SqlToYql("select @@@@@@ from plato.foo");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Unexpected character : syntax error...\n\n");
+ }
+
+ Y_UNIT_TEST(SelectInvalidSyntax) {
+ NYql::TAstParseResult res = SqlToYql("select 1 form Wat");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:14: Error: Unexpected token 'Wat' : cannot match to any predicted input...\n\n");
+ }
+
+ Y_UNIT_TEST(SelectNoCluster) {
+ NYql::TAstParseResult res = SqlToYql("select foo from bar");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: No cluster name given and no default cluster is selected\n");
+ }
+
+ Y_UNIT_TEST(SelectDuplicateColumns) {
+ NYql::TAstParseResult res = SqlToYql("select a, a from plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:11: Error: Unable to use duplicate column names. Collision in name: a\n");
+ }
+
+ Y_UNIT_TEST(SelectDuplicateLabels) {
+ NYql::TAstParseResult res = SqlToYql("select a as foo, b as foo from plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unable to use duplicate column names. Collision in name: foo\n");
+ }
+
+ Y_UNIT_TEST(SelectCaseWithoutThen) {
+ NYql::TAstParseResult res = SqlToYql("select case when true 1;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:1:22: Error: Unexpected token absence : Missing THEN \n\n"
+ "<main>:1:23: Error: Unexpected token absence : Missing END \n\n"
+ );
+ }
+
+ Y_UNIT_TEST(SelectComplexCaseWithoutThen) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT *\n"
+ "FROM plato.Input AS a\n"
+ "WHERE CASE WHEN a.key = \"foo\" a.subkey ELSE a.value END\n"
+ );
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:30: Error: Unexpected token absence : Missing THEN \n\n");
+ }
+
+ Y_UNIT_TEST(SelectCaseWithoutEnd) {
+ NYql::TAstParseResult res = SqlToYql("select case a when b then c end from plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: ELSE is required\n");
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregationNoInput) {
+ NYql::TAstParseResult res = SqlToYql("select a, Min(b), c");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:8: Error: Column reference 'a'\n"
+ "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:15: Error: Column reference 'b'\n"
+ "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:19: Error: Column reference 'c'\n"
+ );
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregation) {
+ ExpectFailWithError("select count(*), 1 + key from plato.Input",
+ "<main>:1:22: Error: Column `key` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregatedTerms) {
+ ExpectFailWithError("select key, 2 * subkey from plato.Input group by key",
+ "<main>:1:17: Error: Column `subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectDistinctWithBadAggregation) {
+ ExpectFailWithError("select distinct count(*), 1 + key from plato.Input",
+ "<main>:1:31: Error: Column `key` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ ExpectFailWithError("select distinct key, 2 * subkey from plato.Input group by key",
+ "<main>:1:26: Error: Column `subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregationInHaving) {
+ ExpectFailWithError("select key from plato.Input group by key\n"
+ "having \"f\" || value == \"foo\"",
+ "<main>:2:15: Error: Column `value` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(JoinWithNonAggregatedColumnInProjection) {
+ ExpectFailWithError("select a.key, 1 + b.subkey\n"
+ "from plato.Input1 as a join plato.Input2 as b using(key)\n"
+ "group by a.key;",
+ "<main>:1:19: Error: Column `b.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+
+ ExpectFailWithError("select a.key, 1 + b.subkey.x\n"
+ "from plato.Input1 as a join plato.Input2 as b using(key)\n"
+ "group by a.key;",
+ "<main>:1:19: Error: Column must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregatedTermsWithSources) {
+ ExpectFailWithError("select key, 1 + a.subkey\n"
+ "from plato.Input1 as a\n"
+ "group by a.key;",
+ "<main>:1:17: Error: Column `a.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ ExpectFailWithError("select key, 1 + a.subkey.x\n"
+ "from plato.Input1 as a\n"
+ "group by a.key;",
+ "<main>:1:17: Error: Column must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(WarnForAggregationBySelectAlias) {
+ NYql::TAstParseResult res = SqlToYql("select c + 1 as c from plato.Input\n"
+ "group by c");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:2:11: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n"
+ "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n");
+
+ res = SqlToYql("select c + 1 as c from plato.Input\n"
+ "group by Math::Floor(c + 2) as c;");
+
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:2:22: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n"
+ "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n");
+ }
+
+ Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenAggrFunctionsAreUsedInAlias) {
+ NYql::TAstParseResult res = SqlToYql("select\n"
+ " cast(avg(val) as int) as value,\n"
+ " value as key\n"
+ "from\n"
+ " plato.Input\n"
+ "group by value");
+
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ res = SqlToYql("select\n"
+ " cast(avg(val) over w as int) as value,\n"
+ " value as key\n"
+ "from\n"
+ " plato.Input\n"
+ "group by value\n"
+ "window w as ()");
+
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+
+ Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenQualifiedNameIsUsed) {
+ NYql::TAstParseResult res = SqlToYql("select\n"
+ " Unwrap(a.key) as key\n"
+ "from plato.Input as a\n"
+ "join plato.Input2 as b using(k)\n"
+ "group by a.key;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ res = SqlToYql("select Unwrap(a.key) as key\n"
+ "from plato.Input as a\n"
+ "group by a.key;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+
+ Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenTrivialRenamingIsUsed) {
+ NYql::TAstParseResult res = SqlToYql("select a.key as key\n"
+ "from plato.Input as a\n"
+ "group by key;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ res = SqlToYql("select key as key\n"
+ "from plato.Input\n"
+ "group by key;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+
+ Y_UNIT_TEST(ErrorByAggregatingByExpressionWithSameExpressionInSelect) {
+ ExpectFailWithError("select k * 2 from plato.Input group by k * 2",
+ "<main>:1:8: Error: Column `k` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(ErrorForAggregationBySelectAlias) {
+ ExpectFailWithError("select key, Math::Floor(1.1 + a.subkey) as foo\n"
+ "from plato.Input as a\n"
+ "group by a.key, foo;",
+ "<main>:3:17: Warning: GROUP BY will aggregate by column `foo` instead of aggregating by SELECT expression with same alias, code: 4532\n"
+ "<main>:1:19: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n"
+ "<main>:1:31: Error: Column `a.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+
+ ExpectFailWithError("select c + 1 as c from plato.Input\n"
+ "group by Math::Floor(c + 2);",
+ "<main>:2:22: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n"
+ "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n"
+ "<main>:1:8: Error: Column `c` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectWithDuplicateGroupingColumns) {
+ NYql::TAstParseResult res = SqlToYql("select c from plato.Input group by c, c");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Duplicate grouping column: c\n");
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregationInGrouping) {
+ NYql::TAstParseResult res = SqlToYql("select a, Min(b), c group by c");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:30: Error: Column reference 'c'\n");
+ }
+
+ Y_UNIT_TEST(SelectWithOpOnBadAggregation) {
+ ExpectFailWithError("select 1 + a + Min(b) from plato.Input",
+ "<main>:1:12: Error: Column `a` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectOrderByConstantNum) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by 1");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY constant expression\n");
+ }
+
+ Y_UNIT_TEST(SelectOrderByConstantExpr) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by 1 * 42");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:38: Error: Unable to ORDER BY constant expression\n");
+ }
+
+ Y_UNIT_TEST(SelectOrderByConstantString) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by \"nest\"");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY constant expression\n");
+ }
+
+ Y_UNIT_TEST(SelectOrderByAggregated) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by min(a)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY aggregated values\n");
+ }
+
+ Y_UNIT_TEST(ErrorInOrderByExpresison) {
+ NYql::TAstParseResult res = SqlToYql("select key, value from plato.Input order by (key as zey)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:45: Error: You should use in ORDER BY column name, qualified field, callable function or expression\n");
+ }
+
+ Y_UNIT_TEST(ErrorsInOrderByWhenColumnIsMissingInProjection) {
+ ExpectFailWithError("select subkey from (select 1 as subkey) order by key", "<main>:1:50: Error: Column key is not in source column set\n");
+ ExpectFailWithError("select subkey from plato.Input as a order by x.key", "<main>:1:46: Error: Unknown correlation name: x\n");
+ ExpectFailWithError("select distinct a, b from plato.Input order by c", "<main>:1:48: Error: Column c is not in source column set. Did you mean a?\n");
+ ExpectFailWithError("select count(*) as a from plato.Input order by c", "<main>:1:48: Error: Column c is not in source column set. Did you mean a?\n");
+ ExpectFailWithError("select count(*) as a, b, from plato.Input group by b order by c", "<main>:1:63: Error: Column c is not in source column set. Did you mean a?\n");
+ UNIT_ASSERT(SqlToYql("select a, b from plato.Input order by c").IsOk());
+ }
+
+ Y_UNIT_TEST(SelectAggregatedWhere) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input where count(key)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:33: Error: Can not use aggregated values in filtering\n");
+ }
+
+ Y_UNIT_TEST(DoubleFrom) {
+ NYql::TAstParseResult res = SqlToYql("from plato.Input select * from plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: Only one FROM clause is allowed\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinMissingCorrName) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input1 as a join plato.Input2 as b on a.key == key");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:65: Error: JOIN: column requires correlation name\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinMissingCorrName1) {
+ NYql::TAstParseResult res = SqlToYql(
+ "use plato;\n"
+ "$foo = select * from Input1;\n"
+ "select * from Input2 join $foo USING(key);\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:27: Error: JOIN: missing correlation name for source\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinMissingCorrName2) {
+ NYql::TAstParseResult res = SqlToYql(
+ "use plato;\n"
+ "$foo = select * from Input1;\n"
+ "select * from Input2 cross join $foo;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:33: Error: JOIN: missing correlation name for source\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinEmptyCorrNames) {
+ NYql::TAstParseResult res = SqlToYql(
+ "$left = (SELECT * FROM plato.Input1 LIMIT 2);\n"
+ "$right = (SELECT * FROM plato.Input2 LIMIT 2);\n"
+ "SELECT * FROM $left FULL JOIN $right USING (key);\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:45: Error: At least one correlation name is required in join\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinSameCorrNames) {
+ NYql::TAstParseResult res = SqlToYql("SELECT Input.key FROM plato.Input JOIN plato.Input1 ON Input.key == Input.subkey\n");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:66: Error: JOIN: different correlation names are required for joined tables\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinConstPredicateArg) {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input1 as A JOIN plato.Input2 as B ON A.key == B.key AND A.subkey == \"wtf\"\n");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:87: Error: JOIN: each equality predicate argument must depend on exactly one JOIN input\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinNonEqualityPredicate) {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input1 as A JOIN plato.Input2 as B ON A.key == B.key AND A.subkey > B.subkey\n");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:87: Error: JOIN ON expression must be a conjunction of equality predicates\n");
+ }
+
+ Y_UNIT_TEST(SelectEquiJoinCorrNameOutOfScope) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA equijoin;\n"
+ "SELECT * FROM plato.A JOIN plato.B ON A.key == C.key JOIN plato.C ON A.subkey == C.subkey;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:45: Error: JOIN: can not use source: C in equality predicate, it is out of current join scope\n");
+ }
+
+ Y_UNIT_TEST(SelectEquiJoinNoRightSource) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA equijoin;\n"
+ "SELECT * FROM plato.A JOIN plato.B ON A.key == B.key JOIN plato.C ON A.subkey == B.subkey;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:79: Error: JOIN ON equality predicate must have one of its arguments from the rightmost source\n");
+ }
+
+ Y_UNIT_TEST(SelectEquiJoinOuterWithoutType) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT * FROM plato.A Outer JOIN plato.B ON A.key == B.key;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Invalid join type: OUTER JOIN. OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL\n");
+ }
+
+ Y_UNIT_TEST(SelectEquiJoinOuterWithWrongType) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT * FROM plato.A LEFT semi OUTER JOIN plato.B ON A.key == B.key;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:33: Error: Invalid join type: LEFT SEMI OUTER JOIN. OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL\n");
+ }
+
+ Y_UNIT_TEST(InsertNoCluster) {
+ NYql::TAstParseResult res = SqlToYql("insert into Output (foo) values (1)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: No cluster name given and no default cluster is selected\n");
+ }
+
+ Y_UNIT_TEST(InsertValuesNoLabels) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output values (1)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: INSERT INTO ... VALUES requires specification of table columns\n");
+ }
+
+ Y_UNIT_TEST(UpsertValuesNoLabelsKikimr) {
+ NYql::TAstParseResult res = SqlToYql("upsert into plato.Output values (1)", 10, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: UPSERT INTO ... VALUES requires specification of table columns\n");
+ }
+
+ Y_UNIT_TEST(ReplaceValuesNoLabelsKikimr) {
+ NYql::TAstParseResult res = SqlToYql("replace into plato.Output values (1)", 10, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:20: Error: REPLACE INTO ... VALUES requires specification of table columns\n");
+ }
+
+ Y_UNIT_TEST(InsertValuesInvalidLabels) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output (foo) values (1, 2)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: VALUES have 2 columns, INSERT INTO expects: 1\n");
+ }
+
+ Y_UNIT_TEST(BuiltinFileOpNoArgs) {
+ NYql::TAstParseResult res = SqlToYql("select FilePath()");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: FilePath() requires exactly 1 arguments, given: 0\n");
+ }
+
+ Y_UNIT_TEST(ProcessWithHaving) {
+ NYql::TAstParseResult res = SqlToYql("process plato.Input using some::udf(value) having value == 1");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: PROCESS does not allow HAVING yet! You may request it on yql@ maillist.\n");
+ }
+
+ Y_UNIT_TEST(ReduceNoBy) {
+ NYql::TAstParseResult res = SqlToYql("reduce plato.Input using some::udf(value)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unexpected token absence : Missing ON \n\n<main>:1:25: Error: Unexpected token absence : Missing USING \n\n");
+ }
+
+ Y_UNIT_TEST(ReduceDistinct) {
+ NYql::TAstParseResult res = SqlToYql("reduce plato.Input on key using some::udf(distinct value)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:43: Error: DISTINCT can not be used in PROCESS/REDUCE\n");
+ }
+
+ Y_UNIT_TEST(CreateTableWithView) {
+ NYql::TAstParseResult res = SqlToYql("CREATE TABLE plato.foo:bar (key INT);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:22: Error: Unexpected token ':' : syntax error...\n\n");
+ }
+
+ Y_UNIT_TEST(AsteriskWithSomethingAfter) {
+ NYql::TAstParseResult res = SqlToYql("select *, LENGTH(value) from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).\n");
+ }
+
+ Y_UNIT_TEST(AsteriskWithSomethingBefore) {
+ NYql::TAstParseResult res = SqlToYql("select LENGTH(value), * from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).\n");
+ }
+
+ Y_UNIT_TEST(DuplicatedQualifiedAsterisk) {
+ NYql::TAstParseResult res = SqlToYql("select in.*, key, in.* from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unable to use twice same quialified asterisk. Invalid source: in\n");
+ }
+
+ Y_UNIT_TEST(BrokenLabel) {
+ NYql::TAstParseResult res = SqlToYql("select in.*, key as `funny.label` from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:14: Error: Unable to use '.' in column name. Invalid column name: funny.label\n");
+ }
+
+ Y_UNIT_TEST(KeyConflictDetect0) {
+ NYql::TAstParseResult res = SqlToYql("select key, in.key as key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:13: Error: Unable to use duplicate column names. Collision in name: key\n");
+ }
+
+ Y_UNIT_TEST(KeyConflictDetect1) {
+ NYql::TAstParseResult res = SqlToYql("select length(key) as key, key from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Unable to use duplicate column names. Collision in name: key\n");
+ }
+
+ Y_UNIT_TEST(KeyConflictDetect2) {
+ NYql::TAstParseResult res = SqlToYql("select key, in.key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n");
+ }
+
+ Y_UNIT_TEST(AutogenerationAliasWithCollisionConflict1) {
+ UNIT_ASSERT(SqlToYql("select LENGTH(Value), key as column0 from plato.Input;").IsOk());
+ }
+
+ Y_UNIT_TEST(AutogenerationAliasWithCollisionConflict2) {
+ UNIT_ASSERT(SqlToYql("select key as column1, LENGTH(Value) from plato.Input;").IsOk());
+ }
+
+ Y_UNIT_TEST(MissedSourceTableForQualifiedAsteriskOnSimpleSelect) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select Intop.*, Input.key from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unknown correlation name: Intop\n");
+ }
+
+ Y_UNIT_TEST(MissedSourceTableForQualifiedAsteriskOnJoin) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select tmissed.*, t2.*, t1.key from plato.Input as t1 join plato.Input as t2 on t1.key==t2.key;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unknown correlation name for asterisk: tmissed\n");
+ }
+
+ Y_UNIT_TEST(UnableToReferenceOnNotExistSubcolumn) {
+ NYql::TAstParseResult res = SqlToYql("select b.subkey from (select key from plato.Input as a) as b;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Column subkey is not in source column set\n");
+ }
+
+ Y_UNIT_TEST(ConflictOnSameNameWithQualify0) {
+ NYql::TAstParseResult res = SqlToYql("select in.key, in.key as key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n");
+ }
+
+ Y_UNIT_TEST(ConflictOnSameNameWithQualify1) {
+ NYql::TAstParseResult res = SqlToYql("select in.key, length(key) as key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n");
+ }
+
+ Y_UNIT_TEST(ConflictOnSameNameWithQualify2) {
+ NYql::TAstParseResult res = SqlToYql("select key, in.key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n");
+ }
+
+ Y_UNIT_TEST(ConflictOnSameNameWithQualify3) {
+ NYql::TAstParseResult res = SqlToYql("select in.key, subkey as key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenBySameColumns) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, key as kk)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Duplicate column name found: key in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenBySameAliases) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, subkey as kk);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Duplicate alias found: kk in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenByExprSameAliases) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, ListSkip(subkey,1) as kk);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Collision between alias and column name: kk in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenByConflictNameAndAlias0) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, subkey as key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Collision between alias and column name: key in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenByConflictNameAndAlias1) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, subkey as key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Collision between alias and column name: key in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenByExprConflictNameAndAlias1) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, ListSkip(subkey,1) as key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Duplicate column name found: key in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenByUnnamedExpr) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, ListSkip(key, 1))");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Unnamed expression after FLATTEN BY is not allowed\n");
+ }
+
+ Y_UNIT_TEST(UseInOnStrings) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input where \"foo\" in \"foovalue\";");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:42: Error: Unable to use IN predicate with string argument, it won't search substring - "
+ "expecting tuple, list, dict or single column table source\n");
+ }
+
+ Y_UNIT_TEST(UseSubqueryInScalarContextInsideIn) {
+ NYql::TAstParseResult res = SqlToYql("$q = (select key from plato.Input); select * from plato.Input where subkey in ($q);");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:79: Warning: Using subrequest in scalar context after IN, "
+ "perhaps you should remove parenthesis here, code: 4501\n");
+ }
+
+ Y_UNIT_TEST(InHintsWithKeywordClash) {
+ NYql::TAstParseResult res = SqlToYql("SELECT COMPACT FROM plato.Input WHERE COMPACT IN COMPACT `COMPACT`(1,2,3)");
+ UNIT_ASSERT(!res.Root);
+ // should try to parse last compact as call expression
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:58: Error: Unknown builtin: COMPACT\n");
+ }
+
+ Y_UNIT_TEST(ErrorColumnPosition) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato;\n"
+ "SELECT \n"
+ "value FROM (\n"
+ "select key from Input\n"
+ ");\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:1: Error: Column value is not in source column set\n");
+ }
+
+ Y_UNIT_TEST(PrimaryViewAbortMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input VIEW PRIMARY KEY");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: primary view is not supported for yt tables\n");
+ }
+
+ Y_UNIT_TEST(InsertAbortMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("INSERT OR ABORT INTO plato.Output SELECT key FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: INSERT OR ABORT INTO is not supported for yt tables\n");
+ }
+
+ Y_UNIT_TEST(ReplaceIntoMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("REPLACE INTO plato.Output SELECT key FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: Meaning of REPLACE INTO has been changed, now you should use INSERT INTO <table> WITH TRUNCATE ... for yt\n");
+ }
+
+ Y_UNIT_TEST(UpsertIntoMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("UPSERT INTO plato.Output SELECT key FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: UPSERT INTO is not supported for yt tables\n");
+ }
+
+ Y_UNIT_TEST(UpdateMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("UPDATE plato.Output SET value = value + 1 WHERE key < 1");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: UPDATE is unsupported for yt\n");
+ }
+
+ Y_UNIT_TEST(DeleteMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("DELETE FROM plato.Output WHERE key < 1");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: DELETE is unsupported for yt\n");
+ }
+
+ Y_UNIT_TEST(ReplaceIntoWithTruncate) {
+ NYql::TAstParseResult res = SqlToYql("REPLACE INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:32: Error: Unable REPLACE INTO with truncate mode\n");
+ }
+
+ Y_UNIT_TEST(UpsertIntoWithTruncate) {
+ NYql::TAstParseResult res = SqlToYql("UPSERT INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:31: Error: Unable UPSERT INTO with truncate mode\n");
+ }
+
+ Y_UNIT_TEST(InsertIntoWithTruncateKikimr) {
+ NYql::TAstParseResult res = SqlToYql("INSERT INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input", 10, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: INSERT INTO WITH TRUNCATE is not supported for kikimr tables\n");
+ }
+
+ Y_UNIT_TEST(InsertIntoWithWrongArgumentCount) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output with truncate (key, value, subkey) values (5, '1', '2', '3');");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: VALUES have 4 columns, INSERT INTO ... WITH TRUNCATE expects: 3\n");
+ }
+
+ Y_UNIT_TEST(UpsertWithWrongArgumentCount) {
+ NYql::TAstParseResult res = SqlToYql("upsert into plato.Output (key, value, subkey) values (2, '3');", 10, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:39: Error: VALUES have 2 columns, UPSERT INTO expects: 3\n");
+ }
+
+ Y_UNIT_TEST(GroupingSetByExprWithoutAlias) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY GROUPING SETS (cast(key as uint32), subkey);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: Unnamed expressions are not supported in GROUPING SETS. Please use '<expr> AS <name>'.\n");
+ }
+
+ Y_UNIT_TEST(GroupingSetByExprWithoutAlias2) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY subkey || subkey, GROUPING SETS (\n"
+ "cast(key as uint32), subkey);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:1: Error: Unnamed expressions are not supported in GROUPING SETS. Please use '<expr> AS <name>'.\n");
+ }
+
+ Y_UNIT_TEST(CubeByExprWithoutAlias) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE (key, subkey / key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:56: Error: Unnamed expressions are not supported in CUBE. Please use '<expr> AS <name>'.\n");
+ }
+
+ Y_UNIT_TEST(RollupByExprWithoutAlias) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY ROLLUP (subkey / key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: Unnamed expressions are not supported in ROLLUP. Please use '<expr> AS <name>'.\n");
+ }
+
+ Y_UNIT_TEST(GroupByHugeCubeDeniedNoPragma) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE (key, subkey, value, key + subkey as sum, key - subkey as sub, key + val as keyval);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:119: Error: GROUP BY CUBE is allowed only for 5 columns, but you use 6\n");
+ }
+
+ Y_UNIT_TEST(GroupByInvalidPragma) {
+ NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByCubeLimit = '-4';");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: Expected unsigned integer literal as a single argument for: GroupByCubeLimit\n");
+ }
+
+ Y_UNIT_TEST(GroupByHugeCubeDeniedPragme) {
+ NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByCubeLimit = '4'; SELECT key FROM plato.Input GROUP BY CUBE (key, subkey, value, key + subkey as sum, key - subkey as sub);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:132: Error: GROUP BY CUBE is allowed only for 4 columns, but you use 5\n");
+ }
+
+ Y_UNIT_TEST(GroupByFewBigCubes) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE(key, subkey, key + subkey as sum), CUBE(value, value + key + subkey as total);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Unable to GROUP BY more than 64 groups, you try use 80 groups\n");
+ }
+
+ Y_UNIT_TEST(GroupByFewBigCubesWithPragmaLimit) {
+ NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByLimit = '16'; SELECT key FROM plato.Input GROUP BY GROUPING SETS(key, subkey, key + subkey as sum), ROLLUP(value, value + key + subkey as total);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:29: Error: Unable to GROUP BY more than 16 groups, you try use 18 groups\n");
+ }
+
+ Y_UNIT_TEST(NoGroupingColumn0) {
+ NYql::TAstParseResult res = SqlToYql(
+ "select count(1), key_first, val_first, grouping(key_first, val_first, nomind) as group\n"
+ "from plato.Input group by grouping sets (cast(key as uint32) /100 as key_first, Substring(value, 1, 1) as val_first);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:71: Error: Column 'nomind' is not a grouping column\n");
+ }
+
+ Y_UNIT_TEST(NoGroupingColumn1) {
+ NYql::TAstParseResult res = SqlToYql("select count(1), grouping(key, value) as group_duo from plato.Input group by cube (key, subkey);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:32: Error: Column 'value' is not a grouping column\n");
+ }
+
+ Y_UNIT_TEST(EmptyAccess0) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), AsList(``));");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:73: Error: Column reference \"\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(EmptyAccess1) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), ``);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:66: Error: Column reference \"\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(UseUnknownColumnInInsert) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), AsList(`test`));");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:73: Error: Column reference \"test\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(GroupByEmptyColumn) {
+ NYql::TAstParseResult res = SqlToYql("select count(1) from plato.Input group by ``;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:43: Error: Column name can not be empty\n");
+ }
+
+ Y_UNIT_TEST(ConvertNumberOutOfBase) {
+ NYql::TAstParseResult res = SqlToYql("select 0o80l;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 0o80l, char: '8' is out of base: 8\n");
+ }
+
+ Y_UNIT_TEST(ConvertNumberOutOfRangeForInt64ButFitsInUint64) {
+ NYql::TAstParseResult res = SqlToYql("select 0xc000000000000000l;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse 13835058055282163712 as integer literal of Int64 type: value out of range for Int64\n");
+ }
+
+ Y_UNIT_TEST(ConvertNumberOutOfRangeUint64) {
+ NYql::TAstParseResult res = SqlToYql("select 0xc0000000000000000l;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 0xc0000000000000000l, number limit overflow\n");
+
+ res = SqlToYql("select 1234234543563435151456;\n");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 1234234543563435151456, number limit overflow\n");
+ }
+
+ Y_UNIT_TEST(ConvertNumberNegativeOutOfRange) {
+ NYql::TAstParseResult res = SqlToYql("select -9223372036854775808;\n"
+ "select -9223372036854775809;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:8: Error: Failed to parse negative integer: -9223372036854775809, number limit overflow\n");
+ }
+
+ Y_UNIT_TEST(InvaildUsageReal0) {
+ NYql::TAstParseResult res = SqlToYql("select .0;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:7: Error: Unexpected token '.' : cannot match to any predicted input...\n\n");
+ }
+
+ Y_UNIT_TEST(InvaildUsageReal1) {
+ NYql::TAstParseResult res = SqlToYql("select .0f;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:7: Error: Unexpected token '.' : cannot match to any predicted input...\n\n");
+ }
+
+ Y_UNIT_TEST(InvaildUsageWinFunctionWithoutWindow) {
+ NYql::TAstParseResult res = SqlToYql("select lead(key, 2) from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to use window function Lead without window specification\n");
+ }
+
+ Y_UNIT_TEST(DropTableWithIfExists) {
+ NYql::TAstParseResult res = SqlToYql("DROP TABLE IF EXISTS plato.foo;");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop_if_exists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(TooManyErrors) {
+ const char* q = R"(
+ USE plato;
+ select A, B, C, D, E, F, G, H, I, J, K, L, M, N from (select b from `abc`);
+)";
+
+ NYql::TAstParseResult res = SqlToYql(q, 10);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ R"(<main>:3:16: Error: Column A is not in source column set. Did you mean b?
+<main>:3:19: Error: Column B is not in source column set. Did you mean b?
+<main>:3:22: Error: Column C is not in source column set. Did you mean b?
+<main>:3:25: Error: Column D is not in source column set. Did you mean b?
+<main>:3:28: Error: Column E is not in source column set. Did you mean b?
+<main>:3:31: Error: Column F is not in source column set. Did you mean b?
+<main>:3:34: Error: Column G is not in source column set. Did you mean b?
+<main>:3:37: Error: Column H is not in source column set. Did you mean b?
+<main>:3:40: Error: Column I is not in source column set. Did you mean b?
+<main>: Error: Too many issues, code: 1
+)");
+ };
+
+ Y_UNIT_TEST(ShouldCloneBindingForNamedParameter) {
+ NYql::TAstParseResult res = SqlToYql(R"($f = () -> {
+ $value_type = TypeOf(1);
+ $pair_type = StructType(
+ TypeOf("2") AS key,
+ $value_type AS value
+ );
+
+ RETURN TupleType(
+ ListType($value_type),
+ $pair_type);
+};
+
+select FormatType($f());
+)");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(BlockedInvalidFrameBounds) {
+ auto check = [](const TString& frame, const TString& err) {
+ const TString prefix = "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (PARTITION BY key ORDER BY subkey\n";
+ NYql::TAstParseResult res = SqlToYql(prefix + frame + ")");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), err);
+ };
+
+ check("ROWS UNBOUNDED FOLLOWING", "<main>:2:5: Error: Frame cannot start from UNBOUNDED FOLLOWING\n");
+ check("ROWS BETWEEN 5 PRECEDING AND UNBOUNDED PRECEDING", "<main>:2:29: Error: Frame cannot end with UNBOUNDED PRECEDING\n");
+ check("ROWS BETWEEN CURRENT ROW AND 5 PRECEDING", "<main>:2:13: Error: Frame cannot start from CURRENT ROW and end with PRECEDING\n");
+ check("ROWS BETWEEN 5 FOLLOWING AND CURRENT ROW", "<main>:2:14: Error: Frame cannot start from FOLLOWING and end with CURRENT ROW\n");
+ check("ROWS BETWEEN 5 FOLLOWING AND 5 PRECEDING", "<main>:2:14: Error: Frame cannot start from FOLLOWING and end with PRECEDING\n");
+ }
+
+ Y_UNIT_TEST(BlockedRangeValueWithoutSingleOrderBy) {
+ UNIT_ASSERT(SqlToYql("SELECT COUNT(*) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM plato.Input").IsOk());
+ UNIT_ASSERT(SqlToYql("SELECT COUNT(*) OVER (RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) FROM plato.Input").IsOk());
+
+ auto res = SqlToYql("SELECT COUNT(*) OVER (RANGE 5 PRECEDING) FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:29: Error: RANGE with <offset> PRECEDING/FOLLOWING requires exactly one expression in ORDER BY partition clause\n");
+
+ res = SqlToYql("SELECT COUNT(*) OVER (ORDER BY key, value RANGE 5 PRECEDING) FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: RANGE with <offset> PRECEDING/FOLLOWING requires exactly one expression in ORDER BY partition clause\n");
+ }
+
+ Y_UNIT_TEST(NoColumnsInFrameBounds) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (ROWS BETWEEN\n"
+ " 1 + key PRECEDING AND 2 + key FOLLOWING);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:6: Error: Column reference \"key\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(WarnOnEmptyFrameBounds) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (PARTITION BY key ORDER BY subkey\n"
+ "ROWS BETWEEN 10 FOLLOWING AND 5 FOLLOWING)");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:14: Warning: Used frame specification implies empty window frame, code: 4520\n");
+ }
+
+ Y_UNIT_TEST(WarnOnRankWithUnorderedWindow) {
+ NYql::TAstParseResult res = SqlToYql("SELECT RANK() OVER w FROM plato.Input WINDOW w AS ()");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Rank() is used with unordered window - all rows will be considered equal to each other, code: 4521\n");
+ }
+
+ Y_UNIT_TEST(WarnOnRankExprWithUnorderedWindow) {
+ NYql::TAstParseResult res = SqlToYql("SELECT RANK(key) OVER w FROM plato.Input WINDOW w AS ()");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Rank(<expression>) is used with unordered window - the result is likely to be undefined, code: 4521\n");
+ }
+
+ Y_UNIT_TEST(AnyAsTableName) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select * from any;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Unexpected token ';' : syntax error...\n\n");
+ }
+
+ Y_UNIT_TEST(IncorrectOrderOfLambdaOptionalArgs) {
+ NYql::TAstParseResult res = SqlToYql("$f = ($x?, $y)->($x + $y); select $f(1);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Non-optional argument can not follow optional one\n");
+ }
+
+ Y_UNIT_TEST(IncorrectOrderOfActionOptionalArgs) {
+ NYql::TAstParseResult res = SqlToYql("define action $f($x?, $y) as select $x,$y; end define; do $f(1);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Non-optional argument can not follow optional one\n");
+ }
+
+ Y_UNIT_TEST(NotAllowedQuestionOnNamedNode) {
+ NYql::TAstParseResult res = SqlToYql("$f = 1; select $f?;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unexpected token '?' at the end of expression\n");
+ }
+
+ Y_UNIT_TEST(AnyAndCrossJoin) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select * from any Input1 cross join Input2");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:26: Error: ANY should not be used with Cross JOIN\n");
+
+ res = SqlToYql("use plato; select * from Input1 cross join any Input2");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:44: Error: ANY should not be used with Cross JOIN\n");
+ }
+
+ Y_UNIT_TEST(AnyWithCartesianProduct) {
+ NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from any Input1, Input2");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:56: Error: ANY should not be used with Cross JOIN\n");
+
+ res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from Input1, any Input2");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:64: Error: ANY should not be used with Cross JOIN\n");
+ }
+
+ Y_UNIT_TEST(ErrorPlainEndAsInlineActionTerminator) {
+ NYql::TAstParseResult res = SqlToYql(
+ "do begin\n"
+ " select 1\n"
+ "; end\n");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:0: Error: Unexpected token absence : Missing DO \n\n");
+ }
+
+ Y_UNIT_TEST(ErrorMultiWayJoinWithUsing) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato;\n"
+ "PRAGMA DisableSimpleColumns;\n"
+ "SELECT *\n"
+ "FROM Input1 AS a\n"
+ "JOIN Input2 AS b USING(key)\n"
+ "JOIN Input3 AS c ON a.key = c.key;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:5:24: Error: Multi-way JOINs should be connected with ON clause instead of USING clause\n"
+ );
+ }
+
+ Y_UNIT_TEST(RequireLabelInFlattenByWithDot) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input flatten by x.y");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:1:40: Error: Unnamed expression after FLATTEN BY is not allowed\n"
+ );
+ }
+
+ Y_UNIT_TEST(WarnUnnamedColumns) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA WarnUnnamedColumns;\n"
+ "\n"
+ "SELECT key, subkey, key || subkey FROM plato.Input ORDER BY subkey;\n");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:28: Warning: Autogenerated column name column2 will be used for expression, code: 4516\n");
+ }
+
+ Y_UNIT_TEST(WarnSourceColumnMismatch) {
+ NYql::TAstParseResult res = SqlToYql(
+ "insert into plato.Output (key, subkey, new_value, one_more_value) select key as Key, subkey, value, \"x\" from plato.Input;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:51: Warning: Column names in SELECT don't match column specification in parenthesis. \"key\" doesn't match \"Key\". \"new_value\" doesn't match \"value\", code: 4517\n");
+ }
+
+ Y_UNIT_TEST(YtCaseInsensitive) {
+ NYql::TAstParseResult res = SqlToYql("select * from PlatO.foo;");
+ UNIT_ASSERT(res.Root);
+
+ res = SqlToYql("use PlatO; select * from foo;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(KikimrCaseSensitive) {
+ NYql::TAstParseResult res = SqlToYql("select * from PlatO.foo;", 10, "kikimr");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: Unknown cluster: PlatO\n");
+
+ res = SqlToYql("use PlatO; select * from foo;", 10, "kikimr");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:5: Error: Unknown cluster: PlatO\n");
+ }
+
+ Y_UNIT_TEST(DiscoveryModeForbidden) {
+ NYql::TAstParseResult res = SqlToYqlWithMode("insert into plato.Output select * from plato.range(\"\", Input1, Input4)", NSQLTranslation::ESqlMode::DISCOVERY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: range is not allowed in Discovery mode, code: 4600\n");
+
+ res = SqlToYqlWithMode("insert into plato.Output select * from plato.like(\"\", \"Input%\")", NSQLTranslation::ESqlMode::DISCOVERY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: like is not allowed in Discovery mode, code: 4600\n");
+
+ res = SqlToYqlWithMode("insert into plato.Output select * from plato.regexp(\"\", \"Input.\")", NSQLTranslation::ESqlMode::DISCOVERY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: regexp is not allowed in Discovery mode, code: 4600\n");
+
+ res = SqlToYqlWithMode("insert into plato.Output select * from plato.filter(\"\", ($name) -> { return find($name, \"Input\") is not null; })", NSQLTranslation::ESqlMode::DISCOVERY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: filter is not allowed in Discovery mode, code: 4600\n");
+
+ res = SqlToYqlWithMode("select Path from plato.folder(\"\") where Type == \"table\"", NSQLTranslation::ESqlMode::DISCOVERY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: folder is not allowed in Discovery mode, code: 4600\n");
+ }
+
+ Y_UNIT_TEST(YsonFuncWithoutArgs) {
+ UNIT_ASSERT(SqlToYql("SELECT Yson::SerializeText(Yson::From());").IsOk());
+ }
+
+ Y_UNIT_TEST(CanNotUseOrderByInNonLastSelectInUnionAllChain) {
+ auto req = "pragma AnsiOrderByLimitInUnionAll;\n"
+ "use plato;\n"
+ "\n"
+ "select * from Input order by key\n"
+ "union all\n"
+ "select * from Input order by key limit 1;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:21: Error: ORDER BY within UNION ALL is only allowed after last subquery\n");
+ }
+
+ Y_UNIT_TEST(CanNotUseLimitInNonLastSelectInUnionAllChain) {
+ auto req = "pragma AnsiOrderByLimitInUnionAll;\n"
+ "use plato;\n"
+ "\n"
+ "select * from Input limit 1\n"
+ "union all\n"
+ "select * from Input order by key limit 1;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:21: Error: LIMIT within UNION ALL is only allowed after last subquery\n");
+ }
+
+ Y_UNIT_TEST(CanNotUseDiscardInNonFirstSelectInUnionAllChain) {
+ auto req = "pragma AnsiOrderByLimitInUnionAll;\n"
+ "use plato;\n"
+ "\n"
+ "select * from Input\n"
+ "union all\n"
+ "discard select * from Input;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:6:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n");
+ }
+
+ Y_UNIT_TEST(CanNotUseIntoResultInNonLastSelectInUnionAllChain) {
+ auto req = "use plato;\n"
+ "pragma AnsiOrderByLimitInUnionAll;\n"
+ "\n"
+ "select * from Input\n"
+ "union all\n"
+ "discard select * from Input;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:6:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n");
+ }
+
+ Y_UNIT_TEST(YsonStrictInvalidPragma) {
+ auto res = SqlToYql("pragma yson.Strict = \"wrong\";");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:22: Error: Expected 'true', 'false' or no parameter for: Strict\n");
+ }
+
+ Y_UNIT_TEST(WarnTableNameInSomeContexts) {
+ UNIT_ASSERT(SqlToYql("use plato; select TableName() from Input;").IsOk());
+ UNIT_ASSERT(SqlToYql("use plato; select TableName(\"aaaa\");").IsOk());
+ UNIT_ASSERT(SqlToYql("select TableName(\"aaaa\", \"yt\");").IsOk());
+
+ auto res = SqlToYql("select TableName() from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: TableName requires either service name as second argument or current cluster name\n");
+
+ res = SqlToYql("use plato;\n"
+ "select TableName() from Input1 as a join Input2 as b using(key);");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:8: Warning: TableName() may produce empty result when used in ambiguous context (with JOIN), code: 4525\n");
+
+ res = SqlToYql("use plato;\n"
+ "select SOME(TableName()), key from Input group by key;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:13: Warning: TableName() will produce empty result when used with aggregation.\n"
+ "Please consult documentation for possible workaround, code: 4525\n");
+ }
+
+ Y_UNIT_TEST(WarnOnDistincWithHavingWithoutAggregations) {
+ auto res = SqlToYql("select distinct key from plato.Input having key != '0';");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Warning: The usage of HAVING without aggregations with SELECT DISTINCT is non-standard and will stop working soon. Please use WHERE instead., code: 4526\n");
+ }
+
+ Y_UNIT_TEST(FlattenByExprWithNestedNull) {
+ auto res = SqlToYql("USE plato;\n"
+ "\n"
+ "SELECT * FROM (SELECT 1 AS region_id)\n"
+ "FLATTEN BY (\n"
+ " CAST($unknown(region_id) AS List<String>) AS region\n"
+ ")");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:10: Error: Unknown name: $unknown\n");
+ }
+
+ Y_UNIT_TEST(EmptySymbolNameIsForbidden) {
+ auto req = " $`` = 1; select $``;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:5: Error: Empty symbol name is not allowed\n");
+ }
+
+ Y_UNIT_TEST(WarnOnBinaryOpWithNullArg) {
+ auto req = "select * from plato.Input where cast(key as Int32) != NULL";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Warning: Binary operation != will return NULL here, code: 4529\n");
+
+ req = "select 1 or null";
+ res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "");
+ }
+
+ Y_UNIT_TEST(ErrorIfTableSampleArgUsesColumns) {
+ auto req = "SELECT key FROM plato.Input TABLESAMPLE BERNOULLI(MIN_OF(100.0, CAST(subkey as Int32)));";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:70: Error: Column reference \"subkey\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(DerivedColumnListForSelectIsNotSupportedYet) {
+ auto req = "SELECT a,b,c FROM plato.Input as t(x,y,z);";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:35: Error: Derived column list is only supported for VALUES\n");
+ }
+
+ Y_UNIT_TEST(ErrorIfValuesHasDifferentCountOfColumns) {
+ auto req = "VALUES (1,2,3), (4,5);";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: All VALUES items should have same size: expecting 3, got 2\n");
+ }
+
+ Y_UNIT_TEST(ErrorIfDerivedColumnSizeExceedValuesColumnCount) {
+ auto req = "SELECT * FROM(VALUES (1,2), (3,4)) as t(x,y,z);";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: Derived column list size exceeds column count in VALUES\n");
+ }
+
+ Y_UNIT_TEST(WarnoOnAutogeneratedNamesForValues) {
+ auto req = "PRAGMA WarnUnnamedColumns;\n"
+ "SELECT * FROM (VALUES (1,2,3,4), (5,6,7,8)) as t(x,y);";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:16: Warning: Autogenerated column names column2...column3 will be used here, code: 4516\n");
+ }
+
+ Y_UNIT_TEST(ErrUnionAllWithOrderByWithoutExplicitLegacyMode) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from Input order by key\n"
+ "union all\n"
+ "select * from Input order by key;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: ORDER BY within UNION ALL is only allowed after last subquery\n");
+ }
+
+ Y_UNIT_TEST(ErrUnionAllWithLimitWithoutExplicitLegacyMode) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from Input limit 10\n"
+ "union all\n"
+ "select * from Input limit 1;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: LIMIT within UNION ALL is only allowed after last subquery\n");
+ }
+
+ Y_UNIT_TEST(ErrUnionAllWithIntoResultWithoutExplicitLegacyMode) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from Input into result aaa\n"
+ "union all\n"
+ "select * from Input;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: INTO RESULT within UNION ALL is only allowed after last subquery\n");
+ }
+
+ Y_UNIT_TEST(ErrUnionAllWithDiscardWithoutExplicitLegacyMode) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from Input\n"
+ "union all\n"
+ "discard select * from Input;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n");
+ }
+
+ Y_UNIT_TEST(ErrUnionAllKeepsIgnoredOrderByWarning) {
+ auto req = "use plato;\n"
+ "\n"
+ "SELECT * FROM (\n"
+ " SELECT * FROM Input\n"
+ " UNION ALL\n"
+ " SELECT t.* FROM Input AS t ORDER BY t.key\n"
+ ");";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:3: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n"
+ "<main>:6:39: Error: Unknown correlation name: t\n");
+ }
+
+ Y_UNIT_TEST(ErrOrderByIgnoredButCheckedForMissingColumns) {
+ auto req = "$src = SELECT key FROM (SELECT 1 as key, 2 as subkey) ORDER BY x; SELECT * FROM $src;";
+ ExpectFailWithError(req, "<main>:1:8: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n"
+ "<main>:1:64: Error: Column x is not in source column set\n");
+
+ req = "$src = SELECT key FROM plato.Input ORDER BY x; SELECT * FROM $src;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n");
+ }
+
+ Y_UNIT_TEST(InvalidTtlInterval) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key))
+ WITH (TTL = 1 On CreatedAt);
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:25: Error: Literal of Interval type is expected for TTL\n"
+ "<main>:4:25: Error: Invalid TTL settings\n");
+ }
+
+ Y_UNIT_TEST(InvalidTtlUnit) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key))
+ WITH (TTL = Interval("P1D") On CreatedAt AS PICOSECONDS);
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "<main>:4:56: Error: Unexpected token 'PICOSECONDS'");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedSink) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (SINK_TYPE = "S3", MODE = "KEYS_ONLY", FORMAT = "json")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:55: Error: Unknown changefeed sink type: S3\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedSettings) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (SINK_TYPE = "local", FOO = "bar")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:64: Error: Unknown changefeed setting: FOO\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedInitialScan) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", INITIAL_SCAN = "foo")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:95: Error: Literal of Bool type is expected for INITIAL_SCAN\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedVirtualTimestamps) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", VIRTUAL_TIMESTAMPS = "foo")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:101: Error: Literal of Bool type is expected for VIRTUAL_TIMESTAMPS\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedResolvedTimestamps) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", RESOLVED_TIMESTAMPS = "foo")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:102: Error: Literal of Interval type is expected for RESOLVED_TIMESTAMPS\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedRetentionPeriod) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", RETENTION_PERIOD = "foo")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:99: Error: Literal of Interval type is expected for RETENTION_PERIOD\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedTopicPartitions) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", TOPIC_MIN_ACTIVE_PARTITIONS = "foo")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:110: Error: Literal of integer type is expected for TOPIC_MIN_ACTIVE_PARTITIONS\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedAwsRegion) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", AWS_REGION = true)
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:93: Error: Literal of String type is expected for AWS_REGION\n");
+ }
+
+ Y_UNIT_TEST(ErrJoinWithGroupingSetsWithoutCorrelationName) {
+ auto req = "USE plato;\n"
+ "\n"
+ "SELECT k1, k2, subkey\n"
+ "FROM T1 AS a JOIN T2 AS b USING (key)\n"
+ "GROUP BY GROUPING SETS(\n"
+ " (a.key as k1, b.subkey as k2),\n"
+ " (k1),\n"
+ " (subkey)\n"
+ ");";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:8:4: Error: Columns in grouping sets should have correlation name, error in key: subkey\n");
+ }
+
+ Y_UNIT_TEST(ErrJoinWithGroupByWithoutCorrelationName) {
+ auto req = "USE plato;\n"
+ "\n"
+ "SELECT k1, k2,\n"
+ " value\n"
+ "FROM T1 AS a JOIN T2 AS b USING (key)\n"
+ "GROUP BY a.key as k1, b.subkey as k2,\n"
+ " value;";
+ ExpectFailWithError(req,
+ "<main>:7:5: Error: Columns in GROUP BY should have correlation name, error in key: value\n");
+ }
+
+ Y_UNIT_TEST(ErrWithMissingFrom) {
+ auto req = "select 1 as key where 1 > 1;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:25: Error: Filtering is not allowed without FROM\n");
+
+ req = "select 1 + count(*);";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Aggregation is not allowed without FROM\n");
+
+ req = "select 1 as key, subkey + value;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:18: Error: Column reference 'subkey'\n"
+ "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:27: Error: Column reference 'value'\n");
+
+ req = "select count(1) group by key;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:26: Error: Column reference 'key'\n");
+ }
+
+ Y_UNIT_TEST(ErrWithMissingFromForWindow) {
+ auto req = "$c = () -> (1 + count(1) over w);\n"
+ "select $c();";
+ ExpectFailWithError(req,
+ "<main>:1:9: Error: Window and aggregation functions are not allowed in this context\n"
+ "<main>:1:17: Error: Failed to use aggregation function Count without window specification or in wrong place\n");
+
+ req = "$c = () -> (1 + lead(1) over w);\n"
+ "select $c();";
+ ExpectFailWithError(req,
+ "<main>:1:17: Error: Window functions are not allowed in this context\n"
+ "<main>:1:17: Error: Failed to use window function Lead without window specification or in wrong place\n");
+
+ req = "select 1 + count(1) over w window w as ();";
+ ExpectFailWithError(req,
+ "<main>:1:1: Error: Window and aggregation functions are not allowed without FROM\n"
+ "<main>:1:12: Error: Failed to use aggregation function Count without window specification or in wrong place\n");
+
+ req = "select 1 + lead(1) over w window w as ();";
+ ExpectFailWithError(req,
+ "<main>:1:12: Error: Window functions are not allowed without FROM\n"
+ "<main>:1:12: Error: Failed to use window function Lead without window specification or in wrong place\n");
+ }
+
+ Y_UNIT_TEST(ErrWithMissingFromForInplaceWindow) {
+ auto req = "$c = () -> (1 + count(1) over ());\n"
+ "select $c();";
+ ExpectFailWithError(req,
+ "<main>:1:26: Error: Window and aggregation functions are not allowed in this context\n");
+
+ req = "$c = () -> (1 + lead(1) over (rows between unbounded preceding and current row));\n"
+ "select $c();";
+ ExpectFailWithError(req,
+ "<main>:1:25: Error: Window and aggregation functions are not allowed in this context\n");
+
+ req = "select 1 + count(1) over ();";
+ ExpectFailWithError(req,
+ "<main>:1:1: Error: Window and aggregation functions are not allowed without FROM\n"
+ "<main>:1:12: Error: Failed to use aggregation function Count without window specification or in wrong place\n");
+
+ req = "select 1 + lead(1) over (rows between current row and unbounded following);";
+ ExpectFailWithError(req,
+ "<main>:1:12: Error: Window functions are not allowed without FROM\n"
+ "<main>:1:12: Error: Failed to use window function Lead without window specification or in wrong place\n");
+ }
+
+ Y_UNIT_TEST(ErrDistinctInWrongPlace) {
+ auto req = "select Some::Udf(distinct key) from plato.Input;";
+ ExpectFailWithError(req,
+ "<main>:1:18: Error: DISTINCT can only be used in aggregation functions\n");
+ req = "select sum(key)(distinct foo) from plato.Input;";
+ ExpectFailWithError(req,
+ "<main>:1:17: Error: DISTINCT can only be used in aggregation functions\n");
+
+ req = "select len(distinct foo) from plato.Input;";
+ ExpectFailWithError(req,
+ "<main>:1:8: Error: DISTINCT can only be used in aggregation functions\n");
+
+ req = "$foo = ($x) -> ($x); select $foo(distinct key) from plato.Input;";
+ ExpectFailWithError(req,
+ "<main>:1:34: Error: DISTINCT can only be used in aggregation functions\n");
+ }
+
+ Y_UNIT_TEST(ErrForNotSingleChildInInlineAST) {
+ ExpectFailWithError("select YQL::\"\"",
+ "<main>:1:8: Error: Failed to parse YQL: expecting AST root node with single child, but got 0\n");
+ ExpectFailWithError("select YQL::@@ \t@@",
+ "<main>:1:8: Error: Failed to parse YQL: expecting AST root node with single child, but got 0\n");
+ auto req = "$lambda = YQL::@@(lambda '(x)(+ x x)) (lambda '(y)(+ y y))@@;\n"
+ "select ListMap([1, 2, 3], $lambda);";
+ ExpectFailWithError(req,
+ "<main>:1:11: Error: Failed to parse YQL: expecting AST root node with single child, but got 2\n");
+ }
+
+ Y_UNIT_TEST(ErrEmptyColumnName) {
+ ExpectFailWithError("select * without \"\" from plato.Input",
+ "<main>:1:18: Error: String literal can not be used here\n");
+
+ ExpectFailWithError("select * without `` from plato.Input;",
+ "<main>:1:18: Error: Empty column name is not allowed\n");
+
+ ExpectFailWithErrorForAnsiLexer("select * without \"\" from plato.Input",
+ "<main>:1:18: Error: Empty column name is not allowed\n");
+
+ ExpectFailWithErrorForAnsiLexer("select * without `` from plato.Input;",
+ "<main>:1:18: Error: Empty column name is not allowed\n");
+ }
+
+ Y_UNIT_TEST(ErrOnNonZeroArgumentsForTableRows) {
+ ExpectFailWithError("$udf=\"\";process plato.Input using $udf(TableRows(k))",
+ "<main>:1:40: Error: TableRows requires exactly 0 arguments\n");
+ }
+
+ Y_UNIT_TEST(ErrGroupByWithAggregationFunctionAndDistinctExpr) {
+ ExpectFailWithError("select * from plato.Input group by count(distinct key|key)",
+ "<main>:1:36: Error: Unable to GROUP BY aggregated values\n");
+ }
+
+ // FIXME: check if we can get old behaviour
+#if 0
+ Y_UNIT_TEST(ErrWithSchemaWithColumnsWithoutType) {
+ ExpectFailWithError("select * from plato.Input with COLUMNs",
+ "<main>:1:32: Error: Expected type after COLUMNS\n"
+ "<main>:1:32: Error: Failed to parse table hints\n");
+
+ ExpectFailWithError("select * from plato.Input with scheMa",
+ "<main>:1:32: Error: Expected type after SCHEMA\n"
+ "<main>:1:32: Error: Failed to parse table hints\n");
+ }
+#endif
+
+ Y_UNIT_TEST(ErrCollectPreaggregatedInListLiteralWithoutFrom) {
+ ExpectFailWithError("SELECT([VARIANCE(DISTINCT[])])",
+ "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:9: Error: Column reference '_yql_preagg_Variance0'\n");
+ }
+
+ Y_UNIT_TEST(ErrGroupBySmartParenAsTuple) {
+ ExpectFailWithError("SELECT * FROM plato.Input GROUP BY (k, v,)",
+ "<main>:1:41: Error: Unexpected trailing comma in grouping elements list\n");
+ }
+
+ Y_UNIT_TEST(HandleNestedSmartParensInGroupBy) {
+ ExpectFailWithError("SELECT * FROM plato.Input GROUP BY (+() as k)",
+ "<main>:1:37: Error: Unable to GROUP BY constant expression\n");
+ }
+
+ Y_UNIT_TEST(ErrRenameWithAddColumn) {
+ ExpectFailWithError("USE plato; ALTER TABLE table RENAME TO moved, ADD COLUMN addc uint64",
+ "<main>:1:40: Error: RENAME TO can not be used together with another table action\n");
+ }
+
+ Y_UNIT_TEST(ErrAddColumnAndRename) {
+ // FIXME: fix positions in ALTER TABLE
+ ExpectFailWithError("USE plato; ALTER TABLE table ADD COLUMN addc uint64, RENAME TO moved",
+ "<main>:1:46: Error: RENAME TO can not be used together with another table action\n");
+ }
+
+ Y_UNIT_TEST(InvalidUuidValue) {
+ ExpectFailWithError("SELECT Uuid('123e4567ae89ba12d3aa456a426614174ab0')",
+ "<main>:1:8: Error: Invalid value \"123e4567ae89ba12d3aa456a426614174ab0\" for type Uuid\n");
+ ExpectFailWithError("SELECT Uuid('123e4567ae89b-12d3-a456-426614174000')",
+ "<main>:1:8: Error: Invalid value \"123e4567ae89b-12d3-a456-426614174000\" for type Uuid\n");
+ }
+
+ Y_UNIT_TEST(WindowFunctionWithoutOver) {
+ ExpectFailWithError("SELECT LAST_VALUE(foo) FROM plato.Input",
+ "<main>:1:8: Error: Can't use window function LastValue without window specification (OVER keyword is missing)\n");
+ ExpectFailWithError("SELECT LAST_VALUE(foo) FROM plato.Input GROUP BY key",
+ "<main>:1:8: Error: Can't use window function LastValue without window specification (OVER keyword is missing)\n");
+ }
+
+ Y_UNIT_TEST(CreateAlterUserWithoutCluster) {
+ ExpectFailWithError("\n CREATE USER user ENCRYPTED PASSWORD 'foobar';", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n");
+ ExpectFailWithError("ALTER USER CURRENT_USER RENAME TO $foo;", "<main>:1:1: Error: USE statement is missing - no default cluster is selected\n");
+ }
+
+ Y_UNIT_TEST(ModifyPermissionsWithoutCluster) {
+ ExpectFailWithError("\n GRANT CONNECT ON `/Root` TO user;", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n");
+ ExpectFailWithError("\n REVOKE MANAGE ON `/Root` FROM user;", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n");
+ }
+
+ Y_UNIT_TEST(ReservedRoleNames) {
+ ExpectFailWithError("USE plato; CREATE USER current_User;", "<main>:1:24: Error: System role CURRENT_USER can not be used here\n");
+ ExpectFailWithError("USE plato; ALTER USER current_User RENAME TO Current_role", "<main>:1:46: Error: System role CURRENT_ROLE can not be used here\n");
+ UNIT_ASSERT(SqlToYql("USE plato; DROP GROUP IF EXISTS a, b, c, current_User;").IsOk());
+ }
+
+ Y_UNIT_TEST(DisableClassicDivisionWithError) {
+ ExpectFailWithError("pragma ClassicDivision = 'false'; select $foo / 30;", "<main>:1:42: Error: Unknown name: $foo\n");
+ }
+
+ Y_UNIT_TEST(AggregationOfAgrregatedDistinctExpr) {
+ ExpectFailWithError("select sum(sum(distinct x + 1)) from plato.Input", "<main>:1:12: Error: Aggregation of aggregated values is forbidden\n");
+ }
+
+ Y_UNIT_TEST(WarnForUnusedSqlHint) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input1 as a join /*+ merge() */ plato.Input2 as b using(key);\n"
+ "select --+ foo(bar)\n"
+ " 1;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:23: Warning: Hint foo will not be used, code: 4534\n");
+ }
+
+ Y_UNIT_TEST(WarnForDeprecatedSchema) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.ClusterMapping["s3bucket"] = NYql::S3ProviderName;
+ NYql::TAstParseResult res = SqlToYql("select * from s3bucket.`foo` with schema (col1 Int32, String as col2, Int64 as col3);", settings);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_STRING_CONTAINS(res.Issues.ToString(), "Warning: Deprecated syntax for positional schema: please use 'column type' instead of 'type AS column', code: 4535\n");
+ }
+
+ Y_UNIT_TEST(ErrorOnColumnNameInMaxByLimit) {
+ ExpectFailWithError(
+ "SELECT AGGREGATE_BY(AsTuple(value, key), AggregationFactory(\"MAX_BY\", subkey)) FROM plato.Input;",
+ "<main>:1:42: Error: Source does not allow column references\n"
+ "<main>:1:71: Error: Column reference 'subkey'\n");
+ }
+
+ Y_UNIT_TEST(ErrorInLibraryWithTopLevelNamedSubquery) {
+ TString withUnusedSubq = "$unused = select max(key) from plato.Input;\n"
+ "\n"
+ "define subquery $foo() as\n"
+ " $count = select count(*) from plato.Input;\n"
+ " select * from plato.Input limit $count / 2;\n"
+ "end define;\n"
+ "export $foo;\n";
+ UNIT_ASSERT(SqlToYqlWithMode(withUnusedSubq, NSQLTranslation::ESqlMode::LIBRARY).IsOk());
+
+ TString withTopLevelSubq = "$count = select count(*) from plato.Input;\n"
+ "\n"
+ "define subquery $foo() as\n"
+ " select * from plato.Input limit $count / 2;\n"
+ "end define;\n"
+ "export $foo;\n";
+ auto res = SqlToYqlWithMode(withTopLevelSubq, NSQLTranslation::ESqlMode::LIBRARY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Named subquery can not be used as a top level statement in libraries\n");
+ }
+
+ Y_UNIT_TEST(SessionStartAndSessionStateShouldSurviveSessionWindowArgsError){
+ TString query = R"(
+ $init = ($_row) -> (min(1, 2)); -- error: aggregation func min() can not be used here
+ $calculate = ($_row, $_state) -> (1);
+ $update = ($_row, $_state) -> (2);
+ SELECT
+ SessionStart() over w as session_start,
+ SessionState() over w as session_state,
+ FROM plato.Input as t
+ WINDOW w AS (
+ PARTITION BY user, SessionWindow(ts + 1, $init, $update, $calculate)
+ )
+ )";
+ ExpectFailWithError(query, "<main>:2:33: Error: Aggregation function Min requires exactly 1 argument(s), given: 2\n");
+ }
+}
+
+void CheckUnused(const TString& req, const TString& symbol, unsigned row, unsigned col) {
+ auto res = SqlToYql(req);
+
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), TStringBuilder() << "<main>:" << row << ":" << col << ": Warning: Symbol " << symbol << " is not used, code: 4527\n");
+}
+
+Y_UNIT_TEST_SUITE(WarnUnused) {
+ Y_UNIT_TEST(ActionOrSubquery) {
+ TString req = " $a()\n"
+ "as select 1;\n"
+ "end define;\n"
+ "\n"
+ "select 1;";
+ CheckUnused("define action\n" + req, "$a", 2, 3);
+ CheckUnused("define subquery\n" + req, "$a", 2, 3);
+ }
+
+ Y_UNIT_TEST(Import) {
+ TString req = "import lib1 symbols\n"
+ " $sqr;\n"
+ "select 1;";
+ CheckUnused(req, "$sqr", 2, 3);
+
+ req = "import lib1 symbols\n"
+ " $sqr as\n"
+ " $sq;\n"
+ "select 1;";
+ CheckUnused(req, "$sq", 3, 5);
+ }
+
+ Y_UNIT_TEST(NamedNodeStatement) {
+ TString req = " $a, $a = AsTuple(1, 2);\n"
+ "select $a;";
+ CheckUnused(req, "$a", 1, 2);
+ req = "$a, $b = AsTuple(1, 2);\n"
+ "select $a;";
+ CheckUnused(req, "$b", 1, 6);
+ CheckUnused(" $a = 1; $a = 2; select $a;", "$a", 1, 2);
+ }
+
+ Y_UNIT_TEST(Declare) {
+ CheckUnused("declare $a as String;select 1;", "$a", 1, 9);
+ }
+
+ Y_UNIT_TEST(ActionParams) {
+ TString req = "define action $a($x, $y) as\n"
+ " select $x;\n"
+ "end define;\n"
+ "\n"
+ "do $a(1,2);";
+ CheckUnused(req, "$y", 1, 22);
+ }
+
+ Y_UNIT_TEST(SubqueryParams) {
+ TString req = "use plato;\n"
+ "define subquery $q($name, $x) as\n"
+ " select * from $name;\n"
+ "end define;\n"
+ "\n"
+ "select * from $q(\"Input\", 1);";
+ CheckUnused(req, "$x", 2, 27);
+ }
+
+ Y_UNIT_TEST(For) {
+ TString req = "define action $a() as\n"
+ " select 1;\n"
+ "end define;\n"
+ "\n"
+ "for $i in ListFromRange(1, 10)\n"
+ "do $a();";
+ CheckUnused(req, "$i", 5, 5);
+ }
+
+ Y_UNIT_TEST(LambdaParams) {
+ TString req = "$lambda = ($x, $y) -> ($x);\n"
+ "select $lambda(1, 2);";
+ CheckUnused(req, "$y", 1, 16);
+ }
+
+ Y_UNIT_TEST(InsideLambdaBody) {
+ TString req = "$lambda = () -> {\n"
+ " $x = 1; return 1;\n"
+ "};\n"
+ "select $lambda();";
+ CheckUnused(req, "$x", 2, 3);
+ req = "$lambda = () -> {\n"
+ " $x = 1; $x = 2; return $x;\n"
+ "};\n"
+ "select $lambda();";
+ CheckUnused(req, "$x", 2, 3);
+ }
+
+ Y_UNIT_TEST(InsideAction) {
+ TString req = "define action $a() as\n"
+ " $x = 1; select 1;\n"
+ "end define;\n"
+ "\n"
+ "do $a();";
+ CheckUnused(req, "$x", 2, 3);
+ req = "define action $a() as\n"
+ " $x = 1; $x = 2; select $x;\n"
+ "end define;\n"
+ "\n"
+ "do $a();";
+ CheckUnused(req, "$x", 2, 3);
+ }
+
+ Y_UNIT_TEST(NoWarnOnNestedActions) {
+ auto req = "pragma warning(\"error\", \"4527\");\n"
+ "define action $action($b) as\n"
+ " define action $aaa() as\n"
+ " select $b;\n"
+ " end define;\n"
+ " do $aaa();\n"
+ "end define;\n"
+ "\n"
+ "do $action(1);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(NoWarnForUsageAfterSubquery) {
+ auto req = "use plato;\n"
+ "pragma warning(\"error\", \"4527\");\n"
+ "\n"
+ "$a = 1;\n"
+ "\n"
+ "define subquery $q($table) as\n"
+ " select * from $table;\n"
+ "end define;\n"
+ "\n"
+ "select * from $q(\"Input\");\n"
+ "select $a;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(AnonymousNames) {
+ Y_UNIT_TEST(ReferenceAnonymousVariableIsForbidden) {
+ auto req = "$_ = 1; select $_;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:16: Error: Unable to reference anonymous name $_\n");
+
+ req = "$`_` = 1; select $`_`;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unable to reference anonymous name $_\n");
+ }
+
+ Y_UNIT_TEST(Declare) {
+ auto req = "declare $_ as String;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:9: Error: Can not use anonymous name '$_' in DECLARE statement\n");
+ }
+
+ Y_UNIT_TEST(ActionSubquery) {
+ auto req = "define action $_() as select 1; end define;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: Can not use anonymous name '$_' as ACTION name\n");
+
+ req = "define subquery $_() as select 1; end define;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Can not use anonymous name '$_' as SUBQUERY name\n");
+ }
+
+ Y_UNIT_TEST(Import) {
+ auto req = "import lib symbols $sqr as $_;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Can not import anonymous name $_\n");
+ }
+
+ Y_UNIT_TEST(Export) {
+ auto req = "export $_;";
+ auto res = SqlToYqlWithMode(req, NSQLTranslation::ESqlMode::LIBRARY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Can not export anonymous name $_\n");
+ }
+
+ Y_UNIT_TEST(AnonymousInActionArgs) {
+ auto req = "pragma warning(\"error\", \"4527\");\n"
+ "define action $a($_, $y, $_) as\n"
+ " select $y;\n"
+ "end define;\n"
+ "\n"
+ "do $a(1,2,3);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(AnonymousInSubqueryArgs) {
+ auto req = "use plato;\n"
+ "pragma warning(\"error\", \"4527\");\n"
+ "define subquery $q($_, $y, $_) as\n"
+ " select * from $y;\n"
+ "end define;\n"
+ "\n"
+ "select * from $q(1,\"Input\",3);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(AnonymousInLambdaArgs) {
+ auto req = "pragma warning(\"error\", \"4527\");\n"
+ "$lambda = ($_, $x, $_) -> ($x);\n"
+ "select $lambda(1,2,3);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(AnonymousInFor) {
+ auto req = "pragma warning(\"error\", \"4527\");\n"
+ "evaluate for $_ in ListFromRange(1, 10) do begin select 1; end do;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(Assignment) {
+ auto req = "pragma warning(\"error\", \"4527\");\n"
+ "$_ = 1;\n"
+ "$_, $x, $_ = AsTuple(1,2,3);\n"
+ "select $x;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(JsonValue) {
+ Y_UNIT_TEST(JsonValueArgumentCount) {
+ NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json));");
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: Unexpected token ')' : syntax error...\n\n");
+ }
+
+ Y_UNIT_TEST(JsonValueJsonPathMustBeLiteralString) {
+ NYql::TAstParseResult res = SqlToYql("$jsonPath = \"strict $.key\"; select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), $jsonPath);");
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:79: Error: Unexpected token absence : Missing STRING_VALUE \n\n");
+ }
+
+ Y_UNIT_TEST(JsonValueTranslation) {
+ NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\");");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json"));
+ };
+
+ TWordCountHive elementStat({"JsonValue"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["JsonValue"]);
+ }
+
+ Y_UNIT_TEST(JsonValueReturningSection) {
+ for (const auto& typeName : {"Bool", "Int64", "Double", "String"}) {
+ NYql::TAstParseResult res = SqlToYql(
+ TStringBuilder() << "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" RETURNING " << typeName << ");"
+ );
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(TStringBuilder() << "DataType '" << typeName));
+ };
+
+ TWordCountHive elementStat({typeName});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat[typeName] > 0);
+ }
+ }
+
+ Y_UNIT_TEST(JsonValueInvalidReturningType) {
+ NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{'key': 1238}@@ as Json), 'strict $.key' RETURNING invalid);");
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:77: Error: Unknown simple type 'invalid'\n");
+ }
+
+ Y_UNIT_TEST(JsonValueAndReturningInExpressions) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato\n;"
+ "$json_value = \"some string\";\n"
+ "SELECT $json_value;\n"
+ "SELECT 1 as json_value;\n"
+ "SELECT $json_value as json_value;\n"
+ "$returning = \"another string\";\n"
+ "SELECT $returning;\n"
+ "SELECT 1 as returning;\n"
+ "SELECT $returning as returning;\n"
+ );
+
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(JsonValueValidCaseHandlers) {
+ const TVector<std::pair<TString, TString>> testCases = {
+ {"", "'DefaultValue (Null)"},
+ {"NULL", "'DefaultValue (Null)"},
+ {"ERROR", "'Error (Null)"},
+ {"DEFAULT 123", "'DefaultValue (Int32 '\"123\")"},
+ };
+
+ for (const auto& onEmpty : testCases) {
+ for (const auto& onError : testCases) {
+ TStringBuilder query;
+ query << "$json = CAST(@@{\"key\": 1238}@@ as Json);\n"
+ << "SELECT JSON_VALUE($json, \"strict $.key\"";
+ if (!onEmpty.first.empty()) {
+ query << " " << onEmpty.first << " ON EMPTY";
+ }
+ if (!onError.first.empty()) {
+ query << " " << onError.first << " ON ERROR";
+ }
+ query << ");\n";
+
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(onEmpty.second + " " + onError.second));
+ };
+
+ TWordCountHive elementStat({"JsonValue"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonValue"] > 0);
+ }
+ }
+ }
+
+ Y_UNIT_TEST(JsonValueTooManyCaseHandlers) {
+ NYql::TAstParseResult res = SqlToYql(
+ "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON ERROR NULL ON EMPTY);\n"
+ );
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(
+ Err2Str(res),
+ "<main>:1:52: Error: Only 1 ON EMPTY and/or 1 ON ERROR clause is expected\n"
+ );
+ }
+
+ Y_UNIT_TEST(JsonValueTooManyOnEmpty) {
+ NYql::TAstParseResult res = SqlToYql(
+ "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON EMPTY);\n"
+ );
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(
+ Err2Str(res),
+ "<main>:1:52: Error: Only 1 ON EMPTY clause is expected\n"
+ );
+ }
+
+ Y_UNIT_TEST(JsonValueTooManyOnError) {
+ NYql::TAstParseResult res = SqlToYql(
+ "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON ERROR);\n"
+ );
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(
+ Err2Str(res),
+ "<main>:1:52: Error: Only 1 ON ERROR clause is expected\n"
+ );
+ }
+
+ Y_UNIT_TEST(JsonValueOnEmptyAfterOnError) {
+ NYql::TAstParseResult res = SqlToYql(
+ "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON EMPTY);\n"
+ );
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(
+ Err2Str(res),
+ "<main>:1:52: Error: ON EMPTY clause must be before ON ERROR clause\n"
+ );
+ }
+
+ Y_UNIT_TEST(JsonValueNullInput) {
+ NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_VALUE(NULL, "strict $.key");)");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))"));
+ };
+
+ TWordCountHive elementStat({"JsonValue"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonValue"] > 0);
+ }
+}
+
+Y_UNIT_TEST_SUITE(JsonExists) {
+ Y_UNIT_TEST(JsonExistsValidHandlers) {
+ const TVector<std::pair<TString, TString>> testCases = {
+ {"", "(Just (Bool '\"false\"))"},
+ {"TRUE ON ERROR", "(Just (Bool '\"true\"))"},
+ {"FALSE ON ERROR", "(Just (Bool '\"false\"))"},
+ {"UNKNOWN ON ERROR", "(Nothing (OptionalType (DataType 'Bool)))"},
+ // NOTE: in this case we expect arguments of JsonExists callable to end immediately
+ // after variables. This parenthesis at the end of the expression is left on purpose
+ {"ERROR ON ERROR", "(Utf8 '\"strict $.key\") (JsonVariables))"},
+ };
+
+ for (const auto& item : testCases) {
+ NYql::TAstParseResult res = SqlToYql(
+ TStringBuilder() << R"(
+ $json = CAST(@@{"key": 1238}@@ as Json);
+ SELECT JSON_EXISTS($json, "strict $.key" )" << item.first << ");\n"
+ );
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(item.second));
+ };
+
+ TWordCountHive elementStat({"JsonExists"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonExists"] > 0);
+ }
+ }
+
+ Y_UNIT_TEST(JsonExistsInvalidHandler) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ $json = CAST(@@{"key": 1238}@@ as Json);
+ $default = false;
+ SELECT JSON_EXISTS($json, "strict $.key" $default ON ERROR);
+ )");
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:53: Error: Unexpected token absence : Missing RPAREN \n\n");
+ }
+
+ Y_UNIT_TEST(JsonExistsNullInput) {
+ NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_EXISTS(NULL, "strict $.key");)");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))"));
+ };
+
+ TWordCountHive elementStat({"JsonExists"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonExists"] > 0);
+ }
+}
+
+Y_UNIT_TEST_SUITE(JsonQuery) {
+ Y_UNIT_TEST(JsonQueryValidHandlers) {
+ using TTestSuite = const TVector<std::pair<TString, TString>>;
+ TTestSuite wrapCases = {
+ {"", "'NoWrap"},
+ {"WITHOUT WRAPPER", "'NoWrap"},
+ {"WITHOUT ARRAY WRAPPER", "'NoWrap"},
+ {"WITH WRAPPER", "'Wrap"},
+ {"WITH ARRAY WRAPPER", "'Wrap"},
+ {"WITH UNCONDITIONAL WRAPPER", "'Wrap"},
+ {"WITH UNCONDITIONAL ARRAY WRAPPER", "'Wrap"},
+ {"WITH CONDITIONAL WRAPPER", "'ConditionalWrap"},
+ {"WITH CONDITIONAL ARRAY WRAPPER", "'ConditionalWrap"},
+ };
+ TTestSuite handlerCases = {
+ {"", "'Null"},
+ {"ERROR", "'Error"},
+ {"NULL", "'Null"},
+ {"EMPTY ARRAY", "'EmptyArray"},
+ {"EMPTY OBJECT", "'EmptyObject"},
+ };
+
+ for (const auto& wrap : wrapCases) {
+ for (const auto& onError : handlerCases) {
+ for (const auto& onEmpty : handlerCases) {
+ TStringBuilder query;
+ query << R"($json = CAST(@@{"key": [123]}@@ as Json);
+ SELECT JSON_QUERY($json, "strict $.key" )" << wrap.first;
+ if (!onEmpty.first.empty()) {
+ if (wrap.first.StartsWith("WITH ")) {
+ continue;
+ }
+ query << " " << onEmpty.first << " ON EMPTY";
+ }
+ if (!onError.first.empty()) {
+ query << " " << onError.first << " ON ERROR";
+ }
+ query << ");\n";
+
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ const TString args = TStringBuilder() << wrap.second << " " << onEmpty.second << " " << onError.second;
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(args));
+ };
+
+ Cout << wrap.first << " " << onEmpty.first << " " << onError.first << Endl;
+
+ TWordCountHive elementStat({"JsonQuery"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonQuery"] > 0);
+ }
+ }
+ }
+ }
+
+ Y_UNIT_TEST(JsonQueryOnEmptyWithWrapper) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ $json = CAST(@@{"key": 1238}@@ as Json);
+ SELECT JSON_QUERY($json, "strict $" WITH ARRAY WRAPPER EMPTY ARRAY ON EMPTY);
+ )");
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:38: Error: ON EMPTY is prohibited because WRAPPER clause is specified\n");
+ }
+
+ Y_UNIT_TEST(JsonQueryNullInput) {
+ NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_QUERY(NULL, "strict $.key");)");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))"));
+ };
+
+ TWordCountHive elementStat({"JsonQuery"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonQuery"] > 0);
+ }
+}
+
+Y_UNIT_TEST_SUITE(JsonPassing) {
+ Y_UNIT_TEST(SupportedVariableTypes) {
+ const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"};
+
+ for (const auto& function : functions) {
+ const auto query = Sprintf(R"(
+ pragma CompactNamedExprs;
+ $json = CAST(@@{"key": 1238}@@ as Json);
+ SELECT %s(
+ $json,
+ "strict $.key"
+ PASSING
+ "string" as var1,
+ 1.234 as var2,
+ CAST(1 as Int64) as var3,
+ true as var4,
+ $json as var5
+ ))",
+ function.data()
+ );
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"string")))"), "Cannot find `var1`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var2" (Double '"1.234")))"), "Cannot find `var2`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (SafeCast (Int32 '"1") (DataType 'Int64))))"), "Cannot find `var3`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var4" (Bool '"true")))"), "Cannot find `var4`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var5" namedexprnode0))"), "Cannot find `var5`");
+ };
+
+ TWordCountHive elementStat({"JsonVariables"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonVariables"] > 0);
+ }
+ }
+
+ Y_UNIT_TEST(ValidVariableNames) {
+ const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"};
+
+ for (const auto& function : functions) {
+ const auto query = Sprintf(R"(
+ $json = CAST(@@{"key": 1238}@@ as Json);
+ SELECT %s(
+ $json,
+ "strict $.key"
+ PASSING
+ "one" as var1,
+ "two" as "VaR2",
+ "three" as `var3`,
+ "four" as VaR4
+ ))",
+ function.data()
+ );
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"one")))"), "Cannot find `var1`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR2" (String '"two")))"), "Cannot find `VaR2`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (String '"three")))"), "Cannot find `var3`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR4" (String '"four")))"), "Cannot find `VaR4`");
+ };
+
+ TWordCountHive elementStat({"JsonVariables"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonVariables"] > 0);
+ }
+ }
+}
+
+Y_UNIT_TEST_SUITE(MigrationToJsonApi) {
+ Y_UNIT_TEST(WarningOnDeprecatedJsonUdf) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ $json = CAST(@@{"key": 1234}@@ as Json);
+ SELECT Json::Parse($json);
+ )");
+
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:26: Warning: Json UDF is deprecated. Please use JSON API instead, code: 4506\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(AnsiIdentsNegative) {
+ Y_UNIT_TEST(EnableAnsiLexerFromRequestSpecialComments) {
+ auto req = "\n"
+ "\t --!ansi_lexer \n"
+ "-- Some comment\n"
+ "-- another comment\n"
+ "pragma SimpleColumns;\n"
+ "\n"
+ "select 1, '''' as empty;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+
+ Y_UNIT_TEST(AnsiLexerShouldNotBeEnabledHere) {
+ auto req = "$str = '\n"
+ "--!ansi_lexer\n"
+ "--!syntax_v1\n"
+ "';\n"
+ "\n"
+ "select 1, $str, \"\" as empty;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+
+ Y_UNIT_TEST(DoubleQuotesInDictsTuplesOrLists) {
+ auto req = "$d = { 'a': 1, \"b\": 2, 'c': 3,};";
+
+ auto res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:16: Error: Column reference \"b\" is not allowed in current scope\n");
+
+ req = "$t = (1, 2, \"a\");";
+
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:13: Error: Column reference \"a\" is not allowed in current scope\n");
+
+ req = "$l = ['a', 'b', \"c\"];";
+
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Column reference \"c\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(MultilineComments) {
+ auto req = "/*/**/ select 1;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:16: Error: Unexpected character : syntax error...\n\n");
+
+ req = "/*\n"
+ "--/*\n"
+ "*/ select 1;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:12: Error: Unexpected character : syntax error...\n\n");
+
+ req = "/*\n"
+ "/*\n"
+ "--*/\n"
+ "*/ select 1;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:0: Error: Unexpected token '*' : cannot match to any predicted input...\n\n");
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(res.Root);
+ }
+}
+
+Y_UNIT_TEST_SUITE(AnsiOptionalAs) {
+ Y_UNIT_TEST(OptionalAsInProjection) {
+ UNIT_ASSERT(SqlToYql("PRAGMA AnsiOptionalAs; SELECT a b, c FROM plato.Input;").IsOk());
+ ExpectFailWithError("PRAGMA DisableAnsiOptionalAs;\n"
+ "SELECT a b, c FROM plato.Input;",
+ "<main>:2:10: Error: Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility\n");
+ }
+
+ Y_UNIT_TEST(OptionalAsWithKeywords) {
+ UNIT_ASSERT(SqlToYql("PRAGMA AnsiOptionalAs; SELECT a type, b data, c source FROM plato.Input;").IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(SessionWindowNegative) {
+ Y_UNIT_TEST(SessionWindowWithoutSource) {
+ ExpectFailWithError("SELECT 1 + SessionWindow(ts, 32);",
+ "<main>:1:12: Error: SessionWindow requires data source\n");
+ }
+
+ Y_UNIT_TEST(SessionWindowInProjection) {
+ ExpectFailWithError("SELECT 1 + SessionWindow(ts, 32) from plato.Input;",
+ "<main>:1:12: Error: SessionWindow can only be used as a top-level GROUP BY / PARTITION BY expression\n");
+ }
+
+ Y_UNIT_TEST(SessionWindowWithNonConstSecondArg) {
+ ExpectFailWithError(
+ "SELECT key, session_start FROM plato.Input\n"
+ "GROUP BY SessionWindow(ts, 32 + subkey) as session_start, key;",
+
+ "<main>:2:10: Error: Source does not allow column references\n"
+ "<main>:2:33: Error: Column reference 'subkey'\n");
+ }
+
+ Y_UNIT_TEST(SessionWindowWithWrongNumberOfArgs) {
+ ExpectFailWithError("SELECT * FROM plato.Input GROUP BY SessionWindow()",
+ "<main>:1:36: Error: SessionWindow requires either two or four arguments\n");
+ ExpectFailWithError("SELECT * FROM plato.Input GROUP BY SessionWindow(key, subkey, 100)",
+ "<main>:1:36: Error: SessionWindow requires either two or four arguments\n");
+ }
+
+ Y_UNIT_TEST(DuplicateSessionWindow) {
+ ExpectFailWithError(
+ "SELECT\n"
+ " *\n"
+ "FROM plato.Input\n"
+ "GROUP BY\n"
+ " SessionWindow(ts, 10),\n"
+ " user,\n"
+ " SessionWindow(ts, 20)\n"
+ ";",
+
+ "<main>:7:5: Error: Duplicate session window specification:\n"
+ "<main>:5:5: Error: Previous session window is declared here\n");
+
+ ExpectFailWithError(
+ "SELECT\n"
+ " MIN(key) over w\n"
+ "FROM plato.Input\n"
+ "WINDOW w AS (\n"
+ " PARTITION BY SessionWindow(ts, 10), user,\n"
+ " SessionWindow(ts, 20)\n"
+ ");",
+
+ "<main>:6:5: Error: Duplicate session window specification:\n"
+ "<main>:5:18: Error: Previous session window is declared here\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithoutSource) {
+ ExpectFailWithError("SELECT 1 + SessionStart();",
+ "<main>:1:12: Error: SessionStart requires data source\n");
+ ExpectFailWithError("SELECT 1 + SessionState();",
+ "<main>:1:12: Error: SessionState requires data source\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithoutGroupByOrWindow) {
+ ExpectFailWithError("SELECT 1 + SessionStart() from plato.Input;",
+ "<main>:1:12: Error: SessionStart can not be used without aggregation by SessionWindow\n");
+ ExpectFailWithError("SELECT 1 + SessionState() from plato.Input;",
+ "<main>:1:12: Error: SessionState can not be used without aggregation by SessionWindow\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithGroupByWithoutSession) {
+ ExpectFailWithError("SELECT 1 + SessionStart() from plato.Input group by user;",
+ "<main>:1:12: Error: SessionStart can not be used here: SessionWindow specification is missing in GROUP BY\n");
+ ExpectFailWithError("SELECT 1 + SessionState() from plato.Input group by user;",
+ "<main>:1:12: Error: SessionState can not be used here: SessionWindow specification is missing in GROUP BY\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithoutOverWithWindowWithoutSession) {
+ ExpectFailWithError("SELECT 1 + SessionStart(), MIN(key) over w from plato.Input window w as ()",
+ "<main>:1:12: Error: SessionStart can not be used without aggregation by SessionWindow. Maybe you forgot to add OVER `window_name`?\n");
+ ExpectFailWithError("SELECT 1 + SessionState(), MIN(key) over w from plato.Input window w as ()",
+ "<main>:1:12: Error: SessionState can not be used without aggregation by SessionWindow. Maybe you forgot to add OVER `window_name`?\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithWindowWithoutSession) {
+ ExpectFailWithError("SELECT 1 + SessionStart() over w, MIN(key) over w from plato.Input window w as ()",
+ "<main>:1:12: Error: SessionStart can not be used with window w: SessionWindow specification is missing in PARTITION BY\n");
+ ExpectFailWithError("SELECT 1 + SessionState() over w, MIN(key) over w from plato.Input window w as ()",
+ "<main>:1:12: Error: SessionState can not be used with window w: SessionWindow specification is missing in PARTITION BY\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithSessionedWindow) {
+ ExpectFailWithError("SELECT 1 + SessionStart(), MIN(key) over w from plato.Input group by key window w as (partition by SessionWindow(ts, 1)) ",
+ "<main>:1:12: Error: SessionStart can not be used here: SessionWindow specification is missing in GROUP BY. Maybe you forgot to add OVER `window_name`?\n");
+ ExpectFailWithError("SELECT 1 + SessionState(), MIN(key) over w from plato.Input group by key window w as (partition by SessionWindow(ts, 1)) ",
+ "<main>:1:12: Error: SessionState can not be used here: SessionWindow specification is missing in GROUP BY. Maybe you forgot to add OVER `window_name`?\n");
+ }
+
+ Y_UNIT_TEST(AggregationBySessionStateIsNotSupportedYet) {
+ ExpectFailWithError("SELECT SOME(1 + SessionState()), key from plato.Input group by key, SessionWindow(ts, 1);",
+ "<main>:1:17: Error: SessionState with GROUP BY is not supported yet\n");
+ }
+
+ Y_UNIT_TEST(SessionWindowInRtmr) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT * FROM plato.Input GROUP BY SessionWindow(ts, 10);",
+ 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:54: Error: Streaming group by query must have a hopping window specification.\n");
+
+ res = SqlToYql(R"(
+ SELECT key, SUM(value) AS value FROM plato.Input
+ GROUP BY key, HOP(subkey, "PT10S", "PT30S", "PT20S"), SessionWindow(ts, 10);
+ )", 10, TString(NYql::RtmrProviderName));
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:13: Error: SessionWindow is unsupported for streaming sources\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(LibraSqlSugar) {
+ auto makeResult = [](TStringBuf settings) {
+ return SqlToYql(
+ TStringBuilder()
+ << settings
+ << "\n$udf1 = MyLibra::MakeLibraPreprocessor($settings);"
+ << "\n$udf2 = CustomLibra::MakeLibraPreprocessor($settings);"
+ << "\nPROCESS plato.Input USING $udf1(TableRow())"
+ << "\nUNION ALL"
+ << "\nPROCESS plato.Input USING $udf2(TableRow());"
+ );
+ };
+
+ Y_UNIT_TEST(EmptySettings) {
+ auto res = makeResult(R"(
+ $settings = AsStruct();
+ )");
+ UNIT_ASSERT(res.IsOk());
+ }
+
+ Y_UNIT_TEST(OnlyEntities) {
+ auto res = makeResult(R"(
+ $settings = AsStruct(
+ AsList("A", "B", "C") AS Entities
+ );
+ )");
+ UNIT_ASSERT(res.IsOk());
+ }
+
+ Y_UNIT_TEST(EntitiesWithStrategy) {
+ auto res = makeResult(R"(
+ $settings = AsStruct(
+ AsList("A", "B", "C") AS Entities,
+ "blacklist" AS EntitiesStrategy
+ );
+ )");
+ UNIT_ASSERT(res.IsOk());
+ }
+
+ Y_UNIT_TEST(AllSettings) {
+ auto res = makeResult(R"(
+ $settings = AsStruct(
+ AsList("A", "B", "C") AS Entities,
+ "whitelist" AS EntitiesStrategy,
+ "path" AS BlockstatDict,
+ false AS ParseWithFat,
+ "map" AS Mode
+ );
+ )");
+ UNIT_ASSERT(res.IsOk());
+ }
+
+ Y_UNIT_TEST(BadStrategy) {
+ auto res = makeResult(R"(
+ $settings = AsStruct("bad" AS EntitiesStrategy);
+ )");
+ UNIT_ASSERT_STRING_CONTAINS(
+ Err2Str(res),
+ "Error: MakeLibraPreprocessor got invalid entities strategy: expected 'whitelist' or 'blacklist'"
+ );
+ }
+
+ Y_UNIT_TEST(BadEntities) {
+ auto res = makeResult(R"(
+ $settings = AsStruct(AsList("A", 1) AS Entities);
+ )");
+ UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "Error: MakeLibraPreprocessor entity must be string literal");
+ }
+}
+
+Y_UNIT_TEST_SUITE(TrailingQuestionsNegative) {
+ Y_UNIT_TEST(Basic) {
+ ExpectFailWithError("SELECT 1?;", "<main>:1:9: Error: Unexpected token '?' at the end of expression\n");
+ ExpectFailWithError("SELECT 1? + 1;", "<main>:1:10: Error: Unexpected token '+' : cannot match to any predicted input...\n\n");
+ ExpectFailWithError("SELECT 1 + 1??? < 2", "<main>:1:13: Error: Unexpected token '?' at the end of expression\n");
+ ExpectFailWithError("SELECT 1? > 2? > 3?",
+ "<main>:1:11: Error: Unexpected token '?' at the end of expression\n"
+ "<main>:1:16: Error: Unexpected token '?' at the end of expression\n"
+ "<main>:1:21: Error: Unexpected token '?' at the end of expression\n");
+ }
+
+ Y_UNIT_TEST(SmartParen) {
+ ExpectFailWithError("$x = 1; SELECT (Int32?, $x?)", "<main>:1:27: Error: Unexpected token '?' at the end of expression\n");
+ ExpectFailWithError("SELECT (Int32, foo?)", "<main>:1:19: Error: Unexpected token '?' at the end of expression\n");
+ }
+
+ Y_UNIT_TEST(LambdaOptArgs) {
+ ExpectFailWithError("$l = ($x, $y?, $z??, $t?) -> ($x);", "<main>:1:18: Error: Expecting at most one '?' token here (for optional lambda parameters), but got 2\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(FlexibleTypes) {
+ Y_UNIT_TEST(AssumeOrderByType) {
+ UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT 1 AS int32 ASSUME ORDER BY int32").IsOk());
+ }
+
+ Y_UNIT_TEST(GroupingSets) {
+ UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT COUNT(*) AS cnt, text, uuid FROM plato.Input GROUP BY GROUPING SETS((uuid), (uuid, text));").IsOk());
+ }
+
+ Y_UNIT_TEST(WeakField) {
+ UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT WeakField(text, string) as text FROM plato.Input").IsOk());
+ }
+
+ Y_UNIT_TEST(Aggregation1) {
+ TString q =
+ "PRAGMA FlexibleTypes;\n"
+ "$foo = ($x, $const, $type) -> ($x || $const || FormatType($type));\n"
+ "SELECT $foo(SOME(x), 'aaa', String) FROM plato.Input GROUP BY y;";
+ UNIT_ASSERT(SqlToYql(q).IsOk());
+ }
+
+ Y_UNIT_TEST(Aggregation2) {
+ TString q =
+ "PRAGMA FlexibleTypes;\n"
+ "SELECT 1 + String + MAX(key) FROM plato.Input;";
+ UNIT_ASSERT(SqlToYql(q).IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(ExternalDeclares) {
+ Y_UNIT_TEST(BasicUsage) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.DeclaredNamedExprs["foo"] = "String";
+ auto res = SqlToYqlWithSettings("select $foo;", settings);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "declare") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'String)))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("declare"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]);
+ }
+
+ Y_UNIT_TEST(DeclareOverrides) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.DeclaredNamedExprs["foo"] = "String";
+ auto res = SqlToYqlWithSettings("declare $foo as Int32; select $foo;", settings);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "declare") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'Int32)))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("declare"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]);
+ }
+
+ Y_UNIT_TEST(UnusedDeclareDoesNotProduceWarning) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.DeclaredNamedExprs["foo"] = "String";
+ auto res = SqlToYqlWithSettings("select 1;", settings);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "declare") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'String)))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("declare"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]);
+ }
+
+ Y_UNIT_TEST(DeclaresWithInvalidTypesFails) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.DeclaredNamedExprs["foo"] = "List<BadType>";
+ auto res = SqlToYqlWithSettings("select 1;", settings);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:0:5: Error: Unknown type: 'BadType'\n"
+ "<main>: Error: Failed to parse type for externally declared name 'foo'\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(ExternalDataSource) {
+ Y_UNIT_TEST(CreateExternalDataSourceWithAuthNone) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithAuthServiceAccount) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="SERVICE_ACCOUNT",
+ SERVICE_ACCOUNT_ID="sa",
+ SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"SERVICE_ACCOUNT") '('"location" '"my-bucket") '('"service_account_id" '"sa") '('"service_account_secret_name" '"sa_secret_name") '('"source_type" '"ObjectStorage"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithBasic) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="BASIC",
+ LOGIN="admin",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"BASIC") '('"location" '"protocol://host:port/") '('"login" '"admin") '('"password_secret_name" '"secret_name") '('"source_type" '"PostgreSQL"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithMdbBasic) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="MDB_BASIC",
+ SERVICE_ACCOUNT_ID="sa",
+ SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name",
+ LOGIN="admin",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"MDB_BASIC") '('"location" '"protocol://host:port/") '('"login" '"admin") '('"password_secret_name" '"secret_name") '('"service_account_id" '"sa") '('"service_account_secret_name" '"sa_secret_name") '('"source_type" '"PostgreSQL"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithAws) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="AWS",
+ AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name",
+ AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name",
+ AWS_REGION="ru-central-1"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"AWS") '('"aws_access_key_id_secret_name" '"secred_id_name") '('"aws_region" '"ru-central-1") '('"aws_secret_access_key_secret_name" '"secret_key_name") '('"location" '"protocol://host:port/") '('"source_type" '"PostgreSQL"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithToken) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="YT",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="TOKEN",
+ TOKEN_SECRET_NAME="token_name"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"TOKEN") '('"location" '"protocol://host:port/") '('"source_type" '"YT") '('"token_secret_name" '"token_name"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ pragma TablePathPrefix='/aba';
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyDataSource");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceIfNotExists) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE IF NOT EXISTS MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectIfNotExists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AlterExternalDataSource) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER EXTERNAL DATA SOURCE MyDataSource
+ SET (SOURCE_TYPE = "ObjectStorage", Login = "Admin"),
+ SET Location "bucket",
+ RESET (Auth_Method, Service_Account_Id, Service_Account_Secret_Name);
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"location" '"bucket") '('"login" '"Admin") '('"source_type" '"ObjectStorage"))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"auth_method" '"service_account_id" '"service_account_secret_name")))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceOrReplace) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ CREATE OR REPLACE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE"
+ );
+ )");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectOrReplace"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateOrReplaceForUnsupportedTableTypesShouldFail) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE OR REPLACE TABLE t (a int32 not null, primary key(a, a));
+ )sql" , "<main>:3:23: Error: OR REPLACE feature is supported only for EXTERNAL DATA SOURCE and EXTERNAL TABLE\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE OR REPLACE TABLE t (
+ Key Uint64,
+ Value1 String,
+ PRIMARY KEY (Key)
+ )
+ WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10
+ );
+ )sql" , "<main>:3:23: Error: OR REPLACE feature is supported only for EXTERNAL DATA SOURCE and EXTERNAL TABLE\n");
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithBadArguments) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource;
+ )sql" , "<main>:3:56: Error: Unexpected token ';' : syntax error...\n\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE"
+ );
+ )sql" , "<main>:5:33: Error: SOURCE_TYPE requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket"
+ );
+ )sql" , "<main>:5:30: Error: AUTH_METHOD requires key\n");
+
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE1"
+ );
+ )sql" , "<main>:6:33: Error: Unknown AUTH_METHOD = NONE1\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="SERVICE_ACCOUNT"
+ );
+ )sql" , "<main>:6:33: Error: SERVICE_ACCOUNT_ID requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="SERVICE_ACCOUNT",
+ SERVICE_ACCOUNT_ID="s1"
+ );
+ )sql" , "<main>:7:40: Error: SERVICE_ACCOUNT_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="SERVICE_ACCOUNT",
+ SERVICE_ACCOUNT_SECRET_NAME="s1"
+ );
+ )sql" , "<main>:7:49: Error: SERVICE_ACCOUNT_ID requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="BASIC",
+ LOGIN="admin"
+ );
+ )sql" , "<main>:7:27: Error: PASSWORD_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="BASIC",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql" , "<main>:7:42: Error: LOGIN requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="MDB_BASIC",
+ SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name",
+ LOGIN="admin",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql" , "<main>:9:42: Error: SERVICE_ACCOUNT_ID requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="MDB_BASIC",
+ SERVICE_ACCOUNT_ID="sa",
+ LOGIN="admin",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql" , "<main>:9:42: Error: SERVICE_ACCOUNT_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="MDB_BASIC",
+ SERVICE_ACCOUNT_ID="sa",
+ SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql" , "<main>:9:42: Error: LOGIN requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="MDB_BASIC",
+ SERVICE_ACCOUNT_ID="sa",
+ SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name",
+ LOGIN="admin"
+ );
+ )sql" , "<main>:9:27: Error: PASSWORD_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="AWS",
+ AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name",
+ AWS_REGION="ru-central-1"
+ );
+ )sql" , "<main>:8:32: Error: AWS_ACCESS_KEY_ID_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="AWS",
+ AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name",
+ AWS_REGION="ru-central-1"
+ );
+ )sql" , "<main>:8:32: Error: AWS_SECRET_ACCESS_KEY_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="AWS",
+ AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name",
+ AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name"
+ );
+ )sql" , "<main>:8:51: Error: AWS_REGION requires key\n");
+ }
+
+ Y_UNIT_TEST(DropExternalDataSourceWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP EXTERNAL DATA SOURCE MyDataSource;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropExternalDataSource) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ pragma TablePathPrefix='/aba';
+ DROP EXTERNAL DATA SOURCE MyDataSource;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyDataSource");
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropExternalDataSourceIfExists) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP EXTERNAL DATA SOURCE IF EXISTS MyDataSource;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "MyDataSource");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObjectIfExists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(ExternalTable) {
+ Y_UNIT_TEST(CreateExternalTable) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource",
+ LOCATION="/folder1/*"
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tablescheme"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalTableWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ pragma TablePathPrefix='/aba';
+ CREATE EXTERNAL TABLE mytable (
+ a int
+ ) WITH (
+ DATA_SOURCE="mydatasource",
+ LOCATION="/folder1/*"
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/aba/mydatasource");
+ UNIT_ASSERT_STRING_CONTAINS(line, "/aba/mytable");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tablescheme"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalTableObjectStorage) {
+ auto res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int,
+ year Int
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource",
+ LOCATION="/folder1/*",
+ FORMAT="json_as_string",
+ `projection.enabled`="true",
+ `projection.year.type`="integer",
+ `projection.year.min`="2010",
+ `projection.year.max`="2022",
+ `projection.year.interval`="1",
+ `projection.month.type`="integer",
+ `projection.month.min`="1",
+ `projection.month.max`="12",
+ `projection.month.interval`="1",
+ `projection.month.digits`="2",
+ `storage.location.template`="${year}/${month}",
+ PARTITONED_BY = "[year, month]"
+ );
+ )sql");
+ UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(CreateExternalTableIfNotExists) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE IF NOT EXISTS mytable (
+ a int
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource",
+ LOCATION="/folder1/*"
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, "create_if_not_exists");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalTableOrReplace) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ CREATE OR REPLACE EXTERNAL TABLE mytable (
+ a int
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource",
+ LOCATION="/folder1/*"
+ );
+ )");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, "create_or_replace");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AlterExternalTableAddColumn) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER EXTERNAL TABLE mytable
+ ADD COLUMN my_column int32,
+ RESET (LOCATION);
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('actions '('('addColumns '('('"my_column" (AsOptionalType (DataType 'Int32))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location)))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('tableType 'externalTable))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AlterExternalTableDropColumn) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER EXTERNAL TABLE mytable
+ DROP COLUMN my_column,
+ SET (Location = "abc", Other_Prop = "42"),
+ SET x 'y';
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('actions '('('dropColumns '('"my_column")#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location (String '"abc")) '('Other_Prop (String '"42")) '('x (String '"y")))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('tableType 'externalTable))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalTableWithBadArguments) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable;
+ )sql" , "<main>:3:45: Error: Unexpected token ';' : syntax error...\n\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int
+ );
+ )sql" , "<main>:4:23: Error: DATA_SOURCE requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource"
+ );
+ )sql" , "<main>:6:33: Error: LOCATION requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int
+ ) WITH (
+ LOCATION="/folder1/*"
+ );
+ )sql" , "<main>:6:30: Error: DATA_SOURCE requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int,
+ PRIMARY KEY(a)
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource",
+ LOCATION="/folder1/*"
+ );
+ )sql" , "<main>:8:30: Error: PRIMARY KEY is not supported for external table\n");
+ }
+
+ Y_UNIT_TEST(DropExternalTable) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP EXTERNAL TABLE MyExternalTable;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("tablescheme"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropExternalTableWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ pragma TablePathPrefix='/aba';
+ DROP EXTERNAL TABLE MyExternalTable;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyExternalTable");
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'tablescheme"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropExternalTableIfExists) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP EXTERNAL TABLE IF EXISTS MyExternalTable;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("tablescheme"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop_if_exists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(TopicsDDL) {
+ void TestQuery(const TString& query, bool expectOk = true) {
+ TStringBuilder finalQuery;
+
+ finalQuery << "use plato;" << Endl << query;
+ auto res = SqlToYql(finalQuery, 10, "kikimr");
+ if (expectOk) {
+ UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString());
+ } else {
+ UNIT_ASSERT(!res.IsOk());
+ }
+ }
+
+ Y_UNIT_TEST(CreateTopicSimple) {
+ TestQuery(R"(
+ CREATE TOPIC topic1;
+ )");
+ TestQuery(R"(
+ CREATE TOPIC `cluster1.topic1`;
+ )");
+ TestQuery(R"(
+ CREATE TOPIC topic1 WITH (metering_mode = "str_value", partition_count_limit = 123, retention_period = Interval('PT1H'));
+ )");
+ }
+
+ Y_UNIT_TEST(CreateTopicConsumer) {
+ TestQuery(R"(
+ CREATE TOPIC topic1 (CONSUMER cons1);
+ )");
+ TestQuery(R"(
+ CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons2 WITH (important = false));
+ )");
+ TestQuery(R"(
+ CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons2 WITH (important = false)) WITH (supported_codecs = "1,2,3");
+ )");
+ }
+
+ Y_UNIT_TEST(AlterTopicSimple) {
+ TestQuery(R"(
+ ALTER TOPIC topic1 SET (retention_period = Interval('PT1H'));
+ )");
+ TestQuery(R"(
+ ALTER TOPIC topic1 SET (retention_storage_mb = 3, partition_count_limit = 50);
+ )");
+ TestQuery(R"(
+ ALTER TOPIC topic1 RESET (supported_codecs, retention_period);
+ )");
+ TestQuery(R"(
+ ALTER TOPIC topic1 RESET (partition_write_speed_bytes_per_second),
+ SET (partition_write_burst_bytes = 11111, min_active_partitions = 1);
+ )");
+ }
+ Y_UNIT_TEST(AlterTopicConsumer) {
+ TestQuery(R"(
+ ALTER TOPIC topic1 ADD CONSUMER consumer1,
+ ADD CONSUMER consumer2 WITH (important = false, supported_codecs = "RAW"),
+ ALTER CONSUMER consumer3 SET (important = false, read_from = 1),
+ ALTER CONSUMER consumer3 RESET (supported_codecs),
+ DROP CONSUMER consumer4,
+ SET (partition_count_limit = 11, retention_period = Interval('PT1H')),
+ RESET(metering_mode)
+ )");
+ }
+ Y_UNIT_TEST(DropTopic) {
+ TestQuery(R"(
+ DROP TOPIC topic1;
+ )");
+ }
+
+ Y_UNIT_TEST(TopicBadRequests) {
+ TestQuery(R"(
+ CREATE TOPIC topic1();
+ )", false);
+ TestQuery(R"(
+ CREATE TOPIC topic1 SET setting1 = value1;
+ )", false);
+ TestQuery(R"(
+ ALTER TOPIC topic1 SET setting1 value1;
+ )", false);
+ TestQuery(R"(
+ ALTER TOPIC topic1 RESET setting1;
+ )", false);
+
+ TestQuery(R"(
+ ALTER TOPIC topic1 DROP CONSUMER consumer4 WITH (k1 = v1);
+ )", false);
+
+ TestQuery(R"(
+ CREATE TOPIC topic1 WITH (retention_period = 123);
+ )", false);
+ TestQuery(R"(
+ CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons1 WITH (important = false));
+ )", false);
+ TestQuery(R"(
+ CREATE TOPIC topic1 (CONSUMER cons1 WITH (bad_option = false));
+ )", false);
+ TestQuery(R"(
+ ALTER TOPIC topic1 ADD CONSUMER cons1, ALTER CONSUMER cons1 RESET (important);
+ )", false);
+ TestQuery(R"(
+ ALTER TOPIC topic1 ADD CONSUMER consumer1,
+ ALTER CONSUMER consumer3 SET (supported_codecs = "RAW", read_from = 1),
+ ALTER CONSUMER consumer3 RESET (supported_codecs);
+ )", false);
+ TestQuery(R"(
+ ALTER TOPIC topic1 ADD CONSUMER consumer1,
+ ALTER CONSUMER consumer3 SET (supported_codecs = "RAW", read_from = 1),
+ ALTER CONSUMER consumer3 SET (read_from = 2);
+ )", false);
+ }
+
+ Y_UNIT_TEST(TopicWithPrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ PRAGMA TablePathPrefix = '/database/path/to/tables';
+ ALTER TOPIC `my_table/my_feed` ADD CONSUMER `my_consumer`;
+ )");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("/database/path/to/tables/my_table/my_feed"), 0}, {"topic", 0}};
+ VerifyProgram(res, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["topic"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["/database/path/to/tables/my_table/my_feed"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(BlockEnginePragma) {
+ Y_UNIT_TEST(Basic) {
+ const TVector<TString> values = {"auto", "force", "disable"};
+ for (const auto& value : values) {
+ const auto query = TStringBuilder() << "pragma Blockengine='" << value << "'; select 1;";
+ NYql::TAstParseResult res = SqlToYql(query);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_STRING_CONTAINS(line, TStringBuilder() << R"(Configure! world (DataSource '"config") '"BlockEngine" '")" << value << "\"");
+ };
+
+ TWordCountHive elementStat({"BlockEngine"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["BlockEngine"] == ((value == "disable") ? 0 : 1));
+ }
+ }
+
+ Y_UNIT_TEST(UnknownSetting) {
+ ExpectFailWithError("use plato; pragma BlockEngine='foo';",
+ "<main>:1:31: Error: Expected `disable|auto|force' argument for: BlockEngine\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(TViewSyntaxTest) {
+ Y_UNIT_TEST(CreateViewSimple) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ CREATE VIEW TheView WITH (security_invoker = TRUE) AS SELECT 1;
+ )"
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(CreateViewFromTable) {
+ constexpr const char* path = "/PathPrefix/TheView";
+ constexpr const char* query = R"(
+ SELECT * FROM SomeTable
+ )";
+
+ NYql::TAstParseResult res = SqlToYql(std::format(R"(
+ USE plato;
+ CREATE VIEW `{}` WITH (security_invoker = TRUE) AS {};
+ )",
+ path,
+ query
+ )
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_STRING_CONTAINS(line, path);
+ UNIT_ASSERT_STRING_CONTAINS(line, "createObject");
+ }
+ };
+ TWordCountHive elementStat = { {"Write!"} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1);
+ }
+
+ Y_UNIT_TEST(CheckReconstructedQuery) {
+ constexpr const char* path = "/PathPrefix/TheView";
+ constexpr const char* query = R"(
+ SELECT * FROM FirstTable JOIN SecondTable ON FirstTable.key == SecondTable.key
+ )";
+
+ NYql::TAstParseResult res = SqlToYql(std::format(R"(
+ USE plato;
+ CREATE VIEW `{}` WITH (security_invoker = TRUE) AS {};
+ )",
+ path,
+ query
+ )
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TString reconstructedQuery = ToString(Tokenize(query));
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ if (word == "query_text") {
+ UNIT_ASSERT_STRING_CONTAINS(line, reconstructedQuery);
+ }
+ };
+ TWordCountHive elementStat = { {"Write!"} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1);
+ }
+
+ Y_UNIT_TEST(DropView) {
+ constexpr const char* path = "/PathPrefix/TheView";
+ NYql::TAstParseResult res = SqlToYql(std::format(R"(
+ USE plato;
+ DROP VIEW `{}`;
+ )",
+ path
+ )
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_STRING_CONTAINS(line, path);
+ UNIT_ASSERT_STRING_CONTAINS(line, "dropObject");
+ }
+ };
+ TWordCountHive elementStat = { {"Write!"} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1);
+ }
+
+ Y_UNIT_TEST(CreateViewWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ PRAGMA TablePathPrefix='/PathPrefix';
+ CREATE VIEW TheView WITH (security_invoker = TRUE) AS SELECT 1;
+ )"
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/PathPrefix/TheView");
+ UNIT_ASSERT_STRING_CONTAINS(line, "createObject");
+ }
+ };
+
+ TWordCountHive elementStat = { {"Write!"} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1);
+ }
+
+ Y_UNIT_TEST(DropViewWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ PRAGMA TablePathPrefix='/PathPrefix';
+ DROP VIEW TheView;
+ )"
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/PathPrefix/TheView");
+ UNIT_ASSERT_STRING_CONTAINS(line, "dropObject");
+ }
+ };
+
+ TWordCountHive elementStat = { {"Write!"} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1);
+ }
+
+ Y_UNIT_TEST(YtAlternativeSchemaSyntax) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ SELECT * FROM plato.Input WITH schema(y Int32, x String not null);
+ )");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "userschema") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__('('('"userschema" (StructType '('"y" (AsOptionalType (DataType 'Int32))) '('"x" (DataType 'String))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("userschema"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["userschema"]);
+ }
+
+ Y_UNIT_TEST(UseViewAndFullColumnId) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; SELECT Input.x FROM Input VIEW uitzicht;");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("SqlAccess"), 0}, {"SqlProjectItem", 0}, {"Read!", 0}};
+ VerifyProgram(res, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlAccess"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(CompactNamedExprs) {
+ Y_UNIT_TEST(SourceCallablesInWrongContext) {
+ TString query = R"(
+ pragma CompactNamedExprs;
+ $foo = %s();
+ select $foo from plato.Input;
+ )";
+
+ THashMap<TString, TString> errs = {
+ {"TableRow", "<main>:3:20: Error: TableRow requires data source\n"},
+ {"JoinTableRow", "<main>:3:20: Error: JoinTableRow requires data source\n"},
+ {"TableRecordIndex", "<main>:3:20: Error: Unable to use function: TableRecord without source\n"},
+ {"TablePath", "<main>:3:20: Error: Unable to use function: TablePath without source\n"},
+ {"SystemMetadata", "<main>:3:20: Error: Unable to use function: SystemMetadata without source\n"},
+ };
+
+ for (TString callable : { "TableRow", "JoinTableRow", "TableRecordIndex", "TablePath", "SystemMetadata"}) {
+ auto req = Sprintf(query.c_str(), callable.c_str());
+ ExpectFailWithError(req, errs[callable]);
+ }
+ }
+
+ Y_UNIT_TEST(ValidateUnusedExprs) {
+ TString query = R"(
+ pragma warning("disable", "4527");
+ pragma CompactNamedExprs;
+ pragma ValidateUnusedExprs;
+
+ $foo = count(1);
+ select 1;
+ )";
+ ExpectFailWithError(query, "<main>:6:20: Error: Aggregation is not allowed in this context\n");
+ query = R"(
+ pragma warning("disable", "4527");
+ pragma CompactNamedExprs;
+ pragma ValidateUnusedExprs;
+
+ define subquery $x() as
+ select count(1, 2);
+ end define;
+ select 1;
+ )";
+ ExpectFailWithError(query, "<main>:7:24: Error: Aggregation function Count requires exactly 1 argument(s), given: 2\n");
+ }
+
+ Y_UNIT_TEST(DisableValidateUnusedExprs) {
+ TString query = R"(
+ pragma warning("disable", "4527");
+ pragma CompactNamedExprs;
+ pragma DisableValidateUnusedExprs;
+
+ $foo = count(1);
+ select 1;
+ )";
+ SqlToYql(query).IsOk();
+ query = R"(
+ pragma warning("disable", "4527");
+ pragma CompactNamedExprs;
+ pragma DisableValidateUnusedExprs;
+
+ define subquery $x() as
+ select count(1, 2);
+ end define;
+ select 1;
+ )";
+ SqlToYql(query).IsOk();
+ }
+}
+
+Y_UNIT_TEST_SUITE(ResourcePool) {
+ Y_UNIT_TEST(CreateResourcePool) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL MyResourcePool WITH (
+ CONCURRENT_QUERY_LIMIT=20,
+ QUERY_CANCEL_AFTER_SECONDS=86400,
+ QUEUE_TYPE="FIFO"
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"concurrent_query_limit" (Int32 '"20")) '('"query_cancel_after_seconds" (Int32 '"86400")) '('"queue_type" '"FIFO"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateResourcePoolWithBadArguments) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL MyResourcePool;
+ )sql" , "<main>:3:51: Error: Unexpected token ';' : syntax error...\n\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL MyResourcePool WITH (
+ DUPLICATE_SETTING="first_value",
+ DUPLICATE_SETTING="second_value"
+ );
+ )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n");
+ }
+
+ Y_UNIT_TEST(AlterResourcePool) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER RESOURCE POOL MyResourcePool
+ SET (CONCURRENT_QUERY_LIMIT = 30, Weight = 5, QUEUE_TYPE = "UNORDERED"),
+ RESET (Query_Cancel_After_Seconds, Query_Count_Limit);
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"concurrent_query_limit" (Int32 '"30")) '('"queue_type" '"UNORDERED") '('"weight" (Int32 '"5")))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"query_cancel_after_seconds" '"query_count_limit")))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropResourcePool) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP RESOURCE POOL MyResourcePool;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(BackupCollection) {
+ Y_UNIT_TEST(CreateBackupCollection) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection WITH (
+ STORAGE="local",
+ TAG="test" -- for testing purposes, not a real thing
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateBackupCollectionWithDatabase) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection DATABASE WITH (
+ STORAGE="local",
+ TAG="test" -- for testing purposes, not a real thing
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('type 'database)"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateBackupCollectionWithTables) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection (
+ TABLE someTable,
+ TABLE `prefix/anotherTable`
+ ) WITH (
+ STORAGE="local",
+ TAG="test" -- for testing purposes, not a real thing
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('('('type 'table) '('path '"someTable")))#"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('('('type 'table) '('path '"prefix/anotherTable")))#"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateBackupCollectionWithBadArguments) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection;
+ )sql" , "<main>:3:55: Error: Unexpected token ';' : syntax error...\n\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TABLE TestCollection;
+ )sql" , "<main>:3:47: Error: Unexpected token 'TestCollection' : syntax error...\n\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION DATABASE `test` TestCollection;
+ )sql" , "<main>:3:50: Error: Unexpected token '`test`' : syntax error...\n\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection WITH (
+ DUPLICATE_SETTING="first_value",
+ DUPLICATE_SETTING="second_value"
+ );
+ )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection WITH (
+ INT_SETTING=1
+ );
+ )sql" , "<main>:4:21: Error: INT_SETTING value should be a string literal\n");
+ }
+
+ Y_UNIT_TEST(AlterBackupCollection) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER BACKUP COLLECTION TestCollection
+ SET (STORAGE="remote"), -- also just for test
+ SET (TAG1 = "123"),
+ RESET (TAG2, TAG3);
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('settings '('('"storage" '"remote") '('"tag1" '"123"))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetSettings '('"tag2" '"tag3")))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AlterBackupCollectionEntries) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER BACKUP COLLECTION TestCollection
+ DROP TABLE `test`,
+ ADD DATABASE;
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('alterEntries)#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('type 'table) '('path '"test") '('action 'drop)))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('type 'database) '('action 'add)))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropBackupCollection) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP BACKUP COLLECTION TestCollection;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(ResourcePoolClassifier) {
+ Y_UNIT_TEST(CreateResourcePoolClassifier) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH (
+ RANK=20,
+ RESOURCE_POOL='wgUserQueries',
+ MEMBER_NAME='yandex_query@abc'
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"member_name" '"yandex_query@abc") '('"rank" (Int32 '"20")) '('"resource_pool" '"wgUserQueries"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateResourcePoolClassifierWithBadArguments) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier;
+ )sql" , "<main>:3:72: Error: Unexpected token ';' : syntax error...\n\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH (
+ DUPLICATE_SETTING="first_value",
+ DUPLICATE_SETTING="second_value"
+ );
+ )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n");
+ }
+
+ Y_UNIT_TEST(AlterResourcePoolClassifier) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier
+ SET (RANK = 30, Weight = 5, MEMBER_NAME = "test@user"),
+ RESET (Resource_Pool);
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"member_name" '"test@user") '('"rank" (Int32 '"30")) '('"weight" (Int32 '"5")))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"resource_pool")))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropResourcePoolClassifier) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP RESOURCE POOL CLASSIFIER MyResourcePoolClassifier;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(BacktickMatching) {
+ auto req = "select\n"
+ " 1 as `Schema has \\`RealCost\\``\n"
+ " -- foo`bar";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ req = "select 1 as `a``b`, 2 as ````, 3 as `\\x60a\\x60`, 4 as ```b```, 5 as `\\`c\\``";
+ res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+}
+
+Y_UNIT_TEST_SUITE(OlapPartitionCount) {
+ Y_UNIT_TEST(CorrectUsage) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE TABLE `mytable` (id Uint32, PRIMARY KEY (id))
+ PARTITION BY HASH(id)
+ WITH (STORE = COLUMN, PARTITION_COUNT = 8);
+ )sql");
+
+ UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(UseWithoutColumnStore) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE TABLE `mytable` (id Uint32, PRIMARY KEY (id))
+ WITH (PARTITION_COUNT = 8);
+ )sql");
+
+ UNIT_ASSERT(!res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 1);
+ UNIT_ASSERT_STRING_CONTAINS(res.Issues.ToString(), "PARTITION_COUNT can be used only with STORE=COLUMN");
+ }
+}
+
+Y_UNIT_TEST_SUITE(Backup) {
+ Y_UNIT_TEST(Simple) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ BACKUP TestCollection;
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'incremental"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'backup"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(Incremental) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ BACKUP TestCollection INCREMENTAL;
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'incremental"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'backup"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(Restore) {
+ Y_UNIT_TEST(Simple) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ RESTORE TestCollection;
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'restore"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AtPoint) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ RESTORE TestCollection AT '2024-06-16_20-14-02';
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('at '"2024-06-16_20-14-02")#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'restore"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(ColumnFamily) {
+ Y_UNIT_TEST(CompressionLevel) {
+ NYql::TAstParseResult res = SqlToYql(R"( use plato;
+ CREATE TABLE tableName (
+ Key Uint32 FAMILY default,
+ Value String FAMILY family1,
+ PRIMARY KEY (Key),
+ FAMILY default (
+ DATA = "test",
+ COMPRESSION = "lz4",
+ COMPRESSION_LEVEL = 5
+ ),
+ FAMILY family1 (
+ DATA = "test",
+ COMPRESSION = "lz4",
+ COMPRESSION_LEVEL = 3
+ )
+ );
+ )");
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("compression_level"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("5"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("3"));
+ }
+ };
+
+ TWordCountHive elementStat = { { TString("Write"), 0 }, { TString("compression_level"), 0 } };
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["compression_level"]);
+ }
+}
diff --git a/yql/essentials/sql/v1/sql_ut.h b/yql/essentials/sql/v1/sql_ut.h
new file mode 100644
index 00000000000..7e9c3df8e8e
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_ut.h
@@ -0,0 +1,235 @@
+
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+#include <yql/essentials/sql/sql.h>
+#include <util/generic/map.h>
+
+#include <library/cpp/regex/pcre/pcre.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/string/split.h>
+#include <deque>
+#include <unordered_set>
+using namespace NSQLTranslation;
+
+enum class EDebugOutput {
+ None,
+ ToCerr,
+};
+
+const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote |
+ NYql::TAstPrintFlags::AdaptArbitraryContent;
+
+inline TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) {
+ TStringStream s;
+ res.Issues.PrintTo(s);
+
+ if (debug == EDebugOutput::ToCerr) {
+ Cerr << s.Str() << Endl;
+ }
+ return s.Str();
+}
+
+inline NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {},
+ EDebugOutput debug = EDebugOutput::None, bool ansiLexer = false, NSQLTranslation::TTranslationSettings settings = {})
+{
+ google::protobuf::Arena arena;
+ const auto service = provider ? provider : TString(NYql::YtProviderName);
+ const TString cluster = "plato";
+ settings.ClusterMapping[cluster] = service;
+ settings.ClusterMapping["hahn"] = NYql::YtProviderName;
+ settings.ClusterMapping["mon"] = NYql::SolomonProviderName;
+ settings.MaxErrors = maxErrors;
+ settings.Mode = mode;
+ settings.Arena = &arena;
+ settings.AnsiLexer = ansiLexer;
+ settings.Antlr4Parser = false;
+ settings.SyntaxVersion = 1;
+ auto res = SqlToYql(query, settings);
+ if (debug == EDebugOutput::ToCerr) {
+ Err2Str(res, debug);
+ }
+ return res;
+}
+
+inline NYql::TAstParseResult SqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
+ return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug);
+}
+
+inline NYql::TAstParseResult
+SqlToYqlWithSettings(const TString& query, const NSQLTranslation::TTranslationSettings& settings) {
+ return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, 10, {}, EDebugOutput::None, false, settings);
+}
+
+inline void ExpectFailWithError(const TString& query, const TString& error) {
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), error);
+}
+
+inline void ExpectFailWithFuzzyError(const TString& query, const TString& errorRegex) {
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT(NPcre::TPcre<char>(errorRegex.c_str()).Matches(Err2Str(res)));
+}
+
+inline NYql::TAstParseResult SqlToYqlWithAnsiLexer(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
+ bool ansiLexer = true;
+ return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug, ansiLexer);
+}
+
+inline void ExpectFailWithErrorForAnsiLexer(const TString& query, const TString& error) {
+ NYql::TAstParseResult res = SqlToYqlWithAnsiLexer(query);
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), error);
+}
+
+inline TString GetPrettyPrint(const NYql::TAstParseResult& res) {
+ TStringStream yqlProgram;
+ res.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
+ return yqlProgram.Str();
+}
+
+inline TString Quote(const char* str) {
+ return TStringBuilder() << "'\"" << str << "\"";
+}
+
+class TWordCountHive: public TMap<TString, unsigned> {
+public:
+ TWordCountHive(std::initializer_list<TString> strings) {
+ for (auto& str: strings) {
+ emplace(str, 0);
+ }
+ }
+
+ TWordCountHive(std::initializer_list<std::pair<const TString, unsigned>> list)
+ : TMap(list)
+ {
+ }
+};
+
+typedef std::function<void (const TString& word, const TString& line)> TVerifyLineFunc;
+
+inline TString VerifyProgram(const NYql::TAstParseResult& res, TWordCountHive& wordCounter, TVerifyLineFunc verifyLine = TVerifyLineFunc()) {
+ const auto programm = GetPrettyPrint(res);
+ TVector<TString> yqlProgram;
+ Split(programm, "\n", yqlProgram);
+ for (const auto& line: yqlProgram) {
+ for (auto& counterIter: wordCounter) {
+ const auto& word = counterIter.first;
+ auto pos = line.find(word);
+ while (pos != TString::npos) {
+ ++counterIter.second;
+ if (verifyLine) {
+ verifyLine(word, line);
+ }
+ pos = line.find(word, pos + word.length());
+ }
+ }
+ }
+ return programm;
+}
+
+inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints, TMaybe<bool> ansi) {
+ TString pragma;
+ if (ansi.Defined()) {
+ pragma = *ansi ? "PRAGMA AnsiInForEmptyOrNullableItemsCollections;" :
+ "PRAGMA DisableAnsiInForEmptyOrNullableItemsCollections;";
+ }
+
+ NYql::TAstParseResult res = SqlToYql(pragma + query);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ if (!ansi.Defined()) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('warnNoAnsi)"));
+ } else if (*ansi) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('ansi)"));
+ }
+ for (auto& hint : expectedHints) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(hint));
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlIn"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+}
+
+inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints) {
+ VerifySqlInHints(query, expectedHints, false);
+ VerifySqlInHints(query, expectedHints, true);
+}
+
+inline NSQLTranslation::TTranslationSettings GetSettingsWithS3Binding(const TString& name) {
+ NSQLTranslation::TTranslationSettings settings;
+ NSQLTranslation::TTableBindingSettings bindSettings;
+ bindSettings.ClusterType = "s3";
+ bindSettings.Settings["cluster"] = "cluster";
+ bindSettings.Settings["path"] = "path";
+ bindSettings.Settings["format"] = "format";
+ bindSettings.Settings["compression"] = "ccompression";
+ bindSettings.Settings["bar"] = "1";
+ // schema is not validated in this test but should be valid YSON text
+ bindSettings.Settings["schema"] = R"__("[
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]])__";
+ bindSettings.Settings["partitioned_by"] = "[\"key\", \"subkey\"]";
+ settings.Bindings[name] = bindSettings;
+ return settings;
+}
+
+inline void AstBfs(NYql::TAstNode const* root, std::function<bool (NYql::TAstNode const*)> visitor) {
+ std::deque<NYql::TAstNode const*> wishList{ root };
+ std::unordered_set<NYql::TAstNode const*> visited;
+ while(!wishList.empty()){
+ auto v = wishList.front();
+ wishList.pop_front();
+ if (!visitor(v))
+ return;
+ visited.insert(v);
+ if (v->IsList()) {
+ for (ui32 i = 0; i != v->GetChildrenCount(); ++i) {
+ auto child = v->GetChild(i);
+ if (visited.find(child) == visited.cend()) {
+ wishList.push_back(child);
+ }
+ }
+ }
+ }
+}
+
+inline const NYql::TAstNode* FindNodeByChildAtomContent(const NYql::TAstNode* root, uint32_t childIndex, TStringBuf name){
+ const NYql::TAstNode* result = nullptr;
+ AstBfs(root, [&result, childIndex, name](auto v) {
+ if (v->IsList() && v->GetChildrenCount() > childIndex &&
+ v->GetChild(childIndex)->IsAtom() && v->GetChild(childIndex)->GetContent() == name) {
+ result = v;
+ return false;
+ }
+ return true; });
+ return result;
+}
diff --git a/yql/essentials/sql/v1/sql_ut_antlr4.cpp b/yql/essentials/sql/v1/sql_ut_antlr4.cpp
new file mode 100644
index 00000000000..7f11822ccaf
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_ut_antlr4.cpp
@@ -0,0 +1,7434 @@
+#include "sql_ut_antlr4.h"
+#include "format/sql_format.h"
+#include "lexer/lexer.h"
+
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+#include <yql/essentials/sql/sql.h>
+#include <util/generic/map.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/string/split.h>
+
+#include <format>
+
+using namespace NSQLTranslation;
+
+namespace {
+
+TParsedTokenList Tokenize(const TString& query) {
+ auto lexer = NSQLTranslationV1::MakeLexer(true, true);
+ TParsedTokenList tokens;
+ NYql::TIssues issues;
+ UNIT_ASSERT_C(Tokenize(*lexer, query, "Query", tokens, issues, SQL_MAX_PARSER_ERRORS),
+ issues.ToString());
+
+ return tokens;
+}
+
+TString ToString(const TParsedTokenList& tokens) {
+ TStringBuilder reconstructedQuery;
+ for (const auto& token : tokens) {
+ if (token.Name == "WS" || token.Name == "EOF") {
+ continue;
+ }
+ if (!reconstructedQuery.empty()) {
+ reconstructedQuery << ' ';
+ }
+ reconstructedQuery << token.Content;
+ }
+ return reconstructedQuery;
+}
+
+}
+
+Y_UNIT_TEST_SUITE(AnsiMode) {
+ Y_UNIT_TEST(PragmaAnsi) {
+ UNIT_ASSERT(SqlToYql("PRAGMA ANSI 2016;").IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(SqlParsingOnly) {
+ ///This function is used in BACKWARD COMPATIBILITY tests below that LIMIT the sets of token that CAN NOT be used
+ ///as identifiers in different contexts in a SQL request
+ ///\return list of tokens that failed this check
+ TVector<TString> ValidateTokens(const THashSet<TString>& forbidden, const std::function<TString (const TString& )>& makeRequest) {
+ THashMap<TString, bool> allTokens;
+ for (const auto& t: NSQLFormat::GetKeywords()) {
+ allTokens[t] = !forbidden.contains((t));
+ }
+ for (const auto& f: forbidden) {
+ UNIT_ASSERT(allTokens.contains(f)); //check that forbidden list contains tokens only(argument check)
+ }
+ TVector<TString> failed;
+ for (const auto& [token, allowed]: allTokens) {
+ if (SqlToYql(makeRequest(token)).IsOk() != allowed)
+ failed.push_back(token);
+ }
+ return failed;
+ }
+
+ Y_UNIT_TEST(TokensAsColumnName) { //id_expr
+ auto failed = ValidateTokens({
+ "ALL", "ANY", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST",
+ "CALLABLE", "CASE", "CAST", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
+ "DICT", "DISTINCT", "ENUM", "ERASE", "EXCEPT", "EXISTS", "FLOW", "FROM", "FULL", "GLOBAL",
+ "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL",
+ "NOT", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP",
+ "SELECT", "SET", "STREAM", "STRUCT", "SYMMETRIC", "TAGGED", "TUPLE", "UNBOUNDED",
+ "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT " << token << " FROM Plato.Input";
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsWithoutColumnName) { //id_without
+ auto failed = ValidateTokens({
+ "ALL", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST",
+ "CALLABLE", "CASE", "CAST", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
+ "DICT", "DISTINCT", "EMPTY_ACTION", "ENUM", "EXCEPT", "EXISTS", "FALSE", "FLOW", "FROM", "FULL", "GLOBAL",
+ "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL",
+ "NOT", "NULL", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP",
+ "SELECT", "SET", "STRUCT", "SYMMETRIC", "TAGGED", "TRUE", "TUPLE", "UNBOUNDED",
+ "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * WITHOUT " << token << " FROM Plato.Input";
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsColumnNameInAddColumn) { //id_schema
+ auto failed = ValidateTokens({
+ "ANY", "AUTOMAP", "CALLABLE", "COLUMN", "DICT", "ENUM", "ERASE", "FALSE", "FLOW",
+ "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE",
+ "SET", "STREAM", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "ALTER TABLE Plato.Input ADD COLUMN " << token << " Bool";
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsColumnAlias) {
+ auto failed = ValidateTokens({
+ "AUTOMAP", "FALSE",
+ "GLOBAL", "REPEATABLE", "TRUE"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT Col as " << token << " FROM Plato.Input";
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsTableName) { //id_table_or_type
+ auto failed = ValidateTokens({
+ "ANY", "AUTOMAP", "COLUMN", "ERASE", "FALSE",
+ "GLOBAL", "REPEATABLE", "STREAM", "TRUE"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * FROM Plato." << token;
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsTableAlias) { //id_table
+ auto failed = ValidateTokens({
+ "AUTOMAP", "CALLABLE", "DICT", "ENUM","FALSE", "FLOW",
+ "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE",
+ "SET", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * FROM Plato.Input AS " << token;
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsHints) { //id_hint
+ auto failed = ValidateTokens({
+ "AUTOMAP", "CALLABLE", "COLUMNS", "DICT", "ENUM", "FALSE", "FLOW",
+ "GLOBAL", "LIST", "OPTIONAL", "REPEATABLE", "RESOURCE",
+ "SCHEMA", "SET", "STRUCT", "TAGGED", "TRUE", "TUPLE", "VARIANT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * FROM Plato.Input WITH " << token;
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsWindow) { //id_window
+ auto failed = ValidateTokens({
+ "AUTOMAP", "CALLABLE", "DICT", "ENUM", "FALSE", "FLOW", "GLOBAL", "GROUPS", "LIST", "OPTIONAL",
+ "RANGE", "REPEATABLE", "RESOURCE", "ROWS", "SET", "STRUCT", "TAGGED" ,"TRUE", "TUPLE", "VARIANT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * FROM Plato.Input WINDOW " << token << " AS ()";
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TokensAsIdExprIn) { //id_expr_in
+ auto failed = ValidateTokens({
+ "ALL", "ANY", "AS", "ASSUME", "ASYMMETRIC", "AUTOMAP", "BETWEEN", "BITCAST",
+ "CALLABLE", "CASE", "CAST", "COMPACT", "CUBE", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
+ "DICT", "DISTINCT", "ENUM", "ERASE", "EXCEPT", "EXISTS", "FLOW", "FROM", "FULL", "GLOBAL",
+ "HAVING", "HOP", "INTERSECT", "JSON_EXISTS", "JSON_QUERY", "JSON_VALUE", "LIMIT", "LIST", "LOCAL",
+ "NOT", "OPTIONAL", "PROCESS", "REDUCE", "REPEATABLE", "RESOURCE", "RETURN", "RETURNING", "ROLLUP",
+ "SELECT", "SET", "STREAM", "STRUCT", "SYMMETRIC", "TAGGED", "TUPLE", "UNBOUNDED",
+ "UNION", "VARIANT", "WHEN", "WHERE", "WINDOW", "WITHOUT"
+ },
+ [](const TString& token){
+ TStringBuilder req;
+ req << "SELECT * FROM Plato.Input WHERE q IN " << token;
+ return req;
+ }
+ );
+ UNIT_ASSERT_VALUES_EQUAL(failed, TVector<TString>{});
+ }
+
+ Y_UNIT_TEST(TableHints) {
+ UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input WITH INFER_SCHEMA").IsOk());
+ UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input WITH (INFER_SCHEMA)").IsOk());
+ }
+
+ Y_UNIT_TEST(InNoHints) {
+ TString query = "SELECT * FROM plato.Input WHERE key IN (1,2,3)";
+
+ VerifySqlInHints(query, { "'('('warnNoAnsi))" }, {});
+ VerifySqlInHints(query, { "'()" }, false);
+ VerifySqlInHints(query, { "'('('ansi))" }, true);
+ }
+
+ Y_UNIT_TEST(InHintCompact) {
+ // should parse COMPACT as hint
+ TString query = "SELECT * FROM plato.Input WHERE key IN COMPACT(1, 2, 3)";
+
+ VerifySqlInHints(query, { "'('isCompact)" });
+ }
+
+ Y_UNIT_TEST(InHintSubquery) {
+ // should parse tableSource as hint
+ TString query = "$subq = (SELECT key FROM plato.Input); SELECT * FROM plato.Input WHERE key IN $subq";
+
+ VerifySqlInHints(query, { "'('tableSource)" });
+ }
+
+ Y_UNIT_TEST(InHintCompactSubquery) {
+ TString query = "$subq = (SELECT key FROM plato.Input); SELECT * FROM plato.Input WHERE key IN COMPACT $subq";
+
+ VerifySqlInHints(query, { "'('isCompact)", "'('tableSource)" });
+ }
+
+ Y_UNIT_TEST(CompactKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("SELECT COMPACT FROM plato.Input WHERE COMPACT IN COMPACT(1, 2, 3)").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT * FROM COMPACT").IsOk());
+ }
+
+ Y_UNIT_TEST(FamilyKeywordNotReservedForNames) {
+ // FIXME: check if we can get old behaviour
+ //UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE FAMILY (FAMILY Uint32, PRIMARY KEY (FAMILY));").IsOk());
+ //UNIT_ASSERT(SqlToYql("USE plato; SELECT FAMILY FROM FAMILY").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT FAMILY FROM Input").IsOk());
+ }
+
+ Y_UNIT_TEST(ResetKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE RESET (RESET Uint32, PRIMARY KEY (RESET));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT RESET FROM RESET").IsOk());
+ }
+
+ Y_UNIT_TEST(SyncKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE SYNC (SYNC Uint32, PRIMARY KEY (SYNC));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT SYNC FROM SYNC").IsOk());
+ }
+
+ Y_UNIT_TEST(AsyncKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE ASYNC (ASYNC Uint32, PRIMARY KEY (ASYNC));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT ASYNC FROM ASYNC").IsOk());
+ }
+
+ Y_UNIT_TEST(DisableKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE DISABLE (DISABLE Uint32, PRIMARY KEY (DISABLE));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT DISABLE FROM DISABLE").IsOk());
+ }
+
+ Y_UNIT_TEST(ChangefeedKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE CHANGEFEED (CHANGEFEED Uint32, PRIMARY KEY (CHANGEFEED));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT CHANGEFEED FROM CHANGEFEED").IsOk());
+ }
+
+ Y_UNIT_TEST(ReplicationKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE REPLICATION (REPLICATION Uint32, PRIMARY KEY (REPLICATION));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT REPLICATION FROM REPLICATION").IsOk());
+ }
+
+ Y_UNIT_TEST(SecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE SECONDS (SECONDS Uint32, PRIMARY KEY (SECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT SECONDS FROM SECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(MillisecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MILLISECONDS (MILLISECONDS Uint32, PRIMARY KEY (MILLISECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT MILLISECONDS FROM MILLISECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(MicrosecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MICROSECONDS (MICROSECONDS Uint32, PRIMARY KEY (MICROSECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT MICROSECONDS FROM MICROSECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(NanosecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE NANOSECONDS (NANOSECONDS Uint32, PRIMARY KEY (NANOSECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT NANOSECONDS FROM NANOSECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(Jubilee) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; INSERT INTO Arcadia (r2000000) VALUES (\"2M GET!!!\");");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(QualifiedAsteriskBefore) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA DisableSimpleColumns;"
+ "select interested_table.*, LENGTH(value) AS megahelpful_len from plato.Input as interested_table;"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ static bool seenStar = false;
+ if (word == "FlattenMembers") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("interested_table."));
+ } else if (word == "SqlProjectItem") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megahelpful_len")));
+ UNIT_ASSERT_VALUES_EQUAL(seenStar, true);
+ } else if (word == "SqlProjectStarItem") {
+ seenStar = true;
+ }
+ };
+ TWordCountHive elementStat = {{TString("FlattenMembers"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["FlattenMembers"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]);
+ }
+
+ Y_UNIT_TEST(QualifiedAsteriskAfter) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA DisableSimpleColumns;"
+ "select LENGTH(value) AS megahelpful_len, interested_table.* from plato.Input as interested_table;"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ static bool seenStar = false;
+ if (word == "FlattenMembers") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("interested_table."));
+ } else if (word == "SqlProjectItem") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megahelpful_len")));
+ UNIT_ASSERT_VALUES_EQUAL(seenStar, false);
+ } else if (word == "SqlProjectStarItem") {
+ seenStar = true;
+ }
+ };
+ TWordCountHive elementStat = {{TString("FlattenMembers"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["FlattenMembers"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]);
+ }
+
+ Y_UNIT_TEST(QualifiedMembers) {
+ NYql::TAstParseResult res = SqlToYql("select interested_table.key, interested_table.value from plato.Input as interested_table;");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ const bool fieldKey = TString::npos != line.find(Quote("key"));
+ const bool fieldValue = TString::npos != line.find(Quote("value"));
+ const bool refOnTable = TString::npos != line.find("interested_table.");
+ if (word == "SqlProjectItem") {
+ UNIT_ASSERT(fieldKey || fieldValue);
+ UNIT_ASSERT(!refOnTable);
+ } else if (word == "Write!") {
+ UNIT_ASSERT(fieldKey && fieldValue && !refOnTable);
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(JoinParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA DisableSimpleColumns;"
+ " SELECT table_bb.*, table_aa.key as megakey"
+ " FROM plato.Input AS table_aa"
+ " JOIN plato.Input AS table_bb"
+ " ON table_aa.value == table_bb.value;"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "SelectMembers") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa."));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table_bb."));
+ } else if (word == "SqlProjectItem") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("megakey")));
+ } else if (word == "SqlColumn") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("table_aa")));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("key")));
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}, {TString("SelectMembers"), 0}, {TString("SqlColumn"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectStarItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SelectMembers"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlColumn"]);
+ }
+
+ Y_UNIT_TEST(Join3Table) {
+ NYql::TAstParseResult res = SqlToYql(
+ " PRAGMA DisableSimpleColumns;"
+ " SELECT table_bb.*, table_aa.key as gigakey, table_cc.* "
+ " FROM plato.Input AS table_aa"
+ " JOIN plato.Input AS table_bb ON table_aa.key == table_bb.key"
+ " JOIN plato.Input AS table_cc ON table_aa.subkey == table_cc.subkey;"
+ );
+ Err2Str(res);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "SelectMembers") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa."));
+ UNIT_ASSERT(line.find("table_bb.") != TString::npos || line.find("table_cc.") != TString::npos);
+ } else if (word == "SqlProjectItem") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("gigakey")));
+ } else if (word == "SqlColumn") {
+ const auto posTableAA = line.find(Quote("table_aa"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, posTableAA);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("key")));
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("table_aa", posTableAA + 3));
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlProjectItem"), 0}, {TString("SqlProjectStarItem"), 0}, {TString("SelectMembers"), 0}, {TString("SqlColumn"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectStarItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SelectMembers"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlColumn"]);
+ }
+
+ Y_UNIT_TEST(DisabledJoinCartesianProduct) {
+ NYql::TAstParseResult res = SqlToYql("pragma DisableAnsiImplicitCrossJoin; use plato; select * from A,B,C");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:67: Error: Cartesian product of tables is disabled. Please use explicit CROSS JOIN or enable it via PRAGMA AnsiImplicitCrossJoin\n");
+ }
+
+ Y_UNIT_TEST(JoinCartesianProduct) {
+ NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from A,B,C");
+ UNIT_ASSERT(res.Root);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "EquiJoin") {
+ auto pos = line.find("Cross");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, pos);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Cross", pos + 1));
+ }
+ };
+ TWordCountHive elementStat = {{TString("EquiJoin"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["EquiJoin"]);
+ }
+
+ Y_UNIT_TEST(JoinWithoutConcreteColumns) {
+ NYql::TAstParseResult res = SqlToYql(
+ " use plato;"
+ " SELECT a.v, b.value"
+ " FROM `Input1` VIEW `ksv` AS a"
+ " JOIN `Input2` AS b"
+ " ON a.k == b.key;"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "SqlProjectItem") {
+ UNIT_ASSERT(line.find(Quote("a.v")) != TString::npos || line.find(Quote("b.value")) != TString::npos);
+ } else if (word == "SqlColumn") {
+ const auto posTableA = line.find(Quote("a"));
+ const auto posTableB = line.find(Quote("b"));
+ if (posTableA != TString::npos) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("v")));
+ } else {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, posTableB);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("value")));
+ }
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {TString("SqlColumn"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlColumn"]);
+ }
+
+ Y_UNIT_TEST(JoinWithSameValues) {
+ NYql::TAstParseResult res = SqlToYql("SELECT a.value, b.value FROM plato.Input AS a JOIN plato.Input as b ON a.key == b.key;");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "SqlProjectItem") {
+ const bool isValueFromA = TString::npos != line.find(Quote("a.value"));
+ const bool isValueFromB = TString::npos != line.find(Quote("b.value"));
+ UNIT_ASSERT(isValueFromA || isValueFromB);
+ } if (word == "Write!") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("a.a."));
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("b.b."));
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlProjectStarItem"), 0}, {TString("SqlProjectItem"), 0}, {"Write!", 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlProjectStarItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(SameColumnsForDifferentTables) {
+ NYql::TAstParseResult res = SqlToYql("SELECT a.key, b.key FROM plato.Input as a JOIN plato.Input as b on a.key==b.key;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SameColumnsForDifferentTablesFullJoin) {
+ NYql::TAstParseResult res = SqlToYql("SELECT a.key, b.key, a.value, b.value FROM plato.Input AS a FULL JOIN plato.Input AS b USING(key);");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(JoinStreamLookupStrategyHint) {
+ {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ plato.Input AS b USING(key);");
+ UNIT_ASSERT(res.Root);
+ }
+ //case insensitive
+ {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ streamlookup() */ plato.Input AS b USING(key);");
+ UNIT_ASSERT(res.Root);
+ }
+ }
+
+ Y_UNIT_TEST(JoinConflictingStrategyHint) {
+ {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ /*+ Merge() */ plato.Input AS b USING(key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:91: Error: Conflicting join strategy hints\n");
+ }
+ }
+
+ Y_UNIT_TEST(JoinDuplicatingStrategyHint) {
+ {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ StreamLookup() */ /*+ StreamLookup() */ plato.Input AS b USING(key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:98: Error: Duplicate join strategy hint\n");
+ }
+ }
+
+ Y_UNIT_TEST(WarnCrossJoinStrategyHint) {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a CROSS JOIN /*+ merge() */ plato.Input AS b;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:32: Warning: Non-default join strategy will not be used for CROSS JOIN, code: 4534\n");
+ }
+
+ Y_UNIT_TEST(WarnCartesianProductStrategyHint) {
+ NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; SELECT * FROM A, /*+ merge() */ B;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:74: Warning: Non-default join strategy will not be used for CROSS JOIN, code: 4534\n");
+ }
+
+ Y_UNIT_TEST(WarnUnknownJoinStrategyHint) {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input AS a JOIN /*+ xmerge() */ plato.Input AS b USING (key);");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_STRINGS_EQUAL(res.Issues.ToString(), "<main>:1:41: Warning: Unsupported join hint: xmerge, code: 4534\n");
+ }
+
+ Y_UNIT_TEST(ReverseLabels) {
+ NYql::TAstParseResult res = SqlToYql("select in.key as subkey, subkey as key from plato.Input as in;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(AutogenerationAliasWithoutCollisionConflict1) {
+ NYql::TAstParseResult res = SqlToYql("select LENGTH(Value), key as column1 from plato.Input;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(AutogenerationAliasWithoutCollision2Conflict2) {
+ NYql::TAstParseResult res = SqlToYql("select key as column0, LENGTH(Value) from plato.Input;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(InputAliasForQualifiedAsterisk) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select zyuzya.*, key from plato.Input as zyuzya;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectSupportsResultColumnsWithTrailingComma) {
+ NYql::TAstParseResult res = SqlToYql("select a, b, c, from plato.Input;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectOrderByLabeledColumn) {
+ NYql::TAstParseResult res = SqlToYql("pragma DisableOrderedColumns; select key as goal from plato.Input order by goal");
+ UNIT_ASSERT(res.Root);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "DataSource") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("plato"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Input"));
+
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("goal"));
+ } else if (word == "Sort") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("goal"));
+
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("key"));
+ }
+ };
+ TWordCountHive elementStat = {{TString("DataSource"), 0}, {TString("Sort"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["DataSource"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]);
+ }
+
+ Y_UNIT_TEST(SelectOrderBySimpleExpr) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by a + a");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectOrderByDuplicateLabels) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by a, a");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectOrderByExpression) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input as i order by cast(key as uint32) + cast(subkey as uint32)");
+ UNIT_ASSERT(res.Root);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Sort") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"+MayWarn\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("key"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("subkey"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'true)"));
+
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i.key"));
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i.subkey"));
+ }
+ };
+ TWordCountHive elementStat = {{TString("Sort"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]);
+ }
+
+ Y_UNIT_TEST(SelectOrderByExpressionDesc) {
+ NYql::TAstParseResult res = SqlToYql("pragma disablesimplecolumns; select i.*, key, subkey from plato.Input as i order by cast(i.key as uint32) - cast(i.subkey as uint32) desc");
+ UNIT_ASSERT(res.Root);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Sort") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"-MayWarn\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'false)"));
+ } else if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'columns"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("prefix"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"i.\""));
+ }
+ };
+ TWordCountHive elementStat = {{TString("Sort"), 0}, {TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(SelectOrderByExpressionAsc) {
+ NYql::TAstParseResult res = SqlToYql("select i.key, i.subkey from plato.Input as i order by cast(key as uint32) % cast(i.subkey as uint32) asc");
+ UNIT_ASSERT(res.Root);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Sort") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"%MayWarn\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Bool 'true)"));
+ } else if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'columns"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"subkey\""));
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("i."));
+ }
+ };
+ TWordCountHive elementStat = {{TString("Sort"), 0}, {TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Sort"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(ReferenceToKeyInSubselect) {
+ NYql::TAstParseResult res = SqlToYql("select b.key from (select a.key from plato.Input as a) as b;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(OrderByCastValue) {
+ NYql::TAstParseResult res = SqlToYql("select i.key, i.subkey from plato.Input as i order by cast(key as uint32) desc;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(GroupByCastValue) {
+ NYql::TAstParseResult res = SqlToYql("select count(1) from plato.Input as i group by cast(key as uint8);");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(KeywordInSelectColumns) {
+ NYql::TAstParseResult res = SqlToYql("select in, s.check from (select 1 as in, \"test\" as check) as s;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectAllGroupBy) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input group by subkey;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(CreateObjectWithFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"Key1\" '\"Value1\")"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(CreateObjectIfNotExists) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT IF NOT EXISTS secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectIfNotExists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(CreateObjectWithFeaturesStrings) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=\"Value1\", K2='V2', K3=V3, K4='', K5=`aaa`, K6='a\\'aa');");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"K3\" '\"V3\") '('\"K4\" '\"\") '('\"K5\" '\"aaa\") '('\"K6\" '\"a'aa\") '('\"Key1\" '\"Value1\")"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("SECRET"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ }
+
+ Y_UNIT_TEST(UpsertObjectWithFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; UPSERT OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"K2\" '\"V2\") '('\"Key1\" '\"Value1\")"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("upsertObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(CreateObjectWithFeaturesAndFlags) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET) WITH (Key1=Value1, K2=V2, RECURSE);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('\"Key1\" '\"Value1\") '('\"RECURSE\")"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(Select1Type) {
+ NYql::TAstParseResult res = SqlToYql("SELECT 1 type;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectTableType) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; SELECT * from T type;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(CreateObjectNoFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE OBJECT secretId (TYPE SECRET);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(AlterObjectWithFeatures) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato;\n"
+ "declare $path as String;\n"
+ "ALTER OBJECT secretId (TYPE SECRET) SET (Key1=$path, K2=V2);"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"Key1\" (EvaluateAtom \"$path\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"K2\" '\"V2\""));
+
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alterObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(AlterObjectNoFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; ALTER OBJECT secretId (TYPE SECRET);");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(DropObjectNoFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(DropObjectWithFeatures) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET) WITH (A, B, C);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(DropObjectWithOneOption) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT secretId (TYPE SECRET) WITH OVERRIDE;");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"OVERRIDE\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(DropObjectIfExists) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; DROP OBJECT IF EXISTS secretId (TYPE SECRET);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObjectIfExists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}, {TString("SECRET"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SECRET"]);
+ }
+
+ Y_UNIT_TEST(PrimaryKeyParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32, Subkey Int64, Value String, PRIMARY KEY (Key, Subkey));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"Key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"Subkey\""));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("primarykey"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["primarykey"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNonNullableYqlTypeAstCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (DataType 'Int32) '('columnConstrains '('('not_null))) '())))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNullableYqlTypeAstCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNonNullablePgTypeAstCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a pg_int4 not null);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (PgType '_int4) '('columnConstrains '('('not_null))) '())))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNullablePgTypeAstCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a pg_int4);");
+ UNIT_ASSERT(res.Root);
+
+ res.Root->PrettyPrintTo(Cout, PRETTY_FLAGS);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (PgType '_int4)) '('columnConstrains '()) '()))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNullPkColumnsAreAllowed) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32, primary key(a));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableNotNullPkColumnsAreIdempotentAstCorrect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null, primary key(a));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (DataType 'Int32) '('columnConstrains '('('not_null))) '()))) '('primarykey '('"a"))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableWithIfNotExists) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE IF NOT EXISTS t (a int32, primary key(a));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create_if_not_exists) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTempTable) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TEMP TABLE t (a int32, primary key(a));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")) '('temporary))))__"), line);
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTemporaryTable) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TEMPORARY TABLE t (a int32, primary key(a));");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos,
+ line.find(R"__((Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a" (AsOptionalType (DataType 'Int32)) '('columnConstrains '()) '()))) '('primarykey '('"a")) '('temporary))))__"), line);
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableWithoutTypes) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, primary key(a));");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(CreateTableAsSelectWithTypes) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32, primary key(a)) AS SELECT * FROM ts;");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(CreateTableAsSelect) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, b, primary key(a)) AS SELECT * FROM ts;");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((let world (Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '('('"a") '('"b"))) '('primarykey '('"a"))))))__"));
+ }
+ if (word == "Read!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Read! world (DataSource '"yt" '"plato") (MrTableConcat (Key '('table (String '"ts")))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}, {TString("Read!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableAsSelectOnlyPrimary) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (primary key(a)) AS SELECT * FROM ts;");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((let world (Write! world sink (Key '('tablescheme (String '"t"))) values '('('mode 'create) '('columns '()) '('primarykey '('"a"))))))__"));
+ }
+ if (word == "Read!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Read! world (DataSource '"yt" '"plato") (MrTableConcat (Key '('table (String '"ts")))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}, {TString("Read!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]);
+ }
+
+ Y_UNIT_TEST(CreateTableAsValuesFail) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a, primary key(a)) AS VALUES (1), (2);");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(CreateTableDuplicatedPkColumnsFail) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; CREATE TABLE t (a int32 not null, primary key(a, a));");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(DeleteFromTableByKey) {
+ NYql::TAstParseResult res = SqlToYql("delete from plato.Input where key = 200;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DeleteFromTable) {
+ NYql::TAstParseResult res = SqlToYql("delete from plato.Input;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DeleteFromTableOnValues) {
+ NYql::TAstParseResult res = SqlToYql("delete from plato.Input on (key) values (1);",
+ 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete_on)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DeleteFromTableOnSelect) {
+ NYql::TAstParseResult res = SqlToYql(
+ "delete from plato.Input on select key from plato.Input where value > 0;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'delete_on)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(UpdateByValues) {
+ NYql::TAstParseResult res = SqlToYql("update plato.Input set key = 777, value = 'cool' where key = 200;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)"));
+ } else if (word == "AsStruct") {
+ const bool isKey = line.find("key") != TString::npos;
+ const bool isValue = line.find("value") != TString::npos;
+ UNIT_ASSERT(isKey || isValue);
+ if (isKey) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("777")));
+ } else if (isValue) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("cool")));
+ }
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]);
+ }
+
+ Y_UNIT_TEST(UpdateByMultiValues) {
+ NYql::TAstParseResult res = SqlToYql("update plato.Input set (key, value, subkey) = ('2','ddd',':') where key = 200;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)"));
+ } else if (word == "AsStruct") {
+ const bool isKey = line.find("key") != TString::npos;
+ const bool isSubkey = line.find("subkey") != TString::npos;
+ const bool isValue = line.find("value") != TString::npos;
+ UNIT_ASSERT(isKey || isSubkey || isValue);
+ if (isKey && !isSubkey) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("2")));
+ } else if (isSubkey) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote(":")));
+ } else if (isValue) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("ddd")));
+ }
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]);
+ }
+
+ Y_UNIT_TEST(UpdateBySelect) {
+ NYql::TAstParseResult res = SqlToYql("update plato.Input set (key, value, subkey) = (select key, value, subkey from plato.Input where key = 911) where key = 200;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ int lineIndex = 0;
+ int writeLineIndex = -1;
+ bool found = false;
+
+ TVerifyLineFunc verifyLine = [&lineIndex, &writeLineIndex, &found](const TString& word, const TString& line) {
+ if (word == "Write") {
+ writeLineIndex = lineIndex;
+ found = line.find("('mode 'update)") != TString::npos;
+ } else if (word == "mode") {
+ found |= lineIndex == writeLineIndex + 1 && line.find("('mode 'update)") != TString::npos;
+ UNIT_ASSERT(found);
+ }
+
+ ++lineIndex;
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("mode"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(UpdateSelfModifyAll) {
+ NYql::TAstParseResult res = SqlToYql("update plato.Input set subkey = subkey + 's';", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update)"));
+ } else if (word == "AsStruct") {
+ const bool isSubkey = line.find("subkey") != TString::npos;
+ UNIT_ASSERT(isSubkey);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("subkey")));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(Quote("s")));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}, {TString("AsStruct"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AsStruct"]);
+ }
+
+ Y_UNIT_TEST(UpdateOnValues) {
+ NYql::TAstParseResult res = SqlToYql("update plato.Input on (key, value) values (5, 'cool')", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update_on)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(UpdateOnSelect) {
+ NYql::TAstParseResult res = SqlToYql(
+ "update plato.Input on select key, value + 1 as value from plato.Input", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("('mode 'update_on)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(UnionAllTest) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input UNION ALL select subkey FROM plato.Input;");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("UnionAll"), 0}};
+ VerifyProgram(res, elementStat, {});
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["UnionAll"]);
+ }
+
+ Y_UNIT_TEST(UnionTest) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input UNION select subkey FROM plato.Input;");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("Union"), 0}};
+ VerifyProgram(res, elementStat, {});
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Union"]);
+ }
+
+ Y_UNIT_TEST(UnionAggregationTest) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ SELECT 1
+ UNION ALL
+ SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
+ UNION
+ SELECT 1 UNION SELECT 1 UNION SELECT 1 UNION SELECT 1
+ UNION ALL
+ SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1;
+ )");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("Union"), 0}, {TString("UnionAll"), 0}};
+ VerifyProgram(res, elementStat, {});
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["UnionAll"]);
+ UNIT_ASSERT_VALUES_EQUAL(3, elementStat["Union"]);
+ }
+
+ Y_UNIT_TEST(DeclareDecimalParameter) {
+ NYql::TAstParseResult res = SqlToYql("declare $value as Decimal(22,9); select $value as cnt;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SimpleGroupBy) {
+ NYql::TAstParseResult res = SqlToYql("select count(1),z from plato.Input group by key as z order by z;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(EmptyColumnName0) {
+ /// Now it's parsed well and error occur on validate step like "4:31:Empty struct member name is not allowed" in "4:31:Function: AddMember"
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output (``, list1) values (0, AsList(0, 1, 2));");
+ /// Verify that parsed well without crash
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(KikimrRollback) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select * from Input; rollback;", 10, "kikimr");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("rollback"), 0}};
+ VerifyProgram(res, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["rollback"]);
+ }
+
+ Y_UNIT_TEST(PragmaFile) {
+ NYql::TAstParseResult res = SqlToYql(R"(pragma file("HW", "sbr:181041334");)");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString(R"((let world (Configure! world (DataSource '"config") '"AddFileByUrl" '"HW" '"sbr:181041334")))"), 0}};
+ VerifyProgram(res, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat.cbegin()->second);
+ }
+
+ Y_UNIT_TEST(DoNotCrashOnNamedInFilter) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; $all = ($table_name) -> { return true; }; SELECT * FROM FILTER(Input, $all)");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(PragmasFileAndUdfOrder) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ PRAGMA file("libvideoplayers_udf.so", "https://proxy.sandbox.yandex-team.ru/235185290");
+ PRAGMA udf("libvideoplayers_udf.so");
+ )");
+ UNIT_ASSERT(res.Root);
+
+ const auto programm = GetPrettyPrint(res);
+ const auto file = programm.find("AddFileByUrl");
+ const auto udfs = programm.find("ImportUdfs");
+ UNIT_ASSERT(file < udfs);
+ }
+
+ Y_UNIT_TEST(ProcessUserType) {
+ NYql::TAstParseResult res = SqlToYql("process plato.Input using Kikimr::PushData(TableRows());", 1, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Kikimr.PushData") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TupleType"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TypeOf"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Kikimr.PushData"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Kikimr.PushData"]);
+ }
+
+ Y_UNIT_TEST(ProcessUserTypeAuth) {
+ NYql::TAstParseResult res = SqlToYql("process plato.Input using YDB::PushData(TableRows(), AsTuple('oauth', SecureParam('api:oauth')));", 1, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "YDB.PushData") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TupleType"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("TypeOf"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("api:oauth"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("YDB.PushData"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["YDB.PushData"]);
+ }
+
+ Y_UNIT_TEST(SelectStreamRtmr) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato; INSERT INTO Output SELECT STREAM key FROM Input;",
+ 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+
+ res = SqlToYql(
+ "USE plato; INSERT INTO Output SELECT key FROM Input;",
+ 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectStreamRtmrJoinWithYt) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato; INSERT INTO Output SELECT STREAM key FROM Input LEFT JOIN hahn.ttt as t ON Input.key = t.Name;",
+ 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SelectStreamNonRtmr) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato; INSERT INTO Output SELECT STREAM key FROM Input;",
+ 10);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:31: Error: SELECT STREAM is unsupported for non-streaming sources\n");
+ }
+
+ Y_UNIT_TEST(GroupByHopRtmr) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato; INSERT INTO Output SELECT key, SUM(value) AS value FROM Input
+ GROUP BY key, HOP(subkey, "PT10S", "PT30S", "PT20S");
+ )", 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(GroupByHopRtmrSubquery) {
+ // 'use plato' intentially avoided
+ NYql::TAstParseResult res = SqlToYql(R"(
+ SELECT COUNT(*) AS value FROM (SELECT * FROM plato.Input)
+ GROUP BY HOP(Data, "PT10S", "PT30S", "PT20S")
+ )", 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(GroupByHopRtmrSubqueryBinding) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ $q = SELECT * FROM Input;
+ INSERT INTO Output SELECT STREAM * FROM (
+ SELECT COUNT(*) AS value FROM $q
+ GROUP BY HOP(Data, "PT10S", "PT30S", "PT20S")
+ );
+ )", 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(GroupByNoHopRtmr) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato; INSERT INTO Output SELECT STREAM key, SUM(value) AS value FROM Input
+ GROUP BY key;
+ )", 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:22: Error: Streaming group by query must have a hopping window specification.\n");
+ }
+
+ Y_UNIT_TEST(KikimrInserts) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ INSERT INTO Output SELECT key, value FROM Input;
+ INSERT OR ABORT INTO Output SELECT key, value FROM Input;
+ INSERT OR IGNORE INTO Output SELECT key, value FROM Input;
+ INSERT OR REVERT INTO Output SELECT key, value FROM Input;
+ )", 10, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(WarnMissingIsBeforeNotNull) {
+ NYql::TAstParseResult res = SqlToYql("select 1 NOT NULL");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Missing IS keyword before NOT NULL, code: 4507\n");
+ }
+
+ Y_UNIT_TEST(Subqueries) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ $sq1 = (SELECT * FROM plato.Input);
+
+ $sq2 = SELECT * FROM plato.Input;
+
+ $squ1 = (
+ SELECT * FROM plato.Input
+ UNION ALL
+ SELECT * FROM plato.Input
+ );
+
+ $squ2 =
+ SELECT * FROM plato.Input
+ UNION ALL
+ SELECT * FROM plato.Input;
+
+ $squ3 = (
+ (SELECT * FROM plato.Input)
+ UNION ALL
+ (SELECT * FROM plato.Input)
+ );
+
+ SELECT * FROM $sq1;
+ SELECT * FROM $sq2;
+ SELECT * FROM $squ1;
+ SELECT * FROM $squ2;
+ SELECT * FROM $squ3;
+ )");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(SubqueriesJoin) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+
+ $left = SELECT * FROM plato.Input1 WHERE value != "BadValue";
+ $right = SELECT * FROM plato.Input2;
+
+ SELECT * FROM $left AS l
+ JOIN $right AS r
+ ON l.key == r.key;
+ )");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(AnyInBackticksAsTableName) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select * from `any`;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(AnyJoinForTableAndSubQuery) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+
+ $r = SELECT * FROM plato.Input2;
+
+ SELECT * FROM ANY plato.Input1 AS l
+ LEFT JOIN ANY $r AS r
+ USING (key);
+ )");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "EquiJoin") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('left 'any)"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('right 'any)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["right"]);
+ }
+
+ Y_UNIT_TEST(AnyJoinForTableAndTableSource) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+
+ $r = AsList(
+ AsStruct("aaa" as key, "bbb" as subkey, "ccc" as value)
+ );
+
+ SELECT * FROM ANY plato.Input1 AS l
+ LEFT JOIN ANY AS_TABLE($r) AS r
+ USING (key);
+ )");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "EquiJoin") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('left 'any)"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('right 'any)"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["right"]);
+ }
+
+ Y_UNIT_TEST(AnyJoinNested) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+
+ FROM ANY Input1 as a
+ JOIN Input2 as b ON a.key = b.key
+ LEFT JOIN ANY Input3 as c ON a.key = c.key
+ RIGHT JOIN ANY Input4 as d ON d.key = b.key
+ CROSS JOIN Input5
+ SELECT *;
+ )");
+
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("left"), 0}, {TString("right"), 0}};
+ VerifyProgram(res, elementStat);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["left"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["right"]);
+ }
+
+ Y_UNIT_TEST(InlineAction) {
+ NYql::TAstParseResult res = SqlToYql(
+ "do begin\n"
+ " select 1\n"
+ "; end do\n");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "");
+ }
+
+ Y_UNIT_TEST(FlattenByCorrelationName) {
+ UNIT_ASSERT(SqlToYql("select * from plato.Input as t flatten by t.x").IsOk());
+ UNIT_ASSERT(SqlToYql("select * from plato.Input as t flatten by t -- same as flatten by t.t").IsOk());
+ }
+
+ Y_UNIT_TEST(DiscoveryMode) {
+ UNIT_ASSERT(SqlToYqlWithMode("insert into plato.Output select * from plato.Input", NSQLTranslation::ESqlMode::DISCOVERY).IsOk());
+ UNIT_ASSERT(SqlToYqlWithMode("select * from plato.concat(Input1, Input2)", NSQLTranslation::ESqlMode::DISCOVERY).IsOk());
+ UNIT_ASSERT(SqlToYqlWithMode("select * from plato.each(AsList(\"Input1\", \"Input2\"))", NSQLTranslation::ESqlMode::DISCOVERY).IsOk());
+ }
+
+ Y_UNIT_TEST(CubeWithAutoGeneratedLikeColumnName) {
+ UNIT_ASSERT(SqlToYql("select key,subkey,group from plato.Input group by cube(key,subkey,group)").IsOk());
+ }
+
+ Y_UNIT_TEST(CubeWithAutoGeneratedLikeAlias) {
+ UNIT_ASSERT(SqlToYql("select key,subkey,group from plato.Input group by cube(key,subkey,value as group)").IsOk());
+ }
+
+ Y_UNIT_TEST(FilterCanBeUsedAsColumnIdOrBind) {
+ UNIT_ASSERT(SqlToYql("select filter from plato.Input").IsOk());
+ UNIT_ASSERT(SqlToYql("select 1 as filter").IsOk());
+ UNIT_ASSERT(SqlToYql("$filter = 1; select $filter").IsOk());
+ }
+
+ Y_UNIT_TEST(DuplicateSemicolonsAreAllowedBetweenTopLevelStatements) {
+ UNIT_ASSERT(SqlToYql(";;select 1; ; select 2;/*comment*/;select 3;;--comment\n;select 4;;").IsOk());
+ }
+
+ Y_UNIT_TEST(DuplicateAndMissingTrailingSemicolonsAreAllowedBetweenActionStatements) {
+ TString req =
+ "define action $action($b,$c) as\n"
+ " ;;$d = $b + $c;\n"
+ " select $b;\n"
+ " select $c;;\n"
+ " select $d,\n"
+ "end define;\n"
+ "\n"
+ "do $action(1,2);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(DuplicateAndMissingTrailingSemicolonsAreAllowedBetweenInlineActionStatements) {
+ TString req =
+ "do begin\n"
+ " ;select 1,\n"
+ "end do;\n"
+ "evaluate for $i in AsList(1,2,3) do begin\n"
+ " select $i;;\n"
+ " select $i + $i;;\n"
+ "end do;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(DuplicateSemicolonsAreAllowedBetweenLambdaStatements) {
+ TString req =
+ "$x=1;\n"
+ "$foo = ($a, $b)->{\n"
+ " ;;$v = $a + $b;\n"
+ " $bar = ($c) -> {; return $c << $x};;\n"
+ " return $bar($v);;\n"
+ "};\n"
+ "select $foo(1,2);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(StringLiteralWithEscapedBackslash) {
+ NYql::TAstParseResult res1 = SqlToYql(R"foo(SELECT 'a\\';)foo");
+ NYql::TAstParseResult res2 = SqlToYql(R"foo(SELECT "a\\";)foo");
+ UNIT_ASSERT(res1.Root);
+ UNIT_ASSERT(res2.Root);
+
+ TWordCountHive elementStat = {{TString("a\\"), 0}};
+
+ VerifyProgram(res1, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["a\\"]);
+
+ VerifyProgram(res2, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["a\\"]);
+ }
+
+ Y_UNIT_TEST(StringMultiLineLiteralWithEscapes) {
+ UNIT_ASSERT(SqlToYql("SELECT @@@foo@@@@bar@@@").IsOk());
+ UNIT_ASSERT(SqlToYql("SELECT @@@@@@@@@").IsOk());
+ }
+
+ Y_UNIT_TEST(StringMultiLineLiteralConsequitiveAt) {
+ UNIT_ASSERT(!SqlToYql("SELECT @").IsOk());
+ UNIT_ASSERT(!SqlToYql("SELECT @@").IsOk());
+ UNIT_ASSERT(!SqlToYql("SELECT @@@").IsOk());
+ UNIT_ASSERT( SqlToYql("SELECT @@@@").IsOk());
+ UNIT_ASSERT( SqlToYql("SELECT @@@@@").IsOk());
+
+ UNIT_ASSERT(!SqlToYql("SELECT @@@@@@").IsOk());
+ UNIT_ASSERT(!SqlToYql("SELECT @@@@@@@").IsOk());
+
+ UNIT_ASSERT( SqlToYql("SELECT @@@@@@@@").IsOk());
+ UNIT_ASSERT( SqlToYql("SELECT @@@@@@@@@").IsOk());
+ UNIT_ASSERT(!SqlToYql("SELECT @@@@@@@@@@").IsOk());
+ }
+
+ Y_UNIT_TEST(ConstnessForListDictSetCreate) {
+ auto req = "$foo = ($x, $y) -> (\"aaaa\");\n"
+ "\n"
+ "select\n"
+ " $foo(sum(key), ListCreate(String)),\n"
+ " $foo(sum(key), DictCreate(String, String)),\n"
+ " $foo(sum(key), SetCreate(String)),\n"
+ "from (select 1 as key);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(CanUseEmptyTupleInWindowPartitionBy) {
+ auto req = "select sum(key) over w\n"
+ "from plato.Input\n"
+ "window w as (partition compact by (), (subkey), (), value || value as dvalue);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(DenyAnsiOrderByLimitLegacyMode) {
+ auto req = "pragma DisableAnsiOrderByLimitInUnionAll;\n"
+ "use plato;\n"
+ "\n"
+ "select * from Input order by key limit 10\n"
+ "union all\n"
+ "select * from Input order by key limit 1;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: DisableAnsiOrderByLimitInUnionAll pragma is deprecated and no longer supported\n");
+ }
+
+ Y_UNIT_TEST(ReduceUsingUdfWithShortcutsWorks) {
+ auto req = "use plato;\n"
+ "\n"
+ "$arg = 'foo';\n"
+ "$func = XXX::YYY($arg);\n"
+ "\n"
+ "REDUCE Input ON key using $func(subkey);\n"
+ "REDUCE Input ON key using $func(UUU::VVV(TableRow()));\n";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ req = "use plato;\n"
+ "\n"
+ "$arg = 'foo';\n"
+ "$func = XXX::YYY($arg);\n"
+ "\n"
+ "REDUCE Input ON key using all $func(subkey);\n"
+ "REDUCE Input ON key using all $func(UUU::VVV(TableRow()));";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(YsonDisableStrict) {
+ UNIT_ASSERT(SqlToYql("pragma yson.DisableStrict = \"false\";").IsOk());
+ UNIT_ASSERT(SqlToYql("pragma yson.DisableStrict;").IsOk());
+ }
+
+ Y_UNIT_TEST(YsonStrict) {
+ UNIT_ASSERT(SqlToYql("pragma yson.Strict = \"false\";").IsOk());
+ UNIT_ASSERT(SqlToYql("pragma yson.Strict;").IsOk());
+ }
+
+ Y_UNIT_TEST(JoinByTuple) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from T1 as a\n"
+ "join T2 as b\n"
+ "on AsTuple(a.key, a.subkey) = AsTuple(b.key, b.subkey);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(JoinByStruct) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from T1 as a\n"
+ "join T2 as b\n"
+ "on AsStruct(a.key as k, a.subkey as sk) = AsStruct(b.key as k, b.subkey as sk);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(JoinByUdf) {
+ auto req = "use plato;\n"
+ "\n"
+ "select a.align\n"
+ "from T1 as a\n"
+ "join T2 as b\n"
+ "on Yson::SerializeJsonEncodeUtf8(a.align)=b.align;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(EscapedIdentifierAsLambdaArg) {
+ auto req = "$f = ($`foo bar`, $x) -> { return $`foo bar` + $x; };\n"
+ "\n"
+ "select $f(1, 2);";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(lambda '(\"$foo bar\" \"$x\")";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarOnlyCallable) {
+ auto req = "SELECT Udf(DateTime::FromString)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType)))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarTypeNoRun) {
+ auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\")";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarRunNoType) {
+ auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, Void() as RunConfig)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"\" (Void))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarFullTest) {
+ auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, Void() As RunConfig)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (Void))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarOtherRunConfigs) {
+ auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, '55' As RunConfig)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (String '\"55\"))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarOtherRunConfigs2) {
+ auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, AsTuple(32, 'no', AsStruct(1e-9 As SomeFloat)) As RunConfig)('2022-01-01');";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" '((Int32 '\"32\") (String '\"no\") (AsStruct '('\"SomeFloat\" (Double '\"1e-9\")))))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarOptional) {
+ auto req = "SELECT Udf(DateTime::FromString, String?, Int32??, Tuple<Int32, Float>, \"foo\" as TypeConfig, Void() As RunConfig)(\"2022-01-01\");";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ const auto programm = GetPrettyPrint(res);
+ auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (OptionalType (DataType 'String)) (OptionalType (OptionalType (DataType 'Int32))) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (Void))";
+ UNIT_ASSERT(programm.find(expected) != TString::npos);
+ }
+
+ Y_UNIT_TEST(CompactionPolicyParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key))
+ WITH ( COMPACTION_POLICY = "SomeCompactionPreset" );)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("compactionPolicy"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SomeCompactionPreset"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AutoPartitioningBySizeParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key))
+ WITH ( AUTO_PARTITIONING_BY_SIZE = ENABLED );)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("autoPartitioningBySize"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("ENABLED"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(UniformPartitionsParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key))
+ WITH ( UNIFORM_PARTITIONS = 16 );)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("uniformPartitions"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("16"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DateTimeTtlParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key))
+ WITH (TTL = Interval("P1D") On CreatedAt);)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(IntTtlParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key))
+ WITH (TTL = Interval("P1D") On CreatedAt AS SECONDS);)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnUnit"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("seconds"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(TieringParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key))
+ WITH ( TIERING = 'my_tiering' );)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiering"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("my_tiering"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(StoreExternalBlobsParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, Value String, PRIMARY KEY (Key))
+ WITH ( STORE_EXTERNAL_BLOBS = ENABLED );)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("storeExternalBlobs"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("ENABLED"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DefaultValueColumn2) {
+ auto res = SqlToYql(R"( use plato;
+ $lambda = () -> {
+ RETURN CAST(RandomUuid(2) as String)
+ };
+
+ CREATE TABLE tableName (
+ Key Uint32 DEFAULT RandomNumber(1),
+ Value String DEFAULT $lambda,
+ PRIMARY KEY (Key)
+ );
+ )");
+
+ UNIT_ASSERT_C(res.Root, Err2Str(res));
+
+ const auto program = GetPrettyPrint(res);
+
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("RandomNumber"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("RandomUuid"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("columnConstrains"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("columnConstrains"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, program.find("Write"));
+
+#if 0
+ Cerr << program << Endl;
+#endif
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DefaultValueColumn3) {
+ auto res = SqlToYql(R"( use plato;
+
+ CREATE TABLE tableName (
+ database_id Utf8,
+ cloud_id Utf8,
+ global_id Utf8 DEFAULT database_id || "=====",
+ PRIMARY KEY (database_id)
+ );
+ )");
+
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:6:40: Error: Column reference \"database_id\" is not allowed in current scope\n");
+ UNIT_ASSERT(!res.Root);
+ }
+
+ Y_UNIT_TEST(DefaultValueColumn) {
+ auto res = SqlToYql(R"( use plato;
+ CREATE TABLE tableName (
+ Key Uint32 FAMILY cold DEFAULT 5,
+ Value String FAMILY default DEFAULT "empty",
+ PRIMARY KEY (Key),
+ FAMILY default (
+ DATA = "test",
+ COMPRESSION = "lz4"
+ ),
+ FAMILY cold (
+ DATA = "test",
+ COMPRESSION = "off"
+ )
+ );
+ )");
+
+ UNIT_ASSERT_C(res.Root, Err2Str(res));
+
+#if 0
+ const auto program = GetPrettyPrint(res);
+ Cerr << program << Endl;
+#endif
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("default"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnConstrains"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnFamilies"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(ChangefeedParseCorrect) {
+ auto res = SqlToYql(R"( USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (
+ MODE = 'KEYS_ONLY',
+ FORMAT = 'json',
+ INITIAL_SCAN = TRUE,
+ VIRTUAL_TIMESTAMPS = FALSE,
+ RESOLVED_TIMESTAMPS = Interval("PT1S"),
+ RETENTION_PERIOD = Interval("P1D"),
+ TOPIC_MIN_ACTIVE_PARTITIONS = 10,
+ AWS_REGION = 'aws:region'
+ )
+ );
+ )");
+ UNIT_ASSERT_C(res.Root, Err2Str(res));
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("changefeed"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("mode"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("KEYS_ONLY"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("format"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("json"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("initial_scan"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("true"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("virtual_timestamps"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("false"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("resolved_timestamps"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("retention_period"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("topic_min_active_partitions"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("aws_region"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("aws:region"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CloneForAsTableWorksWithCube) {
+ UNIT_ASSERT(SqlToYql("SELECT * FROM AS_TABLE([<|k1:1, k2:1|>]) GROUP BY CUBE(k1, k2);").IsOk());
+ }
+
+ Y_UNIT_TEST(WindowPartitionByColumnProperlyEscaped) {
+ NYql::TAstParseResult res = SqlToYql("SELECT SUM(key) OVER w FROM plato.Input WINDOW w AS (PARTITION BY `column with space`);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "CalcOverWindow") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("\"column with space\""));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("CalcOverWindow"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["CalcOverWindow"]);
+ }
+
+ Y_UNIT_TEST(WindowPartitionByExpressionWithoutAliasesAreAllowed) {
+ NYql::TAstParseResult res = SqlToYql("SELECT SUM(key) OVER w FROM plato.Input as i WINDOW w AS (PARTITION BY ii.subkey);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "AddMember") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("AddMember row 'group_w_0 (SqlAccess 'struct (Member row '\"ii\")"));
+ }
+ if (word == "CalcOverWindow") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("CalcOverWindow core '('\"group_w_0\")"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("CalcOverWindow"), 0}, {TString("AddMember"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["CalcOverWindow"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["AddMember"]);
+ }
+
+ Y_UNIT_TEST(PqReadByAfterUse) {
+ ExpectFailWithError("use plato; pragma PqReadBy='plato2';",
+ "<main>:1:28: Error: Cluster in PqReadPqBy pragma differs from cluster specified in USE statement: plato2 != plato\n");
+
+ UNIT_ASSERT(SqlToYql("pragma PqReadBy='plato2';").IsOk());
+ UNIT_ASSERT(SqlToYql("pragma PqReadBy='plato2'; use plato;").IsOk());
+ UNIT_ASSERT(SqlToYql("$x='plato'; use rtmr:$x; pragma PqReadBy='plato2';").IsOk());
+ UNIT_ASSERT(SqlToYql("use plato; pragma PqReadBy='dq';").IsOk());
+ }
+
+ Y_UNIT_TEST(MrObject) {
+ NYql::TAstParseResult res = SqlToYql(
+ "declare $path as String;\n"
+ "select * from plato.object($path, `format`, \"comp\" || \"ression\" as compression, 1 as bar) with schema (Int32 as y, String as x)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "MrObject") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((MrObject (EvaluateAtom "$path") '"format" '('('"compression" (Concat (String '"comp") (String '"ression"))) '('"bar" (Int32 '"1")))))__"));
+ } else if (word == "userschema") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__('('('"userschema" (StructType '('"y" (DataType 'Int32)) '('"x" (DataType 'String))) '('"y" '"x"))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("MrObject"), 0}, {TString("userschema"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["MrObject"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["userschema"]);
+ }
+
+ Y_UNIT_TEST(TableBindings) {
+ NSQLTranslation::TTranslationSettings settings = GetSettingsWithS3Binding("foo");
+ NYql::TAstParseResult res = SqlToYqlWithSettings(
+ "select * from bindings.foo",
+ settings
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "MrObject") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((MrTableConcat (Key '('table (String '"path")))) (Void) '('('"bar" '"1") '('"compression" '"ccompression") '('"format" '"format") '('"partitionedby" '"key" '"subkey") '('"userschema" (SqlTypeFromYson)__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("MrTableConcat"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["MrTableConcat"]);
+
+ settings.DefaultCluster = "plato";
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DISABLED;
+ res = SqlToYqlWithSettings(
+ "select * from bindings.foo",
+ settings
+ );
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:15: Error: Please remove 'bindings.' from your query, the support for this syntax has ended, code: 4601\n");
+ UNIT_ASSERT(!res.Root);
+
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP;
+ res = SqlToYqlWithSettings(
+ "select * from bindings.foo",
+ settings
+ );
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine2 = [](const TString& word, const TString& line) {
+ if (word == "MrTableConcat") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((MrTableConcat (Key '('table (String '"foo")))) (Void) '())))__"));
+ }
+ };
+
+ TWordCountHive elementStat2 = {{TString("MrTableConcat"), 0}};
+ VerifyProgram(res, elementStat2, verifyLine2);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat2["MrTableConcat"]);
+
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP_WITH_WARNING;
+ res = SqlToYqlWithSettings(
+ "select * from bindings.foo",
+ settings
+ );
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:15: Warning: Please remove 'bindings.' from your query, the support for this syntax will be dropped soon, code: 4538\n");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat3 = {{TString("MrTableConcat"), 0}};
+ VerifyProgram(res, elementStat3, verifyLine2);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat3["MrTableConcat"]);
+ }
+
+ Y_UNIT_TEST(TableBindingsWithInsert) {
+ NSQLTranslation::TTranslationSettings settings = GetSettingsWithS3Binding("foo");
+ NYql::TAstParseResult res = SqlToYqlWithSettings(
+ "insert into bindings.foo with truncate (x, y) values (1, 2);",
+ settings
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('table (String '"path"))) values '('('"bar" '"1") '('"compression" '"ccompression") '('"format" '"format") '('"partitionedby" '"key" '"subkey") '('"userschema" (SqlTypeFromYson)__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write!"]);
+
+ settings.DefaultCluster = "plato";
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DISABLED;
+ res = SqlToYqlWithSettings(
+ "insert into bindings.foo with truncate (x, y) values (1, 2);",
+ settings
+ );
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:13: Error: Please remove 'bindings.' from your query, the support for this syntax has ended, code: 4601\n");
+ UNIT_ASSERT(!res.Root);
+
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP;
+ res = SqlToYqlWithSettings(
+ "insert into bindings.foo with truncate (x, y) values (1, 2);",
+ settings
+ );
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine2 = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ //UNIT_ASSERT_VALUES_EQUAL(line, "");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__((Write! world sink (Key '('table (String '"foo"))) values '('('mode 'renew)))__"));
+ }
+ };
+
+ TWordCountHive elementStat2 = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat2, verifyLine2);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat2["Write!"]);
+
+ settings.BindingsMode = NSQLTranslation::EBindingsMode::DROP_WITH_WARNING;
+ res = SqlToYqlWithSettings(
+ "insert into bindings.foo with truncate (x, y) values (1, 2);",
+ settings
+ );
+ UNIT_ASSERT_VALUES_EQUAL(Err2Str(res), "<main>:1:13: Warning: Please remove 'bindings.' from your query, the support for this syntax will be dropped soon, code: 4538\n");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat3 = {{TString("Write!"), 0}};
+ VerifyProgram(res, elementStat3, verifyLine2);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat3["Write!"]);
+ }
+
+ Y_UNIT_TEST(TrailingCommaInWithout) {
+ UNIT_ASSERT(SqlToYql("SELECT * WITHOUT stream, FROM plato.Input").IsOk());
+ UNIT_ASSERT(SqlToYql("SELECT a.* WITHOUT a.intersect, FROM plato.Input AS a").IsOk());
+ UNIT_ASSERT(SqlToYql("SELECT a.* WITHOUT col1, col2, a.col3, FROM plato.Input AS a").IsOk());
+ }
+
+ Y_UNIT_TEST(NoStackOverflowOnBigCaseStatement) {
+ TStringBuilder req;
+ req << "select case 1 + 123";
+ for (size_t i = 0; i < 20000; ++i) {
+ req << " when " << i << " then " << i + 1;
+ }
+ req << " else 100500 end;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(CollectPreaggregatedInListLiteral) {
+ UNIT_ASSERT(SqlToYql("SELECT [COUNT(DISTINCT a+b)] FROM plato.Input").IsOk());
+ }
+
+ Y_UNIT_TEST(SmartParenInGroupByClause) {
+ UNIT_ASSERT(SqlToYql("SELECT * FROM plato.Input GROUP BY (k, v)").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableRenameToIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table RENAME TO moved").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableAddDropColumnIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ADD COLUMN addc uint64, DROP COLUMN dropc, ADD addagain uint64").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableSetTTLIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column)").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column AS SECONDS)").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableSetTieringIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TIERING = 'my_tiering')").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableAddChangefeedIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ADD CHANGEFEED feed WITH (MODE = 'UPDATES', FORMAT = 'json')").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableAlterChangefeedIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ALTER CHANGEFEED feed DISABLE").IsOk());
+ ExpectFailWithError("USE plato; ALTER TABLE table ALTER CHANGEFEED feed SET (FORMAT = 'proto');",
+ "<main>:1:57: Error: FORMAT alter is not supported\n");
+ }
+
+ Y_UNIT_TEST(AlterTableDropChangefeedIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table DROP CHANGEFEED feed").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableSetPartitioningIsCorrect) {
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (AUTO_PARTITIONING_BY_SIZE = DISABLED)").IsOk());
+ }
+
+ Y_UNIT_TEST(AlterTableAddIndexWithIsNotSupported) {
+ ExpectFailWithError("USE plato; ALTER TABLE table ADD INDEX idx GLOBAL ON (col) WITH (a=b)",
+ "<main>:1:40: Error: with: alternative is not implemented yet: \n");
+ }
+
+ Y_UNIT_TEST(AlterTableAddIndexLocalIsNotSupported) {
+ ExpectFailWithError("USE plato; ALTER TABLE table ADD INDEX idx LOCAL ON (col)",
+ "<main>:1:40: Error: local: alternative is not implemented yet: \n");
+ }
+
+ Y_UNIT_TEST(CreateTableAddIndexVector) {
+ const auto result = SqlToYql(R"(USE plato;
+ CREATE TABLE table (
+ pk INT32 NOT NULL,
+ col String,
+ INDEX idx GLOBAL USING vector_kmeans_tree
+ ON (col) COVER (col)
+ WITH (distance=cosine, vector_type=float, vector_dimension=1024,),
+ PRIMARY KEY (pk))
+ )");
+ UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(AlterTableAddIndexVector) {
+ const auto result = SqlToYql(R"(USE plato;
+ ALTER TABLE table ADD INDEX idx
+ GLOBAL USING vector_kmeans_tree
+ ON (col) COVER (col)
+ WITH (distance=cosine, vector_type="float", vector_dimension=1024)
+ )");
+ UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(AlterTableAddIndexUnknownSubtype) {
+ ExpectFailWithError("USE plato; ALTER TABLE table ADD INDEX idx GLOBAL USING unknown ON (col)",
+ "<main>:1:57: Error: UNKNOWN index subtype is not supported\n");
+ }
+
+ Y_UNIT_TEST(AlterTableAddIndexMissedParameter) {
+ ExpectFailWithError(R"(USE plato;
+ ALTER TABLE table ADD INDEX idx
+ GLOBAL USING vector_kmeans_tree
+ ON (col)
+ WITH (distance=cosine, vector_type=float)
+ )",
+ "<main>:5:52: Error: vector_dimension should be set\n");
+ }
+
+ Y_UNIT_TEST(AlterTableAlterIndexSetPartitioningIsCorrect) {
+ const auto result = SqlToYql("USE plato; ALTER TABLE table ALTER INDEX index SET AUTO_PARTITIONING_MIN_PARTITIONS_COUNT 10");
+ UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(AlterTableAlterIndexSetMultiplePartitioningSettings) {
+ const auto result = SqlToYql("USE plato; ALTER TABLE table ALTER INDEX index SET "
+ "(AUTO_PARTITIONING_BY_LOAD = ENABLED, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10)"
+ );
+ UNIT_ASSERT_C(result.IsOk(), result.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(AlterTableAlterIndexResetPartitioningIsNotSupported) {
+ ExpectFailWithError("USE plato; ALTER TABLE table ALTER INDEX index RESET (AUTO_PARTITIONING_MIN_PARTITIONS_COUNT)",
+ "<main>:1:55: Error: AUTO_PARTITIONING_MIN_PARTITIONS_COUNT reset is not supported\n"
+ );
+ }
+
+ Y_UNIT_TEST(AlterTableAlterColumnDropNotNullAstCorrect) {
+ auto reqSetNull = SqlToYql(R"(
+ USE plato;
+ CREATE TABLE tableName (
+ id Uint32,
+ val Uint32 NOT NULL,
+ PRIMARY KEY (id)
+ );
+
+ COMMIT;
+ ALTER TABLE tableName ALTER COLUMN val DROP NOT NULL;
+ )");
+
+ UNIT_ASSERT(reqSetNull.IsOk());
+ UNIT_ASSERT(reqSetNull.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(
+ R"(let world (Write! world sink (Key '('tablescheme (String '"tableName"))) (Void) '('('mode 'alter) '('actions '('('alterColumns '('('"val" '('changeColumnConstraints '('('drop_not_null)))))))))))"
+ ));
+ };
+
+ TWordCountHive elementStat({TString("\'mode \'alter")});
+ VerifyProgram(reqSetNull, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["\'mode \'alter"]);
+ }
+
+ Y_UNIT_TEST(OptionalAliases) {
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT foo FROM (SELECT key foo FROM Input);").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT a.x FROM Input1 a JOIN Input2 b ON a.key = b.key;").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT a.x FROM (VALUES (1,2), (3,4)) a(x,key) JOIN Input b ON a.key = b.key;").IsOk());
+ }
+
+ Y_UNIT_TEST(TableNameConstness) {
+ UNIT_ASSERT(SqlToYql("USE plato; $path = 'foo'; SELECT TableName($path), count(*) FROM Input;").IsOk());
+ UNIT_ASSERT(SqlToYql("$path = 'foo'; SELECT TableName($path, 'yt'), count(*) FROM plato.Input;").IsOk());
+ ExpectFailWithError("USE plato; SELECT TableName(), count(*) FROM plato.Input;",
+ "<main>:1:19: Error: Expression has to be an aggregation function or key column, because aggregation is used elsewhere in this subquery\n");
+ }
+
+ Y_UNIT_TEST(UseShouldWorkAsColumnName) {
+ UNIT_ASSERT(SqlToYql("select use from (select 1 as use);").IsOk());
+ }
+
+ Y_UNIT_TEST(TrueFalseWorkAfterDollar) {
+ UNIT_ASSERT(SqlToYql("$ true = false; SELECT $ true or false;").IsOk());
+ UNIT_ASSERT(SqlToYql("$False = 0; SELECT $False;").IsOk());
+ }
+
+ Y_UNIT_TEST(WithSchemaEquals) {
+ UNIT_ASSERT(SqlToYql("select * from plato.T with schema Struct<a:Int32, b:String>;").IsOk());
+ UNIT_ASSERT(SqlToYql("select * from plato.T with columns = Struct<a:Int32, b:String>;").IsOk());
+ }
+
+ Y_UNIT_TEST(WithNonStructSchemaS3) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.ClusterMapping["s3bucket"] = NYql::S3ProviderName;
+ UNIT_ASSERT(SqlToYql("select * from s3bucket.`foo` with schema (col1 Int32, String as col2, Int64 as col3);", settings).IsOk());
+ }
+
+ Y_UNIT_TEST(AllowNestedTuplesInGroupBy) {
+ NYql::TAstParseResult res = SqlToYql("select count(*) from plato.Input group by 1 + (x, y, z);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Aggregate core '('\"group0\")"));
+ };
+
+ TWordCountHive elementStat({"Aggregate"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["Aggregate"] == 1);
+ }
+
+ Y_UNIT_TEST(AllowGroupByWithParens) {
+ NYql::TAstParseResult res = SqlToYql("select count(*) from plato.Input group by (x, y as alias1, z);");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Aggregate core '('\"x\" '\"alias1\" '\"z\")"));
+ };
+
+ TWordCountHive elementStat({"Aggregate"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["Aggregate"] == 1);
+ }
+
+ Y_UNIT_TEST(CreateAsyncReplicationParseCorrect) {
+ auto req = R"(
+ USE plato;
+ CREATE ASYNC REPLICATION MyReplication
+ FOR table1 AS table2, table3 AS table4
+ WITH (
+ CONNECTION_STRING = "grpc://localhost:2135/?database=/MyDatabase",
+ ENDPOINT = "localhost:2135",
+ DATABASE = "/MyDatabase"
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("create"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table1"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table2"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table3"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("table4"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("connection_string"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("grpc://localhost:2135/?database=/MyDatabase"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("endpoint"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("localhost:2135"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("database"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("/MyDatabase"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateAsyncReplicationUnsupportedSettings) {
+ auto reqTpl = R"(
+ USE plato;
+ CREATE ASYNC REPLICATION MyReplication
+ FOR table1 AS table2, table3 AS table4
+ WITH (
+ %s = "%s"
+ )
+ )";
+
+ auto settings = THashMap<TString, TString>{
+ {"STATE", "DONE"},
+ {"FAILOVER_MODE", "FORCE"},
+ };
+
+ for (const auto& [k, v] : settings) {
+ auto req = Sprintf(reqTpl, k.c_str(), v.c_str());
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), Sprintf("<main>:6:%zu: Error: %s is not supported in CREATE\n", 20 + k.size(), k.c_str()));
+ }
+ }
+
+ Y_UNIT_TEST(AlterAsyncReplicationParseCorrect) {
+ auto req = R"(
+ USE plato;
+ ALTER ASYNC REPLICATION MyReplication
+ SET (
+ STATE = "DONE",
+ FAILOVER_MODE = "FORCE"
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("state"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DONE"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("failover_mode"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("FORCE"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AlterAsyncReplicationUnsupportedSettings) {
+ auto reqTpl = R"(
+ USE plato;
+ ALTER ASYNC REPLICATION MyReplication
+ SET (
+ %s = "%s"
+ )
+ )";
+
+ auto settings = THashMap<TString, TString>{
+ {"connection_string", "grpc://localhost:2135/?database=/MyDatabase"},
+ {"endpoint", "localhost:2135"},
+ {"database", "/MyDatabase"},
+ {"token", "foo"},
+ {"token_secret_name", "foo_secret_name"},
+ {"user", "user"},
+ {"password", "bar"},
+ {"password_secret_name", "bar_secret_name"},
+ };
+
+ for (const auto& setting : settings) {
+ auto& key = setting.first;
+ auto& value = setting.second;
+ auto req = Sprintf(reqTpl, key.c_str(), value.c_str());
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&key, &value](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(key));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(value));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+ }
+
+ Y_UNIT_TEST(AsyncReplicationInvalidSettings) {
+ auto req = R"(
+ USE plato;
+ ALTER ASYNC REPLICATION MyReplication SET (FOO = "BAR");
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:62: Error: Unknown replication setting: FOO\n");
+ }
+
+ Y_UNIT_TEST(DropAsyncReplicationParseCorrect) {
+ auto req = R"(
+ USE plato;
+ DROP ASYNC REPLICATION MyReplication;
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("MyReplication"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropAsyncReplicationCascade) {
+ auto req = R"(
+ USE plato;
+ DROP ASYNC REPLICATION MyReplication CASCADE;
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropCascade"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(PragmaCompactGroupBy) {
+ auto req = "PRAGMA CompactGroupBy; SELECT key, COUNT(*) FROM plato.Input GROUP BY key;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Aggregate") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('compact)"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Aggregate"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Aggregate"]);
+ }
+
+ Y_UNIT_TEST(PragmaDisableCompactGroupBy) {
+ auto req = "PRAGMA DisableCompactGroupBy; SELECT key, COUNT(*) FROM plato.Input GROUP /*+ compact() */ BY key;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Aggregate") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'('compact)"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Aggregate"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Aggregate"]);
+ }
+
+ Y_UNIT_TEST(AutoSampleWorksWithNamedSubquery) {
+ UNIT_ASSERT(SqlToYql("$src = select * from plato.Input; select * from $src sample 0.2").IsOk());
+ }
+
+ Y_UNIT_TEST(AutoSampleWorksWithSubquery) {
+ UNIT_ASSERT(SqlToYql("select * from (select * from plato.Input) sample 0.2").IsOk());
+ }
+
+ Y_UNIT_TEST(CreateTableTrailingComma) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32, PRIMARY KEY (Key),);").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE tableName (Key Uint32,);").IsOk());
+ }
+
+ Y_UNIT_TEST(BetweenSymmetric) {
+ UNIT_ASSERT(SqlToYql("select 3 between symmetric 5 and 4;").IsOk());
+ UNIT_ASSERT(SqlToYql("select 3 between asymmetric 5 and 4;").IsOk());
+ UNIT_ASSERT(SqlToYql("use plato; select key between symmetric and and and from Input;").IsOk());
+ UNIT_ASSERT(SqlToYql("use plato; select key between and and and from Input;").IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(ExternalFunction) {
+ Y_UNIT_TEST(ValidUseFunctions) {
+
+ UNIT_ASSERT(SqlToYql(
+ "PROCESS plato.Input"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', <|a: 123, b: a + 641|>)"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,"
+ " CONCURRENCY=3, OPTIMIZE_FOR='CALLS'").IsOk());
+
+ // use CALLS without quotes, as keyword
+ UNIT_ASSERT(SqlToYql(
+ "PROCESS plato.Input"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo')"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,"
+ " OPTIMIZE_FOR=CALLS").IsOk());
+
+ UNIT_ASSERT(SqlToYql(
+ "PROCESS plato.Input"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', TableRow())"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,"
+ " CONCURRENCY=3").IsOk());
+
+ UNIT_ASSERT(SqlToYql(
+ "PROCESS plato.Input"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo')"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,"
+ " CONCURRENCY=3, BATCH_SIZE=1000000, CONNECTION='yc-folder34fse-con',"
+ " INIT=[0, 900]").IsOk());
+
+ UNIT_ASSERT(SqlToYql(
+ "PROCESS plato.Input"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'bar', TableRow())"
+ " WITH UNKNOWN_PARAM_1='837747712', UNKNOWN_PARAM_2=Tuple<Uint16, Utf8>,"
+ " INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>").IsOk());
+ }
+
+
+ Y_UNIT_TEST(InValidUseFunctions) {
+ ExpectFailWithError("PROCESS plato.Input USING some::udf(*) WITH INPUT_TYPE=Struct<a:Int32>",
+ "<main>:1:33: Error: PROCESS without USING EXTERNAL FUNCTION doesn't allow WITH block\n");
+
+ ExpectFailWithError("PROCESS plato.Input USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'jhhjfh88134d')"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>"
+ " ASSUME ORDER BY key",
+ "<main>:1:129: Error: PROCESS with USING EXTERNAL FUNCTION doesn't allow ASSUME block\n");
+
+ ExpectFailWithError("PROCESS plato.Input USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', 'bar', 'baz')",
+ "<main>:1:15: Error: EXTERNAL FUNCTION requires from 2 to 3 arguments, but got: 4\n");
+
+ ExpectFailWithError("PROCESS plato.Input\n"
+ " USING EXTERNAL FUNCTION('YANDEX-CLOUD', 'foo', <|field_1: a1, field_b: b1|>)\n"
+ " WITH INPUT_TYPE=Struct<a:Int32>, OUTPUT_TYPE=Struct<b:Int32>,\n"
+ " CONCURRENCY=3, BATCH_SIZE=1000000, CONNECTION='yc-folder34fse-con',\n"
+ " CONCURRENCY=5, INPUT_TYPE=Struct<b:Bool>,\n"
+ " INIT=[0, 900]\n",
+ "<main>:5:2: Error: WITH \"CONCURRENCY\" clause should be specified only once\n"
+ "<main>:5:17: Error: WITH \"INPUT_TYPE\" clause should be specified only once\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(SqlToYQLErrors) {
+ Y_UNIT_TEST(UdfSyntaxSugarMissingCall) {
+ auto req = "SELECT Udf(DateTime::FromString, \"foo\" as RunConfig);";
+ auto res = SqlToYql(req);
+ TString a1 = Err2Str(res);
+ TString a2("<main>:1:8: Error: Abstract Udf Node can't be used as a part of expression.\n");
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarIsNotCallable) {
+ auto req = "SELECT Udf(123, \"foo\" as RunConfig);";
+ auto res = SqlToYql(req);
+ TString a1 = Err2Str(res);
+ TString a2("<main>:1:8: Error: Udf: first argument must be a callable, like Foo::Bar\n");
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(UdfSyntaxSugarNoArgs) {
+ auto req = "SELECT Udf()();";
+ auto res = SqlToYql(req);
+ TString a1 = Err2Str(res);
+ TString a2("<main>:1:8: Error: Udf: expected at least one argument\n");
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(StrayUTF8) {
+ /// 'c' in plato is russian here
+ NYql::TAstParseResult res = SqlToYql("select * from сedar.Input");
+ UNIT_ASSERT(!res.Root);
+
+ TString a1 = Err2Str(res);
+ TString a2(R"foo(<main>:1:16: Error: Unknown cluster: edar
+)foo");
+
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(IvalidStringLiteralWithEscapedBackslash) {
+ NYql::TAstParseResult res1 = SqlToYql(R"foo($bar = 'a\\'b';)foo");
+ NYql::TAstParseResult res2 = SqlToYql(R"foo($bar = "a\\"b";)foo");
+ UNIT_ASSERT(!res1.Root);
+ UNIT_ASSERT(!res2.Root);
+
+ UNIT_ASSERT_NO_DIFF(Err2Str(res1), "<main>:1:12: Error: mismatched input 'b' expecting {<EOF>, ';'}\n");
+ UNIT_ASSERT_NO_DIFF(Err2Str(res2), "<main>:1:12: Error: mismatched input 'b' expecting {<EOF>, ';'}\n");
+ }
+
+ Y_UNIT_TEST(InvalidHexInStringLiteral) {
+ NYql::TAstParseResult res = SqlToYql("select \"foo\\x1\\xfe\"");
+ UNIT_ASSERT(!res.Root);
+ TString a1 = Err2Str(res);
+ TString a2 = "<main>:1:15: Error: Failed to parse string literal: Invalid hexadecimal value\n";
+
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(InvalidOctalInMultilineStringLiteral) {
+ NYql::TAstParseResult res = SqlToYql("select \"foo\n"
+ "bar\n"
+ "\\01\"");
+ UNIT_ASSERT(!res.Root);
+ TString a1 = Err2Str(res);
+ TString a2 = "<main>:3:4: Error: Failed to parse string literal: Invalid octal value\n";
+
+ UNIT_ASSERT_NO_DIFF(a1, a2);
+ }
+
+ Y_UNIT_TEST(InvalidDoubleAtString) {
+ NYql::TAstParseResult res = SqlToYql("select @@@@@@");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: extraneous input '@' expecting {<EOF>, ';'}\n");
+ }
+
+ Y_UNIT_TEST(InvalidDoubleAtStringWhichWasAcceptedEarlier) {
+ NYql::TAstParseResult res = SqlToYql("SELECT @@foo@@ @ @@bar@@");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: mismatched input '@' expecting {<EOF>, ';'}\n");
+ }
+
+ Y_UNIT_TEST(InvalidStringFromTable) {
+ NYql::TAstParseResult res = SqlToYql("select \"FOO\"\"BAR from plato.foo");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: mismatched input '\"' expecting {<EOF>, ';'}\n");
+ }
+
+ Y_UNIT_TEST(InvalidDoubleAtStringFromTable) {
+ NYql::TAstParseResult res = SqlToYql("select @@@@@@ from plato.foo");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: mismatched input '@' expecting {<EOF>, ';'}\n");
+ }
+
+ Y_UNIT_TEST(SelectInvalidSyntax) {
+ NYql::TAstParseResult res = SqlToYql("select 1 form Wat");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:14: Error: extraneous input 'Wat' expecting {<EOF>, ';'}\n");
+ }
+
+ Y_UNIT_TEST(SelectNoCluster) {
+ NYql::TAstParseResult res = SqlToYql("select foo from bar");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: No cluster name given and no default cluster is selected\n");
+ }
+
+ Y_UNIT_TEST(SelectDuplicateColumns) {
+ NYql::TAstParseResult res = SqlToYql("select a, a from plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:11: Error: Unable to use duplicate column names. Collision in name: a\n");
+ }
+
+ Y_UNIT_TEST(SelectDuplicateLabels) {
+ NYql::TAstParseResult res = SqlToYql("select a as foo, b as foo from plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unable to use duplicate column names. Collision in name: foo\n");
+ }
+
+ Y_UNIT_TEST(SelectCaseWithoutThen) {
+ NYql::TAstParseResult res = SqlToYql("select case when true 1;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:1:22: Error: missing THEN at \'1\'\n"
+ "<main>:1:23: Error: extraneous input \';\' expecting {ELSE, END, WHEN}\n"
+ );
+ }
+
+ Y_UNIT_TEST(SelectComplexCaseWithoutThen) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT *\n"
+ "FROM plato.Input AS a\n"
+ "WHERE CASE WHEN a.key = \"foo\" a.subkey ELSE a.value END\n"
+ );
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:30: Error: missing THEN at 'a'\n");
+ }
+
+ Y_UNIT_TEST(SelectCaseWithoutEnd) {
+ NYql::TAstParseResult res = SqlToYql("select case a when b then c end from plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: ELSE is required\n");
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregationNoInput) {
+ NYql::TAstParseResult res = SqlToYql("select a, Min(b), c");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:8: Error: Column reference 'a'\n"
+ "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:15: Error: Column reference 'b'\n"
+ "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:19: Error: Column reference 'c'\n"
+ );
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregation) {
+ ExpectFailWithError("select count(*), 1 + key from plato.Input",
+ "<main>:1:22: Error: Column `key` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregatedTerms) {
+ ExpectFailWithError("select key, 2 * subkey from plato.Input group by key",
+ "<main>:1:17: Error: Column `subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectDistinctWithBadAggregation) {
+ ExpectFailWithError("select distinct count(*), 1 + key from plato.Input",
+ "<main>:1:31: Error: Column `key` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ ExpectFailWithError("select distinct key, 2 * subkey from plato.Input group by key",
+ "<main>:1:26: Error: Column `subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregationInHaving) {
+ ExpectFailWithError("select key from plato.Input group by key\n"
+ "having \"f\" || value == \"foo\"",
+ "<main>:2:15: Error: Column `value` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(JoinWithNonAggregatedColumnInProjection) {
+ ExpectFailWithError("select a.key, 1 + b.subkey\n"
+ "from plato.Input1 as a join plato.Input2 as b using(key)\n"
+ "group by a.key;",
+ "<main>:1:19: Error: Column `b.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+
+ ExpectFailWithError("select a.key, 1 + b.subkey.x\n"
+ "from plato.Input1 as a join plato.Input2 as b using(key)\n"
+ "group by a.key;",
+ "<main>:1:19: Error: Column must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregatedTermsWithSources) {
+ ExpectFailWithError("select key, 1 + a.subkey\n"
+ "from plato.Input1 as a\n"
+ "group by a.key;",
+ "<main>:1:17: Error: Column `a.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ ExpectFailWithError("select key, 1 + a.subkey.x\n"
+ "from plato.Input1 as a\n"
+ "group by a.key;",
+ "<main>:1:17: Error: Column must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(WarnForAggregationBySelectAlias) {
+ NYql::TAstParseResult res = SqlToYql("select c + 1 as c from plato.Input\n"
+ "group by c");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:2:11: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n"
+ "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n");
+
+ res = SqlToYql("select c + 1 as c from plato.Input\n"
+ "group by Math::Floor(c + 2) as c;");
+
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:2:22: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n"
+ "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n");
+ }
+
+ Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenAggrFunctionsAreUsedInAlias) {
+ NYql::TAstParseResult res = SqlToYql("select\n"
+ " cast(avg(val) as int) as value,\n"
+ " value as key\n"
+ "from\n"
+ " plato.Input\n"
+ "group by value");
+
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ res = SqlToYql("select\n"
+ " cast(avg(val) over w as int) as value,\n"
+ " value as key\n"
+ "from\n"
+ " plato.Input\n"
+ "group by value\n"
+ "window w as ()");
+
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+
+ Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenQualifiedNameIsUsed) {
+ NYql::TAstParseResult res = SqlToYql("select\n"
+ " Unwrap(a.key) as key\n"
+ "from plato.Input as a\n"
+ "join plato.Input2 as b using(k)\n"
+ "group by a.key;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ res = SqlToYql("select Unwrap(a.key) as key\n"
+ "from plato.Input as a\n"
+ "group by a.key;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+
+ Y_UNIT_TEST(NoWarnForAggregationBySelectAliasWhenTrivialRenamingIsUsed) {
+ NYql::TAstParseResult res = SqlToYql("select a.key as key\n"
+ "from plato.Input as a\n"
+ "group by key;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ res = SqlToYql("select key as key\n"
+ "from plato.Input\n"
+ "group by key;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+
+ Y_UNIT_TEST(ErrorByAggregatingByExpressionWithSameExpressionInSelect) {
+ ExpectFailWithError("select k * 2 from plato.Input group by k * 2",
+ "<main>:1:8: Error: Column `k` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(ErrorForAggregationBySelectAlias) {
+ ExpectFailWithError("select key, Math::Floor(1.1 + a.subkey) as foo\n"
+ "from plato.Input as a\n"
+ "group by a.key, foo;",
+ "<main>:3:17: Warning: GROUP BY will aggregate by column `foo` instead of aggregating by SELECT expression with same alias, code: 4532\n"
+ "<main>:1:19: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n"
+ "<main>:1:31: Error: Column `a.subkey` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+
+ ExpectFailWithError("select c + 1 as c from plato.Input\n"
+ "group by Math::Floor(c + 2);",
+ "<main>:2:22: Warning: GROUP BY will aggregate by column `c` instead of aggregating by SELECT expression with same alias, code: 4532\n"
+ "<main>:1:10: Warning: You should probably use alias in GROUP BY instead of using it here. Please consult documentation for more details, code: 4532\n"
+ "<main>:1:8: Error: Column `c` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectWithDuplicateGroupingColumns) {
+ NYql::TAstParseResult res = SqlToYql("select c from plato.Input group by c, c");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Duplicate grouping column: c\n");
+ }
+
+ Y_UNIT_TEST(SelectWithBadAggregationInGrouping) {
+ NYql::TAstParseResult res = SqlToYql("select a, Min(b), c group by c");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:30: Error: Column reference 'c'\n");
+ }
+
+ Y_UNIT_TEST(SelectWithOpOnBadAggregation) {
+ ExpectFailWithError("select 1 + a + Min(b) from plato.Input",
+ "<main>:1:12: Error: Column `a` must either be a key column in GROUP BY or it should be used in aggregation function\n");
+ }
+
+ Y_UNIT_TEST(SelectOrderByConstantNum) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by 1");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY constant expression\n");
+ }
+
+ Y_UNIT_TEST(SelectOrderByConstantExpr) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by 1 * 42");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:38: Error: Unable to ORDER BY constant expression\n");
+ }
+
+ Y_UNIT_TEST(SelectOrderByConstantString) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by \"nest\"");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY constant expression\n");
+ }
+
+ Y_UNIT_TEST(SelectOrderByAggregated) {
+ NYql::TAstParseResult res = SqlToYql("select a from plato.Input order by min(a)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:36: Error: Unable to ORDER BY aggregated values\n");
+ }
+
+ Y_UNIT_TEST(ErrorInOrderByExpresison) {
+ NYql::TAstParseResult res = SqlToYql("select key, value from plato.Input order by (key as zey)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:45: Error: You should use in ORDER BY column name, qualified field, callable function or expression\n");
+ }
+
+ Y_UNIT_TEST(ErrorsInOrderByWhenColumnIsMissingInProjection) {
+ ExpectFailWithError("select subkey from (select 1 as subkey) order by key", "<main>:1:50: Error: Column key is not in source column set\n");
+ ExpectFailWithError("select subkey from plato.Input as a order by x.key", "<main>:1:46: Error: Unknown correlation name: x\n");
+ ExpectFailWithError("select distinct a, b from plato.Input order by c", "<main>:1:48: Error: Column c is not in source column set. Did you mean a?\n");
+ ExpectFailWithError("select count(*) as a from plato.Input order by c", "<main>:1:48: Error: Column c is not in source column set. Did you mean a?\n");
+ ExpectFailWithError("select count(*) as a, b, from plato.Input group by b order by c", "<main>:1:63: Error: Column c is not in source column set. Did you mean a?\n");
+ UNIT_ASSERT(SqlToYql("select a, b from plato.Input order by c").IsOk());
+ }
+
+ Y_UNIT_TEST(SelectAggregatedWhere) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input where count(key)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:33: Error: Can not use aggregated values in filtering\n");
+ }
+
+ Y_UNIT_TEST(DoubleFrom) {
+ NYql::TAstParseResult res = SqlToYql("from plato.Input select * from plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: Only one FROM clause is allowed\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinMissingCorrName) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input1 as a join plato.Input2 as b on a.key == key");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:65: Error: JOIN: column requires correlation name\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinMissingCorrName1) {
+ NYql::TAstParseResult res = SqlToYql(
+ "use plato;\n"
+ "$foo = select * from Input1;\n"
+ "select * from Input2 join $foo USING(key);\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:27: Error: JOIN: missing correlation name for source\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinMissingCorrName2) {
+ NYql::TAstParseResult res = SqlToYql(
+ "use plato;\n"
+ "$foo = select * from Input1;\n"
+ "select * from Input2 cross join $foo;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:33: Error: JOIN: missing correlation name for source\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinEmptyCorrNames) {
+ NYql::TAstParseResult res = SqlToYql(
+ "$left = (SELECT * FROM plato.Input1 LIMIT 2);\n"
+ "$right = (SELECT * FROM plato.Input2 LIMIT 2);\n"
+ "SELECT * FROM $left FULL JOIN $right USING (key);\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:45: Error: At least one correlation name is required in join\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinSameCorrNames) {
+ NYql::TAstParseResult res = SqlToYql("SELECT Input.key FROM plato.Input JOIN plato.Input1 ON Input.key == Input.subkey\n");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:66: Error: JOIN: different correlation names are required for joined tables\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinConstPredicateArg) {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input1 as A JOIN plato.Input2 as B ON A.key == B.key AND A.subkey == \"wtf\"\n");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:87: Error: JOIN: each equality predicate argument must depend on exactly one JOIN input\n");
+ }
+
+ Y_UNIT_TEST(SelectJoinNonEqualityPredicate) {
+ NYql::TAstParseResult res = SqlToYql("SELECT * FROM plato.Input1 as A JOIN plato.Input2 as B ON A.key == B.key AND A.subkey > B.subkey\n");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:87: Error: JOIN ON expression must be a conjunction of equality predicates\n");
+ }
+
+ Y_UNIT_TEST(SelectEquiJoinCorrNameOutOfScope) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA equijoin;\n"
+ "SELECT * FROM plato.A JOIN plato.B ON A.key == C.key JOIN plato.C ON A.subkey == C.subkey;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:45: Error: JOIN: can not use source: C in equality predicate, it is out of current join scope\n");
+ }
+
+ Y_UNIT_TEST(SelectEquiJoinNoRightSource) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA equijoin;\n"
+ "SELECT * FROM plato.A JOIN plato.B ON A.key == B.key JOIN plato.C ON A.subkey == B.subkey;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:79: Error: JOIN ON equality predicate must have one of its arguments from the rightmost source\n");
+ }
+
+ Y_UNIT_TEST(SelectEquiJoinOuterWithoutType) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT * FROM plato.A Outer JOIN plato.B ON A.key == B.key;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Invalid join type: OUTER JOIN. OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL\n");
+ }
+
+ Y_UNIT_TEST(SelectEquiJoinOuterWithWrongType) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT * FROM plato.A LEFT semi OUTER JOIN plato.B ON A.key == B.key;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:33: Error: Invalid join type: LEFT SEMI OUTER JOIN. OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL\n");
+ }
+
+ Y_UNIT_TEST(InsertNoCluster) {
+ NYql::TAstParseResult res = SqlToYql("insert into Output (foo) values (1)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: No cluster name given and no default cluster is selected\n");
+ }
+
+ Y_UNIT_TEST(InsertValuesNoLabels) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output values (1)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: INSERT INTO ... VALUES requires specification of table columns\n");
+ }
+
+ Y_UNIT_TEST(UpsertValuesNoLabelsKikimr) {
+ NYql::TAstParseResult res = SqlToYql("upsert into plato.Output values (1)", 10, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: UPSERT INTO ... VALUES requires specification of table columns\n");
+ }
+
+ Y_UNIT_TEST(ReplaceValuesNoLabelsKikimr) {
+ NYql::TAstParseResult res = SqlToYql("replace into plato.Output values (1)", 10, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:20: Error: REPLACE INTO ... VALUES requires specification of table columns\n");
+ }
+
+ Y_UNIT_TEST(InsertValuesInvalidLabels) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output (foo) values (1, 2)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: VALUES have 2 columns, INSERT INTO expects: 1\n");
+ }
+
+ Y_UNIT_TEST(BuiltinFileOpNoArgs) {
+ NYql::TAstParseResult res = SqlToYql("select FilePath()");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: FilePath() requires exactly 1 arguments, given: 0\n");
+ }
+
+ Y_UNIT_TEST(ProcessWithHaving) {
+ NYql::TAstParseResult res = SqlToYql("process plato.Input using some::udf(value) having value == 1");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: PROCESS does not allow HAVING yet! You may request it on yql@ maillist.\n");
+ }
+
+ Y_UNIT_TEST(ReduceNoBy) {
+ NYql::TAstParseResult res = SqlToYql("reduce plato.Input using some::udf(value)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: mismatched input 'using' expecting {',', ON, PRESORT}\n");
+ }
+
+ Y_UNIT_TEST(ReduceDistinct) {
+ NYql::TAstParseResult res = SqlToYql("reduce plato.Input on key using some::udf(distinct value)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:43: Error: DISTINCT can not be used in PROCESS/REDUCE\n");
+ }
+
+ Y_UNIT_TEST(CreateTableWithView) {
+ NYql::TAstParseResult res = SqlToYql("CREATE TABLE plato.foo:bar (key INT);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:22: Error: mismatched input ':' expecting '('\n");
+ }
+
+ Y_UNIT_TEST(AsteriskWithSomethingAfter) {
+ NYql::TAstParseResult res = SqlToYql("select *, LENGTH(value) from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).\n");
+ }
+
+ Y_UNIT_TEST(AsteriskWithSomethingBefore) {
+ NYql::TAstParseResult res = SqlToYql("select LENGTH(value), * from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).\n");
+ }
+
+ Y_UNIT_TEST(DuplicatedQualifiedAsterisk) {
+ NYql::TAstParseResult res = SqlToYql("select in.*, key, in.* from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unable to use twice same quialified asterisk. Invalid source: in\n");
+ }
+
+ Y_UNIT_TEST(BrokenLabel) {
+ NYql::TAstParseResult res = SqlToYql("select in.*, key as `funny.label` from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:14: Error: Unable to use '.' in column name. Invalid column name: funny.label\n");
+ }
+
+ Y_UNIT_TEST(KeyConflictDetect0) {
+ NYql::TAstParseResult res = SqlToYql("select key, in.key as key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:13: Error: Unable to use duplicate column names. Collision in name: key\n");
+ }
+
+ Y_UNIT_TEST(KeyConflictDetect1) {
+ NYql::TAstParseResult res = SqlToYql("select length(key) as key, key from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Unable to use duplicate column names. Collision in name: key\n");
+ }
+
+ Y_UNIT_TEST(KeyConflictDetect2) {
+ NYql::TAstParseResult res = SqlToYql("select key, in.key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n");
+ }
+
+ Y_UNIT_TEST(AutogenerationAliasWithCollisionConflict1) {
+ UNIT_ASSERT(SqlToYql("select LENGTH(Value), key as column0 from plato.Input;").IsOk());
+ }
+
+ Y_UNIT_TEST(AutogenerationAliasWithCollisionConflict2) {
+ UNIT_ASSERT(SqlToYql("select key as column1, LENGTH(Value) from plato.Input;").IsOk());
+ }
+
+ Y_UNIT_TEST(MissedSourceTableForQualifiedAsteriskOnSimpleSelect) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select Intop.*, Input.key from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unknown correlation name: Intop\n");
+ }
+
+ Y_UNIT_TEST(MissedSourceTableForQualifiedAsteriskOnJoin) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select tmissed.*, t2.*, t1.key from plato.Input as t1 join plato.Input as t2 on t1.key==t2.key;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:19: Error: Unknown correlation name for asterisk: tmissed\n");
+ }
+
+ Y_UNIT_TEST(UnableToReferenceOnNotExistSubcolumn) {
+ NYql::TAstParseResult res = SqlToYql("select b.subkey from (select key from plato.Input as a) as b;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Column subkey is not in source column set\n");
+ }
+
+ Y_UNIT_TEST(ConflictOnSameNameWithQualify0) {
+ NYql::TAstParseResult res = SqlToYql("select in.key, in.key as key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n");
+ }
+
+ Y_UNIT_TEST(ConflictOnSameNameWithQualify1) {
+ NYql::TAstParseResult res = SqlToYql("select in.key, length(key) as key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n");
+ }
+
+ Y_UNIT_TEST(ConflictOnSameNameWithQualify2) {
+ NYql::TAstParseResult res = SqlToYql("select key, in.key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n");
+ }
+
+ Y_UNIT_TEST(ConflictOnSameNameWithQualify3) {
+ NYql::TAstParseResult res = SqlToYql("select in.key, subkey as key from plato.Input as in;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Duplicate column: key\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenBySameColumns) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, key as kk)");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Duplicate column name found: key in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenBySameAliases) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, subkey as kk);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Duplicate alias found: kk in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenByExprSameAliases) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, ListSkip(subkey,1) as kk);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Collision between alias and column name: kk in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenByConflictNameAndAlias0) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, subkey as key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Collision between alias and column name: key in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenByConflictNameAndAlias1) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, subkey as key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Collision between alias and column name: key in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenByExprConflictNameAndAlias1) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key as kk, ListSkip(subkey,1) as key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Error: Duplicate column name found: key in FlattenBy section\n");
+ }
+
+ Y_UNIT_TEST(SelectFlattenByUnnamedExpr) {
+ NYql::TAstParseResult res = SqlToYql("select key from plato.Input flatten by (key, ListSkip(key, 1))");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:46: Error: Unnamed expression after FLATTEN BY is not allowed\n");
+ }
+
+ Y_UNIT_TEST(UseInOnStrings) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input where \"foo\" in \"foovalue\";");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:42: Error: Unable to use IN predicate with string argument, it won't search substring - "
+ "expecting tuple, list, dict or single column table source\n");
+ }
+
+ Y_UNIT_TEST(UseSubqueryInScalarContextInsideIn) {
+ NYql::TAstParseResult res = SqlToYql("$q = (select key from plato.Input); select * from plato.Input where subkey in ($q);");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:79: Warning: Using subrequest in scalar context after IN, "
+ "perhaps you should remove parenthesis here, code: 4501\n");
+ }
+
+ Y_UNIT_TEST(InHintsWithKeywordClash) {
+ NYql::TAstParseResult res = SqlToYql("SELECT COMPACT FROM plato.Input WHERE COMPACT IN COMPACT `COMPACT`(1,2,3)");
+ UNIT_ASSERT(!res.Root);
+ // should try to parse last compact as call expression
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:58: Error: Unknown builtin: COMPACT\n");
+ }
+
+ Y_UNIT_TEST(ErrorColumnPosition) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato;\n"
+ "SELECT \n"
+ "value FROM (\n"
+ "select key from Input\n"
+ ");\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:1: Error: Column value is not in source column set\n");
+ }
+
+ Y_UNIT_TEST(PrimaryViewAbortMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input VIEW PRIMARY KEY");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: primary view is not supported for yt tables\n");
+ }
+
+ Y_UNIT_TEST(InsertAbortMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("INSERT OR ABORT INTO plato.Output SELECT key FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: INSERT OR ABORT INTO is not supported for yt tables\n");
+ }
+
+ Y_UNIT_TEST(ReplaceIntoMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("REPLACE INTO plato.Output SELECT key FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: Meaning of REPLACE INTO has been changed, now you should use INSERT INTO <table> WITH TRUNCATE ... for yt\n");
+ }
+
+ Y_UNIT_TEST(UpsertIntoMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("UPSERT INTO plato.Output SELECT key FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: UPSERT INTO is not supported for yt tables\n");
+ }
+
+ Y_UNIT_TEST(UpdateMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("UPDATE plato.Output SET value = value + 1 WHERE key < 1");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: UPDATE is unsupported for yt\n");
+ }
+
+ Y_UNIT_TEST(DeleteMapReduce) {
+ NYql::TAstParseResult res = SqlToYql("DELETE FROM plato.Output WHERE key < 1");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: DELETE is unsupported for yt\n");
+ }
+
+ Y_UNIT_TEST(ReplaceIntoWithTruncate) {
+ NYql::TAstParseResult res = SqlToYql("REPLACE INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:32: Error: Unable REPLACE INTO with truncate mode\n");
+ }
+
+ Y_UNIT_TEST(UpsertIntoWithTruncate) {
+ NYql::TAstParseResult res = SqlToYql("UPSERT INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:31: Error: Unable UPSERT INTO with truncate mode\n");
+ }
+
+ Y_UNIT_TEST(InsertIntoWithTruncateKikimr) {
+ NYql::TAstParseResult res = SqlToYql("INSERT INTO plato.Output WITH TRUNCATE SELECT key FROM plato.Input", 10, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:0: Error: INSERT INTO WITH TRUNCATE is not supported for kikimr tables\n");
+ }
+
+ Y_UNIT_TEST(InsertIntoWithWrongArgumentCount) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output with truncate (key, value, subkey) values (5, '1', '2', '3');");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: VALUES have 4 columns, INSERT INTO ... WITH TRUNCATE expects: 3\n");
+ }
+
+ Y_UNIT_TEST(UpsertWithWrongArgumentCount) {
+ NYql::TAstParseResult res = SqlToYql("upsert into plato.Output (key, value, subkey) values (2, '3');", 10, TString(NYql::KikimrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:39: Error: VALUES have 2 columns, UPSERT INTO expects: 3\n");
+ }
+
+ Y_UNIT_TEST(GroupingSetByExprWithoutAlias) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY GROUPING SETS (cast(key as uint32), subkey);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: Unnamed expressions are not supported in GROUPING SETS. Please use '<expr> AS <name>'.\n");
+ }
+
+ Y_UNIT_TEST(GroupingSetByExprWithoutAlias2) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY subkey || subkey, GROUPING SETS (\n"
+ "cast(key as uint32), subkey);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:1: Error: Unnamed expressions are not supported in GROUPING SETS. Please use '<expr> AS <name>'.\n");
+ }
+
+ Y_UNIT_TEST(CubeByExprWithoutAlias) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE (key, subkey / key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:56: Error: Unnamed expressions are not supported in CUBE. Please use '<expr> AS <name>'.\n");
+ }
+
+ Y_UNIT_TEST(RollupByExprWithoutAlias) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY ROLLUP (subkey / key);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:53: Error: Unnamed expressions are not supported in ROLLUP. Please use '<expr> AS <name>'.\n");
+ }
+
+ Y_UNIT_TEST(GroupByHugeCubeDeniedNoPragma) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE (key, subkey, value, key + subkey as sum, key - subkey as sub, key + val as keyval);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:119: Error: GROUP BY CUBE is allowed only for 5 columns, but you use 6\n");
+ }
+
+ Y_UNIT_TEST(GroupByInvalidPragma) {
+ NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByCubeLimit = '-4';");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:27: Error: Expected unsigned integer literal as a single argument for: GroupByCubeLimit\n");
+ }
+
+ Y_UNIT_TEST(GroupByHugeCubeDeniedPragme) {
+ NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByCubeLimit = '4'; SELECT key FROM plato.Input GROUP BY CUBE (key, subkey, value, key + subkey as sum, key - subkey as sub);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:132: Error: GROUP BY CUBE is allowed only for 4 columns, but you use 5\n");
+ }
+
+ Y_UNIT_TEST(GroupByFewBigCubes) {
+ NYql::TAstParseResult res = SqlToYql("SELECT key FROM plato.Input GROUP BY CUBE(key, subkey, key + subkey as sum), CUBE(value, value + key + subkey as total);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Unable to GROUP BY more than 64 groups, you try use 80 groups\n");
+ }
+
+ Y_UNIT_TEST(GroupByFewBigCubesWithPragmaLimit) {
+ NYql::TAstParseResult res = SqlToYql("PRAGMA GroupByLimit = '16'; SELECT key FROM plato.Input GROUP BY GROUPING SETS(key, subkey, key + subkey as sum), ROLLUP(value, value + key + subkey as total);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:29: Error: Unable to GROUP BY more than 16 groups, you try use 18 groups\n");
+ }
+
+ Y_UNIT_TEST(NoGroupingColumn0) {
+ NYql::TAstParseResult res = SqlToYql(
+ "select count(1), key_first, val_first, grouping(key_first, val_first, nomind) as group\n"
+ "from plato.Input group by grouping sets (cast(key as uint32) /100 as key_first, Substring(value, 1, 1) as val_first);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:71: Error: Column 'nomind' is not a grouping column\n");
+ }
+
+ Y_UNIT_TEST(NoGroupingColumn1) {
+ NYql::TAstParseResult res = SqlToYql("select count(1), grouping(key, value) as group_duo from plato.Input group by cube (key, subkey);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:32: Error: Column 'value' is not a grouping column\n");
+ }
+
+ Y_UNIT_TEST(EmptyAccess0) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), AsList(``));");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:73: Error: Column reference \"\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(EmptyAccess1) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), ``);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:66: Error: Column reference \"\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(UseUnknownColumnInInsert) {
+ NYql::TAstParseResult res = SqlToYql("insert into plato.Output (list0, list1) values (AsList(0, 1, 2), AsList(`test`));");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:73: Error: Column reference \"test\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(GroupByEmptyColumn) {
+ NYql::TAstParseResult res = SqlToYql("select count(1) from plato.Input group by ``;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:43: Error: Column name can not be empty\n");
+ }
+
+ Y_UNIT_TEST(ConvertNumberOutOfBase) {
+ NYql::TAstParseResult res = SqlToYql("select 0o80l;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 0o80l, char: '8' is out of base: 8\n");
+ }
+
+ Y_UNIT_TEST(ConvertNumberOutOfRangeForInt64ButFitsInUint64) {
+ NYql::TAstParseResult res = SqlToYql("select 0xc000000000000000l;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse 13835058055282163712 as integer literal of Int64 type: value out of range for Int64\n");
+ }
+
+ Y_UNIT_TEST(ConvertNumberOutOfRangeUint64) {
+ NYql::TAstParseResult res = SqlToYql("select 0xc0000000000000000l;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 0xc0000000000000000l, number limit overflow\n");
+
+ res = SqlToYql("select 1234234543563435151456;\n");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to parse number from string: 1234234543563435151456, number limit overflow\n");
+ }
+
+ Y_UNIT_TEST(ConvertNumberNegativeOutOfRange) {
+ NYql::TAstParseResult res = SqlToYql("select -9223372036854775808;\n"
+ "select -9223372036854775809;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:8: Error: Failed to parse negative integer: -9223372036854775809, number limit overflow\n");
+ }
+
+ Y_UNIT_TEST(InvaildUsageReal0) {
+ NYql::TAstParseResult res = SqlToYql("select .0;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "<main>:1:7: Error: extraneous input '.' expecting {");
+ }
+
+ Y_UNIT_TEST(InvaildUsageReal1) {
+ NYql::TAstParseResult res = SqlToYql("select .0f;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "<main>:1:7: Error: extraneous input '.' expecting {");
+ }
+
+ Y_UNIT_TEST(InvaildUsageWinFunctionWithoutWindow) {
+ NYql::TAstParseResult res = SqlToYql("select lead(key, 2) from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Failed to use window function Lead without window specification\n");
+ }
+
+ Y_UNIT_TEST(DropTableWithIfExists) {
+ NYql::TAstParseResult res = SqlToYql("DROP TABLE IF EXISTS plato.foo;");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop_if_exists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(TooManyErrors) {
+ const char* q = R"(
+ USE plato;
+ select A, B, C, D, E, F, G, H, I, J, K, L, M, N from (select b from `abc`);
+)";
+
+ NYql::TAstParseResult res = SqlToYql(q, 10);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ R"(<main>:3:16: Error: Column A is not in source column set. Did you mean b?
+<main>:3:19: Error: Column B is not in source column set. Did you mean b?
+<main>:3:22: Error: Column C is not in source column set. Did you mean b?
+<main>:3:25: Error: Column D is not in source column set. Did you mean b?
+<main>:3:28: Error: Column E is not in source column set. Did you mean b?
+<main>:3:31: Error: Column F is not in source column set. Did you mean b?
+<main>:3:34: Error: Column G is not in source column set. Did you mean b?
+<main>:3:37: Error: Column H is not in source column set. Did you mean b?
+<main>:3:40: Error: Column I is not in source column set. Did you mean b?
+<main>: Error: Too many issues, code: 1
+)");
+ };
+
+ Y_UNIT_TEST(ShouldCloneBindingForNamedParameter) {
+ NYql::TAstParseResult res = SqlToYql(R"($f = () -> {
+ $value_type = TypeOf(1);
+ $pair_type = StructType(
+ TypeOf("2") AS key,
+ $value_type AS value
+ );
+
+ RETURN TupleType(
+ ListType($value_type),
+ $pair_type);
+};
+
+select FormatType($f());
+)");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(BlockedInvalidFrameBounds) {
+ auto check = [](const TString& frame, const TString& err) {
+ const TString prefix = "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (PARTITION BY key ORDER BY subkey\n";
+ NYql::TAstParseResult res = SqlToYql(prefix + frame + ")");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), err);
+ };
+
+ check("ROWS UNBOUNDED FOLLOWING", "<main>:2:5: Error: Frame cannot start from UNBOUNDED FOLLOWING\n");
+ check("ROWS BETWEEN 5 PRECEDING AND UNBOUNDED PRECEDING", "<main>:2:29: Error: Frame cannot end with UNBOUNDED PRECEDING\n");
+ check("ROWS BETWEEN CURRENT ROW AND 5 PRECEDING", "<main>:2:13: Error: Frame cannot start from CURRENT ROW and end with PRECEDING\n");
+ check("ROWS BETWEEN 5 FOLLOWING AND CURRENT ROW", "<main>:2:14: Error: Frame cannot start from FOLLOWING and end with CURRENT ROW\n");
+ check("ROWS BETWEEN 5 FOLLOWING AND 5 PRECEDING", "<main>:2:14: Error: Frame cannot start from FOLLOWING and end with PRECEDING\n");
+ }
+
+ Y_UNIT_TEST(BlockedRangeValueWithoutSingleOrderBy) {
+ UNIT_ASSERT(SqlToYql("SELECT COUNT(*) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM plato.Input").IsOk());
+ UNIT_ASSERT(SqlToYql("SELECT COUNT(*) OVER (RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) FROM plato.Input").IsOk());
+
+ auto res = SqlToYql("SELECT COUNT(*) OVER (RANGE 5 PRECEDING) FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:29: Error: RANGE with <offset> PRECEDING/FOLLOWING requires exactly one expression in ORDER BY partition clause\n");
+
+ res = SqlToYql("SELECT COUNT(*) OVER (ORDER BY key, value RANGE 5 PRECEDING) FROM plato.Input");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: RANGE with <offset> PRECEDING/FOLLOWING requires exactly one expression in ORDER BY partition clause\n");
+ }
+
+ Y_UNIT_TEST(NoColumnsInFrameBounds) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (ROWS BETWEEN\n"
+ " 1 + key PRECEDING AND 2 + key FOLLOWING);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:6: Error: Column reference \"key\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(WarnOnEmptyFrameBounds) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT SUM(x) OVER w FROM plato.Input WINDOW w AS (PARTITION BY key ORDER BY subkey\n"
+ "ROWS BETWEEN 10 FOLLOWING AND 5 FOLLOWING)");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:14: Warning: Used frame specification implies empty window frame, code: 4520\n");
+ }
+
+ Y_UNIT_TEST(WarnOnRankWithUnorderedWindow) {
+ NYql::TAstParseResult res = SqlToYql("SELECT RANK() OVER w FROM plato.Input WINDOW w AS ()");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Rank() is used with unordered window - all rows will be considered equal to each other, code: 4521\n");
+ }
+
+ Y_UNIT_TEST(WarnOnRankExprWithUnorderedWindow) {
+ NYql::TAstParseResult res = SqlToYql("SELECT RANK(key) OVER w FROM plato.Input WINDOW w AS ()");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: Rank(<expression>) is used with unordered window - the result is likely to be undefined, code: 4521\n");
+ }
+
+ Y_UNIT_TEST(AnyAsTableName) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select * from any;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: no viable alternative at input 'any;'\n");
+ }
+
+ Y_UNIT_TEST(IncorrectOrderOfLambdaOptionalArgs) {
+ NYql::TAstParseResult res = SqlToYql("$f = ($x?, $y)->($x + $y); select $f(1);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Non-optional argument can not follow optional one\n");
+ }
+
+ Y_UNIT_TEST(IncorrectOrderOfActionOptionalArgs) {
+ NYql::TAstParseResult res = SqlToYql("define action $f($x?, $y) as select $x,$y; end define; do $f(1);");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:23: Error: Non-optional argument can not follow optional one\n");
+ }
+
+ Y_UNIT_TEST(NotAllowedQuestionOnNamedNode) {
+ NYql::TAstParseResult res = SqlToYql("$f = 1; select $f?;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unexpected token '?' at the end of expression\n");
+ }
+
+ Y_UNIT_TEST(AnyAndCrossJoin) {
+ NYql::TAstParseResult res = SqlToYql("use plato; select * from any Input1 cross join Input2");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:26: Error: ANY should not be used with Cross JOIN\n");
+
+ res = SqlToYql("use plato; select * from Input1 cross join any Input2");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:44: Error: ANY should not be used with Cross JOIN\n");
+ }
+
+ Y_UNIT_TEST(AnyWithCartesianProduct) {
+ NYql::TAstParseResult res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from any Input1, Input2");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:56: Error: ANY should not be used with Cross JOIN\n");
+
+ res = SqlToYql("pragma AnsiImplicitCrossJoin; use plato; select * from Input1, any Input2");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:64: Error: ANY should not be used with Cross JOIN\n");
+ }
+
+ Y_UNIT_TEST(ErrorPlainEndAsInlineActionTerminator) {
+ NYql::TAstParseResult res = SqlToYql(
+ "do begin\n"
+ " select 1\n"
+ "; end\n");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:0: Error: missing DO at '<EOF>'\n");
+ }
+
+ Y_UNIT_TEST(ErrorMultiWayJoinWithUsing) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato;\n"
+ "PRAGMA DisableSimpleColumns;\n"
+ "SELECT *\n"
+ "FROM Input1 AS a\n"
+ "JOIN Input2 AS b USING(key)\n"
+ "JOIN Input3 AS c ON a.key = c.key;\n"
+ );
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:5:24: Error: Multi-way JOINs should be connected with ON clause instead of USING clause\n"
+ );
+ }
+
+ Y_UNIT_TEST(RequireLabelInFlattenByWithDot) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input flatten by x.y");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:1:40: Error: Unnamed expression after FLATTEN BY is not allowed\n"
+ );
+ }
+
+ Y_UNIT_TEST(WarnUnnamedColumns) {
+ NYql::TAstParseResult res = SqlToYql(
+ "PRAGMA WarnUnnamedColumns;\n"
+ "\n"
+ "SELECT key, subkey, key || subkey FROM plato.Input ORDER BY subkey;\n");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:28: Warning: Autogenerated column name column2 will be used for expression, code: 4516\n");
+ }
+
+ Y_UNIT_TEST(WarnSourceColumnMismatch) {
+ NYql::TAstParseResult res = SqlToYql(
+ "insert into plato.Output (key, subkey, new_value, one_more_value) select key as Key, subkey, value, \"x\" from plato.Input;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:51: Warning: Column names in SELECT don't match column specification in parenthesis. \"key\" doesn't match \"Key\". \"new_value\" doesn't match \"value\", code: 4517\n");
+ }
+
+ Y_UNIT_TEST(YtCaseInsensitive) {
+ NYql::TAstParseResult res = SqlToYql("select * from PlatO.foo;");
+ UNIT_ASSERT(res.Root);
+
+ res = SqlToYql("use PlatO; select * from foo;");
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(KikimrCaseSensitive) {
+ NYql::TAstParseResult res = SqlToYql("select * from PlatO.foo;", 10, "kikimr");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: Unknown cluster: PlatO\n");
+
+ res = SqlToYql("use PlatO; select * from foo;", 10, "kikimr");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:5: Error: Unknown cluster: PlatO\n");
+ }
+
+ Y_UNIT_TEST(DiscoveryModeForbidden) {
+ NYql::TAstParseResult res = SqlToYqlWithMode("insert into plato.Output select * from plato.range(\"\", Input1, Input4)", NSQLTranslation::ESqlMode::DISCOVERY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: range is not allowed in Discovery mode, code: 4600\n");
+
+ res = SqlToYqlWithMode("insert into plato.Output select * from plato.like(\"\", \"Input%\")", NSQLTranslation::ESqlMode::DISCOVERY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: like is not allowed in Discovery mode, code: 4600\n");
+
+ res = SqlToYqlWithMode("insert into plato.Output select * from plato.regexp(\"\", \"Input.\")", NSQLTranslation::ESqlMode::DISCOVERY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: regexp is not allowed in Discovery mode, code: 4600\n");
+
+ res = SqlToYqlWithMode("insert into plato.Output select * from plato.filter(\"\", ($name) -> { return find($name, \"Input\") is not null; })", NSQLTranslation::ESqlMode::DISCOVERY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: filter is not allowed in Discovery mode, code: 4600\n");
+
+ res = SqlToYqlWithMode("select Path from plato.folder(\"\") where Type == \"table\"", NSQLTranslation::ESqlMode::DISCOVERY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: folder is not allowed in Discovery mode, code: 4600\n");
+ }
+
+ Y_UNIT_TEST(YsonFuncWithoutArgs) {
+ UNIT_ASSERT(SqlToYql("SELECT Yson::SerializeText(Yson::From());").IsOk());
+ }
+
+ Y_UNIT_TEST(CanNotUseOrderByInNonLastSelectInUnionAllChain) {
+ auto req = "pragma AnsiOrderByLimitInUnionAll;\n"
+ "use plato;\n"
+ "\n"
+ "select * from Input order by key\n"
+ "union all\n"
+ "select * from Input order by key limit 1;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:21: Error: ORDER BY within UNION ALL is only allowed after last subquery\n");
+ }
+
+ Y_UNIT_TEST(CanNotUseLimitInNonLastSelectInUnionAllChain) {
+ auto req = "pragma AnsiOrderByLimitInUnionAll;\n"
+ "use plato;\n"
+ "\n"
+ "select * from Input limit 1\n"
+ "union all\n"
+ "select * from Input order by key limit 1;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:21: Error: LIMIT within UNION ALL is only allowed after last subquery\n");
+ }
+
+ Y_UNIT_TEST(CanNotUseDiscardInNonFirstSelectInUnionAllChain) {
+ auto req = "pragma AnsiOrderByLimitInUnionAll;\n"
+ "use plato;\n"
+ "\n"
+ "select * from Input\n"
+ "union all\n"
+ "discard select * from Input;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:6:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n");
+ }
+
+ Y_UNIT_TEST(CanNotUseIntoResultInNonLastSelectInUnionAllChain) {
+ auto req = "use plato;\n"
+ "pragma AnsiOrderByLimitInUnionAll;\n"
+ "\n"
+ "select * from Input\n"
+ "union all\n"
+ "discard select * from Input;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:6:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n");
+ }
+
+ Y_UNIT_TEST(YsonStrictInvalidPragma) {
+ auto res = SqlToYql("pragma yson.Strict = \"wrong\";");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:22: Error: Expected 'true', 'false' or no parameter for: Strict\n");
+ }
+
+ Y_UNIT_TEST(WarnTableNameInSomeContexts) {
+ UNIT_ASSERT(SqlToYql("use plato; select TableName() from Input;").IsOk());
+ UNIT_ASSERT(SqlToYql("use plato; select TableName(\"aaaa\");").IsOk());
+ UNIT_ASSERT(SqlToYql("select TableName(\"aaaa\", \"yt\");").IsOk());
+
+ auto res = SqlToYql("select TableName() from plato.Input;");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: TableName requires either service name as second argument or current cluster name\n");
+
+ res = SqlToYql("use plato;\n"
+ "select TableName() from Input1 as a join Input2 as b using(key);");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:8: Warning: TableName() may produce empty result when used in ambiguous context (with JOIN), code: 4525\n");
+
+ res = SqlToYql("use plato;\n"
+ "select SOME(TableName()), key from Input group by key;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:13: Warning: TableName() will produce empty result when used with aggregation.\n"
+ "Please consult documentation for possible workaround, code: 4525\n");
+ }
+
+ Y_UNIT_TEST(WarnOnDistincWithHavingWithoutAggregations) {
+ auto res = SqlToYql("select distinct key from plato.Input having key != '0';");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Warning: The usage of HAVING without aggregations with SELECT DISTINCT is non-standard and will stop working soon. Please use WHERE instead., code: 4526\n");
+ }
+
+ Y_UNIT_TEST(FlattenByExprWithNestedNull) {
+ auto res = SqlToYql("USE plato;\n"
+ "\n"
+ "SELECT * FROM (SELECT 1 AS region_id)\n"
+ "FLATTEN BY (\n"
+ " CAST($unknown(region_id) AS List<String>) AS region\n"
+ ")");
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:10: Error: Unknown name: $unknown\n");
+ }
+
+ Y_UNIT_TEST(EmptySymbolNameIsForbidden) {
+ auto req = " $`` = 1; select $``;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:5: Error: Empty symbol name is not allowed\n");
+ }
+
+ Y_UNIT_TEST(WarnOnBinaryOpWithNullArg) {
+ auto req = "select * from plato.Input where cast(key as Int32) != NULL";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:52: Warning: Binary operation != will return NULL here, code: 4529\n");
+
+ req = "select 1 or null";
+ res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "");
+ }
+
+ Y_UNIT_TEST(ErrorIfTableSampleArgUsesColumns) {
+ auto req = "SELECT key FROM plato.Input TABLESAMPLE BERNOULLI(MIN_OF(100.0, CAST(subkey as Int32)));";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:70: Error: Column reference \"subkey\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(DerivedColumnListForSelectIsNotSupportedYet) {
+ auto req = "SELECT a,b,c FROM plato.Input as t(x,y,z);";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:35: Error: Derived column list is only supported for VALUES\n");
+ }
+
+ Y_UNIT_TEST(ErrorIfValuesHasDifferentCountOfColumns) {
+ auto req = "VALUES (1,2,3), (4,5);";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: All VALUES items should have same size: expecting 3, got 2\n");
+ }
+
+ Y_UNIT_TEST(ErrorIfDerivedColumnSizeExceedValuesColumnCount) {
+ auto req = "SELECT * FROM(VALUES (1,2), (3,4)) as t(x,y,z);";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:40: Error: Derived column list size exceeds column count in VALUES\n");
+ }
+
+ Y_UNIT_TEST(WarnoOnAutogeneratedNamesForValues) {
+ auto req = "PRAGMA WarnUnnamedColumns;\n"
+ "SELECT * FROM (VALUES (1,2,3,4), (5,6,7,8)) as t(x,y);";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:16: Warning: Autogenerated column names column2...column3 will be used here, code: 4516\n");
+ }
+
+ Y_UNIT_TEST(ErrUnionAllWithOrderByWithoutExplicitLegacyMode) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from Input order by key\n"
+ "union all\n"
+ "select * from Input order by key;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: ORDER BY within UNION ALL is only allowed after last subquery\n");
+ }
+
+ Y_UNIT_TEST(ErrUnionAllWithLimitWithoutExplicitLegacyMode) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from Input limit 10\n"
+ "union all\n"
+ "select * from Input limit 1;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: LIMIT within UNION ALL is only allowed after last subquery\n");
+ }
+
+ Y_UNIT_TEST(ErrUnionAllWithIntoResultWithoutExplicitLegacyMode) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from Input into result aaa\n"
+ "union all\n"
+ "select * from Input;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:21: Error: INTO RESULT within UNION ALL is only allowed after last subquery\n");
+ }
+
+ Y_UNIT_TEST(ErrUnionAllWithDiscardWithoutExplicitLegacyMode) {
+ auto req = "use plato;\n"
+ "\n"
+ "select * from Input\n"
+ "union all\n"
+ "discard select * from Input;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:1: Error: DISCARD within UNION ALL is only allowed before first subquery\n");
+ }
+
+ Y_UNIT_TEST(ErrUnionAllKeepsIgnoredOrderByWarning) {
+ auto req = "use plato;\n"
+ "\n"
+ "SELECT * FROM (\n"
+ " SELECT * FROM Input\n"
+ " UNION ALL\n"
+ " SELECT t.* FROM Input AS t ORDER BY t.key\n"
+ ");";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:3: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n"
+ "<main>:6:39: Error: Unknown correlation name: t\n");
+ }
+
+ Y_UNIT_TEST(ErrOrderByIgnoredButCheckedForMissingColumns) {
+ auto req = "$src = SELECT key FROM (SELECT 1 as key, 2 as subkey) ORDER BY x; SELECT * FROM $src;";
+ ExpectFailWithError(req, "<main>:1:8: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n"
+ "<main>:1:64: Error: Column x is not in source column set\n");
+
+ req = "$src = SELECT key FROM plato.Input ORDER BY x; SELECT * FROM $src;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Warning: ORDER BY without LIMIT in subquery will be ignored, code: 4504\n");
+ }
+
+ Y_UNIT_TEST(InvalidTtlInterval) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key))
+ WITH (TTL = 1 On CreatedAt);
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:25: Error: Literal of Interval type is expected for TTL\n"
+ "<main>:4:25: Error: Invalid TTL settings\n");
+ }
+
+ Y_UNIT_TEST(InvalidTtlUnit) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key))
+ WITH (TTL = Interval("P1D") On CreatedAt AS PICOSECONDS);
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "mismatched input 'PICOSECONDS' expecting {MICROSECONDS, MILLISECONDS, NANOSECONDS, SECONDS}");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedSink) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (SINK_TYPE = "S3", MODE = "KEYS_ONLY", FORMAT = "json")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:55: Error: Unknown changefeed sink type: S3\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedSettings) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (SINK_TYPE = "local", FOO = "bar")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:64: Error: Unknown changefeed setting: FOO\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedInitialScan) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", INITIAL_SCAN = "foo")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:95: Error: Literal of Bool type is expected for INITIAL_SCAN\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedVirtualTimestamps) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", VIRTUAL_TIMESTAMPS = "foo")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:101: Error: Literal of Bool type is expected for VIRTUAL_TIMESTAMPS\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedResolvedTimestamps) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", RESOLVED_TIMESTAMPS = "foo")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:102: Error: Literal of Interval type is expected for RESOLVED_TIMESTAMPS\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedRetentionPeriod) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", RETENTION_PERIOD = "foo")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:99: Error: Literal of Interval type is expected for RETENTION_PERIOD\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedTopicPartitions) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", TOPIC_MIN_ACTIVE_PARTITIONS = "foo")
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:110: Error: Literal of integer type is expected for TOPIC_MIN_ACTIVE_PARTITIONS\n");
+ }
+
+ Y_UNIT_TEST(InvalidChangefeedAwsRegion) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (
+ Key Uint32, PRIMARY KEY (Key),
+ CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", AWS_REGION = true)
+ );
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:93: Error: Literal of String type is expected for AWS_REGION\n");
+ }
+
+ Y_UNIT_TEST(ErrJoinWithGroupingSetsWithoutCorrelationName) {
+ auto req = "USE plato;\n"
+ "\n"
+ "SELECT k1, k2, subkey\n"
+ "FROM T1 AS a JOIN T2 AS b USING (key)\n"
+ "GROUP BY GROUPING SETS(\n"
+ " (a.key as k1, b.subkey as k2),\n"
+ " (k1),\n"
+ " (subkey)\n"
+ ");";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:8:4: Error: Columns in grouping sets should have correlation name, error in key: subkey\n");
+ }
+
+ Y_UNIT_TEST(ErrJoinWithGroupByWithoutCorrelationName) {
+ auto req = "USE plato;\n"
+ "\n"
+ "SELECT k1, k2,\n"
+ " value\n"
+ "FROM T1 AS a JOIN T2 AS b USING (key)\n"
+ "GROUP BY a.key as k1, b.subkey as k2,\n"
+ " value;";
+ ExpectFailWithError(req,
+ "<main>:7:5: Error: Columns in GROUP BY should have correlation name, error in key: value\n");
+ }
+
+ Y_UNIT_TEST(ErrWithMissingFrom) {
+ auto req = "select 1 as key where 1 > 1;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:25: Error: Filtering is not allowed without FROM\n");
+
+ req = "select 1 + count(*);";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Aggregation is not allowed without FROM\n");
+
+ req = "select 1 as key, subkey + value;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:18: Error: Column reference 'subkey'\n"
+ "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:27: Error: Column reference 'value'\n");
+
+ req = "select count(1) group by key;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:26: Error: Column reference 'key'\n");
+ }
+
+ Y_UNIT_TEST(ErrWithMissingFromForWindow) {
+ auto req = "$c = () -> (1 + count(1) over w);\n"
+ "select $c();";
+ ExpectFailWithError(req,
+ "<main>:1:9: Error: Window and aggregation functions are not allowed in this context\n"
+ "<main>:1:17: Error: Failed to use aggregation function Count without window specification or in wrong place\n");
+
+ req = "$c = () -> (1 + lead(1) over w);\n"
+ "select $c();";
+ ExpectFailWithError(req,
+ "<main>:1:17: Error: Window functions are not allowed in this context\n"
+ "<main>:1:17: Error: Failed to use window function Lead without window specification or in wrong place\n");
+
+ req = "select 1 + count(1) over w window w as ();";
+ ExpectFailWithError(req,
+ "<main>:1:1: Error: Window and aggregation functions are not allowed without FROM\n"
+ "<main>:1:12: Error: Failed to use aggregation function Count without window specification or in wrong place\n");
+
+ req = "select 1 + lead(1) over w window w as ();";
+ ExpectFailWithError(req,
+ "<main>:1:12: Error: Window functions are not allowed without FROM\n"
+ "<main>:1:12: Error: Failed to use window function Lead without window specification or in wrong place\n");
+ }
+
+ Y_UNIT_TEST(ErrWithMissingFromForInplaceWindow) {
+ auto req = "$c = () -> (1 + count(1) over ());\n"
+ "select $c();";
+ ExpectFailWithError(req,
+ "<main>:1:26: Error: Window and aggregation functions are not allowed in this context\n");
+
+ req = "$c = () -> (1 + lead(1) over (rows between unbounded preceding and current row));\n"
+ "select $c();";
+ ExpectFailWithError(req,
+ "<main>:1:25: Error: Window and aggregation functions are not allowed in this context\n");
+
+ req = "select 1 + count(1) over ();";
+ ExpectFailWithError(req,
+ "<main>:1:1: Error: Window and aggregation functions are not allowed without FROM\n"
+ "<main>:1:12: Error: Failed to use aggregation function Count without window specification or in wrong place\n");
+
+ req = "select 1 + lead(1) over (rows between current row and unbounded following);";
+ ExpectFailWithError(req,
+ "<main>:1:12: Error: Window functions are not allowed without FROM\n"
+ "<main>:1:12: Error: Failed to use window function Lead without window specification or in wrong place\n");
+ }
+
+ Y_UNIT_TEST(ErrDistinctInWrongPlace) {
+ auto req = "select Some::Udf(distinct key) from plato.Input;";
+ ExpectFailWithError(req,
+ "<main>:1:18: Error: DISTINCT can only be used in aggregation functions\n");
+ req = "select sum(key)(distinct foo) from plato.Input;";
+ ExpectFailWithError(req,
+ "<main>:1:17: Error: DISTINCT can only be used in aggregation functions\n");
+
+ req = "select len(distinct foo) from plato.Input;";
+ ExpectFailWithError(req,
+ "<main>:1:8: Error: DISTINCT can only be used in aggregation functions\n");
+
+ req = "$foo = ($x) -> ($x); select $foo(distinct key) from plato.Input;";
+ ExpectFailWithError(req,
+ "<main>:1:34: Error: DISTINCT can only be used in aggregation functions\n");
+ }
+
+ Y_UNIT_TEST(ErrForNotSingleChildInInlineAST) {
+ ExpectFailWithError("select YQL::\"\"",
+ "<main>:1:8: Error: Failed to parse YQL: expecting AST root node with single child, but got 0\n");
+ ExpectFailWithError("select YQL::@@ \t@@",
+ "<main>:1:8: Error: Failed to parse YQL: expecting AST root node with single child, but got 0\n");
+ auto req = "$lambda = YQL::@@(lambda '(x)(+ x x)) (lambda '(y)(+ y y))@@;\n"
+ "select ListMap([1, 2, 3], $lambda);";
+ ExpectFailWithError(req,
+ "<main>:1:11: Error: Failed to parse YQL: expecting AST root node with single child, but got 2\n");
+ }
+
+ Y_UNIT_TEST(ErrEmptyColumnName) {
+ ExpectFailWithError("select * without \"\" from plato.Input",
+ "<main>:1:18: Error: String literal can not be used here\n");
+
+ ExpectFailWithError("select * without `` from plato.Input;",
+ "<main>:1:18: Error: Empty column name is not allowed\n");
+
+ ExpectFailWithErrorForAnsiLexer("select * without \"\" from plato.Input",
+ "<main>:1:18: Error: Empty column name is not allowed\n");
+
+ ExpectFailWithErrorForAnsiLexer("select * without `` from plato.Input;",
+ "<main>:1:18: Error: Empty column name is not allowed\n");
+ }
+
+ Y_UNIT_TEST(ErrOnNonZeroArgumentsForTableRows) {
+ ExpectFailWithError("$udf=\"\";process plato.Input using $udf(TableRows(k))",
+ "<main>:1:40: Error: TableRows requires exactly 0 arguments\n");
+ }
+
+ Y_UNIT_TEST(ErrGroupByWithAggregationFunctionAndDistinctExpr) {
+ ExpectFailWithError("select * from plato.Input group by count(distinct key|key)",
+ "<main>:1:36: Error: Unable to GROUP BY aggregated values\n");
+ }
+
+ // FIXME: check if we can get old behaviour
+#if 0
+ Y_UNIT_TEST(ErrWithSchemaWithColumnsWithoutType) {
+ ExpectFailWithError("select * from plato.Input with COLUMNs",
+ "<main>:1:32: Error: Expected type after COLUMNS\n"
+ "<main>:1:32: Error: Failed to parse table hints\n");
+
+ ExpectFailWithError("select * from plato.Input with scheMa",
+ "<main>:1:32: Error: Expected type after SCHEMA\n"
+ "<main>:1:32: Error: Failed to parse table hints\n");
+ }
+#endif
+
+ Y_UNIT_TEST(ErrCollectPreaggregatedInListLiteralWithoutFrom) {
+ ExpectFailWithError("SELECT([VARIANCE(DISTINCT[])])",
+ "<main>:1:1: Error: Column references are not allowed without FROM\n"
+ "<main>:1:9: Error: Column reference '_yql_preagg_Variance0'\n");
+ }
+
+ Y_UNIT_TEST(ErrGroupBySmartParenAsTuple) {
+ ExpectFailWithError("SELECT * FROM plato.Input GROUP BY (k, v,)",
+ "<main>:1:41: Error: Unexpected trailing comma in grouping elements list\n");
+ }
+
+ Y_UNIT_TEST(HandleNestedSmartParensInGroupBy) {
+ ExpectFailWithError("SELECT * FROM plato.Input GROUP BY (+() as k)",
+ "<main>:1:37: Error: Unable to GROUP BY constant expression\n");
+ }
+
+ Y_UNIT_TEST(ErrRenameWithAddColumn) {
+ ExpectFailWithError("USE plato; ALTER TABLE table RENAME TO moved, ADD COLUMN addc uint64",
+ "<main>:1:40: Error: RENAME TO can not be used together with another table action\n");
+ }
+
+ Y_UNIT_TEST(ErrAddColumnAndRename) {
+ // FIXME: fix positions in ALTER TABLE
+ ExpectFailWithError("USE plato; ALTER TABLE table ADD COLUMN addc uint64, RENAME TO moved",
+ "<main>:1:46: Error: RENAME TO can not be used together with another table action\n");
+ }
+
+ Y_UNIT_TEST(InvalidUuidValue) {
+ ExpectFailWithError("SELECT Uuid('123e4567ae89ba12d3aa456a426614174ab0')",
+ "<main>:1:8: Error: Invalid value \"123e4567ae89ba12d3aa456a426614174ab0\" for type Uuid\n");
+ ExpectFailWithError("SELECT Uuid('123e4567ae89b-12d3-a456-426614174000')",
+ "<main>:1:8: Error: Invalid value \"123e4567ae89b-12d3-a456-426614174000\" for type Uuid\n");
+ }
+
+ Y_UNIT_TEST(WindowFunctionWithoutOver) {
+ ExpectFailWithError("SELECT LAST_VALUE(foo) FROM plato.Input",
+ "<main>:1:8: Error: Can't use window function LastValue without window specification (OVER keyword is missing)\n");
+ ExpectFailWithError("SELECT LAST_VALUE(foo) FROM plato.Input GROUP BY key",
+ "<main>:1:8: Error: Can't use window function LastValue without window specification (OVER keyword is missing)\n");
+ }
+
+ Y_UNIT_TEST(CreateAlterUserWithoutCluster) {
+ ExpectFailWithError("\n CREATE USER user ENCRYPTED PASSWORD 'foobar';", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n");
+ ExpectFailWithError("ALTER USER CURRENT_USER RENAME TO $foo;", "<main>:1:1: Error: USE statement is missing - no default cluster is selected\n");
+ }
+
+ Y_UNIT_TEST(ModifyPermissionsWithoutCluster) {
+ ExpectFailWithError("\n GRANT CONNECT ON `/Root` TO user;", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n");
+ ExpectFailWithError("\n REVOKE MANAGE ON `/Root` FROM user;", "<main>:2:2: Error: USE statement is missing - no default cluster is selected\n");
+ }
+
+ Y_UNIT_TEST(ReservedRoleNames) {
+ ExpectFailWithError("USE plato; CREATE USER current_User;", "<main>:1:24: Error: System role CURRENT_USER can not be used here\n");
+ ExpectFailWithError("USE plato; ALTER USER current_User RENAME TO Current_role", "<main>:1:46: Error: System role CURRENT_ROLE can not be used here\n");
+ UNIT_ASSERT(SqlToYql("USE plato; DROP GROUP IF EXISTS a, b, c, current_User;").IsOk());
+ }
+
+ Y_UNIT_TEST(DisableClassicDivisionWithError) {
+ ExpectFailWithError("pragma ClassicDivision = 'false'; select $foo / 30;", "<main>:1:42: Error: Unknown name: $foo\n");
+ }
+
+ Y_UNIT_TEST(AggregationOfAgrregatedDistinctExpr) {
+ ExpectFailWithError("select sum(sum(distinct x + 1)) from plato.Input", "<main>:1:12: Error: Aggregation of aggregated values is forbidden\n");
+ }
+
+ Y_UNIT_TEST(WarnForUnusedSqlHint) {
+ NYql::TAstParseResult res = SqlToYql("select * from plato.Input1 as a join /*+ merge() */ plato.Input2 as b using(key);\n"
+ "select --+ foo(bar)\n"
+ " 1;");
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:23: Warning: Hint foo will not be used, code: 4534\n");
+ }
+
+ Y_UNIT_TEST(WarnForDeprecatedSchema) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.ClusterMapping["s3bucket"] = NYql::S3ProviderName;
+ NYql::TAstParseResult res = SqlToYql("select * from s3bucket.`foo` with schema (col1 Int32, String as col2, Int64 as col3);", settings);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_STRING_CONTAINS(res.Issues.ToString(), "Warning: Deprecated syntax for positional schema: please use 'column type' instead of 'type AS column', code: 4535\n");
+ }
+
+ Y_UNIT_TEST(ErrorOnColumnNameInMaxByLimit) {
+ ExpectFailWithError(
+ "SELECT AGGREGATE_BY(AsTuple(value, key), AggregationFactory(\"MAX_BY\", subkey)) FROM plato.Input;",
+ "<main>:1:42: Error: Source does not allow column references\n"
+ "<main>:1:71: Error: Column reference 'subkey'\n");
+ }
+
+ Y_UNIT_TEST(ErrorInLibraryWithTopLevelNamedSubquery) {
+ TString withUnusedSubq = "$unused = select max(key) from plato.Input;\n"
+ "\n"
+ "define subquery $foo() as\n"
+ " $count = select count(*) from plato.Input;\n"
+ " select * from plato.Input limit $count / 2;\n"
+ "end define;\n"
+ "export $foo;\n";
+ UNIT_ASSERT(SqlToYqlWithMode(withUnusedSubq, NSQLTranslation::ESqlMode::LIBRARY).IsOk());
+
+ TString withTopLevelSubq = "$count = select count(*) from plato.Input;\n"
+ "\n"
+ "define subquery $foo() as\n"
+ " select * from plato.Input limit $count / 2;\n"
+ "end define;\n"
+ "export $foo;\n";
+ auto res = SqlToYqlWithMode(withTopLevelSubq, NSQLTranslation::ESqlMode::LIBRARY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Named subquery can not be used as a top level statement in libraries\n");
+ }
+
+ Y_UNIT_TEST(SessionStartAndSessionStateShouldSurviveSessionWindowArgsError){
+ TString query = R"(
+ $init = ($_row) -> (min(1, 2)); -- error: aggregation func min() can not be used here
+ $calculate = ($_row, $_state) -> (1);
+ $update = ($_row, $_state) -> (2);
+ SELECT
+ SessionStart() over w as session_start,
+ SessionState() over w as session_state,
+ FROM plato.Input as t
+ WINDOW w AS (
+ PARTITION BY user, SessionWindow(ts + 1, $init, $update, $calculate)
+ )
+ )";
+ ExpectFailWithError(query, "<main>:2:33: Error: Aggregation function Min requires exactly 1 argument(s), given: 2\n");
+ }
+}
+
+void CheckUnused(const TString& req, const TString& symbol, unsigned row, unsigned col) {
+ auto res = SqlToYql(req);
+
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), TStringBuilder() << "<main>:" << row << ":" << col << ": Warning: Symbol " << symbol << " is not used, code: 4527\n");
+}
+
+Y_UNIT_TEST_SUITE(WarnUnused) {
+ Y_UNIT_TEST(ActionOrSubquery) {
+ TString req = " $a()\n"
+ "as select 1;\n"
+ "end define;\n"
+ "\n"
+ "select 1;";
+ CheckUnused("define action\n" + req, "$a", 2, 3);
+ CheckUnused("define subquery\n" + req, "$a", 2, 3);
+ }
+
+ Y_UNIT_TEST(Import) {
+ TString req = "import lib1 symbols\n"
+ " $sqr;\n"
+ "select 1;";
+ CheckUnused(req, "$sqr", 2, 3);
+
+ req = "import lib1 symbols\n"
+ " $sqr as\n"
+ " $sq;\n"
+ "select 1;";
+ CheckUnused(req, "$sq", 3, 5);
+ }
+
+ Y_UNIT_TEST(NamedNodeStatement) {
+ TString req = " $a, $a = AsTuple(1, 2);\n"
+ "select $a;";
+ CheckUnused(req, "$a", 1, 2);
+ req = "$a, $b = AsTuple(1, 2);\n"
+ "select $a;";
+ CheckUnused(req, "$b", 1, 6);
+ CheckUnused(" $a = 1; $a = 2; select $a;", "$a", 1, 2);
+ }
+
+ Y_UNIT_TEST(Declare) {
+ CheckUnused("declare $a as String;select 1;", "$a", 1, 9);
+ }
+
+ Y_UNIT_TEST(ActionParams) {
+ TString req = "define action $a($x, $y) as\n"
+ " select $x;\n"
+ "end define;\n"
+ "\n"
+ "do $a(1,2);";
+ CheckUnused(req, "$y", 1, 22);
+ }
+
+ Y_UNIT_TEST(SubqueryParams) {
+ TString req = "use plato;\n"
+ "define subquery $q($name, $x) as\n"
+ " select * from $name;\n"
+ "end define;\n"
+ "\n"
+ "select * from $q(\"Input\", 1);";
+ CheckUnused(req, "$x", 2, 27);
+ }
+
+ Y_UNIT_TEST(For) {
+ TString req = "define action $a() as\n"
+ " select 1;\n"
+ "end define;\n"
+ "\n"
+ "for $i in ListFromRange(1, 10)\n"
+ "do $a();";
+ CheckUnused(req, "$i", 5, 5);
+ }
+
+ Y_UNIT_TEST(LambdaParams) {
+ TString req = "$lambda = ($x, $y) -> ($x);\n"
+ "select $lambda(1, 2);";
+ CheckUnused(req, "$y", 1, 16);
+ }
+
+ Y_UNIT_TEST(InsideLambdaBody) {
+ TString req = "$lambda = () -> {\n"
+ " $x = 1; return 1;\n"
+ "};\n"
+ "select $lambda();";
+ CheckUnused(req, "$x", 2, 3);
+ req = "$lambda = () -> {\n"
+ " $x = 1; $x = 2; return $x;\n"
+ "};\n"
+ "select $lambda();";
+ CheckUnused(req, "$x", 2, 3);
+ }
+
+ Y_UNIT_TEST(InsideAction) {
+ TString req = "define action $a() as\n"
+ " $x = 1; select 1;\n"
+ "end define;\n"
+ "\n"
+ "do $a();";
+ CheckUnused(req, "$x", 2, 3);
+ req = "define action $a() as\n"
+ " $x = 1; $x = 2; select $x;\n"
+ "end define;\n"
+ "\n"
+ "do $a();";
+ CheckUnused(req, "$x", 2, 3);
+ }
+
+ Y_UNIT_TEST(NoWarnOnNestedActions) {
+ auto req = "pragma warning(\"error\", \"4527\");\n"
+ "define action $action($b) as\n"
+ " define action $aaa() as\n"
+ " select $b;\n"
+ " end define;\n"
+ " do $aaa();\n"
+ "end define;\n"
+ "\n"
+ "do $action(1);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(NoWarnForUsageAfterSubquery) {
+ auto req = "use plato;\n"
+ "pragma warning(\"error\", \"4527\");\n"
+ "\n"
+ "$a = 1;\n"
+ "\n"
+ "define subquery $q($table) as\n"
+ " select * from $table;\n"
+ "end define;\n"
+ "\n"
+ "select * from $q(\"Input\");\n"
+ "select $a;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(AnonymousNames) {
+ Y_UNIT_TEST(ReferenceAnonymousVariableIsForbidden) {
+ auto req = "$_ = 1; select $_;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:16: Error: Unable to reference anonymous name $_\n");
+
+ req = "$`_` = 1; select $`_`;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:18: Error: Unable to reference anonymous name $_\n");
+ }
+
+ Y_UNIT_TEST(Declare) {
+ auto req = "declare $_ as String;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:9: Error: Can not use anonymous name '$_' in DECLARE statement\n");
+ }
+
+ Y_UNIT_TEST(ActionSubquery) {
+ auto req = "define action $_() as select 1; end define;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:15: Error: Can not use anonymous name '$_' as ACTION name\n");
+
+ req = "define subquery $_() as select 1; end define;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Can not use anonymous name '$_' as SUBQUERY name\n");
+ }
+
+ Y_UNIT_TEST(Import) {
+ auto req = "import lib symbols $sqr as $_;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:28: Error: Can not import anonymous name $_\n");
+ }
+
+ Y_UNIT_TEST(Export) {
+ auto req = "export $_;";
+ auto res = SqlToYqlWithMode(req, NSQLTranslation::ESqlMode::LIBRARY);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:8: Error: Can not export anonymous name $_\n");
+ }
+
+ Y_UNIT_TEST(AnonymousInActionArgs) {
+ auto req = "pragma warning(\"error\", \"4527\");\n"
+ "define action $a($_, $y, $_) as\n"
+ " select $y;\n"
+ "end define;\n"
+ "\n"
+ "do $a(1,2,3);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(AnonymousInSubqueryArgs) {
+ auto req = "use plato;\n"
+ "pragma warning(\"error\", \"4527\");\n"
+ "define subquery $q($_, $y, $_) as\n"
+ " select * from $y;\n"
+ "end define;\n"
+ "\n"
+ "select * from $q(1,\"Input\",3);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(AnonymousInLambdaArgs) {
+ auto req = "pragma warning(\"error\", \"4527\");\n"
+ "$lambda = ($_, $x, $_) -> ($x);\n"
+ "select $lambda(1,2,3);";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(AnonymousInFor) {
+ auto req = "pragma warning(\"error\", \"4527\");\n"
+ "evaluate for $_ in ListFromRange(1, 10) do begin select 1; end do;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+
+ Y_UNIT_TEST(Assignment) {
+ auto req = "pragma warning(\"error\", \"4527\");\n"
+ "$_ = 1;\n"
+ "$_, $x, $_ = AsTuple(1,2,3);\n"
+ "select $x;";
+ UNIT_ASSERT(SqlToYql(req).IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(JsonValue) {
+ Y_UNIT_TEST(JsonValueArgumentCount) {
+ NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json));");
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: mismatched input ')' expecting ','\n");
+ }
+
+ Y_UNIT_TEST(JsonValueJsonPathMustBeLiteralString) {
+ NYql::TAstParseResult res = SqlToYql("$jsonPath = \"strict $.key\"; select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), $jsonPath);");
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:79: Error: mismatched input '$' expecting STRING_VALUE\n");
+ }
+
+ Y_UNIT_TEST(JsonValueTranslation) {
+ NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\");");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json"));
+ };
+
+ TWordCountHive elementStat({"JsonValue"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["JsonValue"]);
+ }
+
+ Y_UNIT_TEST(JsonValueReturningSection) {
+ for (const auto& typeName : {"Bool", "Int64", "Double", "String"}) {
+ NYql::TAstParseResult res = SqlToYql(
+ TStringBuilder() << "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" RETURNING " << typeName << ");"
+ );
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'\"strict $.key\""));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("SafeCast"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("DataType 'Json"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(TStringBuilder() << "DataType '" << typeName));
+ };
+
+ TWordCountHive elementStat({typeName});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat[typeName] > 0);
+ }
+ }
+
+ Y_UNIT_TEST(JsonValueInvalidReturningType) {
+ NYql::TAstParseResult res = SqlToYql("select JSON_VALUE(CAST(@@{'key': 1238}@@ as Json), 'strict $.key' RETURNING invalid);");
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:77: Error: Unknown simple type 'invalid'\n");
+ }
+
+ Y_UNIT_TEST(JsonValueAndReturningInExpressions) {
+ NYql::TAstParseResult res = SqlToYql(
+ "USE plato\n;"
+ "$json_value = \"some string\";\n"
+ "SELECT $json_value;\n"
+ "SELECT 1 as json_value;\n"
+ "SELECT $json_value as json_value;\n"
+ "$returning = \"another string\";\n"
+ "SELECT $returning;\n"
+ "SELECT 1 as returning;\n"
+ "SELECT $returning as returning;\n"
+ );
+
+ UNIT_ASSERT(res.Root);
+ }
+
+ Y_UNIT_TEST(JsonValueValidCaseHandlers) {
+ const TVector<std::pair<TString, TString>> testCases = {
+ {"", "'DefaultValue (Null)"},
+ {"NULL", "'DefaultValue (Null)"},
+ {"ERROR", "'Error (Null)"},
+ {"DEFAULT 123", "'DefaultValue (Int32 '\"123\")"},
+ };
+
+ for (const auto& onEmpty : testCases) {
+ for (const auto& onError : testCases) {
+ TStringBuilder query;
+ query << "$json = CAST(@@{\"key\": 1238}@@ as Json);\n"
+ << "SELECT JSON_VALUE($json, \"strict $.key\"";
+ if (!onEmpty.first.empty()) {
+ query << " " << onEmpty.first << " ON EMPTY";
+ }
+ if (!onError.first.empty()) {
+ query << " " << onError.first << " ON ERROR";
+ }
+ query << ");\n";
+
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(onEmpty.second + " " + onError.second));
+ };
+
+ TWordCountHive elementStat({"JsonValue"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonValue"] > 0);
+ }
+ }
+ }
+
+ Y_UNIT_TEST(JsonValueTooManyCaseHandlers) {
+ NYql::TAstParseResult res = SqlToYql(
+ "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON ERROR NULL ON EMPTY);\n"
+ );
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(
+ Err2Str(res),
+ "<main>:1:52: Error: Only 1 ON EMPTY and/or 1 ON ERROR clause is expected\n"
+ );
+ }
+
+ Y_UNIT_TEST(JsonValueTooManyOnEmpty) {
+ NYql::TAstParseResult res = SqlToYql(
+ "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON EMPTY NULL ON EMPTY);\n"
+ );
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(
+ Err2Str(res),
+ "<main>:1:52: Error: Only 1 ON EMPTY clause is expected\n"
+ );
+ }
+
+ Y_UNIT_TEST(JsonValueTooManyOnError) {
+ NYql::TAstParseResult res = SqlToYql(
+ "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON ERROR);\n"
+ );
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(
+ Err2Str(res),
+ "<main>:1:52: Error: Only 1 ON ERROR clause is expected\n"
+ );
+ }
+
+ Y_UNIT_TEST(JsonValueOnEmptyAfterOnError) {
+ NYql::TAstParseResult res = SqlToYql(
+ "select JSON_VALUE(CAST(@@{\"key\": 1238}@@ as Json), \"strict $.key\" NULL ON ERROR NULL ON EMPTY);\n"
+ );
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(
+ Err2Str(res),
+ "<main>:1:52: Error: ON EMPTY clause must be before ON ERROR clause\n"
+ );
+ }
+
+ Y_UNIT_TEST(JsonValueNullInput) {
+ NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_VALUE(NULL, "strict $.key");)");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))"));
+ };
+
+ TWordCountHive elementStat({"JsonValue"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonValue"] > 0);
+ }
+}
+
+Y_UNIT_TEST_SUITE(JsonExists) {
+ Y_UNIT_TEST(JsonExistsValidHandlers) {
+ const TVector<std::pair<TString, TString>> testCases = {
+ {"", "(Just (Bool '\"false\"))"},
+ {"TRUE ON ERROR", "(Just (Bool '\"true\"))"},
+ {"FALSE ON ERROR", "(Just (Bool '\"false\"))"},
+ {"UNKNOWN ON ERROR", "(Nothing (OptionalType (DataType 'Bool)))"},
+ // NOTE: in this case we expect arguments of JsonExists callable to end immediately
+ // after variables. This parenthesis at the end of the expression is left on purpose
+ {"ERROR ON ERROR", "(Utf8 '\"strict $.key\") (JsonVariables))"},
+ };
+
+ for (const auto& item : testCases) {
+ NYql::TAstParseResult res = SqlToYql(
+ TStringBuilder() << R"(
+ $json = CAST(@@{"key": 1238}@@ as Json);
+ SELECT JSON_EXISTS($json, "strict $.key" )" << item.first << ");\n"
+ );
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(item.second));
+ };
+
+ TWordCountHive elementStat({"JsonExists"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonExists"] > 0);
+ }
+ }
+
+ Y_UNIT_TEST(JsonExistsInvalidHandler) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ $json = CAST(@@{"key": 1238}@@ as Json);
+ $default = false;
+ SELECT JSON_EXISTS($json, "strict $.key" $default ON ERROR);
+ )");
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:53: Error: mismatched input '$' expecting {')', ERROR, FALSE, TRUE, UNKNOWN}\n");
+ }
+
+ Y_UNIT_TEST(JsonExistsNullInput) {
+ NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_EXISTS(NULL, "strict $.key");)");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))"));
+ };
+
+ TWordCountHive elementStat({"JsonExists"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonExists"] > 0);
+ }
+}
+
+Y_UNIT_TEST_SUITE(JsonQuery) {
+ Y_UNIT_TEST(JsonQueryValidHandlers) {
+ using TTestSuite = const TVector<std::pair<TString, TString>>;
+ TTestSuite wrapCases = {
+ {"", "'NoWrap"},
+ {"WITHOUT WRAPPER", "'NoWrap"},
+ {"WITHOUT ARRAY WRAPPER", "'NoWrap"},
+ {"WITH WRAPPER", "'Wrap"},
+ {"WITH ARRAY WRAPPER", "'Wrap"},
+ {"WITH UNCONDITIONAL WRAPPER", "'Wrap"},
+ {"WITH UNCONDITIONAL ARRAY WRAPPER", "'Wrap"},
+ {"WITH CONDITIONAL WRAPPER", "'ConditionalWrap"},
+ {"WITH CONDITIONAL ARRAY WRAPPER", "'ConditionalWrap"},
+ };
+ TTestSuite handlerCases = {
+ {"", "'Null"},
+ {"ERROR", "'Error"},
+ {"NULL", "'Null"},
+ {"EMPTY ARRAY", "'EmptyArray"},
+ {"EMPTY OBJECT", "'EmptyObject"},
+ };
+
+ for (const auto& wrap : wrapCases) {
+ for (const auto& onError : handlerCases) {
+ for (const auto& onEmpty : handlerCases) {
+ TStringBuilder query;
+ query << R"($json = CAST(@@{"key": [123]}@@ as Json);
+ SELECT JSON_QUERY($json, "strict $.key" )" << wrap.first;
+ if (!onEmpty.first.empty()) {
+ if (wrap.first.StartsWith("WITH ")) {
+ continue;
+ }
+ query << " " << onEmpty.first << " ON EMPTY";
+ }
+ if (!onError.first.empty()) {
+ query << " " << onError.first << " ON ERROR";
+ }
+ query << ");\n";
+
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ const TString args = TStringBuilder() << wrap.second << " " << onEmpty.second << " " << onError.second;
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(args));
+ };
+
+ Cout << wrap.first << " " << onEmpty.first << " " << onError.first << Endl;
+
+ TWordCountHive elementStat({"JsonQuery"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonQuery"] > 0);
+ }
+ }
+ }
+ }
+
+ Y_UNIT_TEST(JsonQueryOnEmptyWithWrapper) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ $json = CAST(@@{"key": 1238}@@ as Json);
+ SELECT JSON_QUERY($json, "strict $" WITH ARRAY WRAPPER EMPTY ARRAY ON EMPTY);
+ )");
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:38: Error: ON EMPTY is prohibited because WRAPPER clause is specified\n");
+ }
+
+ Y_UNIT_TEST(JsonQueryNullInput) {
+ NYql::TAstParseResult res = SqlToYql(R"(SELECT JSON_QUERY(NULL, "strict $.key");)");
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("(Nothing (OptionalType (DataType 'Json)))"));
+ };
+
+ TWordCountHive elementStat({"JsonQuery"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonQuery"] > 0);
+ }
+}
+
+Y_UNIT_TEST_SUITE(JsonPassing) {
+ Y_UNIT_TEST(SupportedVariableTypes) {
+ const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"};
+
+ for (const auto& function : functions) {
+ const auto query = Sprintf(R"(
+ pragma CompactNamedExprs;
+ $json = CAST(@@{"key": 1238}@@ as Json);
+ SELECT %s(
+ $json,
+ "strict $.key"
+ PASSING
+ "string" as var1,
+ 1.234 as var2,
+ CAST(1 as Int64) as var3,
+ true as var4,
+ $json as var5
+ ))",
+ function.data()
+ );
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"string")))"), "Cannot find `var1`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var2" (Double '"1.234")))"), "Cannot find `var2`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (SafeCast (Int32 '"1") (DataType 'Int64))))"), "Cannot find `var3`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var4" (Bool '"true")))"), "Cannot find `var4`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var5" namedexprnode0))"), "Cannot find `var5`");
+ };
+
+ TWordCountHive elementStat({"JsonVariables"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonVariables"] > 0);
+ }
+ }
+
+ Y_UNIT_TEST(ValidVariableNames) {
+ const TVector<TString> functions = {"JSON_EXISTS", "JSON_VALUE", "JSON_QUERY"};
+
+ for (const auto& function : functions) {
+ const auto query = Sprintf(R"(
+ $json = CAST(@@{"key": 1238}@@ as Json);
+ SELECT %s(
+ $json,
+ "strict $.key"
+ PASSING
+ "one" as var1,
+ "two" as "VaR2",
+ "three" as `var3`,
+ "four" as VaR4
+ ))",
+ function.data()
+ );
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var1" (String '"one")))"), "Cannot find `var1`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR2" (String '"two")))"), "Cannot find `VaR2`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"var3" (String '"three")))"), "Cannot find `var3`");
+ UNIT_ASSERT_VALUES_UNEQUAL_C(TString::npos, line.find(R"('('"VaR4" (String '"four")))"), "Cannot find `VaR4`");
+ };
+
+ TWordCountHive elementStat({"JsonVariables"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["JsonVariables"] > 0);
+ }
+ }
+}
+
+Y_UNIT_TEST_SUITE(MigrationToJsonApi) {
+ Y_UNIT_TEST(WarningOnDeprecatedJsonUdf) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ $json = CAST(@@{"key": 1234}@@ as Json);
+ SELECT Json::Parse($json);
+ )");
+
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:3:26: Warning: Json UDF is deprecated. Please use JSON API instead, code: 4506\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(AnsiIdentsNegative) {
+ Y_UNIT_TEST(EnableAnsiLexerFromRequestSpecialComments) {
+ auto req = "\n"
+ "\t --!ansi_lexer \n"
+ "-- Some comment\n"
+ "-- another comment\n"
+ "pragma SimpleColumns;\n"
+ "\n"
+ "select 1, '''' as empty;";
+
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+
+ Y_UNIT_TEST(AnsiLexerShouldNotBeEnabledHere) {
+ auto req = "$str = '\n"
+ "--!ansi_lexer\n"
+ "--!syntax_v1\n"
+ "';\n"
+ "\n"
+ "select 1, $str, \"\" as empty;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+
+ Y_UNIT_TEST(DoubleQuotesInDictsTuplesOrLists) {
+ auto req = "$d = { 'a': 1, \"b\": 2, 'c': 3,};";
+
+ auto res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:16: Error: Column reference \"b\" is not allowed in current scope\n");
+
+ req = "$t = (1, 2, \"a\");";
+
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:13: Error: Column reference \"a\" is not allowed in current scope\n");
+
+ req = "$l = ['a', 'b', \"c\"];";
+
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:17: Error: Column reference \"c\" is not allowed in current scope\n");
+ }
+
+ Y_UNIT_TEST(MultilineComments) {
+ auto req = "/*/**/ select 1;";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ req = "/*\n"
+ "--/*\n"
+ "*/ select 1;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ req = "/*\n"
+ "/*\n"
+ "--*/\n"
+ "*/ select 1;";
+ res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:0: Error: mismatched input '*' expecting {';', '(', '$', ALTER, ANALYZE, BACKUP, COMMIT, CREATE, DECLARE, DEFINE, DELETE, DISCARD, DO, DROP, EVALUATE, EXPLAIN, EXPORT, FOR, FROM, GRANT, IF, IMPORT, INSERT, PARALLEL, PRAGMA, PROCESS, REDUCE, REPLACE, RESTORE, REVOKE, ROLLBACK, SELECT, UPDATE, UPSERT, USE, VALUES}\n");
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(res.Root);
+ }
+}
+
+Y_UNIT_TEST_SUITE(AnsiOptionalAs) {
+ Y_UNIT_TEST(OptionalAsInProjection) {
+ UNIT_ASSERT(SqlToYql("PRAGMA AnsiOptionalAs; SELECT a b, c FROM plato.Input;").IsOk());
+ ExpectFailWithError("PRAGMA DisableAnsiOptionalAs;\n"
+ "SELECT a b, c FROM plato.Input;",
+ "<main>:2:10: Error: Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility\n");
+ }
+
+ Y_UNIT_TEST(OptionalAsWithKeywords) {
+ UNIT_ASSERT(SqlToYql("PRAGMA AnsiOptionalAs; SELECT a type, b data, c source FROM plato.Input;").IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(SessionWindowNegative) {
+ Y_UNIT_TEST(SessionWindowWithoutSource) {
+ ExpectFailWithError("SELECT 1 + SessionWindow(ts, 32);",
+ "<main>:1:12: Error: SessionWindow requires data source\n");
+ }
+
+ Y_UNIT_TEST(SessionWindowInProjection) {
+ ExpectFailWithError("SELECT 1 + SessionWindow(ts, 32) from plato.Input;",
+ "<main>:1:12: Error: SessionWindow can only be used as a top-level GROUP BY / PARTITION BY expression\n");
+ }
+
+ Y_UNIT_TEST(SessionWindowWithNonConstSecondArg) {
+ ExpectFailWithError(
+ "SELECT key, session_start FROM plato.Input\n"
+ "GROUP BY SessionWindow(ts, 32 + subkey) as session_start, key;",
+
+ "<main>:2:10: Error: Source does not allow column references\n"
+ "<main>:2:33: Error: Column reference 'subkey'\n");
+ }
+
+ Y_UNIT_TEST(SessionWindowWithWrongNumberOfArgs) {
+ ExpectFailWithError("SELECT * FROM plato.Input GROUP BY SessionWindow()",
+ "<main>:1:36: Error: SessionWindow requires either two or four arguments\n");
+ ExpectFailWithError("SELECT * FROM plato.Input GROUP BY SessionWindow(key, subkey, 100)",
+ "<main>:1:36: Error: SessionWindow requires either two or four arguments\n");
+ }
+
+ Y_UNIT_TEST(DuplicateSessionWindow) {
+ ExpectFailWithError(
+ "SELECT\n"
+ " *\n"
+ "FROM plato.Input\n"
+ "GROUP BY\n"
+ " SessionWindow(ts, 10),\n"
+ " user,\n"
+ " SessionWindow(ts, 20)\n"
+ ";",
+
+ "<main>:7:5: Error: Duplicate session window specification:\n"
+ "<main>:5:5: Error: Previous session window is declared here\n");
+
+ ExpectFailWithError(
+ "SELECT\n"
+ " MIN(key) over w\n"
+ "FROM plato.Input\n"
+ "WINDOW w AS (\n"
+ " PARTITION BY SessionWindow(ts, 10), user,\n"
+ " SessionWindow(ts, 20)\n"
+ ");",
+
+ "<main>:6:5: Error: Duplicate session window specification:\n"
+ "<main>:5:18: Error: Previous session window is declared here\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithoutSource) {
+ ExpectFailWithError("SELECT 1 + SessionStart();",
+ "<main>:1:12: Error: SessionStart requires data source\n");
+ ExpectFailWithError("SELECT 1 + SessionState();",
+ "<main>:1:12: Error: SessionState requires data source\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithoutGroupByOrWindow) {
+ ExpectFailWithError("SELECT 1 + SessionStart() from plato.Input;",
+ "<main>:1:12: Error: SessionStart can not be used without aggregation by SessionWindow\n");
+ ExpectFailWithError("SELECT 1 + SessionState() from plato.Input;",
+ "<main>:1:12: Error: SessionState can not be used without aggregation by SessionWindow\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithGroupByWithoutSession) {
+ ExpectFailWithError("SELECT 1 + SessionStart() from plato.Input group by user;",
+ "<main>:1:12: Error: SessionStart can not be used here: SessionWindow specification is missing in GROUP BY\n");
+ ExpectFailWithError("SELECT 1 + SessionState() from plato.Input group by user;",
+ "<main>:1:12: Error: SessionState can not be used here: SessionWindow specification is missing in GROUP BY\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithoutOverWithWindowWithoutSession) {
+ ExpectFailWithError("SELECT 1 + SessionStart(), MIN(key) over w from plato.Input window w as ()",
+ "<main>:1:12: Error: SessionStart can not be used without aggregation by SessionWindow. Maybe you forgot to add OVER `window_name`?\n");
+ ExpectFailWithError("SELECT 1 + SessionState(), MIN(key) over w from plato.Input window w as ()",
+ "<main>:1:12: Error: SessionState can not be used without aggregation by SessionWindow. Maybe you forgot to add OVER `window_name`?\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithWindowWithoutSession) {
+ ExpectFailWithError("SELECT 1 + SessionStart() over w, MIN(key) over w from plato.Input window w as ()",
+ "<main>:1:12: Error: SessionStart can not be used with window w: SessionWindow specification is missing in PARTITION BY\n");
+ ExpectFailWithError("SELECT 1 + SessionState() over w, MIN(key) over w from plato.Input window w as ()",
+ "<main>:1:12: Error: SessionState can not be used with window w: SessionWindow specification is missing in PARTITION BY\n");
+ }
+
+ Y_UNIT_TEST(SessionStartStateWithSessionedWindow) {
+ ExpectFailWithError("SELECT 1 + SessionStart(), MIN(key) over w from plato.Input group by key window w as (partition by SessionWindow(ts, 1)) ",
+ "<main>:1:12: Error: SessionStart can not be used here: SessionWindow specification is missing in GROUP BY. Maybe you forgot to add OVER `window_name`?\n");
+ ExpectFailWithError("SELECT 1 + SessionState(), MIN(key) over w from plato.Input group by key window w as (partition by SessionWindow(ts, 1)) ",
+ "<main>:1:12: Error: SessionState can not be used here: SessionWindow specification is missing in GROUP BY. Maybe you forgot to add OVER `window_name`?\n");
+ }
+
+ Y_UNIT_TEST(AggregationBySessionStateIsNotSupportedYet) {
+ ExpectFailWithError("SELECT SOME(1 + SessionState()), key from plato.Input group by key, SessionWindow(ts, 1);",
+ "<main>:1:17: Error: SessionState with GROUP BY is not supported yet\n");
+ }
+
+ Y_UNIT_TEST(SessionWindowInRtmr) {
+ NYql::TAstParseResult res = SqlToYql(
+ "SELECT * FROM plato.Input GROUP BY SessionWindow(ts, 10);",
+ 10, TString(NYql::RtmrProviderName));
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:54: Error: Streaming group by query must have a hopping window specification.\n");
+
+ res = SqlToYql(R"(
+ SELECT key, SUM(value) AS value FROM plato.Input
+ GROUP BY key, HOP(subkey, "PT10S", "PT30S", "PT20S"), SessionWindow(ts, 10);
+ )", 10, TString(NYql::RtmrProviderName));
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:2:13: Error: SessionWindow is unsupported for streaming sources\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(LibraSqlSugar) {
+ auto makeResult = [](TStringBuf settings) {
+ return SqlToYql(
+ TStringBuilder()
+ << settings
+ << "\n$udf1 = MyLibra::MakeLibraPreprocessor($settings);"
+ << "\n$udf2 = CustomLibra::MakeLibraPreprocessor($settings);"
+ << "\nPROCESS plato.Input USING $udf1(TableRow())"
+ << "\nUNION ALL"
+ << "\nPROCESS plato.Input USING $udf2(TableRow());"
+ );
+ };
+
+ Y_UNIT_TEST(EmptySettings) {
+ auto res = makeResult(R"(
+ $settings = AsStruct();
+ )");
+ UNIT_ASSERT(res.IsOk());
+ }
+
+ Y_UNIT_TEST(OnlyEntities) {
+ auto res = makeResult(R"(
+ $settings = AsStruct(
+ AsList("A", "B", "C") AS Entities
+ );
+ )");
+ UNIT_ASSERT(res.IsOk());
+ }
+
+ Y_UNIT_TEST(EntitiesWithStrategy) {
+ auto res = makeResult(R"(
+ $settings = AsStruct(
+ AsList("A", "B", "C") AS Entities,
+ "blacklist" AS EntitiesStrategy
+ );
+ )");
+ UNIT_ASSERT(res.IsOk());
+ }
+
+ Y_UNIT_TEST(AllSettings) {
+ auto res = makeResult(R"(
+ $settings = AsStruct(
+ AsList("A", "B", "C") AS Entities,
+ "whitelist" AS EntitiesStrategy,
+ "path" AS BlockstatDict,
+ false AS ParseWithFat,
+ "map" AS Mode
+ );
+ )");
+ UNIT_ASSERT(res.IsOk());
+ }
+
+ Y_UNIT_TEST(BadStrategy) {
+ auto res = makeResult(R"(
+ $settings = AsStruct("bad" AS EntitiesStrategy);
+ )");
+ UNIT_ASSERT_STRING_CONTAINS(
+ Err2Str(res),
+ "Error: MakeLibraPreprocessor got invalid entities strategy: expected 'whitelist' or 'blacklist'"
+ );
+ }
+
+ Y_UNIT_TEST(BadEntities) {
+ auto res = makeResult(R"(
+ $settings = AsStruct(AsList("A", 1) AS Entities);
+ )");
+ UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "Error: MakeLibraPreprocessor entity must be string literal");
+ }
+}
+
+Y_UNIT_TEST_SUITE(TrailingQuestionsNegative) {
+ Y_UNIT_TEST(Basic) {
+ ExpectFailWithError("SELECT 1?;", "<main>:1:9: Error: Unexpected token '?' at the end of expression\n");
+ ExpectFailWithError("SELECT 1? + 1;", "<main>:1:10: Error: mismatched input '+' expecting {<EOF>, ';'}\n");
+ ExpectFailWithError("SELECT 1 + 1??? < 2", "<main>:1:13: Error: Unexpected token '?' at the end of expression\n");
+ ExpectFailWithError("SELECT 1? > 2? > 3?",
+ "<main>:1:11: Error: Unexpected token '?' at the end of expression\n"
+ "<main>:1:16: Error: Unexpected token '?' at the end of expression\n"
+ "<main>:1:21: Error: Unexpected token '?' at the end of expression\n");
+ }
+
+ Y_UNIT_TEST(SmartParen) {
+ ExpectFailWithError("$x = 1; SELECT (Int32?, $x?)", "<main>:1:27: Error: Unexpected token '?' at the end of expression\n");
+ ExpectFailWithError("SELECT (Int32, foo?)", "<main>:1:19: Error: Unexpected token '?' at the end of expression\n");
+ }
+
+ Y_UNIT_TEST(LambdaOptArgs) {
+ ExpectFailWithError("$l = ($x, $y?, $z??, $t?) -> ($x);", "<main>:1:18: Error: Expecting at most one '?' token here (for optional lambda parameters), but got 2\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(FlexibleTypes) {
+ Y_UNIT_TEST(AssumeOrderByType) {
+ UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT 1 AS int32 ASSUME ORDER BY int32").IsOk());
+ }
+
+ Y_UNIT_TEST(GroupingSets) {
+ UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT COUNT(*) AS cnt, text, uuid FROM plato.Input GROUP BY GROUPING SETS((uuid), (uuid, text));").IsOk());
+ }
+
+ Y_UNIT_TEST(WeakField) {
+ UNIT_ASSERT(SqlToYql("PRAGMA FlexibleTypes; SELECT WeakField(text, string) as text FROM plato.Input").IsOk());
+ }
+
+ Y_UNIT_TEST(Aggregation1) {
+ TString q =
+ "PRAGMA FlexibleTypes;\n"
+ "$foo = ($x, $const, $type) -> ($x || $const || FormatType($type));\n"
+ "SELECT $foo(SOME(x), 'aaa', String) FROM plato.Input GROUP BY y;";
+ UNIT_ASSERT(SqlToYql(q).IsOk());
+ }
+
+ Y_UNIT_TEST(Aggregation2) {
+ TString q =
+ "PRAGMA FlexibleTypes;\n"
+ "SELECT 1 + String + MAX(key) FROM plato.Input;";
+ UNIT_ASSERT(SqlToYql(q).IsOk());
+ }
+}
+
+Y_UNIT_TEST_SUITE(ExternalDeclares) {
+ Y_UNIT_TEST(BasicUsage) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.DeclaredNamedExprs["foo"] = "String";
+ auto res = SqlToYqlWithSettings("select $foo;", settings);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "declare") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'String)))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("declare"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]);
+ }
+
+ Y_UNIT_TEST(DeclareOverrides) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.DeclaredNamedExprs["foo"] = "String";
+ auto res = SqlToYqlWithSettings("declare $foo as Int32; select $foo;", settings);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "declare") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'Int32)))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("declare"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]);
+ }
+
+ Y_UNIT_TEST(UnusedDeclareDoesNotProduceWarning) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.DeclaredNamedExprs["foo"] = "String";
+ auto res = SqlToYqlWithSettings("select 1;", settings);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "declare") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"__((declare "$foo" (DataType 'String)))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("declare"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["declare"]);
+ }
+
+ Y_UNIT_TEST(DeclaresWithInvalidTypesFails) {
+ NSQLTranslation::TTranslationSettings settings;
+ settings.DeclaredNamedExprs["foo"] = "List<BadType>";
+ auto res = SqlToYqlWithSettings("select 1;", settings);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res),
+ "<main>:0:5: Error: Unknown type: 'BadType'\n"
+ "<main>: Error: Failed to parse type for externally declared name 'foo'\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(ExternalDataSource) {
+ Y_UNIT_TEST(CreateExternalDataSourceWithAuthNone) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithAuthServiceAccount) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="SERVICE_ACCOUNT",
+ SERVICE_ACCOUNT_ID="sa",
+ SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"SERVICE_ACCOUNT") '('"location" '"my-bucket") '('"service_account_id" '"sa") '('"service_account_secret_name" '"sa_secret_name") '('"source_type" '"ObjectStorage"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithBasic) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="BASIC",
+ LOGIN="admin",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"BASIC") '('"location" '"protocol://host:port/") '('"login" '"admin") '('"password_secret_name" '"secret_name") '('"source_type" '"PostgreSQL"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithMdbBasic) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="MDB_BASIC",
+ SERVICE_ACCOUNT_ID="sa",
+ SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name",
+ LOGIN="admin",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"MDB_BASIC") '('"location" '"protocol://host:port/") '('"login" '"admin") '('"password_secret_name" '"secret_name") '('"service_account_id" '"sa") '('"service_account_secret_name" '"sa_secret_name") '('"source_type" '"PostgreSQL"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithAws) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="AWS",
+ AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name",
+ AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name",
+ AWS_REGION="ru-central-1"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"AWS") '('"aws_access_key_id_secret_name" '"secred_id_name") '('"aws_region" '"ru-central-1") '('"aws_secret_access_key_secret_name" '"secret_key_name") '('"location" '"protocol://host:port/") '('"source_type" '"PostgreSQL"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithToken) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="YT",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="TOKEN",
+ TOKEN_SECRET_NAME="token_name"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"TOKEN") '('"location" '"protocol://host:port/") '('"source_type" '"YT") '('"token_secret_name" '"token_name"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ pragma TablePathPrefix='/aba';
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyDataSource");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceIfNotExists) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE IF NOT EXISTS MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE"
+ );
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectIfNotExists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AlterExternalDataSource) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER EXTERNAL DATA SOURCE MyDataSource
+ SET (SOURCE_TYPE = "ObjectStorage", Login = "Admin"),
+ SET Location "bucket",
+ RESET (Auth_Method, Service_Account_Id, Service_Account_Secret_Name);
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"location" '"bucket") '('"login" '"Admin") '('"source_type" '"ObjectStorage"))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"auth_method" '"service_account_id" '"service_account_secret_name")))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceOrReplace) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ CREATE OR REPLACE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE"
+ );
+ )");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"auth_method" '"NONE") '('"location" '"my-bucket") '('"source_type" '"ObjectStorage"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObjectOrReplace"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateOrReplaceForUnsupportedTableTypesShouldFail) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE OR REPLACE TABLE t (a int32 not null, primary key(a, a));
+ )sql" , "<main>:3:23: Error: OR REPLACE feature is supported only for EXTERNAL DATA SOURCE and EXTERNAL TABLE\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE OR REPLACE TABLE t (
+ Key Uint64,
+ Value1 String,
+ PRIMARY KEY (Key)
+ )
+ WITH (
+ STORE = COLUMN,
+ AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10
+ );
+ )sql" , "<main>:3:23: Error: OR REPLACE feature is supported only for EXTERNAL DATA SOURCE and EXTERNAL TABLE\n");
+ }
+
+ Y_UNIT_TEST(CreateExternalDataSourceWithBadArguments) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource;
+ )sql" , "<main>:3:56: Error: mismatched input ';' expecting WITH\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE"
+ );
+ )sql" , "<main>:5:33: Error: SOURCE_TYPE requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket"
+ );
+ )sql" , "<main>:5:30: Error: AUTH_METHOD requires key\n");
+
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="NONE1"
+ );
+ )sql" , "<main>:6:33: Error: Unknown AUTH_METHOD = NONE1\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="SERVICE_ACCOUNT"
+ );
+ )sql" , "<main>:6:33: Error: SERVICE_ACCOUNT_ID requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="SERVICE_ACCOUNT",
+ SERVICE_ACCOUNT_ID="s1"
+ );
+ )sql" , "<main>:7:40: Error: SERVICE_ACCOUNT_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="ObjectStorage",
+ LOCATION="my-bucket",
+ AUTH_METHOD="SERVICE_ACCOUNT",
+ SERVICE_ACCOUNT_SECRET_NAME="s1"
+ );
+ )sql" , "<main>:7:49: Error: SERVICE_ACCOUNT_ID requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="BASIC",
+ LOGIN="admin"
+ );
+ )sql" , "<main>:7:27: Error: PASSWORD_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="BASIC",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql" , "<main>:7:42: Error: LOGIN requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="MDB_BASIC",
+ SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name",
+ LOGIN="admin",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql" , "<main>:9:42: Error: SERVICE_ACCOUNT_ID requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="MDB_BASIC",
+ SERVICE_ACCOUNT_ID="sa",
+ LOGIN="admin",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql" , "<main>:9:42: Error: SERVICE_ACCOUNT_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="MDB_BASIC",
+ SERVICE_ACCOUNT_ID="sa",
+ SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name",
+ PASSWORD_SECRET_NAME="secret_name"
+ );
+ )sql" , "<main>:9:42: Error: LOGIN requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="MDB_BASIC",
+ SERVICE_ACCOUNT_ID="sa",
+ SERVICE_ACCOUNT_SECRET_NAME="sa_secret_name",
+ LOGIN="admin"
+ );
+ )sql" , "<main>:9:27: Error: PASSWORD_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="AWS",
+ AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name",
+ AWS_REGION="ru-central-1"
+ );
+ )sql" , "<main>:8:32: Error: AWS_ACCESS_KEY_ID_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="AWS",
+ AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name",
+ AWS_REGION="ru-central-1"
+ );
+ )sql" , "<main>:8:32: Error: AWS_SECRET_ACCESS_KEY_SECRET_NAME requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL DATA SOURCE MyDataSource WITH (
+ SOURCE_TYPE="PostgreSQL",
+ LOCATION="protocol://host:port/",
+ AUTH_METHOD="AWS",
+ AWS_SECRET_ACCESS_KEY_SECRET_NAME="secret_key_name",
+ AWS_ACCESS_KEY_ID_SECRET_NAME="secred_id_name"
+ );
+ )sql" , "<main>:8:51: Error: AWS_REGION requires key\n");
+ }
+
+ Y_UNIT_TEST(DropExternalDataSourceWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP EXTERNAL DATA SOURCE MyDataSource;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropExternalDataSource) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ pragma TablePathPrefix='/aba';
+ DROP EXTERNAL DATA SOURCE MyDataSource;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyDataSource");
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropExternalDataSourceIfExists) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP EXTERNAL DATA SOURCE IF EXISTS MyDataSource;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "MyDataSource");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObjectIfExists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(ExternalTable) {
+ Y_UNIT_TEST(CreateExternalTable) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource",
+ LOCATION="/folder1/*"
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tablescheme"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalTableWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ pragma TablePathPrefix='/aba';
+ CREATE EXTERNAL TABLE mytable (
+ a int
+ ) WITH (
+ DATA_SOURCE="mydatasource",
+ LOCATION="/folder1/*"
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/aba/mydatasource");
+ UNIT_ASSERT_STRING_CONTAINS(line, "/aba/mytable");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tablescheme"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalTableObjectStorage) {
+ auto res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int,
+ year Int
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource",
+ LOCATION="/folder1/*",
+ FORMAT="json_as_string",
+ `projection.enabled`="true",
+ `projection.year.type`="integer",
+ `projection.year.min`="2010",
+ `projection.year.max`="2022",
+ `projection.year.interval`="1",
+ `projection.month.type`="integer",
+ `projection.month.min`="1",
+ `projection.month.max`="12",
+ `projection.month.interval`="1",
+ `projection.month.digits`="2",
+ `storage.location.template`="${year}/${month}",
+ PARTITONED_BY = "[year, month]"
+ );
+ )sql");
+ UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(CreateExternalTableIfNotExists) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE IF NOT EXISTS mytable (
+ a int
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource",
+ LOCATION="/folder1/*"
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, "create_if_not_exists");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalTableOrReplace) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ CREATE OR REPLACE EXTERNAL TABLE mytable (
+ a int
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource",
+ LOCATION="/folder1/*"
+ );
+ )");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('data_source_path (String '"/Root/mydatasource")) '('location (String '"/folder1/*")))) '('tableType 'externalTable)))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, "create_or_replace");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AlterExternalTableAddColumn) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER EXTERNAL TABLE mytable
+ ADD COLUMN my_column int32,
+ RESET (LOCATION);
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('actions '('('addColumns '('('"my_column" (AsOptionalType (DataType 'Int32))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location)))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('tableType 'externalTable))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AlterExternalTableDropColumn) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER EXTERNAL TABLE mytable
+ DROP COLUMN my_column,
+ SET (Location = "abc", Other_Prop = "42"),
+ SET x 'y';
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToOneLineString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('actions '('('dropColumns '('"my_column")#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('setTableSettings '('('location (String '"abc")) '('Other_Prop (String '"42")) '('x (String '"y")))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('tableType 'externalTable))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateExternalTableWithBadArguments) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable;
+ )sql" , "<main>:3:45: Error: mismatched input ';' expecting '('\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int
+ );
+ )sql" , "<main>:4:23: Error: DATA_SOURCE requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource"
+ );
+ )sql" , "<main>:6:33: Error: LOCATION requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int
+ ) WITH (
+ LOCATION="/folder1/*"
+ );
+ )sql" , "<main>:6:30: Error: DATA_SOURCE requires key\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE EXTERNAL TABLE mytable (
+ a int,
+ PRIMARY KEY(a)
+ ) WITH (
+ DATA_SOURCE="/Root/mydatasource",
+ LOCATION="/folder1/*"
+ );
+ )sql" , "<main>:8:30: Error: PRIMARY KEY is not supported for external table\n");
+ }
+
+ Y_UNIT_TEST(DropExternalTable) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP EXTERNAL TABLE MyExternalTable;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("tablescheme"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropExternalTableWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ pragma TablePathPrefix='/aba';
+ DROP EXTERNAL TABLE MyExternalTable;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/aba/MyExternalTable");
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'tablescheme"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropExternalTableIfExists) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP EXTERNAL TABLE IF EXISTS MyExternalTable;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("tablescheme"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop_if_exists"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(TopicsDDL) {
+ void TestQuery(const TString& query, bool expectOk = true) {
+ TStringBuilder finalQuery;
+
+ finalQuery << "use plato;" << Endl << query;
+ auto res = SqlToYql(finalQuery, 10, "kikimr");
+ if (expectOk) {
+ UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString());
+ } else {
+ UNIT_ASSERT(!res.IsOk());
+ }
+ }
+
+ Y_UNIT_TEST(CreateTopicSimple) {
+ TestQuery(R"(
+ CREATE TOPIC topic1;
+ )");
+ TestQuery(R"(
+ CREATE TOPIC `cluster1.topic1`;
+ )");
+ TestQuery(R"(
+ CREATE TOPIC topic1 WITH (metering_mode = "str_value", partition_count_limit = 123, retention_period = Interval('PT1H'));
+ )");
+ }
+
+ Y_UNIT_TEST(CreateTopicConsumer) {
+ TestQuery(R"(
+ CREATE TOPIC topic1 (CONSUMER cons1);
+ )");
+ TestQuery(R"(
+ CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons2 WITH (important = false));
+ )");
+ TestQuery(R"(
+ CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons2 WITH (important = false)) WITH (supported_codecs = "1,2,3");
+ )");
+ }
+
+ Y_UNIT_TEST(AlterTopicSimple) {
+ TestQuery(R"(
+ ALTER TOPIC topic1 SET (retention_period = Interval('PT1H'));
+ )");
+ TestQuery(R"(
+ ALTER TOPIC topic1 SET (retention_storage_mb = 3, partition_count_limit = 50);
+ )");
+ TestQuery(R"(
+ ALTER TOPIC topic1 RESET (supported_codecs, retention_period);
+ )");
+ TestQuery(R"(
+ ALTER TOPIC topic1 RESET (partition_write_speed_bytes_per_second),
+ SET (partition_write_burst_bytes = 11111, min_active_partitions = 1);
+ )");
+ }
+ Y_UNIT_TEST(AlterTopicConsumer) {
+ TestQuery(R"(
+ ALTER TOPIC topic1 ADD CONSUMER consumer1,
+ ADD CONSUMER consumer2 WITH (important = false, supported_codecs = "RAW"),
+ ALTER CONSUMER consumer3 SET (important = false, read_from = 1),
+ ALTER CONSUMER consumer3 RESET (supported_codecs),
+ DROP CONSUMER consumer4,
+ SET (partition_count_limit = 11, retention_period = Interval('PT1H')),
+ RESET(metering_mode)
+ )");
+ }
+ Y_UNIT_TEST(DropTopic) {
+ TestQuery(R"(
+ DROP TOPIC topic1;
+ )");
+ }
+
+ Y_UNIT_TEST(TopicBadRequests) {
+ TestQuery(R"(
+ CREATE TOPIC topic1();
+ )", false);
+ TestQuery(R"(
+ CREATE TOPIC topic1 SET setting1 = value1;
+ )", false);
+ TestQuery(R"(
+ ALTER TOPIC topic1 SET setting1 value1;
+ )", false);
+ TestQuery(R"(
+ ALTER TOPIC topic1 RESET setting1;
+ )", false);
+
+ TestQuery(R"(
+ ALTER TOPIC topic1 DROP CONSUMER consumer4 WITH (k1 = v1);
+ )", false);
+
+ TestQuery(R"(
+ CREATE TOPIC topic1 WITH (retention_period = 123);
+ )", false);
+ TestQuery(R"(
+ CREATE TOPIC topic1 (CONSUMER cons1, CONSUMER cons1 WITH (important = false));
+ )", false);
+ TestQuery(R"(
+ CREATE TOPIC topic1 (CONSUMER cons1 WITH (bad_option = false));
+ )", false);
+ TestQuery(R"(
+ ALTER TOPIC topic1 ADD CONSUMER cons1, ALTER CONSUMER cons1 RESET (important);
+ )", false);
+ TestQuery(R"(
+ ALTER TOPIC topic1 ADD CONSUMER consumer1,
+ ALTER CONSUMER consumer3 SET (supported_codecs = "RAW", read_from = 1),
+ ALTER CONSUMER consumer3 RESET (supported_codecs);
+ )", false);
+ TestQuery(R"(
+ ALTER TOPIC topic1 ADD CONSUMER consumer1,
+ ALTER CONSUMER consumer3 SET (supported_codecs = "RAW", read_from = 1),
+ ALTER CONSUMER consumer3 SET (read_from = 2);
+ )", false);
+ }
+
+ Y_UNIT_TEST(TopicWithPrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ PRAGMA TablePathPrefix = '/database/path/to/tables';
+ ALTER TOPIC `my_table/my_feed` ADD CONSUMER `my_consumer`;
+ )");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("/database/path/to/tables/my_table/my_feed"), 0}, {"topic", 0}};
+ VerifyProgram(res, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["topic"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["/database/path/to/tables/my_table/my_feed"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(BlockEnginePragma) {
+ Y_UNIT_TEST(Basic) {
+ const TVector<TString> values = {"auto", "force", "disable"};
+ for (const auto& value : values) {
+ const auto query = TStringBuilder() << "pragma Blockengine='" << value << "'; select 1;";
+ NYql::TAstParseResult res = SqlToYql(query);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ UNIT_ASSERT_STRING_CONTAINS(line, TStringBuilder() << R"(Configure! world (DataSource '"config") '"BlockEngine" '")" << value << "\"");
+ };
+
+ TWordCountHive elementStat({"BlockEngine"});
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT(elementStat["BlockEngine"] == ((value == "disable") ? 0 : 1));
+ }
+ }
+
+ Y_UNIT_TEST(UnknownSetting) {
+ ExpectFailWithError("use plato; pragma BlockEngine='foo';",
+ "<main>:1:31: Error: Expected `disable|auto|force' argument for: BlockEngine\n");
+ }
+}
+
+Y_UNIT_TEST_SUITE(TViewSyntaxTest) {
+ Y_UNIT_TEST(CreateViewSimple) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ CREATE VIEW TheView WITH (security_invoker = TRUE) AS SELECT 1;
+ )"
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+ }
+
+ Y_UNIT_TEST(CreateViewFromTable) {
+ constexpr const char* path = "/PathPrefix/TheView";
+ constexpr const char* query = R"(
+ SELECT * FROM SomeTable
+ )";
+
+ NYql::TAstParseResult res = SqlToYql(std::format(R"(
+ USE plato;
+ CREATE VIEW `{}` WITH (security_invoker = TRUE) AS {};
+ )",
+ path,
+ query
+ )
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_STRING_CONTAINS(line, path);
+ UNIT_ASSERT_STRING_CONTAINS(line, "createObject");
+ }
+ };
+ TWordCountHive elementStat = { {"Write!"} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1);
+ }
+
+ Y_UNIT_TEST(CheckReconstructedQuery) {
+ constexpr const char* path = "/PathPrefix/TheView";
+ constexpr const char* query = R"(
+ SELECT * FROM FirstTable JOIN SecondTable ON FirstTable.key == SecondTable.key
+ )";
+
+ NYql::TAstParseResult res = SqlToYql(std::format(R"(
+ USE plato;
+ CREATE VIEW `{}` WITH (security_invoker = TRUE) AS {};
+ )",
+ path,
+ query
+ )
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TString reconstructedQuery = ToString(Tokenize(query));
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ if (word == "query_text") {
+ UNIT_ASSERT_STRING_CONTAINS(line, reconstructedQuery);
+ }
+ };
+ TWordCountHive elementStat = { {"Write!"} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1);
+ }
+
+ Y_UNIT_TEST(DropView) {
+ constexpr const char* path = "/PathPrefix/TheView";
+ NYql::TAstParseResult res = SqlToYql(std::format(R"(
+ USE plato;
+ DROP VIEW `{}`;
+ )",
+ path
+ )
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_STRING_CONTAINS(line, path);
+ UNIT_ASSERT_STRING_CONTAINS(line, "dropObject");
+ }
+ };
+ TWordCountHive elementStat = { {"Write!"} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1);
+ }
+
+ Y_UNIT_TEST(CreateViewWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ PRAGMA TablePathPrefix='/PathPrefix';
+ CREATE VIEW TheView WITH (security_invoker = TRUE) AS SELECT 1;
+ )"
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write!") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/PathPrefix/TheView");
+ UNIT_ASSERT_STRING_CONTAINS(line, "createObject");
+ }
+ };
+
+ TWordCountHive elementStat = { {"Write!"} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1);
+ }
+
+ Y_UNIT_TEST(DropViewWithTablePrefix) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ USE plato;
+ PRAGMA TablePathPrefix='/PathPrefix';
+ DROP VIEW TheView;
+ )"
+ );
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, "/PathPrefix/TheView");
+ UNIT_ASSERT_STRING_CONTAINS(line, "dropObject");
+ }
+ };
+
+ TWordCountHive elementStat = { {"Write!"} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(elementStat["Write!"], 1);
+ }
+
+ Y_UNIT_TEST(YtAlternativeSchemaSyntax) {
+ NYql::TAstParseResult res = SqlToYql(R"(
+ SELECT * FROM plato.Input WITH schema(y Int32, x String not null);
+ )");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "userschema") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos,
+ line.find(R"__('('('"userschema" (StructType '('"y" (AsOptionalType (DataType 'Int32))) '('"x" (DataType 'String))))))__"));
+ }
+ };
+
+ TWordCountHive elementStat = {{TString("userschema"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["userschema"]);
+ }
+
+ Y_UNIT_TEST(UseViewAndFullColumnId) {
+ NYql::TAstParseResult res = SqlToYql("USE plato; SELECT Input.x FROM Input VIEW uitzicht;");
+ UNIT_ASSERT(res.Root);
+
+ TWordCountHive elementStat = {{TString("SqlAccess"), 0}, {"SqlProjectItem", 0}, {"Read!", 0}};
+ VerifyProgram(res, elementStat);
+ UNIT_ASSERT_VALUES_EQUAL(0, elementStat["SqlAccess"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["SqlProjectItem"]);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Read!"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(CompactNamedExprs) {
+ Y_UNIT_TEST(SourceCallablesInWrongContext) {
+ TString query = R"(
+ pragma CompactNamedExprs;
+ $foo = %s();
+ select $foo from plato.Input;
+ )";
+
+ THashMap<TString, TString> errs = {
+ {"TableRow", "<main>:3:20: Error: TableRow requires data source\n"},
+ {"JoinTableRow", "<main>:3:20: Error: JoinTableRow requires data source\n"},
+ {"TableRecordIndex", "<main>:3:20: Error: Unable to use function: TableRecord without source\n"},
+ {"TablePath", "<main>:3:20: Error: Unable to use function: TablePath without source\n"},
+ {"SystemMetadata", "<main>:3:20: Error: Unable to use function: SystemMetadata without source\n"},
+ };
+
+ for (TString callable : { "TableRow", "JoinTableRow", "TableRecordIndex", "TablePath", "SystemMetadata"}) {
+ auto req = Sprintf(query.c_str(), callable.c_str());
+ ExpectFailWithError(req, errs[callable]);
+ }
+ }
+
+ Y_UNIT_TEST(ValidateUnusedExprs) {
+ TString query = R"(
+ pragma warning("disable", "4527");
+ pragma CompactNamedExprs;
+ pragma ValidateUnusedExprs;
+
+ $foo = count(1);
+ select 1;
+ )";
+ ExpectFailWithError(query, "<main>:6:20: Error: Aggregation is not allowed in this context\n");
+ query = R"(
+ pragma warning("disable", "4527");
+ pragma CompactNamedExprs;
+ pragma ValidateUnusedExprs;
+
+ define subquery $x() as
+ select count(1, 2);
+ end define;
+ select 1;
+ )";
+ ExpectFailWithError(query, "<main>:7:24: Error: Aggregation function Count requires exactly 1 argument(s), given: 2\n");
+ }
+
+ Y_UNIT_TEST(DisableValidateUnusedExprs) {
+ TString query = R"(
+ pragma warning("disable", "4527");
+ pragma CompactNamedExprs;
+ pragma DisableValidateUnusedExprs;
+
+ $foo = count(1);
+ select 1;
+ )";
+ SqlToYql(query).IsOk();
+ query = R"(
+ pragma warning("disable", "4527");
+ pragma CompactNamedExprs;
+ pragma DisableValidateUnusedExprs;
+
+ define subquery $x() as
+ select count(1, 2);
+ end define;
+ select 1;
+ )";
+ SqlToYql(query).IsOk();
+ }
+}
+
+Y_UNIT_TEST_SUITE(ResourcePool) {
+ Y_UNIT_TEST(CreateResourcePool) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL MyResourcePool WITH (
+ CONCURRENT_QUERY_LIMIT=20,
+ QUERY_CANCEL_AFTER_SECONDS=86400,
+ QUEUE_TYPE="FIFO"
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"concurrent_query_limit" (Int32 '"20")) '('"query_cancel_after_seconds" (Int32 '"86400")) '('"queue_type" '"FIFO"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateResourcePoolWithBadArguments) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL MyResourcePool;
+ )sql" , "<main>:3:51: Error: mismatched input ';' expecting WITH\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL MyResourcePool WITH (
+ DUPLICATE_SETTING="first_value",
+ DUPLICATE_SETTING="second_value"
+ );
+ )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n");
+ }
+
+ Y_UNIT_TEST(AlterResourcePool) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER RESOURCE POOL MyResourcePool
+ SET (CONCURRENT_QUERY_LIMIT = 30, Weight = 5, QUEUE_TYPE = "UNORDERED"),
+ RESET (Query_Cancel_After_Seconds, Query_Count_Limit);
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"concurrent_query_limit" (Int32 '"30")) '('"queue_type" '"UNORDERED") '('"weight" (Int32 '"5")))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"query_cancel_after_seconds" '"query_count_limit")))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropResourcePool) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP RESOURCE POOL MyResourcePool;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(BackupCollection) {
+ Y_UNIT_TEST(CreateBackupCollection) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection WITH (
+ STORAGE="local",
+ TAG="test" -- for testing purposes, not a real thing
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateBackupCollectionWithDatabase) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection DATABASE WITH (
+ STORAGE="local",
+ TAG="test" -- for testing purposes, not a real thing
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('type 'database)"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateBackupCollectionWithTables) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection (
+ TABLE someTable,
+ TABLE `prefix/anotherTable`
+ ) WITH (
+ STORAGE="local",
+ TAG="test" -- for testing purposes, not a real thing
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"storage" '"local") '('"tag" '"test"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'create"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('('('type 'table) '('path '"someTable")))#"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('('('type 'table) '('path '"prefix/anotherTable")))#"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateBackupCollectionWithBadArguments) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection;
+ )sql" , "<main>:3:55: Error: mismatched input ';' expecting {'(', DATABASE, WITH}\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TABLE TestCollection;
+ )sql" , "<main>:3:47: Error: mismatched input 'TestCollection' expecting {'(', DATABASE, WITH}\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION DATABASE `test` TestCollection;
+ )sql" , "<main>:3:50: Error: mismatched input '`test`' expecting {'(', DATABASE, WITH}\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection WITH (
+ DUPLICATE_SETTING="first_value",
+ DUPLICATE_SETTING="second_value"
+ );
+ )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE BACKUP COLLECTION TestCollection WITH (
+ INT_SETTING=1
+ );
+ )sql" , "<main>:4:21: Error: INT_SETTING value should be a string literal\n");
+ }
+
+ Y_UNIT_TEST(AlterBackupCollection) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER BACKUP COLLECTION TestCollection
+ SET (STORAGE="remote"), -- also just for test
+ SET (TAG1 = "123"),
+ RESET (TAG2, TAG3);
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('settings '('('"storage" '"remote") '('"tag1" '"123"))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetSettings '('"tag2" '"tag3")))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AlterBackupCollectionEntries) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER BACKUP COLLECTION TestCollection
+ DROP TABLE `test`,
+ ADD DATABASE;
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alter))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('alterEntries)#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('type 'table) '('path '"test") '('action 'drop)))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('type 'database) '('action 'add)))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropBackupCollection) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP BACKUP COLLECTION TestCollection;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("drop"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(ResourcePoolClassifier) {
+ Y_UNIT_TEST(CreateResourcePoolClassifier) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH (
+ RANK=20,
+ RESOURCE_POOL='wgUserQueries',
+ MEMBER_NAME='yandex_query@abc'
+ );
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('('"member_name" '"yandex_query@abc") '('"rank" (Int32 '"20")) '('"resource_pool" '"wgUserQueries"))#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("createObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(CreateResourcePoolClassifierWithBadArguments) {
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier;
+ )sql" , "<main>:3:72: Error: mismatched input ';' expecting WITH\n");
+
+ ExpectFailWithError(R"sql(
+ USE plato;
+ CREATE RESOURCE POOL CLASSIFIER MyResourcePoolClassifier WITH (
+ DUPLICATE_SETTING="first_value",
+ DUPLICATE_SETTING="second_value"
+ );
+ )sql" , "<main>:5:21: Error: DUPLICATE_SETTING duplicate keys\n");
+ }
+
+ Y_UNIT_TEST(AlterResourcePoolClassifier) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ ALTER RESOURCE POOL CLASSIFIER MyResourcePoolClassifier
+ SET (RANK = 30, Weight = 5, MEMBER_NAME = "test@user"),
+ RESET (Resource_Pool);
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#(('mode 'alterObject))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('features '('('"member_name" '"test@user") '('"rank" (Int32 '"30")) '('"weight" (Int32 '"5")))))#");
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('('resetFeatures '('"resource_pool")))#");
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(DropResourcePoolClassifier) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ DROP RESOURCE POOL CLASSIFIER MyResourcePoolClassifier;
+ )sql");
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'features"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("dropObject"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(BacktickMatching) {
+ auto req = "select\n"
+ " 1 as `Schema has \\`RealCost\\``\n"
+ " -- foo`bar";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+
+ req = "select 1 as `a``b`, 2 as ````, 3 as `\\x60a\\x60`, 4 as ```b```, 5 as `\\`c\\``";
+ res = SqlToYql(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ res = SqlToYqlWithAnsiLexer(req);
+ UNIT_ASSERT(res.Root);
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ }
+}
+
+Y_UNIT_TEST_SUITE(Backup) {
+ Y_UNIT_TEST(Simple) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ BACKUP TestCollection;
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("'incremental"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'backup"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(Incremental) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ BACKUP TestCollection INCREMENTAL;
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'incremental"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'backup"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(Restore) {
+ Y_UNIT_TEST(Simple) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ RESTORE TestCollection;
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'restore"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(AtPoint) {
+ NYql::TAstParseResult res = SqlToYql(R"sql(
+ USE plato;
+ RESTORE TestCollection AT '2024-06-16_20-14-02';
+ )sql");
+ UNIT_ASSERT_C(res.Root, res.Issues.ToString());
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(R"#('"TestCollection")#"));
+ UNIT_ASSERT_STRING_CONTAINS(line, R"#('at '"2024-06-16_20-14-02")#");
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'restore"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+}
+
+Y_UNIT_TEST_SUITE(ColumnFamily) {
+ Y_UNIT_TEST(CompressionLevel) {
+ NYql::TAstParseResult res = SqlToYql(R"( use plato;
+ CREATE TABLE tableName (
+ Key Uint32 FAMILY default,
+ Value String FAMILY family1,
+ PRIMARY KEY (Key),
+ FAMILY default (
+ DATA = "test",
+ COMPRESSION = "lz4",
+ COMPRESSION_LEVEL = 5
+ ),
+ FAMILY family1 (
+ DATA = "test",
+ COMPRESSION = "lz4",
+ COMPRESSION_LEVEL = 3
+ )
+ );
+ )");
+ UNIT_ASSERT(res.IsOk());
+ UNIT_ASSERT(res.Issues.Size() == 0);
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("compression_level"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("5"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("3"));
+ }
+ };
+
+ TWordCountHive elementStat = { { TString("Write"), 0 }, { TString("compression_level"), 0 } };
+ VerifyProgram(res, elementStat, verifyLine);
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ UNIT_ASSERT_VALUES_EQUAL(2, elementStat["compression_level"]);
+ }
+}
diff --git a/yql/essentials/sql/v1/sql_ut_antlr4.h b/yql/essentials/sql/v1/sql_ut_antlr4.h
new file mode 100644
index 00000000000..9a0029b67ad
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_ut_antlr4.h
@@ -0,0 +1,226 @@
+
+#include <yql/essentials/providers/common/provider/yql_provider_names.h>
+#include <yql/essentials/sql/sql.h>
+#include <util/generic/map.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/string/split.h>
+#include <deque>
+#include <unordered_set>
+using namespace NSQLTranslation;
+
+enum class EDebugOutput {
+ None,
+ ToCerr,
+};
+
+const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote |
+ NYql::TAstPrintFlags::AdaptArbitraryContent;
+
+inline TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) {
+ TStringStream s;
+ res.Issues.PrintTo(s);
+
+ if (debug == EDebugOutput::ToCerr) {
+ Cerr << s.Str() << Endl;
+ }
+ return s.Str();
+}
+
+inline NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {},
+ EDebugOutput debug = EDebugOutput::None, bool ansiLexer = false, NSQLTranslation::TTranslationSettings settings = {})
+{
+ google::protobuf::Arena arena;
+ const auto service = provider ? provider : TString(NYql::YtProviderName);
+ const TString cluster = "plato";
+ settings.ClusterMapping[cluster] = service;
+ settings.ClusterMapping["hahn"] = NYql::YtProviderName;
+ settings.ClusterMapping["mon"] = NYql::SolomonProviderName;
+ settings.MaxErrors = maxErrors;
+ settings.Mode = mode;
+ settings.Arena = &arena;
+ settings.AnsiLexer = ansiLexer;
+ settings.Antlr4Parser = true;
+ settings.SyntaxVersion = 1;
+ auto res = SqlToYql(query, settings);
+ if (debug == EDebugOutput::ToCerr) {
+ Err2Str(res, debug);
+ }
+ return res;
+}
+
+inline NYql::TAstParseResult SqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
+ return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug);
+}
+
+inline NYql::TAstParseResult SqlToYqlWithSettings(const TString& query, const NSQLTranslation::TTranslationSettings& settings) {
+ return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, 10, {}, EDebugOutput::None, false, settings);
+}
+
+inline void ExpectFailWithError(const TString& query, const TString& error) {
+ NYql::TAstParseResult res = SqlToYql(query);
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), error);
+}
+
+inline NYql::TAstParseResult SqlToYqlWithAnsiLexer(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
+ bool ansiLexer = true;
+ return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug, ansiLexer);
+}
+
+inline void ExpectFailWithErrorForAnsiLexer(const TString& query, const TString& error) {
+ NYql::TAstParseResult res = SqlToYqlWithAnsiLexer(query);
+
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), error);
+}
+
+inline TString GetPrettyPrint(const NYql::TAstParseResult& res) {
+ TStringStream yqlProgram;
+ res.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
+ return yqlProgram.Str();
+}
+
+inline TString Quote(const char* str) {
+ return TStringBuilder() << "'\"" << str << "\"";
+}
+
+class TWordCountHive: public TMap<TString, unsigned> {
+public:
+ TWordCountHive(std::initializer_list<TString> strings) {
+ for (auto& str: strings) {
+ emplace(str, 0);
+ }
+ }
+
+ TWordCountHive(std::initializer_list<std::pair<const TString, unsigned>> list)
+ : TMap(list)
+ {
+ }
+};
+
+typedef std::function<void (const TString& word, const TString& line)> TVerifyLineFunc;
+
+inline TString VerifyProgram(const NYql::TAstParseResult& res, TWordCountHive& wordCounter, TVerifyLineFunc verifyLine = TVerifyLineFunc()) {
+ const auto programm = GetPrettyPrint(res);
+ TVector<TString> yqlProgram;
+ Split(programm, "\n", yqlProgram);
+ for (const auto& line: yqlProgram) {
+ for (auto& counterIter: wordCounter) {
+ const auto& word = counterIter.first;
+ auto pos = line.find(word);
+ while (pos != TString::npos) {
+ ++counterIter.second;
+ if (verifyLine) {
+ verifyLine(word, line);
+ }
+ pos = line.find(word, pos + word.length());
+ }
+ }
+ }
+ return programm;
+}
+
+inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints, TMaybe<bool> ansi) {
+ TString pragma;
+ if (ansi.Defined()) {
+ pragma = *ansi ? "PRAGMA AnsiInForEmptyOrNullableItemsCollections;" :
+ "PRAGMA DisableAnsiInForEmptyOrNullableItemsCollections;";
+ }
+
+ NYql::TAstParseResult res = SqlToYql(pragma + query);
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
+ Y_UNUSED(word);
+ if (!ansi.Defined()) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('warnNoAnsi)"));
+ } else if (*ansi) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('ansi)"));
+ }
+ for (auto& hint : expectedHints) {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(hint));
+ }
+ };
+ TWordCountHive elementStat = {{TString("SqlIn"), 0}};
+ VerifyProgram(res, elementStat, verifyLine);
+}
+
+inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints) {
+ VerifySqlInHints(query, expectedHints, false);
+ VerifySqlInHints(query, expectedHints, true);
+}
+
+inline NSQLTranslation::TTranslationSettings GetSettingsWithS3Binding(const TString& name) {
+ NSQLTranslation::TTranslationSettings settings;
+ NSQLTranslation::TTableBindingSettings bindSettings;
+ bindSettings.ClusterType = "s3";
+ bindSettings.Settings["cluster"] = "cluster";
+ bindSettings.Settings["path"] = "path";
+ bindSettings.Settings["format"] = "format";
+ bindSettings.Settings["compression"] = "ccompression";
+ bindSettings.Settings["bar"] = "1";
+ // schema is not validated in this test but should be valid YSON text
+ bindSettings.Settings["schema"] = R"__("[
+ "StructType";
+ [
+ [
+ "key";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "subkey";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "value";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]])__";
+ bindSettings.Settings["partitioned_by"] = "[\"key\", \"subkey\"]";
+ settings.Bindings[name] = bindSettings;
+ return settings;
+}
+
+inline void AstBfs(NYql::TAstNode const* root, std::function<bool (NYql::TAstNode const*)> visitor) {
+ std::deque<NYql::TAstNode const*> wishList{ root };
+ std::unordered_set<NYql::TAstNode const*> visited;
+ while(!wishList.empty()){
+ auto v = wishList.front();
+ wishList.pop_front();
+ if (!visitor(v))
+ return;
+ visited.insert(v);
+ if (v->IsList()) {
+ for (ui32 i = 0; i != v->GetChildrenCount(); ++i) {
+ auto child = v->GetChild(i);
+ if (visited.find(child) == visited.cend()) {
+ wishList.push_back(child);
+ }
+ }
+ }
+ }
+}
+
+inline const NYql::TAstNode* FindNodeByChildAtomContent(const NYql::TAstNode* root, uint32_t childIndex, TStringBuf name){
+ const NYql::TAstNode* result = nullptr;
+ AstBfs(root, [&result, childIndex, name](auto v) {
+ if (v->IsList() && v->GetChildrenCount() > childIndex &&
+ v->GetChild(childIndex)->IsAtom() && v->GetChild(childIndex)->GetContent() == name) {
+ result = v;
+ return false;
+ }
+ return true; });
+ return result;
+}
diff --git a/yql/essentials/sql/v1/sql_values.cpp b/yql/essentials/sql/v1/sql_values.cpp
new file mode 100644
index 00000000000..c035489387f
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_values.cpp
@@ -0,0 +1,151 @@
+#include "sql_values.h"
+#include "sql_group_by.h"
+#include "sql_query.h"
+#include "sql_select.h"
+#include "sql_expression.h"
+#include "source.h"
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+TSourcePtr TSqlValues::Build(const TRule_values_stmt& node, TPosition& valuesPos, const TVector<TString>& derivedColumns, TPosition derivedColumnsPos) {
+ Token(node.GetToken1());
+ valuesPos = Ctx.Pos();
+
+ TVector<TVector<TNodePtr>> rows;
+ const auto& rowList = node.GetRule_values_source_row_list2();
+ if (!BuildRows(rowList, rows)) {
+ return nullptr;
+ }
+
+ YQL_ENSURE(!rows.empty());
+ const size_t columnsCount = rows.back().size();
+ if (derivedColumns.size() > columnsCount) {
+ Ctx.Error(derivedColumnsPos) << "Derived column list size exceeds column count in VALUES";
+ return nullptr;
+ }
+
+ auto columns = derivedColumns;
+ if (Ctx.WarnUnnamedColumns && columns.size() < columnsCount) {
+ Ctx.Warning(valuesPos, TIssuesIds::YQL_UNNAMED_COLUMN)
+ << "Autogenerated column names column" << columns.size() << "...column" << columnsCount - 1 << " will be used here";
+ }
+
+ while (columns.size() < columnsCount) {
+ columns.push_back(TStringBuilder() << "column" << columns.size());
+ }
+
+ TVector<TNodePtr> labels;
+ for (size_t i = 0; i < columnsCount; ++i) {
+ labels.push_back(BuildQuotedAtom(derivedColumnsPos, columns[i]));
+ }
+
+ TVector<TNodePtr> items;
+ for (auto& row : rows) {
+ YQL_ENSURE(!row.empty());
+ YQL_ENSURE(row.size() == columnsCount);
+ items.push_back(BuildOrderedStructure(row.front()->GetPos(), row, labels));
+ }
+ auto list = new TCallNodeImpl(valuesPos, "AsListMayWarn", items);
+ list = new TCallNodeImpl(valuesPos, "PersistableRepr", { list });
+ list = new TCallNodeImpl(valuesPos, "AssumeColumnOrder", { list, BuildTuple(valuesPos, labels) });
+ auto result = BuildNodeSource(valuesPos, list, false);
+ result->AllColumns();
+ return result;
+}
+
+bool TSqlValues::BuildRows(const TRule_values_source_row_list& node, TVector<TVector<TNodePtr>>& rows) {
+ rows = TVector<TVector<TNodePtr>> {{}};
+
+
+ if (!BuildRow(node.GetRule_values_source_row1(), rows.back())) {
+ return false;
+ }
+
+ const size_t rowSize = rows.back().size();
+
+ for (const auto& valuesSourceRow: node.GetBlock2()) {
+ rows.push_back({});
+ if (!BuildRow(valuesSourceRow.GetRule_values_source_row2(), rows.back())) {
+ return false;
+ }
+ if (rows.back().size() != rowSize) {
+ Token(valuesSourceRow.GetRule_values_source_row2().GetToken1());
+ Error() << "All VALUES items should have same size: expecting " << rowSize << ", got " << rows.back().size();
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TSqlValues::BuildRow(const TRule_values_source_row& inRow, TVector<TNodePtr>& outRow) {
+ TSqlExpression sqlExpr(Ctx, Mode);
+ return ExprList(sqlExpr, outRow, inRow.GetRule_expr_list2());
+}
+
+TSourcePtr TSqlValues::ValuesSource(const TRule_values_source& node, const TVector<TString>& columnsHint,
+ const TString& operationName)
+{
+ Ctx.IncrementMonCounter("sql_features", "ValuesSource");
+ TPosition pos(Ctx.Pos());
+ switch (node.Alt_case()) {
+ case TRule_values_source::kAltValuesSource1: {
+ TVector<TVector<TNodePtr>> rows {{}};
+ const auto& rowList = node.GetAlt_values_source1().GetRule_values_stmt1().GetRule_values_source_row_list2();
+ if (!BuildRows(rowList, rows)) {
+ return nullptr;
+ }
+ return BuildWriteValues(pos, operationName, columnsHint, rows);
+ }
+ case TRule_values_source::kAltValuesSource2: {
+ TSqlSelect select(Ctx, Mode);
+ TPosition selectPos;
+ auto source = select.Build(node.GetAlt_values_source2().GetRule_select_stmt1(), selectPos);
+ if (!source) {
+ return nullptr;
+ }
+ return BuildWriteValues(pos, "UPDATE", columnsHint, std::move(source));
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownValuesSource");
+ AltNotImplemented("values_source", node);
+ return nullptr;
+ }
+}
+
+TSourcePtr TSqlIntoValues::Build(const TRule_into_values_source& node, const TString& operationName) {
+ switch (node.Alt_case()) {
+ case TRule_into_values_source::kAltIntoValuesSource1: {
+ auto alt = node.GetAlt_into_values_source1();
+ TVector<TString> columnsHint;
+ if (alt.HasBlock1()) {
+ PureColumnListStr(alt.GetBlock1().GetRule_pure_column_list1(), *this, columnsHint);
+ }
+ return ValuesSource(alt.GetRule_values_source2(), columnsHint, operationName);
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "DefaultValuesOrOther");
+ AltNotImplemented("into_values_source", node);
+ return nullptr;
+ }
+}
+
+TSourcePtr TSqlAsValues::Build(const TRule_values_source& node, const TString& operationName) {
+ switch (node.Alt_case()) {
+ case TRule_values_source::kAltValuesSource1: {
+ Ctx.IncrementMonCounter("sql_errors", "UnknownValuesSource");
+ Error() << "AS VALUES statement is not supported for " << operationName << ".";
+ return nullptr;
+ }
+ case TRule_values_source::kAltValuesSource2: {
+ return ValuesSource(node, {}, operationName);
+ }
+ default:
+ Ctx.IncrementMonCounter("sql_errors", "UnknownValuesSource");
+ AltNotImplemented("values_source", node);
+ return nullptr;
+ }
+}
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/sql_values.h b/yql/essentials/sql/v1/sql_values.h
new file mode 100644
index 00000000000..7e19d1d8a0d
--- /dev/null
+++ b/yql/essentials/sql/v1/sql_values.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include "sql_translation.h"
+#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
+
+namespace NSQLTranslationV1 {
+
+using namespace NSQLv1Generated;
+
+class TSqlValues: public TSqlTranslation {
+public:
+ TSqlValues(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlTranslation(ctx, mode)
+ {
+ }
+
+ TSourcePtr Build(const TRule_values_stmt& node, TPosition& valuesPos, const TVector<TString>& derivedColumns = {}, TPosition derivedColumnsPos = TPosition());
+protected:
+ bool BuildRows(const TRule_values_source_row_list& node, TVector<TVector<TNodePtr>>& rows);
+
+ TSourcePtr ValuesSource(const TRule_values_source& node, const TVector<TString>& columnsHint,
+ const TString& operationName);
+
+private:
+ bool BuildRow(const TRule_values_source_row& inRow, TVector<TNodePtr>& outRow);
+};
+
+class TSqlIntoValues: public TSqlValues {
+public:
+ TSqlIntoValues(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlValues(ctx, mode)
+ {
+ }
+
+ TSourcePtr Build(const TRule_into_values_source& node, const TString& operationName);
+};
+
+class TSqlAsValues: public TSqlValues {
+public:
+ TSqlAsValues(TContext& ctx, NSQLTranslation::ESqlMode mode)
+ : TSqlValues(ctx, mode)
+ {
+ }
+
+ TSourcePtr Build(const TRule_values_source& node, const TString& operationName);
+};
+
+} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/ut/ya.make b/yql/essentials/sql/v1/ut/ya.make
new file mode 100644
index 00000000000..f7dddb9af18
--- /dev/null
+++ b/yql/essentials/sql/v1/ut/ya.make
@@ -0,0 +1,21 @@
+UNITTEST_FOR(yql/essentials/sql/v1)
+
+SRCS(
+ sql_ut.cpp
+ sql_match_recognize_ut.cpp
+)
+
+PEERDIR(
+ library/cpp/regex/pcre
+ yql/essentials/public/udf/service/exception_policy
+ yql/essentials/core/sql_types
+ yql/essentials/sql
+ yql/essentials/sql/pg_dummy
+ yql/essentials/sql/v1/format
+)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+END()
diff --git a/yql/essentials/sql/v1/ut_antlr4/ya.make b/yql/essentials/sql/v1/ut_antlr4/ya.make
new file mode 100644
index 00000000000..211ebf7fe2e
--- /dev/null
+++ b/yql/essentials/sql/v1/ut_antlr4/ya.make
@@ -0,0 +1,21 @@
+UNITTEST_FOR(yql/essentials/sql/v1)
+
+SRCS(
+ sql_ut_antlr4.cpp
+ sql_match_recognize_ut.cpp
+)
+
+PEERDIR(
+ library/cpp/regex/pcre
+ yql/essentials/public/udf/service/exception_policy
+ yql/essentials/core/sql_types
+ yql/essentials/sql
+ yql/essentials/sql/pg_dummy
+ yql/essentials/sql/v1/format
+)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+END()
diff --git a/yql/essentials/sql/v1/ya.make b/yql/essentials/sql/v1/ya.make
new file mode 100644
index 00000000000..3da22939466
--- /dev/null
+++ b/yql/essentials/sql/v1/ya.make
@@ -0,0 +1,69 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/charset
+ library/cpp/enumbitset
+ library/cpp/json
+ library/cpp/yson/node
+ yql/essentials/minikql
+ yql/essentials/public/udf
+ yql/essentials/sql/settings
+ yql/essentials/core/issue
+ yql/essentials/core/issue/protos
+ yql/essentials/core/sql_types
+ yql/essentials/parser/lexer_common
+ yql/essentials/parser/proto_ast/collect_issues
+ yql/essentials/parser/proto_ast/gen/v1
+ yql/essentials/parser/proto_ast/gen/v1_ansi
+ yql/essentials/parser/proto_ast/gen/v1_proto_split
+ yql/essentials/parser/proto_ast/gen/v1_antlr4
+ yql/essentials/parser/proto_ast/gen/v1_ansi_antlr4
+ yql/essentials/parser/pg_catalog
+ yql/essentials/sql/v1/lexer
+ yql/essentials/sql/v1/proto_parser
+)
+
+SRCS(
+ aggregation.cpp
+ builtin.cpp
+ context.cpp
+ join.cpp
+ insert.cpp
+ list_builtin.cpp
+ match_recognize.cpp
+ node.cpp
+ select.cpp
+ source.cpp
+ sql.cpp
+ sql_call_expr.cpp
+ sql_expression.cpp
+ sql_group_by.cpp
+ sql_match_recognize.cpp
+ sql_into_tables.cpp
+ sql_query.cpp
+ sql_select.cpp
+ sql_translation.cpp
+ sql_values.cpp
+ query.cpp
+ object_processing.cpp
+)
+
+YQL_LAST_ABI_VERSION()
+
+GENERATE_ENUM_SERIALIZATION(match_recognize.h)
+GENERATE_ENUM_SERIALIZATION(node.h)
+GENERATE_ENUM_SERIALIZATION(sql_call_param.h)
+
+END()
+
+RECURSE(
+ format
+ lexer
+ perf
+ proto_parser
+)
+
+RECURSE_FOR_TESTS(
+ ut
+ ut_antlr4
+)