diff options
author | Alexander Smirnov <alex@ydb.tech> | 2024-11-22 09:21:01 +0000 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2024-11-22 09:21:01 +0000 |
commit | a7cac42c868ca5722777ccee944217410812e72c (patch) | |
tree | 7bff7ce2a3ade72f6f15dfc634490d13628066ee /yql/essentials | |
parent | a18f18d81996ca8e681bb6cabd441b52833d99bf (diff) | |
parent | 9478cfdab4217d3710b96329466825bf47111d7d (diff) | |
download | ydb-a7cac42c868ca5722777ccee944217410812e72c.tar.gz |
Merge branch 'rightlib' into mergelibs-241122-0919
Diffstat (limited to 'yql/essentials')
40 files changed, 1699 insertions, 57 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_flow2.cpp b/yql/essentials/core/common_opt/yql_co_flow2.cpp index bc25d612db..ace0141cb2 100644 --- a/yql/essentials/core/common_opt/yql_co_flow2.cpp +++ b/yql/essentials/core/common_opt/yql_co_flow2.cpp @@ -31,16 +31,7 @@ bool AllowComplexFiltersOverAggregatePushdown(const TOptimizeContext& optCtx) { optCtx.Types->MaxAggPushdownPredicates > 0; } -TExprNode::TPtr AggregateSubsetFieldsAnalyzer(const TCoAggregate& node, TExprContext& ctx, const TParentsMap& parentsMap) { - auto inputType = node.Input().Ref().GetTypeAnn(); - auto structType = inputType->GetKind() == ETypeAnnotationKind::List - ? inputType->Cast<TListExprType>()->GetItemType()->Cast<TStructExprType>() - : inputType->Cast<TStreamExprType>()->GetItemType()->Cast<TStructExprType>(); - - if (structType->GetSize() == 0) { - return node.Ptr(); - } - +THashSet<TStringBuf> GetAggregationInputKeys(const TCoAggregate& node) { TMaybe<TStringBuf> sessionColumn; const auto sessionSetting = GetSetting(node.Settings().Ref(), "session"); if (sessionSetting) { @@ -58,13 +49,28 @@ TExprNode::TPtr AggregateSubsetFieldsAnalyzer(const TCoAggregate& node, TExprCon } } - TSet<TStringBuf> usedFields; + THashSet<TStringBuf> result; for (const auto& x : node.Keys()) { if (x.Value() != sessionColumn && x.Value() != hoppingColumn) { - usedFields.insert(x.Value()); + result.insert(x.Value()); } } + return result; +} + +TExprNode::TPtr AggregateSubsetFieldsAnalyzer(const TCoAggregate& node, TExprContext& ctx, const TParentsMap& parentsMap) { + auto inputType = node.Input().Ref().GetTypeAnn(); + auto structType = inputType->GetKind() == ETypeAnnotationKind::List + ? inputType->Cast<TListExprType>()->GetItemType()->Cast<TStructExprType>() + : inputType->Cast<TStreamExprType>()->GetItemType()->Cast<TStructExprType>(); + + if (structType->GetSize() == 0) { + return node.Ptr(); + } + + THashSet<TStringBuf> usedFields = GetAggregationInputKeys(node); + if (usedFields.size() == structType->GetSize()) { return node.Ptr(); } @@ -96,7 +102,7 @@ TExprNode::TPtr AggregateSubsetFieldsAnalyzer(const TCoAggregate& node, TExprCon } } - if (hoppingSetting) { + if (auto hoppingSetting = GetSetting(node.Settings().Ref(), "hopping")) { auto traitsNode = hoppingSetting->ChildPtr(1); if (traitsNode->IsList()) { traitsNode = traitsNode->ChildPtr(1); @@ -120,7 +126,7 @@ TExprNode::TPtr AggregateSubsetFieldsAnalyzer(const TCoAggregate& node, TExprCon } } - if (sessionSetting) { + if (auto sessionSetting = GetSetting(node.Settings().Ref(), "session")) { TCoSessionWindowTraits traits(sessionSetting->Child(1)->ChildPtr(1)); auto usedType = traits.ListType().Ref().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TListExprType>()-> @@ -1326,10 +1332,7 @@ TExprBase FilterOverAggregate(const TCoFlatMapBase& node, TExprContext& ctx, TOp TCoConditionalValueBase body = node.Lambda().Body().Cast<TCoConditionalValueBase>(); const TCoAggregate agg = node.Input().Cast<TCoAggregate>(); - THashSet<TStringBuf> keyColumns; - for (auto key : agg.Keys()) { - keyColumns.insert(key.Value()); - } + const THashSet<TStringBuf> keyColumns = GetAggregationInputKeys(agg); TExprNodeList andComponents; if (auto maybeAnd = body.Predicate().Maybe<TCoAnd>()) { diff --git a/yql/essentials/core/common_opt/yql_co_simple1.cpp b/yql/essentials/core/common_opt/yql_co_simple1.cpp index be5a98c0c5..00432ffc5b 100644 --- a/yql/essentials/core/common_opt/yql_co_simple1.cpp +++ b/yql/essentials/core/common_opt/yql_co_simple1.cpp @@ -3675,6 +3675,28 @@ bool IsEarlyExpandOfSkipNullAllowed(const TOptimizeContext& optCtx) { return optCtx.Types->OptimizerFlags.contains(skipNullFlags); } +TExprNode::TPtr ReplaceFuncWithImpl(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { + auto exportsPtr = optCtx.Types->Modules->GetModule("/lib/yql/core.yql"); + YQL_ENSURE(exportsPtr); + const auto& exports = exportsPtr->Symbols(); + const auto ex = exports.find(TString(node->Content()) + "Impl"); + YQL_ENSURE(exports.cend() != ex); + TNodeOnNodeOwnedMap deepClones; + auto lambda = ctx.DeepCopy(*ex->second, exportsPtr->ExprCtx(), deepClones, true, false); + + YQL_CLOG(DEBUG, Core) << "Replace " << node->Content() << " with implementation"; + return ctx.Builder(node->Pos()) + .Apply(lambda) + .Do([&node](TExprNodeReplaceBuilder& builder) -> TExprNodeReplaceBuilder& { + for (size_t i = 0; i < node->ChildrenSize(); i++) { + builder.With(i, node->ChildPtr(i)); + } + return builder; + }) + .Seal() + .Build(); +} + } // namespace void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { @@ -4897,6 +4919,65 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { return node; }; + map["ListSample"] = map["ListSampleN"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { + if (node->Child(0)->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional) { + YQL_CLOG(DEBUG, Core) << "Handle optional list in " << node->Content(); + return ctx.Builder(node->Pos()) + .Callable("Map") + .Add(0, node->Child(0)) + .Lambda(1) + .Param("list") + .Callable(node->Content()) + .Arg(0, "list") + .Add(1, node->Child(1)) + .Add(2, node->Child(2)) + .Seal() + .Seal() + .Seal() + .Build(); + } + + if (node->Child(1)->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional) { + YQL_CLOG(DEBUG, Core) << "Handle optional prob arg in " << node->Content(); + return ctx.Builder(node->Pos()) + .Callable("IfPresent") + .Add(0, node->Child(1)) + .Lambda(1) + .Param("probArg") + .Callable(node->Content()) + .Add(0, node->Child(0)) + .Arg(1, "probArg") + .Add(2, node->Child(2)) + .Seal() + .Seal() + .Add(2, node->Child(0)) + .Seal() + .Build(); + } + + return ReplaceFuncWithImpl(node, ctx, optCtx); + }; + + map["ListShuffle"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { + if (node->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional) { + YQL_CLOG(DEBUG, Core) << "Handle optionals args in " << node->Content(); + return ctx.Builder(node->Pos()) + .Callable("Map") + .Add(0, node->Child(0)) + .Lambda(1) + .Param("list") + .Callable(node->Content()) + .Arg(0, "list") + .Add(1, node->Child(1)) + .Seal() + .Seal() + .Seal() + .Build(); + } + + return ReplaceFuncWithImpl(node, ctx, optCtx); + }; + map["OptionalReduce"] = std::bind(&RemoveOptionalReduceOverData, _1, _2); map["Fold"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp index ea068f3f4a..810f5f70be 100644 --- a/yql/essentials/core/type_ann/type_ann_core.cpp +++ b/yql/essentials/core/type_ann/type_ann_core.cpp @@ -12594,6 +12594,9 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["ListTopSort"] = &ListTopSortWrapper; Functions["ListTopSortAsc"] = &ListTopSortWrapper; Functions["ListTopSortDesc"] = &ListTopSortWrapper; + Functions["ListSample"] = &ListSampleWrapper; + Functions["ListSampleN"] = &ListSampleNWrapper; + Functions["ListShuffle"] = &ListShuffleWrapper; Functions["ExpandMap"] = &ExpandMapWrapper; Functions["WideMap"] = &WideMapWrapper; diff --git a/yql/essentials/core/type_ann/type_ann_list.cpp b/yql/essentials/core/type_ann/type_ann_list.cpp index 2dcc8c83da..674260bf35 100644 --- a/yql/essentials/core/type_ann/type_ann_list.cpp +++ b/yql/essentials/core/type_ann/type_ann_list.cpp @@ -1524,6 +1524,115 @@ namespace { return OptListWrapperImpl<1U>(input, output, ctx, "Collect"); } + IGraphTransformer::TStatus ListSampleWrapperCommon(const TExprNode::TPtr& input, TExprNode::TPtr& output, NUdf::EDataSlot probArgDataType, TContext& ctx) { + if (!EnsureMinMaxArgsCount(*input, 2, 3, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (IsNull(input->Head())) { + output = input->HeadPtr(); + return IGraphTransformer::TStatus::Repeat; + } + + if (!EnsureComputable(input->Head(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + auto type = input->Head().GetTypeAnn(); + if (type->GetKind() == ETypeAnnotationKind::Optional) { + type = type->Cast<TOptionalExprType>()->GetItemType(); + } + + if (type->GetKind() != ETypeAnnotationKind::List && type->GetKind() != ETypeAnnotationKind::EmptyList) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Head().Pos()), TStringBuilder() + << "Expected (empty) list or optional of (empty) list, but got: " << *input->Head().GetTypeAnn())); + return IGraphTransformer::TStatus::Error; + } + + if (type->GetKind() == ETypeAnnotationKind::EmptyList) { + output = input->HeadPtr(); + return IGraphTransformer::TStatus::Repeat; + } + + if (IsNull(*input->Child(1))) { + output = input->HeadPtr(); + return IGraphTransformer::TStatus::Repeat; + } + + if (!EnsureSpecificDataType(*input->Child(1), probArgDataType, ctx.Expr, true)) { + return IGraphTransformer::TStatus::Error; + } + + if (input->ChildrenSize() == 2) { + auto children = input->ChildrenList(); + children.push_back(ctx.Expr.NewCallable(input->Pos(), "Null", {})); + output = ctx.Expr.ChangeChildren(*input, std::move(children)); + return IGraphTransformer::TStatus::Repeat; + } + YQL_ENSURE(input->ChildrenSize() == 3); + + if (!EnsureComputable(*input->Child(2), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + input->SetTypeAnn(input->Head().GetTypeAnn()); + return IGraphTransformer::TStatus::Ok; + } + + IGraphTransformer::TStatus ListSampleWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + return ListSampleWrapperCommon(input, output, NUdf::EDataSlot::Double, ctx); + } + + IGraphTransformer::TStatus ListSampleNWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + return ListSampleWrapperCommon(input, output, NUdf::EDataSlot::Uint64, ctx); + } + + IGraphTransformer::TStatus ListShuffleWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + if (!EnsureMinMaxArgsCount(*input, 1, 2, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (IsNull(input->Head())) { + output = input->HeadPtr(); + return IGraphTransformer::TStatus::Repeat; + } + + if (!EnsureComputable(input->Head(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + auto type = input->Head().GetTypeAnn(); + if (type->GetKind() == ETypeAnnotationKind::Optional) { + type = type->Cast<TOptionalExprType>()->GetItemType(); + } + + if (type->GetKind() != ETypeAnnotationKind::List && type->GetKind() != ETypeAnnotationKind::EmptyList) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Head().Pos()), TStringBuilder() + << "Expected (empty) list or optional of (empty) list, but got: " << *input->Head().GetTypeAnn())); + return IGraphTransformer::TStatus::Error; + } + + if (type->GetKind() == ETypeAnnotationKind::EmptyList) { + output = input->HeadPtr(); + return IGraphTransformer::TStatus::Repeat; + } + + if (input->ChildrenSize() == 1) { + auto children = input->ChildrenList(); + children.push_back(ctx.Expr.NewCallable(input->Pos(), "Null", {})); + output = ctx.Expr.ChangeChildren(*input, std::move(children)); + return IGraphTransformer::TStatus::Repeat; + } + YQL_ENSURE(input->ChildrenSize() == 2); + + if (!EnsureComputable(*input->Child(1), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + input->SetTypeAnn(input->Head().GetTypeAnn()); + return IGraphTransformer::TStatus::Ok; + } + IGraphTransformer::TStatus OptListFold1WrapperImpl(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx, TExprNode::TPtr&& updateLambda) { if (IsNull(input->Head())) { output = input->HeadPtr(); diff --git a/yql/essentials/core/type_ann/type_ann_list.h b/yql/essentials/core/type_ann/type_ann_list.h index 6b15965d62..7071b67461 100644 --- a/yql/essentials/core/type_ann/type_ann_list.h +++ b/yql/essentials/core/type_ann/type_ann_list.h @@ -41,6 +41,9 @@ namespace NTypeAnnImpl { IGraphTransformer::TStatus ListTopSortWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus ListExtractWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus ListCollectWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); + IGraphTransformer::TStatus ListSampleWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); + IGraphTransformer::TStatus ListSampleNWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); + IGraphTransformer::TStatus ListShuffleWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus FoldMapWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus Fold1MapWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus Chain1MapWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); diff --git a/yql/essentials/minikql/computation/mkql_computation_node_pack.h b/yql/essentials/minikql/computation/mkql_computation_node_pack.h index 5dc47ada2d..6e79b1a1a3 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node_pack.h +++ b/yql/essentials/minikql/computation/mkql_computation_node_pack.h @@ -90,6 +90,10 @@ public: return !ItemCount_; } + bool IsBlock() const { + return IsBlock_; + } + void Clear(); NYql::TChunkedBuffer Finish(); diff --git a/yql/essentials/mount/lib/yql/core.yql b/yql/essentials/mount/lib/yql/core.yql index 88aeb0e7df..be99a7f87e 100755 --- a/yql/essentials/mount/lib/yql/core.yql +++ b/yql/essentials/mount/lib/yql/core.yql @@ -479,6 +479,56 @@ def signature(script, name): (lambda '() (Apply ListToTupleImpl list n))) )) +(let ListSampleImpl (lambda '(list probability dependsOn) + (Filter list (lambda '(x) (< (Random (DependsOn '(x probability dependsOn))) probability))) +)) + +(let ListSampleNImpl (lambda '(list count dependsOn) (block '( + (let value_type (ListItemType (TypeOf list))) + + (let UdfVectorCreate (Udf 'Vector.Create (Void) (TupleType (TupleType value_type (DataType 'Uint64)) (StructType) value_type))) + + (let resource_type (TypeOf (Apply UdfVectorCreate (Uint32 '0)))) + + (let UdfVectorEmplace (Udf 'Vector.Emplace (Void) (TupleType (TupleType resource_type (DataType 'Uint64) value_type) (StructType) value_type))) + (let UdfVectorSwap (Udf 'Vector.Swap (Void) (TupleType (TupleType resource_type (DataType 'Uint64) (DataType 'Uint64)) (StructType) value_type))) + (let UdfVectorGetResult (Udf 'Vector.GetResult (Void) (TupleType (TupleType resource_type) (StructType) value_type))) + + (return (Apply UdfVectorGetResult (Fold + (Skip (Enumerate list) count) + (Fold + (Take list count) + (NamedApply UdfVectorCreate '(count) (AsStruct) (DependsOn '(list dependsOn))) + (lambda '(x y) (Apply UdfVectorEmplace y count x)) + ) + (lambda '(x y) (block '( + (let pos (Coalesce (% (RandomNumber (DependsOn '(x count dependsOn))) (+ (Nth x '0) (Uint64 '1))) (Uint64 '0))) + (return (If (< pos count) (Apply UdfVectorEmplace y pos (Nth x '1)) y)) + ))) + ))) +)))) + +(let ListShuffleImpl (lambda '(list dependsOn) (block '( + (let value_type (ListItemType (TypeOf list))) + + (let UdfVectorCreate (Udf 'Vector.Create (Void) (TupleType (TupleType value_type (DataType 'Uint64)) (StructType) value_type))) + + (let resource_type (TypeOf (Apply UdfVectorCreate (Uint32 '0)))) + + (let UdfVectorEmplace (Udf 'Vector.Emplace (Void) (TupleType (TupleType resource_type (DataType 'Uint64) value_type) (StructType) value_type))) + (let UdfVectorSwap (Udf 'Vector.Swap (Void) (TupleType (TupleType resource_type (DataType 'Uint64) (DataType 'Uint64)) (StructType) value_type))) + (let UdfVectorGetResult (Udf 'Vector.GetResult (Void) (TupleType (TupleType resource_type) (StructType) value_type))) + + (return (Apply UdfVectorGetResult (Fold + (Enumerate list) + (NamedApply UdfVectorCreate '((Uint32 '1)) (AsStruct) (DependsOn '(list dependsOn))) + (lambda '(x y) (block '( + (let pos (Coalesce (% (RandomNumber (DependsOn '(x dependsOn))) (+ (Nth x '0) (Uint64 '1))) (Uint64 '0))) + (return (Apply UdfVectorSwap (Apply UdfVectorEmplace y (Nth x '0) (Nth x '1)) pos (Nth x '0))) + ))) + ))) +)))) + (export Equals) (export Unequals) (export FindIndex) @@ -516,4 +566,7 @@ def signature(script, name): (export ForceSpreadMembers) (export ListFromTuple) (export ListToTuple) +(export ListSampleImpl) +(export ListSampleNImpl) +(export ListShuffleImpl) ) diff --git a/yql/essentials/sql/v1/SQLv1.g.in b/yql/essentials/sql/v1/SQLv1.g.in index 9369b111a9..1b645f0b89 100644 --- a/yql/essentials/sql/v1/SQLv1.g.in +++ b/yql/essentials/sql/v1/SQLv1.g.in @@ -75,6 +75,7 @@ sql_stmt_core: | drop_resource_pool_classifier_stmt | backup_stmt | restore_stmt + | alter_sequence_stmt ; expr: @@ -791,10 +792,16 @@ table_setting_value: | STRING_VALUE | integer | split_boundaries - | expr ON an_id (AS (SECONDS | MILLISECONDS | MICROSECONDS | NANOSECONDS))? + | ttl_tier_list ON an_id (AS (SECONDS | MILLISECONDS | MICROSECONDS | NANOSECONDS))? | bool_value ; +ttl_tier_list: expr (ttl_tier_action (COMMA expr ttl_tier_action)*)?; +ttl_tier_action: + TO EXTERNAL DATA SOURCE an_id + | DELETE +; + family_entry: FAMILY an_id family_settings; family_settings: LPAREN (family_settings_entry (COMMA family_settings_entry)*)? RPAREN; family_settings_entry: an_id EQUALS family_setting_value; @@ -1048,6 +1055,14 @@ analyze_table: simple_table_ref (LPAREN column_list RPAREN)?; analyze_table_list: analyze_table (COMMA analyze_table)* COMMA?; analyze_stmt: ANALYZE analyze_table_list; +alter_sequence_stmt: ALTER SEQUENCE (IF EXISTS)? object_ref alter_sequence_action+; +alter_sequence_action: + START WITH? integer + | RESTART WITH? integer + | RESTART + | INCREMENT BY? integer +; + // Special rules that allow to use certain keywords as identifiers. identifier: ID_PLAIN | ID_QUOTED; id: identifier | keyword; @@ -1333,6 +1348,7 @@ keyword_as_compat: | IMMEDIATE | IMPORT | IN + | INCREMENT | INCREMENTAL | INDEX | INDEXED @@ -1407,6 +1423,7 @@ keyword_as_compat: | REPLICATION | RESET | RESPECT + | RESTART | RESTORE | RESTRICT // | RESULT @@ -1424,7 +1441,9 @@ keyword_as_compat: | SETS | SHOW | SKIP + | SEQUENCE | SOURCE + | START | SUBQUERY | SUBSET | SYMBOLS @@ -1551,6 +1570,7 @@ keyword_compat: ( | IMMEDIATE | IMPORT | IN + | INCREMENT | INCREMENTAL | INDEX | INDEXED @@ -1625,6 +1645,7 @@ keyword_compat: ( | REPLICATION | RESET | RESPECT + | RESTART | RESTORE | RESTRICT | RESULT @@ -1642,7 +1663,9 @@ keyword_compat: ( | SETS | SHOW | SKIP + | SEQUENCE | SOURCE + | START | SUBQUERY | SUBSET | SYMBOLS @@ -1897,6 +1920,7 @@ ILIKE: I L I K E; IMMEDIATE: I M M E D I A T E; IMPORT: I M P O R T; IN: I N; +INCREMENT: I N C R E M E N T; INCREMENTAL: I N C R E M E N T A L; INDEX: I N D E X; INDEXED: I N D E X E D; @@ -1986,6 +2010,7 @@ REPLICATION: R E P L I C A T I O N; RESET: R E S E T; RESOURCE: R E S O U R C E; RESPECT: R E S P E C T; +RESTART: R E S T A R T; RESTORE: R E S T O R E; RESTRICT: R E S T R I C T; RESULT: R E S U L T; @@ -2010,7 +2035,9 @@ SET: S E T; SETS: S E T S; SHOW: S H O W; SKIP: S K I P; +SEQUENCE: S E Q U E N C E; SOURCE: S O U R C E; +START: S T A R T; STREAM: S T R E A M; STRUCT: S T R U C T; SUBQUERY: S U B Q U E R Y; diff --git a/yql/essentials/sql/v1/SQLv1Antlr4.g.in b/yql/essentials/sql/v1/SQLv1Antlr4.g.in index 229b15dfae..5435535336 100644 --- a/yql/essentials/sql/v1/SQLv1Antlr4.g.in +++ b/yql/essentials/sql/v1/SQLv1Antlr4.g.in @@ -74,6 +74,7 @@ sql_stmt_core: | drop_resource_pool_classifier_stmt | backup_stmt | restore_stmt + | alter_sequence_stmt ; expr: @@ -790,10 +791,16 @@ table_setting_value: | STRING_VALUE | integer | split_boundaries - | expr ON an_id (AS (SECONDS | MILLISECONDS | MICROSECONDS | NANOSECONDS))? + | ttl_tier_list ON an_id (AS (SECONDS | MILLISECONDS | MICROSECONDS | NANOSECONDS))? | bool_value ; +ttl_tier_list: expr (ttl_tier_action (COMMA expr ttl_tier_action)*)?; +ttl_tier_action: + TO EXTERNAL DATA SOURCE an_id + | DELETE +; + family_entry: FAMILY an_id family_settings; family_settings: LPAREN (family_settings_entry (COMMA family_settings_entry)*)? RPAREN; family_settings_entry: an_id EQUALS family_setting_value; @@ -1047,6 +1054,14 @@ analyze_table: simple_table_ref (LPAREN column_list RPAREN)?; analyze_table_list: analyze_table (COMMA analyze_table)* COMMA?; analyze_stmt: ANALYZE analyze_table_list; +alter_sequence_stmt: ALTER SEQUENCE (IF EXISTS)? object_ref alter_sequence_action+; +alter_sequence_action: + START WITH? integer + | RESTART WITH? integer + | RESTART + | INCREMENT BY? integer +; + // Special rules that allow to use certain keywords as identifiers. identifier: ID_PLAIN | ID_QUOTED; id: identifier | keyword; @@ -1332,6 +1347,7 @@ keyword_as_compat: | IMMEDIATE | IMPORT | IN + | INCREMENT | INCREMENTAL | INDEX | INDEXED @@ -1406,6 +1422,7 @@ keyword_as_compat: | REPLICATION | RESET | RESPECT + | RESTART | RESTORE | RESTRICT // | RESULT @@ -1423,7 +1440,9 @@ keyword_as_compat: | SETS | SHOW | TSKIP + | SEQUENCE | SOURCE + | START | SUBQUERY | SUBSET | SYMBOLS @@ -1550,6 +1569,7 @@ keyword_compat: ( | IMMEDIATE | IMPORT | IN + | INCREMENT | INCREMENTAL | INDEX | INDEXED @@ -1624,6 +1644,7 @@ keyword_compat: ( | REPLICATION | RESET | RESPECT + | RESTART | RESTORE | RESTRICT | RESULT @@ -1641,7 +1662,9 @@ keyword_compat: ( | SETS | SHOW | TSKIP + | SEQUENCE | SOURCE + | START | SUBQUERY | SUBSET | SYMBOLS @@ -1896,6 +1919,7 @@ ILIKE: I L I K E; IMMEDIATE: I M M E D I A T E; IMPORT: I M P O R T; IN: I N; +INCREMENT: I N C R E M E N T; INCREMENTAL: I N C R E M E N T A L; INDEX: I N D E X; INDEXED: I N D E X E D; @@ -1985,6 +2009,7 @@ REPLICATION: R E P L I C A T I O N; RESET: R E S E T; RESOURCE: R E S O U R C E; RESPECT: R E S P E C T; +RESTART: R E S T A R T; RESTORE: R E S T O R E; RESTRICT: R E S T R I C T; RESULT: R E S U L T; @@ -2009,7 +2034,9 @@ SET: S E T; SETS: S E T S; SHOW: S H O W; TSKIP: S K I P; +SEQUENCE: S E Q U E N C E; SOURCE: S O U R C E; +START: S T A R T; STREAM: S T R E A M; STRUCT: S T R U C T; SUBQUERY: S U B Q U E R Y; diff --git a/yql/essentials/sql/v1/builtin.cpp b/yql/essentials/sql/v1/builtin.cpp index e327c2d2ea..94d8a3bc16 100644 --- a/yql/essentials/sql/v1/builtin.cpp +++ b/yql/essentials/sql/v1/builtin.cpp @@ -2916,6 +2916,9 @@ struct TBuiltinFuncData { {"listtopsort", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopSort", 2, 3)}, {"listtopsortasc", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopSortAsc", 2, 3)}, {"listtopsortdesc", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTopSortDesc", 2, 3)}, + {"listsample", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListSample", 2, 3)}, + {"listsamplen", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListSampleN", 2, 3)}, + {"listshuffle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListShuffle", 1, 2)}, // Dict builtins {"dictlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Length", 1, 1)}, diff --git a/yql/essentials/sql/v1/format/sql_format.cpp b/yql/essentials/sql/v1/format/sql_format.cpp index 80ce5d139e..3cc18b141b 100644 --- a/yql/essentials/sql/v1/format/sql_format.cpp +++ b/yql/essentials/sql/v1/format/sql_format.cpp @@ -962,6 +962,12 @@ private: VisitAllFields(TRule_use_stmt::GetDescriptor(), msg); } + void VisitAlterSequence(const TRule_alter_sequence_stmt& msg) { + PosFromToken(msg.GetToken1()); + NewLine(); + VisitAllFields(TRule_alter_sequence_stmt::GetDescriptor(), msg); + } + void VisitIntoTable(const TRule_into_table_stmt& msg) { switch (msg.GetBlock1().Alt_case()) { case TRule_into_table_stmt_TBlock1::AltCase::kAlt1: @@ -2495,6 +2501,66 @@ private: Visit(msg.GetToken5()); } + void VisitTableSettingValue(const TRule_table_setting_value& msg) { + switch (msg.GetAltCase()) { + case TRule_table_setting_value::kAltTableSettingValue5: { + // | ttl_tier_list ON an_id (AS (SECONDS | MILLISECONDS | MICROSECONDS | NANOSECONDS))? + const auto& ttlSettings = msg.GetAlt_table_setting_value5(); + const auto& tierList = ttlSettings.GetRule_ttl_tier_list1(); + const bool needIndent = tierList.HasBlock2() && tierList.GetBlock2().Block2Size() > 0; // more then one tier + if (needIndent) { + NewLine(); + PushCurrentIndent(); + Visit(tierList.GetRule_expr1()); + VisitTtlTierAction(tierList.GetBlock2().GetRule_ttl_tier_action1()); + + for (const auto& tierEntry : tierList.GetBlock2().GetBlock2()) { + Visit(tierEntry.GetToken1()); // comma + NewLine(); + Visit(tierEntry.GetRule_expr2()); + VisitTtlTierAction(tierEntry.GetRule_ttl_tier_action3()); + } + + PopCurrentIndent(); + NewLine(); + } else { + Visit(tierList.GetRule_expr1()); + if (tierList.HasBlock2()) { + VisitTtlTierAction(tierList.GetBlock2().GetRule_ttl_tier_action1()); + } + } + + VisitKeyword(ttlSettings.GetToken2()); + Visit(ttlSettings.GetRule_an_id3()); + if (ttlSettings.HasBlock4()) { + VisitKeyword(ttlSettings.GetBlock4().GetToken1()); + VisitKeyword(ttlSettings.GetBlock4().GetToken2()); + } + } break; + default: + VisitAllFields(TRule_table_setting_value::GetDescriptor(), msg); + } + } + + void VisitTtlTierAction(const TRule_ttl_tier_action& msg) { + switch (msg.GetAltCase()) { + case TRule_ttl_tier_action::kAltTtlTierAction1: + // | TO EXTERNAL DATA SOURCE an_id + VisitKeyword(msg.GetAlt_ttl_tier_action1().GetToken1()); + VisitKeyword(msg.GetAlt_ttl_tier_action1().GetToken2()); + VisitKeyword(msg.GetAlt_ttl_tier_action1().GetToken3()); + VisitKeyword(msg.GetAlt_ttl_tier_action1().GetToken4()); + Visit(msg.GetAlt_ttl_tier_action1().GetRule_an_id5()); + break; + case TRule_ttl_tier_action::kAltTtlTierAction2: + // | DELETE + VisitKeyword(msg.GetAlt_ttl_tier_action2().GetToken1()); + break; + case TRule_ttl_tier_action::ALT_NOT_SET: + break; + } + } + void VisitExpr(const TRule_expr& msg) { if (msg.HasAlt_expr2()) { Visit(msg.GetAlt_expr2()); @@ -2783,6 +2849,8 @@ TStaticData::TStaticData() {TRule_case_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitCaseExpr)}, {TRule_when_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWhenExpr)}, {TRule_with_table_settings::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitWithTableSettingsExpr)}, + {TRule_table_setting_value::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitTableSettingValue)}, + {TRule_ttl_tier_action::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitTtlTierAction)}, {TRule_expr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitExpr)}, {TRule_or_subexpr::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitOrSubexpr)}, @@ -2853,6 +2921,7 @@ TStaticData::TStaticData() {TRule_drop_resource_pool_classifier_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitDropResourcePoolClassifier)}, {TRule_backup_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitBackup)}, {TRule_restore_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitRestore)}, + {TRule_alter_sequence_stmt::GetDescriptor(), MakePrettyFunctor(&TPrettyVisitor::VisitAlterSequence)}, }) , ObfuscatingVisitDispatch({ {TToken::GetDescriptor(), MakeObfuscatingFunctor(&TObfuscatingVisitor::VisitToken)}, diff --git a/yql/essentials/sql/v1/format/sql_format_ut.h b/yql/essentials/sql/v1/format/sql_format_ut.h index 6d92bd29eb..fc919b38c4 100644 --- a/yql/essentials/sql/v1/format/sql_format_ut.h +++ b/yql/essentials/sql/v1/format/sql_format_ut.h @@ -115,6 +115,17 @@ Y_UNIT_TEST(AlterGroup) { setup.Run(cases); } +Y_UNIT_TEST(AlterSequence) { + TCases cases = { + {"use plato;alter sequence sequence start with 10 increment 2 restart with 5;","USE plato;\n\nALTER SEQUENCE sequence START WITH 10 INCREMENT 2 RESTART WITH 5;\n"}, + {"use plato;alter sequence if exists sequence increment 1000 start 100 restart;","USE plato;\n\nALTER SEQUENCE IF EXISTS sequence INCREMENT 1000 START 100 RESTART;\n"}, + }; + + TSetup setup; + setup.Run(cases); +} + + Y_UNIT_TEST(Use) { TCases cases = { {"use user;","USE user;\n"}, @@ -226,6 +237,16 @@ Y_UNIT_TEST(CreateTable) { "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS MICROSECONDS);\n"}, {"create table user(user int32) with (ttl=interval('P1D') on user as nAnOsEcOnDs)", "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS NANOSECONDS);\n"}, + {"create table user(user int32) with (ttl=interval('P1D') delete on user as nAnOsEcOnDs)", + "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') DELETE ON user AS NANOSECONDS);\n"}, + {"create table user(user int32) with (ttl=interval('P1D')to external data source tier1 ,interval('P10D')delete on user as seconds)", + "CREATE TABLE user (\n" + "\tuser int32\n" + ")\n" + "WITH (ttl =\n" + "\tinterval('P1D') TO EXTERNAL DATA SOURCE tier1,\n" + "\tinterval('P10D') DELETE\n" + "ON user AS SECONDS);\n"}, {"create table user(index user global unique sync on (user,user) with (user=user,user=user))", "CREATE TABLE user (\n\tINDEX user GLOBAL UNIQUE SYNC ON (user, user) WITH (user = user, user = user)\n);\n"}, {"create table user(index user global async on (user) with (user=user,))", diff --git a/yql/essentials/sql/v1/node.cpp b/yql/essentials/sql/v1/node.cpp index c7cafda7a5..b285142909 100644 --- a/yql/essentials/sql/v1/node.cpp +++ b/yql/essentials/sql/v1/node.cpp @@ -1894,9 +1894,14 @@ TMaybe<TStringContent> StringContentOrIdContent(TContext& ctx, TPosition pos, co (ctx.AnsiQuotedIdentifiers && input.StartsWith('"'))? EStringContentMode::AnsiIdent : EStringContentMode::Default); } -TTtlSettings::TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr, const TMaybe<EUnit>& columnUnit) +TTtlSettings::TTierSettings::TTierSettings(const TNodePtr& evictionDelay, const std::optional<TIdentifier>& storageName) + : EvictionDelay(evictionDelay) + , StorageName(storageName) { +} + +TTtlSettings::TTtlSettings(const TIdentifier& columnName, const std::vector<TTierSettings>& tiers, const TMaybe<EUnit>& columnUnit) : ColumnName(columnName) - , Expr(expr) + , Tiers(tiers) , ColumnUnit(columnUnit) { } @@ -3131,10 +3136,10 @@ public: Y_DEBUG_ABORT_UNLESS(FuncNode); FuncNode->VisitTree(func, visited); } - + void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override { if (ctx.DistinctOverWindow) { - FuncNode->CollectPreaggregateExprs(ctx, src, exprs); + FuncNode->CollectPreaggregateExprs(ctx, src, exprs); } else { INode::CollectPreaggregateExprs(ctx, src, exprs); } @@ -3274,7 +3279,7 @@ TSourcePtr TryMakeSourceFromExpression(TPosition pos, TContext& ctx, const TStri return nullptr; } - auto wrappedNode = new TAstListNodeImpl(pos, { + auto wrappedNode = new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "EvaluateAtom", TNodeFlags::Default), node }); @@ -3303,7 +3308,7 @@ void MakeTableFromExpression(TPosition pos, TContext& ctx, TNodePtr node, TDefer node = node->Y("Concat", node->Y("String", node->Q(prefix)), node); } - auto wrappedNode = new TAstListNodeImpl(pos, { + auto wrappedNode = new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "EvaluateAtom", TNodeFlags::Default), node }); @@ -3320,7 +3325,7 @@ TDeferredAtom MakeAtomFromExpression(TPosition pos, TContext& ctx, TNodePtr node node = node->Y("Concat", node->Y("String", node->Q(prefix)), node); } - auto wrappedNode = new TAstListNodeImpl(pos, { + auto wrappedNode = new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "EvaluateAtom", TNodeFlags::Default), node }); @@ -3462,7 +3467,7 @@ bool TVectorIndexSettings::Validate(TContext& ctx) const { if (!Distance && !Similarity) { ctx.Error() << "either distance or similarity should be set"; return false; - } + } if (!VectorType) { ctx.Error() << "vector_type should be set"; return false; diff --git a/yql/essentials/sql/v1/node.h b/yql/essentials/sql/v1/node.h index 5805f92042..99f42f353c 100644 --- a/yql/essentials/sql/v1/node.h +++ b/yql/essentials/sql/v1/node.h @@ -1112,11 +1112,18 @@ namespace NSQLTranslationV1 { Nanoseconds /* "nanoseconds" */, }; + struct TTierSettings { + TNodePtr EvictionDelay; + std::optional<TIdentifier> StorageName; + + TTierSettings(const TNodePtr& evictionDelay, const std::optional<TIdentifier>& storageName = std::nullopt); + }; + TIdentifier ColumnName; - TNodePtr Expr; + std::vector<TTierSettings> Tiers; TMaybe<EUnit> ColumnUnit; - TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr, const TMaybe<EUnit>& columnUnit = {}); + TTtlSettings(const TIdentifier& columnName, const std::vector<TTierSettings>& tiers, const TMaybe<EUnit>& columnUnit = {}); }; struct TTableSettings { @@ -1293,6 +1300,14 @@ namespace NSQLTranslationV1 { TVector<TDeferredAtom> Roles; }; + struct TSequenceParameters { + bool MissingOk = false; + TMaybe<TDeferredAtom> StartValue; + bool IsRestart = false; + TMaybe<TDeferredAtom> RestartValue; + TMaybe<TDeferredAtom> Increment; + }; + struct TTopicConsumerSettings { struct TLocalSinkSettings { // no special settings diff --git a/yql/essentials/sql/v1/query.cpp b/yql/essentials/sql/v1/query.cpp index 56dd8bd63d..8e71da3146 100644 --- a/yql/essentials/sql/v1/query.cpp +++ b/yql/essentials/sql/v1/query.cpp @@ -240,7 +240,17 @@ static INode::TPtr CreateTableSettings(const TTableSettings& tableSettings, ETab auto opts = Y(); opts = L(opts, Q(Y(Q("columnName"), BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name)))); - opts = L(opts, Q(Y(Q("expireAfter"), ttlSettings.Expr))); + + auto tiersDesc = Y(); + for (const auto& tier : ttlSettings.Tiers) { + auto tierDesc = Y(); + tierDesc = L(tierDesc, Q(Y(Q("evictionDelay"), tier.EvictionDelay))); + if (tier.StorageName) { + tierDesc = L(tierDesc, Q(Y(Q("storageName"), BuildQuotedAtom(tier.StorageName->Pos, tier.StorageName->Name)))); + } + tiersDesc = L(tiersDesc, Q(tierDesc)); + } + opts = L(opts, Q(Y(Q("tiers"), Q(tiersDesc)))); if (ttlSettings.ColumnUnit) { opts = L(opts, Q(Y(Q("columnUnit"), Q(ToString(*ttlSettings.ColumnUnit))))); @@ -2039,6 +2049,110 @@ TNodePtr BuildAlterUser(TPosition pos, const TString& service, const TDeferredAt return new TAlterUser(pos, service, cluster, name, params, scoped); } +class TAlterSequence final: public TAstListNode { +public: + TAlterSequence(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TString& id, const TSequenceParameters& params, TScopedStatePtr scoped) + : TAstListNode(pos) + , Service(service) + , Cluster(cluster) + , Id(id) + , Params(params) + , Scoped(scoped) + { + FakeSource = BuildFakeSource(pos); + scoped->UseCluster(service, cluster); + } + + bool DoInit(TContext& ctx, ISource* src) override { + Y_UNUSED(src); + + TNodePtr cluster = Scoped->WrapCluster(Cluster, ctx); + + if (!cluster->Init(ctx, FakeSource.Get())) { + return false; + } + + auto options = Y(); + TString mode = Params.MissingOk ? "alter_if_exists" : "alter"; + options = L(options, Q(Y(Q("mode"), Q(mode)))); + + if (Params.IsRestart) { + if (Params.RestartValue) { + TString strValue = Params.RestartValue->Build()->GetLiteralValue(); + ui64 value = FromString<ui64>(strValue); + ui64 maxValue = ui64(std::numeric_limits<i64>::max()); + ui64 minValue = 1; + if (value > maxValue) { + ctx.Error(Pos) << "Restart value: " << value << " cannot be greater than max value: " << maxValue; + return false; + } + if (value < minValue) { + ctx.Error(Pos) << "Restart value: " << value << " cannot be less than min value: " << minValue; + return false; + } + options = L(options, Q(Y(Q("restart"), Q(ToString(value))))); + } else { + options = L(options, Q(Y(Q("restart"), Q(TString())))); + } + } + if (Params.StartValue) { + TString strValue = Params.StartValue->Build()->GetLiteralValue(); + ui64 value = FromString<ui64>(strValue); + ui64 maxValue = ui64(std::numeric_limits<i64>::max()); + ui64 minValue = 1; + if (value > maxValue) { + ctx.Error(Pos) << "Start value: " << value << " cannot be greater than max value: " << maxValue; + return false; + } + if (value < minValue) { + ctx.Error(Pos) << "Start value: " << value << " cannot be less than min value: " << minValue; + return false; + } + options = L(options, Q(Y(Q("start"), Q(ToString(value))))); + } + + if (Params.Increment) { + TString strValue = Params.Increment->Build()->GetLiteralValue(); + ui64 value = FromString<ui64>(strValue); + ui64 maxValue = ui64(std::numeric_limits<i64>::max()); + if (value > maxValue) { + ctx.Error(Pos) << "Increment: " << value << " cannot be greater than max value: " << maxValue; + return false; + } + if (value == 0) { + ctx.Error(Pos) << "Increment must not be zero"; + return false; + } + options = L(options, Q(Y(Q("increment"), Q(ToString(value))))); + } + + Add("block", Q(Y( + Y("let", "sink", Y("DataSink", BuildQuotedAtom(Pos, TString(KikimrProviderName)), + Scoped->WrapCluster(Cluster, ctx))), + Y("let", "world", Y(TString(WriteName), "world", "sink", Y("Key", Q(Y(Q("sequence"), Y("String", BuildQuotedAtom(Pos, Id))))), Y("Void"), Q(options))), + Y("return", ctx.PragmaAutoCommit ? Y(TString(CommitName), "world", "sink") : AstNode("world")) + ))); + + return TAstListNode::DoInit(ctx, src); + } + + TPtr DoClone() const final { + return {}; + } +private: + const TString Service; + TDeferredAtom Cluster; + TString Id; + const TSequenceParameters Params; + + TScopedStatePtr Scoped; + TSourcePtr FakeSource; +}; + +TNodePtr BuildAlterSequence(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TString& id, const TSequenceParameters& params, TScopedStatePtr scoped) { + return new TAlterSequence(pos, service, cluster, id, params, scoped); +} + class TRenameRole final: public TAstListNode { public: TRenameRole(TPosition pos, bool isUser, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TDeferredAtom& newName, TScopedStatePtr scoped) diff --git a/yql/essentials/sql/v1/source.h b/yql/essentials/sql/v1/source.h index 35129fffbb..ba904d6c21 100644 --- a/yql/essentials/sql/v1/source.h +++ b/yql/essentials/sql/v1/source.h @@ -312,6 +312,7 @@ namespace NSQLTranslationV1 { TNodePtr BuildWriteTable(TPosition pos, const TString& label, const TTableRef& table, EWriteColumnMode mode, TNodePtr options, TScopedStatePtr scoped); TNodePtr BuildAnalyze(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TAnalyzeParams& params, TScopedStatePtr scoped); + TNodePtr BuildAlterSequence(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TString& id, const TSequenceParameters& params, TScopedStatePtr scoped); TSourcePtr TryMakeSourceFromExpression(TPosition pos, TContext& ctx, const TString& currService, const TDeferredAtom& currCluster, TNodePtr node, const TString& view = {}); void MakeTableFromExpression(TPosition pos, TContext& ctx, TNodePtr node, TDeferredAtom& table, const TString& prefix = {}); diff --git a/yql/essentials/sql/v1/sql.cpp b/yql/essentials/sql/v1/sql.cpp index 506b3950d5..3e5dba78f3 100644 --- a/yql/essentials/sql/v1/sql.cpp +++ b/yql/essentials/sql/v1/sql.cpp @@ -140,22 +140,22 @@ bool NeedUseForAllStatements(const TRule_sql_stmt_core::AltCase& subquery) { case TRule_sql_stmt_core::kAltSqlStmtCore17: // do case TRule_sql_stmt_core::kAltSqlStmtCore19: // if case TRule_sql_stmt_core::kAltSqlStmtCore20: // for - case TRule_sql_stmt_core::kAltSqlStmtCore21: // values + case TRule_sql_stmt_core::kAltSqlStmtCore21: // values case TRule_sql_stmt_core::kAltSqlStmtCore22: // create user case TRule_sql_stmt_core::kAltSqlStmtCore23: // alter user case TRule_sql_stmt_core::kAltSqlStmtCore24: // create group - case TRule_sql_stmt_core::kAltSqlStmtCore25: // alter group - case TRule_sql_stmt_core::kAltSqlStmtCore26: // drop role - case TRule_sql_stmt_core::kAltSqlStmtCore27: // create object - case TRule_sql_stmt_core::kAltSqlStmtCore28: // alter object - case TRule_sql_stmt_core::kAltSqlStmtCore29: // drop object - case TRule_sql_stmt_core::kAltSqlStmtCore30: // create external data source - case TRule_sql_stmt_core::kAltSqlStmtCore31: // alter external data source - case TRule_sql_stmt_core::kAltSqlStmtCore32: // drop external data source - case TRule_sql_stmt_core::kAltSqlStmtCore33: // create replication - case TRule_sql_stmt_core::kAltSqlStmtCore34: // drop replication - case TRule_sql_stmt_core::kAltSqlStmtCore35: // create topic - case TRule_sql_stmt_core::kAltSqlStmtCore36: // alter topic + case TRule_sql_stmt_core::kAltSqlStmtCore25: // alter group + case TRule_sql_stmt_core::kAltSqlStmtCore26: // drop role + case TRule_sql_stmt_core::kAltSqlStmtCore27: // create object + case TRule_sql_stmt_core::kAltSqlStmtCore28: // alter object + case TRule_sql_stmt_core::kAltSqlStmtCore29: // drop object + case TRule_sql_stmt_core::kAltSqlStmtCore30: // create external data source + case TRule_sql_stmt_core::kAltSqlStmtCore31: // alter external data source + case TRule_sql_stmt_core::kAltSqlStmtCore32: // drop external data source + case TRule_sql_stmt_core::kAltSqlStmtCore33: // create replication + case TRule_sql_stmt_core::kAltSqlStmtCore34: // drop replication + case TRule_sql_stmt_core::kAltSqlStmtCore35: // create topic + case TRule_sql_stmt_core::kAltSqlStmtCore36: // alter topic case TRule_sql_stmt_core::kAltSqlStmtCore37: // drop topic case TRule_sql_stmt_core::kAltSqlStmtCore38: // grant permissions case TRule_sql_stmt_core::kAltSqlStmtCore39: // revoke permissions @@ -176,6 +176,7 @@ bool NeedUseForAllStatements(const TRule_sql_stmt_core::AltCase& subquery) { case TRule_sql_stmt_core::kAltSqlStmtCore54: // drop resource pool classifier case TRule_sql_stmt_core::kAltSqlStmtCore55: // backup case TRule_sql_stmt_core::kAltSqlStmtCore56: // restore + case TRule_sql_stmt_core::kAltSqlStmtCore57: // alter sequence return false; } } diff --git a/yql/essentials/sql/v1/sql_query.cpp b/yql/essentials/sql/v1/sql_query.cpp index 5f2f31d776..4d9f30a452 100644 --- a/yql/essentials/sql/v1/sql_query.cpp +++ b/yql/essentials/sql/v1/sql_query.cpp @@ -1674,6 +1674,50 @@ bool TSqlQuery::Statement(TVector<TNodePtr>& blocks, const TRule_sql_stmt_core& context)); break; } + case TRule_sql_stmt_core::kAltSqlStmtCore57: { + // alter_sequence_stmt: ALTER SEQUENCE (IF EXISTS)? object_ref alter_sequence_action (COMMA alter_sequence_action)*; + Ctx.BodyPart(); + auto& node = core.GetAlt_sql_stmt_core57().GetRule_alter_sequence_stmt1(); + + Ctx.Token(node.GetToken1()); + const TPosition pos = Ctx.Pos(); + + TString service = Ctx.Scoped->CurrService; + TDeferredAtom cluster = Ctx.Scoped->CurrCluster; + if (cluster.Empty()) { + Error() << "USE statement is missing - no default cluster is selected"; + return false; + } + TObjectOperatorContext context(Ctx.Scoped); + + if (node.GetRule_object_ref4().HasBlock1()) { + if (!ClusterExpr(node.GetRule_object_ref4().GetBlock1().GetRule_cluster_expr1(), + false, context.ServiceId, context.Cluster)) { + return false; + } + } + + const TString id = Id(node.GetRule_object_ref4().GetRule_id_or_at2(), *this).second; + + TSequenceParameters params; + + if (node.HasBlock3()) { // IF EXISTS + params.MissingOk = true; + Y_DEBUG_ABORT_UNLESS( + IS_TOKEN(node.GetBlock3().GetToken1().GetId(), IF) && + IS_TOKEN(node.GetBlock3().GetToken2().GetId(), EXISTS) + ); + } + + for (const auto& block : node.GetBlock5()) { + if (!AlterSequenceAction(block.GetRule_alter_sequence_action1(), params)) { + return false; + } + } + + AddStatementToBlocks(blocks, BuildAlterSequence(pos, service, cluster, id, params, Ctx.Scoped)); + break; + } case TRule_sql_stmt_core::ALT_NOT_SET: Ctx.IncrementMonCounter("sql_errors", "UnknownStatement" + internalStatementName); AltNotImplemented("sql_stmt_core", core); @@ -2177,6 +2221,63 @@ bool TSqlQuery::AlterTableAlterIndex(const TRule_alter_table_alter_index& node, return true; } +bool TSqlQuery::AlterSequenceAction(const TRule_alter_sequence_action& node, TSequenceParameters& params) { + switch (node.Alt_case()) { + case TRule_alter_sequence_action::kAltAlterSequenceAction1: { + if (params.StartValue) { + Ctx.Error(Ctx.Pos()) << "Start value defined more than once"; + return false; + } + auto literalNumber = LiteralNumber(Ctx, node.GetAlt_alter_sequence_action1().GetRule_integer3()); + if (literalNumber) { + params.StartValue = TDeferredAtom(literalNumber, Ctx); + } else { + return false; + } + break; + } + case TRule_alter_sequence_action::kAltAlterSequenceAction2: { + if (params.IsRestart) { + Ctx.Error(Ctx.Pos()) << "Restart value defined more than once"; + return false; + } + auto literalNumber = LiteralNumber(Ctx, node.GetAlt_alter_sequence_action2().GetRule_integer3()); + if (literalNumber) { + params.IsRestart = true; + params.RestartValue = TDeferredAtom(literalNumber, Ctx); + } else { + return false; + } + break; + } + case TRule_alter_sequence_action::kAltAlterSequenceAction3: { + if (params.IsRestart) { + Ctx.Error(Ctx.Pos()) << "Restart value defined more than once"; + return false; + } + params.IsRestart = true; + break; + } + case TRule_alter_sequence_action::kAltAlterSequenceAction4: { + if (params.Increment) { + Ctx.Error(Ctx.Pos()) << "Increment defined more than once"; + return false; + } + auto literalNumber = LiteralNumber(Ctx, node.GetAlt_alter_sequence_action4().GetRule_integer3()); + if (literalNumber) { + params.Increment = TDeferredAtom(literalNumber, Ctx); + } else { + return false; + } + break; + } + case TRule_alter_sequence_action::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + return true; +} + bool TSqlQuery::AlterTableAlterColumnDropNotNull(const TRule_alter_table_alter_column_drop_not_null& node, TAlterTableParameters& params) { TString name = Id(node.GetRule_an_id3(), *this); const TPosition pos(Context().Pos()); diff --git a/yql/essentials/sql/v1/sql_query.h b/yql/essentials/sql/v1/sql_query.h index 99e1a9c4ef..03fd85df6b 100644 --- a/yql/essentials/sql/v1/sql_query.h +++ b/yql/essentials/sql/v1/sql_query.h @@ -42,6 +42,7 @@ private: void AlterTableDropChangefeed(const TRule_alter_table_drop_changefeed& node, TAlterTableParameters& params); void AlterTableRenameIndexTo(const TRule_alter_table_rename_index_to& node, TAlterTableParameters& params); bool AlterTableAlterIndex(const TRule_alter_table_alter_index& node, TAlterTableParameters& params); + bool AlterSequenceAction(const TRule_alter_sequence_action& node, TSequenceParameters& params); TNodePtr PragmaStatement(const TRule_pragma_stmt& stmt, bool& success); void AddStatementToBlocks(TVector<TNodePtr>& blocks, TNodePtr node); bool ParseTableStoreFeatures(std::map<TString, TDeferredAtom> & result, const TRule_alter_table_store_action & actions); @@ -64,7 +65,7 @@ private: if (!Ctx.Settings.Antlr4Parser) { const auto pos = descr.find(": "); Y_DEBUG_ABORT_UNLESS(pos != TString::npos); - Split(TString(descr.begin() + pos + 2, descr.end()), "_", parts); + Split(TString(descr.begin() + pos + 2, descr.end()), "_", parts); } else { Split(descr, "_", parts); } diff --git a/yql/essentials/sql/v1/sql_translation.cpp b/yql/essentials/sql/v1/sql_translation.cpp index b298eb8abb..61a273b260 100644 --- a/yql/essentials/sql/v1/sql_translation.cpp +++ b/yql/essentials/sql/v1/sql_translation.cpp @@ -1963,19 +1963,68 @@ namespace { return true; } - bool StoreTtlSettings(const TRule_table_setting_value& from, TResetableSetting<TTtlSettings, void>& to, - TSqlExpression& expr, TContext& ctx, TTranslation& txc) { + bool FillTieringInterval(const TRule_expr& from, TNodePtr& tieringInterval, TSqlExpression& expr, TContext& ctx) { + auto exprNode = expr.Build(from); + if (!exprNode) { + return false; + } + + if (exprNode->GetOpName() != "Interval") { + ctx.Error() << "Literal of Interval type is expected for TTL"; + return false; + } + + tieringInterval = exprNode; + return true; + } + + bool FillTierAction(const TRule_ttl_tier_action& from, std::optional<TIdentifier>& storageName, TTranslation& txc) { + switch (from.GetAltCase()) { + case TRule_ttl_tier_action::kAltTtlTierAction1: + storageName = IdEx(from.GetAlt_ttl_tier_action1().GetRule_an_id5(), txc); + break; + case TRule_ttl_tier_action::kAltTtlTierAction2: + storageName.reset(); + break; + case TRule_ttl_tier_action::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + return true; + } + + bool StoreTtlSettings(const TRule_table_setting_value& from, TResetableSetting<TTtlSettings, void>& to, TSqlExpression& expr, TContext& ctx, + TTranslation& txc) { switch (from.Alt_case()) { case TRule_table_setting_value::kAltTableSettingValue5: { auto columnName = IdEx(from.GetAlt_table_setting_value5().GetRule_an_id3(), txc); - auto exprNode = expr.Build(from.GetAlt_table_setting_value5().GetRule_expr1()); - if (!exprNode) { + auto tiersLiteral = from.GetAlt_table_setting_value5().GetRule_ttl_tier_list1(); + + TNodePtr firstInterval; + if (!FillTieringInterval(tiersLiteral.GetRule_expr1(), firstInterval, expr, ctx)) { return false; } - if (exprNode->GetOpName() != "Interval") { - ctx.Error() << "Literal of Interval type is expected for TTL"; - return false; + std::vector<TTtlSettings::TTierSettings> tiers; + if (!tiersLiteral.HasBlock2()) { + tiers.emplace_back(firstInterval); + } else { + std::optional<TIdentifier> firstStorageName; + if (!FillTierAction(tiersLiteral.GetBlock2().GetRule_ttl_tier_action1(), firstStorageName, txc)) { + return false; + } + tiers.emplace_back(firstInterval, firstStorageName); + + for (const auto& tierLiteral : tiersLiteral.GetBlock2().GetBlock2()) { + TNodePtr intervalExpr; + if (!FillTieringInterval(tierLiteral.GetRule_expr2(), intervalExpr, expr, ctx)) { + return false; + } + std::optional<TIdentifier> storageName; + if (!FillTierAction(tierLiteral.GetRule_ttl_tier_action3(), storageName, txc)) { + return false; + } + tiers.emplace_back(intervalExpr, storageName); + } } TMaybe<TTtlSettings::EUnit> columnUnit; @@ -1988,7 +2037,7 @@ namespace { } } - to.Set(TTtlSettings(columnName, exprNode, columnUnit)); + to.Set(TTtlSettings(columnName, tiers, columnUnit)); break; } default: diff --git a/yql/essentials/sql/v1/sql_ut.cpp b/yql/essentials/sql/v1/sql_ut.cpp index 65ec39af2c..45272879c0 100644 --- a/yql/essentials/sql/v1/sql_ut.cpp +++ b/yql/essentials/sql/v1/sql_ut.cpp @@ -2053,7 +2053,8 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { if (word == "Write") { UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiers")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("evictionDelay")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); } }; @@ -2075,7 +2076,8 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { if (word == "Write") { UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiers")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("evictionDelay")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnUnit")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("seconds")); @@ -2088,6 +2090,80 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); } + Y_UNIT_TEST(TtlTieringParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key)) + WITH (TTL = + Interval("P1D") TO EXTERNAL DATA SOURCE Tier1, + Interval("P2D") TO EXTERNAL DATA SOURCE Tier2, + Interval("P30D") DELETE + ON CreatedAt AS SECONDS);)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiers")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("evictionDelay")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("storageName")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Tier1")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Tier2")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("172800000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("2592000000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnUnit")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("seconds")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(TtlTieringWithOtherActionsParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + ALTER TABLE tableName + ADD FAMILY cold (DATA = "rot"), + SET TTL + Interval("P1D") TO EXTERNAL DATA SOURCE Tier1, + Interval("P2D") TO EXTERNAL DATA SOURCE Tier2, + Interval("P30D") DELETE + ON CreatedAt, + ALTER COLUMN payload_v2 SET FAMILY cold, + ALTER FAMILY default SET DATA "ssd" + ;)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("addColumnFamilies")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("cold")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alterColumnFamilies")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("default")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiers")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("evictionDelay")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("storageName")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Tier1")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Tier2")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("172800000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("2592000000")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + Y_UNIT_TEST(TieringParseCorrect) { NYql::TAstParseResult res = SqlToYql( R"( USE plato; @@ -2614,6 +2690,153 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT_VALUES_EQUAL(1, elementStat["\'mode \'alter"]); } + Y_UNIT_TEST(AlterSequence) { + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE sequence START WITH 10 INCREMENT 2 RESTART WITH 5; + )").IsOk()); + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE sequence INCREMENT 2; + )").IsOk()); + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE sequence INCREMENT 2 START 1000; + )").IsOk()); + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE sequence RESTART START 1000; + )").IsOk()); + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE IF EXISTS sequence INCREMENT 1000 START 100 RESTART; + )").IsOk()); + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE IF EXISTS sequence RESTART 1000 START WITH 100 INCREMENT BY 7; + )").IsOk()); + } + + Y_UNIT_TEST(AlterSequenceIncorrect) { + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence START WITH 10 INCREMENT 2 RESTART WITH 5 RESTART;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:75: Error: Restart value defined more than once\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence START WITH 10 INCREMENT 2 START 100 RESTART WITH 5;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:60: Error: Start value defined more than once\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence INCREMENT BY 7 START WITH 10 INCREMENT 2 RESTART WITH 5 RESTART;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:62: Error: Increment defined more than once\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 100 START WITH 10 INCREMENT 2 RESTART WITH 5;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:77: Error: Restart value defined more than once\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 1234234543563435151456 START WITH 10 INCREMENT 2;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: Failed to parse number from string: 1234234543563435151456, number limit overflow\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 1 START WITH 9223372036854775817 INCREMENT 4;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Start value: 9223372036854775817 cannot be greater than max value: 9223372036854775807\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 9223372036854775827 START WITH 5 INCREMENT 4;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Restart value: 9223372036854775827 cannot be greater than max value: 9223372036854775807\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 1 START WITH 4 INCREMENT 0;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Increment must not be zero\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 0 START WITH 4 INCREMENT 1;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Restart value: 0 cannot be less than min value: 1\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 1 START WITH 0 INCREMENT 1;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Start value: 0 cannot be less than min value: 1\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 1 START WITH 1 INCREMENT 9223372036854775837;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Increment: 9223372036854775837 cannot be greater than max value: 9223372036854775807\n"); + } + } + + Y_UNIT_TEST(AlterSequenceCorrect) { + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence START WITH 10 INCREMENT 2 RESTART WITH 5;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("sequence")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("alter_if_exists")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("start")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("increment")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("restart")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE IF EXISTS sequence INCREMENT 2 RESTART;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("sequence")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter_if_exists")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("increment")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("restart")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE IF EXISTS sequence START 10 INCREMENT BY 2;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("sequence")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter_if_exists")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("start")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("restart")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("increment")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + } + Y_UNIT_TEST(OptionalAliases) { UNIT_ASSERT(SqlToYql("USE plato; SELECT foo FROM (SELECT key foo FROM Input);").IsOk()); UNIT_ASSERT(SqlToYql("USE plato; SELECT a.x FROM Input1 a JOIN Input2 b ON a.key = b.key;").IsOk()); diff --git a/yql/essentials/sql/v1/sql_ut_antlr4.cpp b/yql/essentials/sql/v1/sql_ut_antlr4.cpp index e2a05cc229..91b85e92e8 100644 --- a/yql/essentials/sql/v1/sql_ut_antlr4.cpp +++ b/yql/essentials/sql/v1/sql_ut_antlr4.cpp @@ -2053,7 +2053,8 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { if (word == "Write") { UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiers")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("evictionDelay")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); } }; @@ -2075,7 +2076,8 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { if (word == "Write") { UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); - UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiers")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("evictionDelay")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnUnit")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("seconds")); @@ -2088,6 +2090,80 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); } + Y_UNIT_TEST(TtlTieringParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key)) + WITH (TTL = + Interval("P1D") TO EXTERNAL DATA SOURCE Tier1, + Interval("P2D") TO EXTERNAL DATA SOURCE Tier2, + Interval("P30D") DELETE + On CreatedAt AS SECONDS);)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiers")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("evictionDelay")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("storageName")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Tier1")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Tier2")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("172800000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("2592000000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnUnit")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("seconds")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(TtlTieringWithOtherActionsParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + ALTER TABLE tableName + ADD FAMILY cold (DATA = "rot"), + SET TTL + Interval("P1D") TO EXTERNAL DATA SOURCE Tier1, + Interval("P2D") TO EXTERNAL DATA SOURCE Tier2, + Interval("P30D") DELETE + ON CreatedAt, + ALTER COLUMN payload_v2 SET FAMILY cold, + ALTER FAMILY default SET DATA "ssd" + ;)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("addColumnFamilies")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("cold")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alterColumnFamilies")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("default")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("tiers")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("evictionDelay")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("storageName")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Tier1")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("Tier2")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("172800000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("2592000000")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + Y_UNIT_TEST(TieringParseCorrect) { NYql::TAstParseResult res = SqlToYql( R"( USE plato; @@ -2614,6 +2690,153 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT_VALUES_EQUAL(1, elementStat["\'mode \'alter"]); } + Y_UNIT_TEST(AlterSequence) { + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE sequence START WITH 10 INCREMENT 2 RESTART WITH 5; + )").IsOk()); + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE sequence INCREMENT 2; + )").IsOk()); + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE sequence INCREMENT 2 START 1000; + )").IsOk()); + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE sequence RESTART START 1000; + )").IsOk()); + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE IF EXISTS sequence INCREMENT 1000 START 100 RESTART; + )").IsOk()); + UNIT_ASSERT(SqlToYql(R"( + USE plato; + ALTER SEQUENCE IF EXISTS sequence RESTART 1000 START WITH 100 INCREMENT BY 7; + )").IsOk()); + } + + Y_UNIT_TEST(AlterSequenceIncorrect) { + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence START WITH 10 INCREMENT 2 RESTART WITH 5 RESTART;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:75: Error: Restart value defined more than once\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence START WITH 10 INCREMENT 2 START 100 RESTART WITH 5;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:60: Error: Start value defined more than once\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence INCREMENT BY 7 START WITH 10 INCREMENT 2 RESTART WITH 5 RESTART;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:62: Error: Increment defined more than once\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 100 START WITH 10 INCREMENT 2 RESTART WITH 5;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:77: Error: Restart value defined more than once\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 1234234543563435151456 START WITH 10 INCREMENT 2;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:49: Error: Failed to parse number from string: 1234234543563435151456, number limit overflow\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 1 START WITH 9223372036854775817 INCREMENT 4;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Start value: 9223372036854775817 cannot be greater than max value: 9223372036854775807\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 9223372036854775827 START WITH 5 INCREMENT 4;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Restart value: 9223372036854775827 cannot be greater than max value: 9223372036854775807\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 1 START WITH 4 INCREMENT 0;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Increment must not be zero\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 0 START WITH 4 INCREMENT 1;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Restart value: 0 cannot be less than min value: 1\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 1 START WITH 0 INCREMENT 1;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Start value: 0 cannot be less than min value: 1\n"); + } + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence RESTART WITH 1 START WITH 1 INCREMENT 9223372036854775837;"); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:1:12: Error: Increment: 9223372036854775837 cannot be greater than max value: 9223372036854775807\n"); + } + } + + Y_UNIT_TEST(AlterSequenceCorrect) { + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE sequence START WITH 10 INCREMENT 2 RESTART WITH 5;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("sequence")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("alter_if_exists")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("start")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("increment")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("restart")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE IF EXISTS sequence INCREMENT 2 RESTART;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("sequence")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter_if_exists")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("increment")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("restart")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + { + NYql::TAstParseResult res = SqlToYql("USE plato; ALTER SEQUENCE IF EXISTS sequence START 10 INCREMENT BY 2;"); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("sequence")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("alter_if_exists")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("start")); + UNIT_ASSERT_VALUES_EQUAL(TString::npos, line.find("restart")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("increment")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0}}; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + } + Y_UNIT_TEST(OptionalAliases) { UNIT_ASSERT(SqlToYql("USE plato; SELECT foo FROM (SELECT key foo FROM Input);").IsOk()); UNIT_ASSERT(SqlToYql("USE plato; SELECT a.x FROM Input1 a JOIN Input2 b ON a.key = b.key;").IsOk()); diff --git a/yql/essentials/tests/common/test_framework/udfs_deps/ya.make b/yql/essentials/tests/common/test_framework/udfs_deps/ya.make index 16b320bc3b..7eddcac827 100644 --- a/yql/essentials/tests/common/test_framework/udfs_deps/ya.make +++ b/yql/essentials/tests/common/test_framework/udfs_deps/ya.make @@ -19,6 +19,7 @@ SET( yql/essentials/udfs/common/url_base yql/essentials/udfs/common/unicode_base yql/essentials/udfs/common/streaming + yql/essentials/udfs/common/vector yql/essentials/udfs/examples/callables yql/essentials/udfs/examples/dicts yql/essentials/udfs/examples/dummylog diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index ef8ca7ee6c..c178530713 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -2463,6 +2463,13 @@ "uri": "https://{canondata_backend}/1924537/1ab444909086b08bd4fe21c5a43f5e183c647e0a/resource.tar.gz#test_sql2yql.test_aggregate-group_by_session_extended_tuple_/sql.yql" } ], + "test_sql2yql.test[aggregate-group_by_session_nopush]": [ + { + "checksum": "09f9e4a178067f6aaa81b9e9959b4cec", + "size": 3177, + "uri": "https://{canondata_backend}/212715/fe819b0081800cfcbf6e2512d273e760949a6cc7/resource.tar.gz#test_sql2yql.test_aggregate-group_by_session_nopush_/sql.yql" + } + ], "test_sql2yql.test[aggregate-group_by_session_only]": [ { "checksum": "0c22dd1ef887ea533c6e0621c0937ffa", @@ -6446,6 +6453,27 @@ "uri": "https://{canondata_backend}/1784117/d56ae82ad9d30397a41490647be1bd2124718f98/resource.tar.gz#test_sql2yql.test_expr-list_replicate_fail_/sql.yql" } ], + "test_sql2yql.test[expr-list_sample]": [ + { + "checksum": "922f4c9c5a2fe848f40272dd15cfde42", + "size": 10843, + "uri": "https://{canondata_backend}/1924537/278b77accb7596bd976e3e218425469d4b97dcf9/resource.tar.gz#test_sql2yql.test_expr-list_sample_/sql.yql" + } + ], + "test_sql2yql.test[expr-list_sample_n]": [ + { + "checksum": "5ce08b8b61ef8b2863f931bc1b986679", + "size": 7573, + "uri": "https://{canondata_backend}/1924537/278b77accb7596bd976e3e218425469d4b97dcf9/resource.tar.gz#test_sql2yql.test_expr-list_sample_n_/sql.yql" + } + ], + "test_sql2yql.test[expr-list_shuffle]": [ + { + "checksum": "3cd4f632706daf9ac8962369e7d0eac3", + "size": 4413, + "uri": "https://{canondata_backend}/1777230/f0ec95d2b2a3a38fc99b00afc1f2d60d2b3e8548/resource.tar.gz#test_sql2yql.test_expr-list_shuffle_/sql.yql" + } + ], "test_sql2yql.test[expr-list_takeskipwhile]": [ { "checksum": "827d6c45ccb33ccc641531600fa839ce", @@ -22322,6 +22350,13 @@ "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_aggregate-group_by_session_extended_tuple_/formatted.sql" } ], + "test_sql_format.test[aggregate-group_by_session_nopush]": [ + { + "checksum": "382f93f1c899dd2d1d5ea6b04575cfef", + "size": 372, + "uri": "https://{canondata_backend}/212715/fe819b0081800cfcbf6e2512d273e760949a6cc7/resource.tar.gz#test_sql_format.test_aggregate-group_by_session_nopush_/formatted.sql" + } + ], "test_sql_format.test[aggregate-group_by_session_only]": [ { "checksum": "531ee77369e54e2a1616411e89c86bb7", @@ -26305,6 +26340,27 @@ "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_expr-list_replicate_fail_/formatted.sql" } ], + "test_sql_format.test[expr-list_sample]": [ + { + "checksum": "a642f47aa5488ecfa6450c114a85903d", + "size": 1235, + "uri": "https://{canondata_backend}/1942525/0302d8428323e9211161c4db74348074ea0aab49/resource.tar.gz#test_sql_format.test_expr-list_sample_/formatted.sql" + } + ], + "test_sql_format.test[expr-list_sample_n]": [ + { + "checksum": "4b04a240db2a66eab919da4fbbf3cdea", + "size": 1128, + "uri": "https://{canondata_backend}/1942525/0302d8428323e9211161c4db74348074ea0aab49/resource.tar.gz#test_sql_format.test_expr-list_sample_n_/formatted.sql" + } + ], + "test_sql_format.test[expr-list_shuffle]": [ + { + "checksum": "73822288846e1fc180736baa4a9548c7", + "size": 612, + "uri": "https://{canondata_backend}/1942525/0302d8428323e9211161c4db74348074ea0aab49/resource.tar.gz#test_sql_format.test_expr-list_shuffle_/formatted.sql" + } + ], "test_sql_format.test[expr-list_takeskipwhile]": [ { "checksum": "fe413941b62655034d49cd2674f2c947", diff --git a/yql/essentials/tests/sql/suites/aggregate/group_by_session_nopush.cfg b/yql/essentials/tests/sql/suites/aggregate/group_by_session_nopush.cfg new file mode 100644 index 0000000000..9e0b837318 --- /dev/null +++ b/yql/essentials/tests/sql/suites/aggregate/group_by_session_nopush.cfg @@ -0,0 +1,2 @@ +in Input session1.txt +providers yt diff --git a/yql/essentials/tests/sql/suites/aggregate/group_by_session_nopush.sql b/yql/essentials/tests/sql/suites/aggregate/group_by_session_nopush.sql new file mode 100644 index 0000000000..fd8e46a071 --- /dev/null +++ b/yql/essentials/tests/sql/suites/aggregate/group_by_session_nopush.sql @@ -0,0 +1,14 @@ +/* syntax version 1 */ +/* postgres can not */ +/* yt can not */ + +SELECT * FROM ( + SELECT + user, + cast(session_start as Int64) as ss, + ListSort(AGGREGATE_LIST(ts)) as session, + COUNT(1) as session_len + FROM plato.Input + GROUP BY SessionWindow(ts, 10) as session_start, user +) +WHERE ss != 100500; -- should not push down diff --git a/yql/essentials/tests/sql/suites/expr/list_sample.cfg b/yql/essentials/tests/sql/suites/expr/list_sample.cfg new file mode 100644 index 0000000000..bb349dd8ab --- /dev/null +++ b/yql/essentials/tests/sql/suites/expr/list_sample.cfg @@ -0,0 +1 @@ +providers yt diff --git a/yql/essentials/tests/sql/suites/expr/list_sample.sql b/yql/essentials/tests/sql/suites/expr/list_sample.sql new file mode 100644 index 0000000000..fecd4febfa --- /dev/null +++ b/yql/essentials/tests/sql/suites/expr/list_sample.sql @@ -0,0 +1,36 @@ +/* yt can not */ +$list = ListFromRange(1, 101); +$test = ($probability, $dependsOn) -> { + $sample = ListCollect(ListSample($list, $probability, $dependsOn)); + RETURN + ( + ListSort(DictKeys(ToSet($sample))) == ListSort($sample), + (ListLength($sample), $probability * 100), + SetIncludes(ToSet($list), $sample) + ); +}; + +SELECT + ListSample(NULL , 1.0) IS NULL AS mustBeTrue1, + ListSample(Nothing(OptionalType(ListType(DataType("Uint64")))), 1.0) IS NULL AS mustBeTrue2, + ListSample([] , 1.0) == [] AS mustBeTrue3, + + ListSample($list, NULL ) == $list AS mustBeTrue4, + ListSample($list, Nothing(OptionalType(DataType("Double")))) == $list AS mustBeTrue5, + + ListSample($list, 0.5, 123) == ListSample($list, 0.5, 123) AS mustBeTrue6, + + $test(0.2, 1) AS result1, + $test(0.2, 2) AS result2, + $test(0.2, 3) AS result3, + $test(0.2, 4) AS result4, + $test(0.2, 5) AS result5, + $test(0.5, 6) AS result6, + $test(0.8, 7) AS result7, + $test(1.0, 8) AS result8, + $test(0.0, 9) AS result9, + + ListSample($list , 0.1 , 10) AS result10, + ListSample(Just($list), 0.1 , 11) AS result11, + ListSample($list , Just(0.1), 12) AS result12, + ListSample(Just($list), Just(0.1), 13) AS result13; diff --git a/yql/essentials/tests/sql/suites/expr/list_sample_n.cfg b/yql/essentials/tests/sql/suites/expr/list_sample_n.cfg new file mode 100644 index 0000000000..bb349dd8ab --- /dev/null +++ b/yql/essentials/tests/sql/suites/expr/list_sample_n.cfg @@ -0,0 +1 @@ +providers yt diff --git a/yql/essentials/tests/sql/suites/expr/list_sample_n.sql b/yql/essentials/tests/sql/suites/expr/list_sample_n.sql new file mode 100644 index 0000000000..5ab48e52ab --- /dev/null +++ b/yql/essentials/tests/sql/suites/expr/list_sample_n.sql @@ -0,0 +1,32 @@ +/* yt can not */ +$list = ListFromRange(1, 40); +$test = ($n, $dependsOn) -> { + $sample = ListCollect(ListSampleN($list, $n, $dependsOn)); + RETURN + ( + ListSort(DictKeys(ToSet($sample))) == ListSort($sample), + ListLength($sample) == ListMin(AsList($n, ListLength($list))), + SetIncludes(ToSet($list), $sample) + ); +}; + +SELECT + ListSampleN(NULL , 1ul) IS NULL AS mustBeTrue1, + ListSampleN(Nothing(OptionalType(ListType(DataType("Uint64")))), 1ul) IS NULL AS mustBeTrue2, + ListSampleN([] , 1ul) == [] AS mustBeTrue3, + + ListSampleN($list, NULL ) == $list AS mustBeTrue4, + ListSampleN($list, Nothing(OptionalType(DataType("Uint64")))) == $list AS mustBeTrue5, + + ListSampleN($list, 25ul, 123) == ListSampleN($list, 25ul, 123) AS mustBeTrue6, + + $test(5ul, 1) AS result1, + $test(10ul, 2) AS result2, + $test(20ul, 3) AS result3, + $test(0ul, 4) AS result4, + $test(100ul, 5) AS result5, + + ListSampleN($list , 10ul , 6) AS result6, + ListSampleN(Just($list), 10ul , 7) AS result7, + ListSampleN($list , Just(10ul), 8) AS result8, + ListSampleN(Just($list), Just(10ul), 9) AS result9; diff --git a/yql/essentials/tests/sql/suites/expr/list_shuffle.cfg b/yql/essentials/tests/sql/suites/expr/list_shuffle.cfg new file mode 100644 index 0000000000..bb349dd8ab --- /dev/null +++ b/yql/essentials/tests/sql/suites/expr/list_shuffle.cfg @@ -0,0 +1 @@ +providers yt diff --git a/yql/essentials/tests/sql/suites/expr/list_shuffle.sql b/yql/essentials/tests/sql/suites/expr/list_shuffle.sql new file mode 100644 index 0000000000..8051e76ad3 --- /dev/null +++ b/yql/essentials/tests/sql/suites/expr/list_shuffle.sql @@ -0,0 +1,21 @@ +/* yt can not */ +$list = ListFromRange(1, 40); +$test = ($c) -> { + $shuffle = ListCollect(ListShuffle($list, $c)); + RETURN ListSort($shuffle) == ListSort($list); +}; + +SELECT + ListShuffle(NULL ) IS NULL AS mustBeTrue1, + ListShuffle(Nothing(OptionalType(ListType(DataType("Uint64"))))) IS NULL AS mustBeTrue2, + ListShuffle([] ) == [] AS mustBeTrue3, + + ListShuffle($list, 123) == ListShuffle($list, 123) AS mustBeTrue4, + + $test(1) AS result1, + $test(2) AS result2, + $test(3) AS result3, + $test(4) AS result4, + + ListShuffle($list , 5) AS result5, + ListShuffle(Just($list), 6) AS result6; diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.cpp b/yql/essentials/udfs/common/python/bindings/py_cast.cpp index 3aa5537b21..078239962e 100644 --- a/yql/essentials/udfs/common/python/bindings/py_cast.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_cast.cpp @@ -814,6 +814,27 @@ NUdf::TUnboxedValue FromPyDict( throw yexception() << "Can't cast "<< PyObjectRepr(value) << " to dict."; } +TPyObjectPtr ToPyNull( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) +{ + if (!value.HasValue()) { + return TPyObjectPtr(Py_None, TPyObjectPtr::ADD_REF); + } + throw yexception() << "Value is not null"; +} + +NUdf::TUnboxedValue FromPyNull( + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) +{ + if (value == Py_None) { + return NYql::NUdf::TUnboxedValuePod(); + } + throw yexception() << "Can't cast " << PyObjectRepr(value) << " to null."; +} + } // namespace TPyObjectPtr ToPyObject( @@ -832,6 +853,7 @@ TPyObjectPtr ToPyObject( case NUdf::ETypeKind::Void: return ToPyVoid(ctx, type, value); case NUdf::ETypeKind::Stream: return ToPyStream(ctx, type, value); case NUdf::ETypeKind::Variant: return ToPyVariant(ctx, type, value); + case NUdf::ETypeKind::Null: return ToPyNull(ctx, type, value); default: { ::TStringBuilder sb; sb << "Failed to export: "; @@ -857,6 +879,7 @@ NUdf::TUnboxedValue FromPyObject( case NUdf::ETypeKind::Void: return FromPyVoid(ctx, type, value); case NUdf::ETypeKind::Stream: return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr); case NUdf::ETypeKind::Variant: return FromPyVariant(ctx, type, value); + case NUdf::ETypeKind::Null: return FromPyNull(ctx, type, value); default: { ::TStringBuilder sb; sb << "Failed to import: "; diff --git a/yql/essentials/udfs/common/vector/test/canondata/result.json b/yql/essentials/udfs/common/vector/test/canondata/result.json new file mode 100644 index 0000000000..9d8010bca3 --- /dev/null +++ b/yql/essentials/udfs/common/vector/test/canondata/result.json @@ -0,0 +1,7 @@ +{ + "test.test[Vector]": [ + { + "uri": "file://test.test_Vector_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/vector/test/canondata/test.test_Vector_/results.txt b/yql/essentials/udfs/common/vector/test/canondata/test.test_Vector_/results.txt new file mode 100644 index 0000000000..f7bb0dbd8c --- /dev/null +++ b/yql/essentials/udfs/common/vector/test/canondata/test.test_Vector_/results.txt @@ -0,0 +1,65 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "test1"; + "test2"; + "test3" + ]; + [ + "test1"; + "test22"; + "test3" + ]; + [ + "test3"; + "test22"; + "test1" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/vector/test/cases/Vector.sql b/yql/essentials/udfs/common/vector/test/cases/Vector.sql new file mode 100644 index 0000000000..d25c654420 --- /dev/null +++ b/yql/essentials/udfs/common/vector/test/cases/Vector.sql @@ -0,0 +1,23 @@ +/* syntax version 1 */ + +$typing = TupleType(VoidType(), VoidType(), String); + +$vectorCreate = YQL::Udf(AsAtom("Vector.Create"), Void(), $typing); +$vectorEmplace = YQL::Udf(AsAtom("Vector.Emplace"), Void(), $typing); +$vectorSwap = YQL::Udf(AsAtom("Vector.Swap"), Void(), $typing); +$vectorGetResult = YQL::Udf(AsAtom("Vector.GetResult"), Void(), $typing); + +$a = $vectorCreate(0); + +$a = $vectorEmplace($a, 0, "test1"); +$a = $vectorEmplace($a, 1, "test2"); +$a = $vectorEmplace($a, 2, "test3"); +$state1 = $vectorGetResult($a); + +$a = $vectorEmplace($a, 1, "test22"); +$state2 = $vectorGetResult($a); + +$a = $vectorSwap($a, 0, 2); +$state3 = $vectorGetResult($a); + +SELECT $state1, $state2, $state3; diff --git a/yql/essentials/udfs/common/vector/test/ya.make b/yql/essentials/udfs/common/vector/test/ya.make new file mode 100644 index 0000000000..0c80f4ea67 --- /dev/null +++ b/yql/essentials/udfs/common/vector/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/vector) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/vector/vector_udf.cpp b/yql/essentials/udfs/common/vector/vector_udf.cpp new file mode 100644 index 0000000000..e8b01e5a05 --- /dev/null +++ b/yql/essentials/udfs/common/vector/vector_udf.cpp @@ -0,0 +1,192 @@ +#include <yql/essentials/public/udf/udf_type_ops.h> +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <vector> + +using namespace NKikimr; +using namespace NUdf; + +namespace { + +class TVector { +private: + std::vector<TUnboxedValue, TUnboxedValue::TAllocator> Vector; + +public: + TVector() + : Vector() + {} + + TUnboxedValue GetResult(const IValueBuilder* builder) { + TUnboxedValue* values = nullptr; + auto list = builder->NewArray(Vector.size(), values); + std::copy(Vector.begin(), Vector.end(), values); + + return list; + } + + void Emplace(const ui64 index, const TUnboxedValuePod& value) { + if (index < Vector.size()) { + Vector[index] = value; + } else { + Vector.push_back(value); + } + } + + void Swap(const ui64 a, const ui64 b) { + if (a < Vector.size() && b < Vector.size()) { + std::swap(Vector[a], Vector[b]); + } + } + + void Reserve(ui64 expectedSize) { + Vector.reserve(expectedSize); + } +}; + +extern const char VectorResourceName[] = "Vector.VectorResource"; +class TVectorResource: + public TBoxedResource<TVector, VectorResourceName> +{ +public: + template <typename... Args> + inline TVectorResource(Args&&... args) + : TBoxedResource(std::forward<Args>(args)...) + {} +}; + +TVectorResource* GetVectorResource(const TUnboxedValuePod& arg) { + TVectorResource::Validate(arg); + return static_cast<TVectorResource*>(arg.AsBoxed().Get()); +} + +class TVectorCreate: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto resource = new TVectorResource; + resource->Get()->Reserve(args[0].Get<ui64>()); + return TUnboxedValuePod(resource); + } +}; + +class TVectorEmplace: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto resource = GetVectorResource(args[0]); + resource->Get()->Emplace(args[1].Get<ui64>(), args[2]); + return TUnboxedValuePod(resource); + } +}; + +class TVectorSwap: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto resource = GetVectorResource(args[0]); + resource->Get()->Swap(args[1].Get<ui64>(), args[2].Get<ui64>()); + return TUnboxedValuePod(resource); + } +}; + +class TVectorGetResult: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + return GetVectorResource(args[0])->Get()->GetResult(valueBuilder); + } +}; + +static const auto CreateName = TStringRef::Of("Create"); +static const auto EmplaceName = TStringRef::Of("Emplace"); +static const auto SwapName = TStringRef::Of("Swap"); +static const auto GetResultName = TStringRef::Of("GetResult"); + +class TVectorModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("Vector"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(CreateName)->SetTypeAwareness(); + sink.Add(EmplaceName)->SetTypeAwareness(); + sink.Add(SwapName)->SetTypeAwareness(); + sink.Add(GetResultName)->SetTypeAwareness(); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final + { + Y_UNUSED(typeConfig); + + try { + const bool typesOnly = (flags & TFlags::TypesOnly); + builder.UserType(userType); + + auto typeHelper = builder.TypeInfoHelper(); + + auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) { + builder.SetError("User type is not a 3-tuple"); + return; + } + + auto valueType = userTypeInspector.GetElementType(2); + TType* vectorType = builder.Resource(VectorResourceName); + + if (name == CreateName) { + builder.IsStrict(); + + builder.Args()->Add<ui64>().Done().Returns(vectorType); + + if (!typesOnly) { + builder.Implementation(new TVectorCreate); + } + } + + if (name == EmplaceName) { + builder.IsStrict(); + + builder.Args()->Add(vectorType).Add<ui64>().Add(valueType).Done().Returns(vectorType); + + if (!typesOnly) { + builder.Implementation(new TVectorEmplace); + } + } + + if (name == SwapName) { + builder.IsStrict(); + + builder.Args()->Add(vectorType).Add<ui64>().Add<ui64>().Done().Returns(vectorType); + + if (!typesOnly) { + builder.Implementation(new TVectorSwap); + } + } + + if (name == GetResultName) { + auto resultType = builder.List()->Item(valueType).Build(); + + builder.IsStrict(); + + builder.Args()->Add(vectorType).Done().Returns(resultType); + + if (!typesOnly) { + builder.Implementation(new TVectorGetResult); + } + } + + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } + } +}; + +} // namespace + +REGISTER_MODULES(TVectorModule) diff --git a/yql/essentials/udfs/common/vector/ya.make b/yql/essentials/udfs/common/vector/ya.make new file mode 100644 index 0000000000..a1403f62a6 --- /dev/null +++ b/yql/essentials/udfs/common/vector/ya.make @@ -0,0 +1,17 @@ +YQL_UDF_CONTRIB(vector_udf) + +YQL_ABI_VERSION( + 2 + 35 + 0 +) + +SRCS( + vector_udf.cpp +) + +END() + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/ya.make b/yql/essentials/udfs/common/ya.make index 29266857ed..415f9f9b38 100644 --- a/yql/essentials/udfs/common/ya.make +++ b/yql/essentials/udfs/common/ya.make @@ -21,6 +21,7 @@ RECURSE( topfreq unicode_base url_base + vector yson2 ) |