diff options
author | pcwur <pcwur@yandex-team.ru> | 2022-02-10 16:52:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:52:15 +0300 |
commit | 6ff7e18c1da18d253a9e78c767c7e7ccaa099f63 (patch) | |
tree | ab7fbbf3253d4c0e2793218f09378908beb025fb | |
parent | 65f827d1c96ae31877fbc999000309503d0d36c4 (diff) | |
download | ydb-6ff7e18c1da18d253a9e78c767c7e7ccaa099f63.tar.gz |
Restoring authorship annotation for <pcwur@yandex-team.ru>. Commit 2 of 2.
142 files changed, 8324 insertions, 8324 deletions
diff --git a/ydb/core/formats/program.h b/ydb/core/formats/program.h index bdd1b1f536e..ff15d30ebf3 100644 --- a/ydb/core/formats/program.h +++ b/ydb/core/formats/program.h @@ -140,12 +140,12 @@ public: , Constant(std::make_shared<arrow::StringScalar>(value)) {} - TAssign(const std::string& name, const std::shared_ptr<arrow::Scalar>& value) - : Name(name) - , Operation(EOperation::Constant) - , Constant(value) - {} - + TAssign(const std::string& name, const std::shared_ptr<arrow::Scalar>& value) + : Name(name) + , Operation(EOperation::Constant) + , Constant(value) + {} + bool IsConstant() const { return Operation == EOperation::Constant; } EOperation GetOperation() const { return Operation; } const std::vector<std::string>& GetArguments() const { return Arguments; } diff --git a/ydb/core/kqp/common/kqp_gateway.h b/ydb/core/kqp/common/kqp_gateway.h index e09f962fd22..1e06beca393 100644 --- a/ydb/core/kqp/common/kqp_gateway.h +++ b/ydb/core/kqp/common/kqp_gateway.h @@ -119,7 +119,7 @@ public: ui64 PerShardKeysSizeLimitBytes = 0; NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::DQ_STATS_MODE_NONE; bool DisableLlvmForUdfStages = false; - bool LlvmEnabled = true; + bool LlvmEnabled = true; TKqpSnapshot Snapshot = TKqpSnapshot(); NKikimrKqp::EIsolationLevel IsolationLevel = NKikimrKqp::ISOLATION_LEVEL_UNDEFINED; TMaybe<NKikimrKqp::TRlPath> RlPath; diff --git a/ydb/core/kqp/common/kqp_yql.cpp b/ydb/core/kqp/common/kqp_yql.cpp index 76f1eb85482..4cd92ee10e1 100644 --- a/ydb/core/kqp/common/kqp_yql.cpp +++ b/ydb/core/kqp/common/kqp_yql.cpp @@ -125,24 +125,24 @@ NNodes::TCoNameValueTupleList TKqpPhyTxSettings::BuildNode(TExprContext& ctx, TP .Done(); } -namespace { - -template <typename TKqlReadOperation> -TKqpReadTableSettings ParseInternal(const TKqlReadOperation& node) { +namespace { + +template <typename TKqlReadOperation> +TKqpReadTableSettings ParseInternal(const TKqlReadOperation& node) { TKqpReadTableSettings settings; for (const auto& tuple : node.Settings()) { TStringBuf name = tuple.Name().Value(); - - if (name == TKqpReadTableSettings::SkipNullKeysSettingName) { - YQL_ENSURE(tuple.Value().template Maybe<TCoAtomList>()); - for (const auto& key : tuple.Value().template Cast<TCoAtomList>()) { + + if (name == TKqpReadTableSettings::SkipNullKeysSettingName) { + YQL_ENSURE(tuple.Value().template Maybe<TCoAtomList>()); + for (const auto& key : tuple.Value().template Cast<TCoAtomList>()) { settings.SkipNullKeys.emplace_back(TString(key.Value())); } - } else if (name == TKqpReadTableSettings::ItemsLimitSettingName) { + } else if (name == TKqpReadTableSettings::ItemsLimitSettingName) { YQL_ENSURE(tuple.Value().IsValid()); settings.ItemsLimit = tuple.Value().Cast().Ptr(); - } else if (name == TKqpReadTableSettings::ReverseSettingName) { + } else if (name == TKqpReadTableSettings::ReverseSettingName) { YQL_ENSURE(tuple.Ref().ChildrenSize() == 1); settings.Reverse = true; } else { @@ -153,16 +153,16 @@ TKqpReadTableSettings ParseInternal(const TKqlReadOperation& node) { return settings; } -} // anonymous namespace end - -TKqpReadTableSettings TKqpReadTableSettings::Parse(const TKqlReadTableBase& node) { - return ParseInternal(node); -} - -TKqpReadTableSettings TKqpReadTableSettings::Parse(const TKqlReadTableRangesBase& node) { - return ParseInternal(node); -} - +} // anonymous namespace end + +TKqpReadTableSettings TKqpReadTableSettings::Parse(const TKqlReadTableBase& node) { + return ParseInternal(node); +} + +TKqpReadTableSettings TKqpReadTableSettings::Parse(const TKqlReadTableRangesBase& node) { + return ParseInternal(node); +} + NNodes::TCoNameValueTupleList TKqpReadTableSettings::BuildNode(TExprContext& ctx, TPositionHandle pos) const { TVector<TCoNameValueTuple> settings; settings.reserve(3); @@ -248,103 +248,103 @@ NNodes::TCoNameValueTupleList TKqpUpsertRowsSettings::BuildNode(TExprContext& ct .Done(); } -TCoNameValueTupleList TKqpReadTableExplainPrompt::BuildNode(TExprContext& ctx, TPositionHandle pos) const { - TVector<TCoNameValueTuple> prompt; - prompt.reserve(2); - - TVector<TExprNodePtr> keys; - keys.reserve(UsedKeyColumns.size()); - - for (auto& key: UsedKeyColumns) { - keys.emplace_back(ctx.NewAtom(pos, key)); - } - - prompt.emplace_back( - Build<TCoNameValueTuple>(ctx, pos) - .Name() - .Build(UsedKeyColumnsName) - .Value<TCoAtomList>() - .Add(keys) - .Build() - .Done() - ); - - if (!ExpectedMaxRanges.empty()) { - prompt.emplace_back( - Build<TCoNameValueTuple>(ctx, pos) - .Name() - .Build(ExpectedMaxRangesName) - .Value<TCoAtom>() - .Build(ExpectedMaxRanges) - .Done() - ); - } - - return Build<TCoNameValueTupleList>(ctx, pos) - .Add(prompt) - .Done(); -} - -TKqpReadTableExplainPrompt TKqpReadTableExplainPrompt::Parse(const NNodes::TKqlReadTableRangesBase& node) { - TKqpReadTableExplainPrompt prompt; - - for (const auto& tuple : node.ExplainPrompt()) { - TStringBuf name = tuple.Name().Value(); - - if (name == TKqpReadTableExplainPrompt::UsedKeyColumnsName) { - for (const auto& key : tuple.Value().template Cast<TCoAtomList>()) { - prompt.UsedKeyColumns.emplace_back(TString(key.Value())); - } - - continue; - } - - if (name == TKqpReadTableExplainPrompt::ExpectedMaxRangesName) { - prompt.ExpectedMaxRanges = TString(tuple.Value().template Cast<TCoAtom>()); - continue; - } - - YQL_ENSURE(false, "Unknown KqpReadTableRanges explain prompt name '" << name << "'"); - } - - return prompt; -} - +TCoNameValueTupleList TKqpReadTableExplainPrompt::BuildNode(TExprContext& ctx, TPositionHandle pos) const { + TVector<TCoNameValueTuple> prompt; + prompt.reserve(2); + + TVector<TExprNodePtr> keys; + keys.reserve(UsedKeyColumns.size()); + + for (auto& key: UsedKeyColumns) { + keys.emplace_back(ctx.NewAtom(pos, key)); + } + + prompt.emplace_back( + Build<TCoNameValueTuple>(ctx, pos) + .Name() + .Build(UsedKeyColumnsName) + .Value<TCoAtomList>() + .Add(keys) + .Build() + .Done() + ); + + if (!ExpectedMaxRanges.empty()) { + prompt.emplace_back( + Build<TCoNameValueTuple>(ctx, pos) + .Name() + .Build(ExpectedMaxRangesName) + .Value<TCoAtom>() + .Build(ExpectedMaxRanges) + .Done() + ); + } + + return Build<TCoNameValueTupleList>(ctx, pos) + .Add(prompt) + .Done(); +} + +TKqpReadTableExplainPrompt TKqpReadTableExplainPrompt::Parse(const NNodes::TKqlReadTableRangesBase& node) { + TKqpReadTableExplainPrompt prompt; + + for (const auto& tuple : node.ExplainPrompt()) { + TStringBuf name = tuple.Name().Value(); + + if (name == TKqpReadTableExplainPrompt::UsedKeyColumnsName) { + for (const auto& key : tuple.Value().template Cast<TCoAtomList>()) { + prompt.UsedKeyColumns.emplace_back(TString(key.Value())); + } + + continue; + } + + if (name == TKqpReadTableExplainPrompt::ExpectedMaxRangesName) { + prompt.ExpectedMaxRanges = TString(tuple.Value().template Cast<TCoAtom>()); + continue; + } + + YQL_ENSURE(false, "Unknown KqpReadTableRanges explain prompt name '" << name << "'"); + } + + return prompt; +} + TString KqpExprToPrettyString(const TExprNode& expr, TExprContext& ctx) { - try { - TConvertToAstSettings settings; - settings.NoInlineFunc = [] (const TExprNode& exprNode) { - TExprBase node(&exprNode); - - if (node.Maybe<TDqStageBase>()) { - return true; - } - - if (node.Maybe<TDqConnection>()) { - return true; - } - - if (node.Maybe<TKqlReadTableBase>()) { - return true; - } - - if (node.Maybe<TKqlReadTableRangesBase>()) { - return true; - } - - return false; - }; - - auto ast = ConvertToAst(expr, ctx, settings); - TStringStream exprStream; - YQL_ENSURE(ast.Root); - ast.Root->PrettyPrintTo(exprStream, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote); - TString exprText = exprStream.Str(); - - return exprText; + try { + TConvertToAstSettings settings; + settings.NoInlineFunc = [] (const TExprNode& exprNode) { + TExprBase node(&exprNode); + + if (node.Maybe<TDqStageBase>()) { + return true; + } + + if (node.Maybe<TDqConnection>()) { + return true; + } + + if (node.Maybe<TKqlReadTableBase>()) { + return true; + } + + if (node.Maybe<TKqlReadTableRangesBase>()) { + return true; + } + + return false; + }; + + auto ast = ConvertToAst(expr, ctx, settings); + TStringStream exprStream; + YQL_ENSURE(ast.Root); + ast.Root->PrettyPrintTo(exprStream, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote); + TString exprText = exprStream.Str(); + + return exprText; } catch (const std::exception& e) { return TStringBuilder() << "Failed to render expression to pretty string: " << e.what(); - } + } } TString KqpExprToPrettyString(const TExprBase& expr, TExprContext& ctx) { diff --git a/ydb/core/kqp/common/kqp_yql.h b/ydb/core/kqp/common/kqp_yql.h index 83b02e11e16..7de6cffe397 100644 --- a/ydb/core/kqp/common/kqp_yql.h +++ b/ydb/core/kqp/common/kqp_yql.h @@ -54,7 +54,7 @@ struct TKqpReadTableSettings { void SetReverse() { Reverse = true; } static TKqpReadTableSettings Parse(const NNodes::TKqlReadTableBase& node); - static TKqpReadTableSettings Parse(const NNodes::TKqlReadTableRangesBase& node); + static TKqpReadTableSettings Parse(const NNodes::TKqlReadTableRangesBase& node); NNodes::TCoNameValueTupleList BuildNode(TExprContext& ctx, TPositionHandle pos) const; }; @@ -69,25 +69,25 @@ struct TKqpUpsertRowsSettings { NNodes::TCoNameValueTupleList BuildNode(TExprContext& ctx, TPositionHandle pos) const; }; -struct TKqpReadTableExplainPrompt { - static constexpr TStringBuf UsedKeyColumnsName = "UsedKeyColumns"; - static constexpr TStringBuf ExpectedMaxRangesName = "ExpectedMaxRanges"; - - TVector<TString> UsedKeyColumns; - TString ExpectedMaxRanges; - - void SetUsedKeyColumns(TVector<TString> columns) { - UsedKeyColumns = columns; - } - - void SetExpectedMaxRanges(size_t count) { - ExpectedMaxRanges = ToString(count); - } - - NNodes::TCoNameValueTupleList BuildNode(TExprContext& ctx, TPositionHandle pos) const; - static TKqpReadTableExplainPrompt Parse(const NNodes::TKqlReadTableRangesBase& node); -}; - +struct TKqpReadTableExplainPrompt { + static constexpr TStringBuf UsedKeyColumnsName = "UsedKeyColumns"; + static constexpr TStringBuf ExpectedMaxRangesName = "ExpectedMaxRanges"; + + TVector<TString> UsedKeyColumns; + TString ExpectedMaxRanges; + + void SetUsedKeyColumns(TVector<TString> columns) { + UsedKeyColumns = columns; + } + + void SetExpectedMaxRanges(size_t count) { + ExpectedMaxRanges = ToString(count); + } + + NNodes::TCoNameValueTupleList BuildNode(TExprContext& ctx, TPositionHandle pos) const; + static TKqpReadTableExplainPrompt Parse(const NNodes::TKqlReadTableRangesBase& node); +}; + TString KqpExprToPrettyString(const TExprNode& expr, TExprContext& ctx); TString KqpExprToPrettyString(const NNodes::TExprBase& expr, TExprContext& ctx); diff --git a/ydb/core/kqp/compile/kqp_compile.cpp b/ydb/core/kqp/compile/kqp_compile.cpp index 65aa9ca5786..c7d305ab469 100644 --- a/ydb/core/kqp/compile/kqp_compile.cpp +++ b/ydb/core/kqp/compile/kqp_compile.cpp @@ -164,38 +164,38 @@ void FillReadRange(const TReader& read, const TKikimrTableMetadata& tableMeta, T readProto.SetReverse(settings.Reverse); } -template <typename TReader, typename TProto> -void FillReadRanges(const TReader& read, const TKikimrTableMetadata& tableMeta, TProto& readProto) -{ - Y_UNUSED(tableMeta); - - auto ranges = read.Ranges().template Maybe<TCoParameter>(); - - if (ranges.IsValid()) { - auto& rangesParam = *readProto.MutableKeyRanges(); - rangesParam.SetParamName(TString(ranges.Cast().Name())); - } else { - YQL_ENSURE( - TCoVoid::Match(read.Ranges().Raw()), +template <typename TReader, typename TProto> +void FillReadRanges(const TReader& read, const TKikimrTableMetadata& tableMeta, TProto& readProto) +{ + Y_UNUSED(tableMeta); + + auto ranges = read.Ranges().template Maybe<TCoParameter>(); + + if (ranges.IsValid()) { + auto& rangesParam = *readProto.MutableKeyRanges(); + rangesParam.SetParamName(TString(ranges.Cast().Name())); + } else { + YQL_ENSURE( + TCoVoid::Match(read.Ranges().Raw()), "Read ranges should be parameter or void, got: " << read.Ranges().Ptr()->Content() - ); - } - - auto settings = TKqpReadTableSettings::Parse(read); - - if (settings.ItemsLimit) { - TExprBase expr(settings.ItemsLimit); - - if (expr.template Maybe<TCoParameter>()) { - readProto.MutableItemsLimit()->SetParamName(TString(expr.template Cast<TCoParameter>().Name().Value())); - } else { - YQL_ENSURE(false, "Unexpected ItemsLimit callable " << expr.Ref().Content()); - } - } - - readProto.SetReverse(settings.Reverse); -} - + ); + } + + auto settings = TKqpReadTableSettings::Parse(read); + + if (settings.ItemsLimit) { + TExprBase expr(settings.ItemsLimit); + + if (expr.template Maybe<TCoParameter>()) { + readProto.MutableItemsLimit()->SetParamName(TString(expr.template Cast<TCoParameter>().Name().Value())); + } else { + YQL_ENSURE(false, "Unexpected ItemsLimit callable " << expr.Ref().Content()); + } + } + + readProto.SetReverse(settings.Reverse); +} + template <typename TEffectCallable, typename TEffectProto> void FillEffectRows(const TEffectCallable& callable, TEffectProto& proto, bool inplace) { if (auto maybeList = callable.Input().template Maybe<TCoIterator>().List()) { @@ -306,10 +306,10 @@ void FillLookup(const TKqpLookupTable& lookup, NKqpProto::TKqpPhyOpLookup& looku } } -void FillOlapProgram(const TCoLambda& process, const TKikimrTableMetadata& tableMeta, - NKqpProto::TKqpPhyOpReadOlapRanges& readProto) +void FillOlapProgram(const TCoLambda& process, const TKikimrTableMetadata& tableMeta, + NKqpProto::TKqpPhyOpReadOlapRanges& readProto) { - CompileOlapProgram(process, tableMeta, readProto); + CompileOlapProgram(process, tableMeta, readProto); } void FillConnection(const TDqConnection& connection, const TMap<ui64, ui32>& stagesMap, @@ -501,30 +501,30 @@ private: auto& tableOp = *stageProto.AddTableOps(); FillTable(deleteRows.Table(), *tableOp.MutableTable()); FillEffectRows(deleteRows, *tableOp.MutableDeleteRows(), false); - } else if (auto maybeWideReadTableRanges = node.Maybe<TKqpWideReadTableRanges>()) { - auto readTableRanges = maybeWideReadTableRanges.Cast(); - auto tableMeta = TablesData->ExistingTable(Cluster, readTableRanges.Table().Path()).Metadata; - YQL_ENSURE(tableMeta); - - auto& tableOp = *stageProto.AddTableOps(); - FillTable(readTableRanges.Table(), *tableOp.MutableTable()); - FillColumns(readTableRanges.Columns(), *tableMeta, tableOp, true); - FillReadRanges(readTableRanges, *tableMeta, *tableOp.MutableReadRanges()); - } else if (auto maybeReadWideTableRanges = node.Maybe<TKqpWideReadOlapTableRanges>()) { - auto readTableRanges = maybeReadWideTableRanges.Cast(); - auto tableMeta = TablesData->ExistingTable(Cluster, readTableRanges.Table().Path()).Metadata; - YQL_ENSURE(tableMeta); - - auto& tableOp = *stageProto.AddTableOps(); - FillTable(readTableRanges.Table(), *tableOp.MutableTable()); - FillColumns(readTableRanges.Columns(), *tableMeta, tableOp, true); - FillReadRanges(readTableRanges, *tableMeta, *tableOp.MutableReadOlapRange()); - FillOlapProgram(readTableRanges.Process(), *tableMeta, *tableOp.MutableReadOlapRange()); - } else if (node.Maybe<TCoSort>()) { + } else if (auto maybeWideReadTableRanges = node.Maybe<TKqpWideReadTableRanges>()) { + auto readTableRanges = maybeWideReadTableRanges.Cast(); + auto tableMeta = TablesData->ExistingTable(Cluster, readTableRanges.Table().Path()).Metadata; + YQL_ENSURE(tableMeta); + + auto& tableOp = *stageProto.AddTableOps(); + FillTable(readTableRanges.Table(), *tableOp.MutableTable()); + FillColumns(readTableRanges.Columns(), *tableMeta, tableOp, true); + FillReadRanges(readTableRanges, *tableMeta, *tableOp.MutableReadRanges()); + } else if (auto maybeReadWideTableRanges = node.Maybe<TKqpWideReadOlapTableRanges>()) { + auto readTableRanges = maybeReadWideTableRanges.Cast(); + auto tableMeta = TablesData->ExistingTable(Cluster, readTableRanges.Table().Path()).Metadata; + YQL_ENSURE(tableMeta); + + auto& tableOp = *stageProto.AddTableOps(); + FillTable(readTableRanges.Table(), *tableOp.MutableTable()); + FillColumns(readTableRanges.Columns(), *tableMeta, tableOp, true); + FillReadRanges(readTableRanges, *tableMeta, *tableOp.MutableReadOlapRange()); + FillOlapProgram(readTableRanges.Process(), *tableMeta, *tableOp.MutableReadOlapRange()); + } else if (node.Maybe<TCoSort>()) { hasSort = true; - } else if (node.Maybe<TCoMapJoinCore>()) { + } else if (node.Maybe<TCoMapJoinCore>()) { hasMapJoin = true; - } else if (node.Maybe<TCoUdf>()) { + } else if (node.Maybe<TCoUdf>()) { hasUdf = true; } else { YQL_ENSURE(!node.Maybe<TKqpReadTable>()); diff --git a/ydb/core/kqp/compile/kqp_mkql_compiler.cpp b/ydb/core/kqp/compile/kqp_mkql_compiler.cpp index 367ac180dd4..04130446986 100644 --- a/ydb/core/kqp/compile/kqp_mkql_compiler.cpp +++ b/ydb/core/kqp/compile/kqp_mkql_compiler.cpp @@ -180,20 +180,20 @@ TKqpKeyRange MakeKeyRange(const TKqlReadTableBase& readTable, const TKqlCompileC return keyRange; } -TKqpKeyRanges MakeComputedKeyRanges(const TKqlReadTableRangesBase& readTable, const TKqlCompileContext& ctx, - TMkqlBuildContext& buildCtx) -{ - auto settings = TKqpReadTableSettings::Parse(readTable); - - TKqpKeyRanges ranges = { - .Ranges = MkqlBuildExpr(readTable.Ranges().Ref(), buildCtx), - .ItemsLimit = settings.ItemsLimit ? MkqlBuildExpr(*settings.ItemsLimit, buildCtx) : ctx.PgmBuilder().NewNull(), - .Reverse = settings.Reverse, - }; - - return ranges; -} - +TKqpKeyRanges MakeComputedKeyRanges(const TKqlReadTableRangesBase& readTable, const TKqlCompileContext& ctx, + TMkqlBuildContext& buildCtx) +{ + auto settings = TKqpReadTableSettings::Parse(readTable); + + TKqpKeyRanges ranges = { + .Ranges = MkqlBuildExpr(readTable.Ranges().Ref(), buildCtx), + .ItemsLimit = settings.ItemsLimit ? MkqlBuildExpr(*settings.ItemsLimit, buildCtx) : ctx.PgmBuilder().NewNull(), + .Reverse = settings.Reverse, + }; + + return ranges; +} + } // namespace const TKikimrTableMetadata& TKqlCompileContext::GetTableMeta(const TKqpTable& table) const { @@ -222,31 +222,31 @@ TIntrusivePtr<IMkqlCallableCompiler> CreateKqlCompiler(const TKqlCompileContext& return result; }); - compiler->AddCallable(TKqpWideReadTableRanges::CallableName(), + compiler->AddCallable(TKqpWideReadTableRanges::CallableName(), [&ctx](const TExprNode& node, TMkqlBuildContext& buildCtx) { - TKqpWideReadTableRanges readTableRanges(&node); + TKqpWideReadTableRanges readTableRanges(&node); - const auto& tableMeta = ctx.GetTableMeta(readTableRanges.Table()); + const auto& tableMeta = ctx.GetTableMeta(readTableRanges.Table()); ValidateRangesType(readTableRanges.Ranges().Ref().GetTypeAnn(), tableMeta); - + TKqpKeyRanges ranges = MakeComputedKeyRanges(readTableRanges, ctx, buildCtx); - return ctx.PgmBuilder().KqpWideReadTableRanges( - MakeTableId(readTableRanges.Table()), - ranges, - GetKqpColumns(tableMeta, readTableRanges.Columns(), true), - nullptr - ); - }); - - compiler->AddCallable(TKqpWideReadOlapTableRanges::CallableName(), - [&ctx](const TExprNode& node, TMkqlBuildContext& buildCtx) { - TKqpWideReadOlapTableRanges readTable(&node); - + return ctx.PgmBuilder().KqpWideReadTableRanges( + MakeTableId(readTableRanges.Table()), + ranges, + GetKqpColumns(tableMeta, readTableRanges.Columns(), true), + nullptr + ); + }); + + compiler->AddCallable(TKqpWideReadOlapTableRanges::CallableName(), + [&ctx](const TExprNode& node, TMkqlBuildContext& buildCtx) { + TKqpWideReadOlapTableRanges readTable(&node); + const auto& tableMeta = ctx.GetTableMeta(readTable.Table()); ValidateRangesType(readTable.Ranges().Ref().GetTypeAnn(), tableMeta); - TKqpKeyRanges ranges = MakeComputedKeyRanges(readTable, ctx, buildCtx); + TKqpKeyRanges ranges = MakeComputedKeyRanges(readTable, ctx, buildCtx); // Return type depends on the process program, so it is built explicitly. TStringStream errorStream; @@ -259,12 +259,12 @@ TIntrusivePtr<IMkqlCallableCompiler> CreateKqlCompiler(const TKqlCompileContext& // the process program through callable. // We anyway move to explicit sources as external nodes in KQP program, so all the information // about read settings will be passed in a side channel, not the program. - auto result = ctx.PgmBuilder().KqpWideReadTableRanges( - MakeTableId(readTable.Table()), - ranges, - GetKqpColumns(tableMeta, readTable.Columns(), true), - returnType - ); + auto result = ctx.PgmBuilder().KqpWideReadTableRanges( + MakeTableId(readTable.Table()), + ranges, + GetKqpColumns(tableMeta, readTable.Columns(), true), + returnType + ); return result; }); diff --git a/ydb/core/kqp/compile/kqp_olap_compiler.cpp b/ydb/core/kqp/compile/kqp_olap_compiler.cpp index 8bddf301f9b..8f16b9345cc 100644 --- a/ydb/core/kqp/compile/kqp_olap_compiler.cpp +++ b/ydb/core/kqp/compile/kqp_olap_compiler.cpp @@ -1,7 +1,7 @@ #include "kqp_olap_compiler.h" #include <ydb/core/formats/arrow_helpers.h> - + namespace NKikimr { namespace NKqp { @@ -9,24 +9,24 @@ using namespace NYql; using namespace NYql::NNodes; using namespace NKikimrSSA; -constexpr ui32 OLAP_PROGRAM_VERSION = 1; - +constexpr ui32 OLAP_PROGRAM_VERSION = 1; + namespace { class TKqpOlapCompileContext { public: - TKqpOlapCompileContext(const TCoArgument& row, const TKikimrTableMetadata& tableMeta, - NKqpProto::TKqpPhyOpReadOlapRanges& readProto) + TKqpOlapCompileContext(const TCoArgument& row, const TKikimrTableMetadata& tableMeta, + NKqpProto::TKqpPhyOpReadOlapRanges& readProto) : Row(row) , MaxColumnId(0) - , ReadProto(readProto) + , ReadProto(readProto) { for (const auto& [_, columnMeta] : tableMeta.Columns) { YQL_ENSURE(ReadColumns.emplace(columnMeta.Name, columnMeta.Id).second); MaxColumnId = std::max(MaxColumnId, columnMeta.Id); } - - Program.SetVersion(OLAP_PROGRAM_VERSION); + + Program.SetVersion(OLAP_PROGRAM_VERSION); } ui32 GetColumnId(const TStringBuf& name) const { @@ -44,267 +44,267 @@ public: return Row.Raw(); } - TProgram::TAssignment* CreateAssignCmd() { - auto* cmd = Program.AddCommand(); - auto* assign = cmd->MutableAssign(); - assign->MutableColumn()->SetId(NewColumnId()); - - return assign; - } - - TProgram::TFilter* CreateFilter() { - return Program.AddCommand()->MutableFilter(); - } - - void AddParameterName(const TString& name) { - ReadProto.AddOlapProgramParameterNames(name); - } - - void SerializeToProto() { - TString programBytes; - TStringOutput stream(programBytes); - Program.SerializeToArcadiaStream(&stream); - ReadProto.SetOlapProgram(programBytes); - } - + TProgram::TAssignment* CreateAssignCmd() { + auto* cmd = Program.AddCommand(); + auto* assign = cmd->MutableAssign(); + assign->MutableColumn()->SetId(NewColumnId()); + + return assign; + } + + TProgram::TFilter* CreateFilter() { + return Program.AddCommand()->MutableFilter(); + } + + void AddParameterName(const TString& name) { + ReadProto.AddOlapProgramParameterNames(name); + } + + void SerializeToProto() { + TString programBytes; + TStringOutput stream(programBytes); + Program.SerializeToArcadiaStream(&stream); + ReadProto.SetOlapProgram(programBytes); + } + private: TCoArgument Row; TMap<TString, ui32> ReadColumns; ui32 MaxColumnId; - TProgram Program; - NKqpProto::TKqpPhyOpReadOlapRanges& ReadProto; + TProgram Program; + NKqpProto::TKqpPhyOpReadOlapRanges& ReadProto; }; - -TProgram::TAssignment* CompileCondition(const TExprBase& condition, TKqpOlapCompileContext& ctx); - -ui32 ConvertValueToColumn(const TCoDataCtor& value, TKqpOlapCompileContext& ctx) -{ - TProgram::TAssignment* ssaValue = ctx.CreateAssignCmd(); - - if (value.Maybe<TCoUtf8>()) { - auto nodeValue = value.Cast<TCoUtf8>().Literal().Value(); - ssaValue->MutableConstant()->SetText(TString(nodeValue)); - } else if (value.Maybe<TCoString>()) { - auto nodeValue = value.Cast<TCoString>().Literal().Value(); - ssaValue->MutableConstant()->SetText(TString(nodeValue)); - } else if (value.Maybe<TCoBool>()) { - auto nodeValue = value.Cast<TCoBool>().Literal().Value(); - ssaValue->MutableConstant()->SetBool(FromString<bool>(nodeValue)); - } else if (value.Maybe<TCoFloat>()) { - auto nodeValue = value.Cast<TCoFloat>().Literal().Value(); - ssaValue->MutableConstant()->SetFloat(FromString<float>(nodeValue)); - } else if (value.Maybe<TCoDouble>()) { - auto nodeValue = value.Cast<TCoDouble>().Literal().Value(); - ssaValue->MutableConstant()->SetDouble(FromString<double>(nodeValue)); - } else if (value.Maybe<TCoInt8>()) { - auto nodeValue = value.Cast<TCoInt8>().Literal().Value(); - ssaValue->MutableConstant()->SetInt32(FromString<i32>(nodeValue)); - } else if (value.Maybe<TCoInt16>()) { - auto nodeValue = value.Cast<TCoInt16>().Literal().Value(); - ssaValue->MutableConstant()->SetInt32(FromString<i32>(nodeValue)); - } else if (value.Maybe<TCoInt32>()) { - auto nodeValue = value.Cast<TCoInt32>().Literal().Value(); - ssaValue->MutableConstant()->SetInt32(FromString<i32>(nodeValue)); - } else if (value.Maybe<TCoInt64>()) { - auto nodeValue = value.Cast<TCoInt64>().Literal().Value(); - ssaValue->MutableConstant()->SetInt64(FromString<i64>(nodeValue)); - } else if (value.Maybe<TCoUint8>()) { - auto nodeValue = value.Cast<TCoUint8>().Literal().Value(); - ssaValue->MutableConstant()->SetUint32(FromString<ui32>(nodeValue)); - } else if (value.Maybe<TCoUint16>()) { - auto nodeValue = value.Cast<TCoUint16>().Literal().Value(); - ssaValue->MutableConstant()->SetUint32(FromString<ui32>(nodeValue)); - } else if (value.Maybe<TCoUint32>()) { - auto nodeValue = value.Cast<TCoUint32>().Literal().Value(); - ssaValue->MutableConstant()->SetUint32(FromString<ui32>(nodeValue)); - } else if (value.Maybe<TCoUint64>()) { - auto nodeValue = value.Cast<TCoUint64>().Literal().Value(); - ssaValue->MutableConstant()->SetUint64(FromString<ui64>(nodeValue)); - } else { - YQL_ENSURE(false, "Unsupported content: " << value.Ptr()->Content()); - } - - return ssaValue->GetColumn().GetId(); -} - -ui32 ConvertParameterToColumn(const TCoParameter& parameter, TKqpOlapCompileContext& ctx) -{ - TProgram::TAssignment* ssaValue = ctx.CreateAssignCmd(); - - auto name = TString(parameter.Name().Value()); - auto maybeType = parameter.Type().Maybe<TCoDataType>(); - - YQL_ENSURE(maybeType.IsValid(), "Unknown type content in conversion: " << parameter.Type().Ptr()->Content()); - - auto newParameter = ssaValue->MutableParameter(); - newParameter->SetName(name); - - ctx.AddParameterName(name); - - return ssaValue->GetColumn().GetId(); -} - -ui64 GetOrCreateColumnId(const TExprBase& node, TKqpOlapCompileContext& ctx) { - if (auto maybeData = node.Maybe<TCoDataCtor>()) { - return ConvertValueToColumn(maybeData.Cast(), ctx); - } - - if (auto maybeAtom = node.Maybe<TCoAtom>()) { - return ctx.GetColumnId(maybeAtom.Cast().Value()); - } - - if (auto maybeParameter = node.Maybe<TCoParameter>()) { - return ConvertParameterToColumn(maybeParameter.Cast(), ctx); - } - - YQL_ENSURE(false, "Unknown node in OLAP comparison compiler: " << node.Ptr()->Content()); -} - -TProgram::TAssignment* CompileComparison(const TKqpOlapFilterCompare& comparison, + +TProgram::TAssignment* CompileCondition(const TExprBase& condition, TKqpOlapCompileContext& ctx); + +ui32 ConvertValueToColumn(const TCoDataCtor& value, TKqpOlapCompileContext& ctx) +{ + TProgram::TAssignment* ssaValue = ctx.CreateAssignCmd(); + + if (value.Maybe<TCoUtf8>()) { + auto nodeValue = value.Cast<TCoUtf8>().Literal().Value(); + ssaValue->MutableConstant()->SetText(TString(nodeValue)); + } else if (value.Maybe<TCoString>()) { + auto nodeValue = value.Cast<TCoString>().Literal().Value(); + ssaValue->MutableConstant()->SetText(TString(nodeValue)); + } else if (value.Maybe<TCoBool>()) { + auto nodeValue = value.Cast<TCoBool>().Literal().Value(); + ssaValue->MutableConstant()->SetBool(FromString<bool>(nodeValue)); + } else if (value.Maybe<TCoFloat>()) { + auto nodeValue = value.Cast<TCoFloat>().Literal().Value(); + ssaValue->MutableConstant()->SetFloat(FromString<float>(nodeValue)); + } else if (value.Maybe<TCoDouble>()) { + auto nodeValue = value.Cast<TCoDouble>().Literal().Value(); + ssaValue->MutableConstant()->SetDouble(FromString<double>(nodeValue)); + } else if (value.Maybe<TCoInt8>()) { + auto nodeValue = value.Cast<TCoInt8>().Literal().Value(); + ssaValue->MutableConstant()->SetInt32(FromString<i32>(nodeValue)); + } else if (value.Maybe<TCoInt16>()) { + auto nodeValue = value.Cast<TCoInt16>().Literal().Value(); + ssaValue->MutableConstant()->SetInt32(FromString<i32>(nodeValue)); + } else if (value.Maybe<TCoInt32>()) { + auto nodeValue = value.Cast<TCoInt32>().Literal().Value(); + ssaValue->MutableConstant()->SetInt32(FromString<i32>(nodeValue)); + } else if (value.Maybe<TCoInt64>()) { + auto nodeValue = value.Cast<TCoInt64>().Literal().Value(); + ssaValue->MutableConstant()->SetInt64(FromString<i64>(nodeValue)); + } else if (value.Maybe<TCoUint8>()) { + auto nodeValue = value.Cast<TCoUint8>().Literal().Value(); + ssaValue->MutableConstant()->SetUint32(FromString<ui32>(nodeValue)); + } else if (value.Maybe<TCoUint16>()) { + auto nodeValue = value.Cast<TCoUint16>().Literal().Value(); + ssaValue->MutableConstant()->SetUint32(FromString<ui32>(nodeValue)); + } else if (value.Maybe<TCoUint32>()) { + auto nodeValue = value.Cast<TCoUint32>().Literal().Value(); + ssaValue->MutableConstant()->SetUint32(FromString<ui32>(nodeValue)); + } else if (value.Maybe<TCoUint64>()) { + auto nodeValue = value.Cast<TCoUint64>().Literal().Value(); + ssaValue->MutableConstant()->SetUint64(FromString<ui64>(nodeValue)); + } else { + YQL_ENSURE(false, "Unsupported content: " << value.Ptr()->Content()); + } + + return ssaValue->GetColumn().GetId(); +} + +ui32 ConvertParameterToColumn(const TCoParameter& parameter, TKqpOlapCompileContext& ctx) +{ + TProgram::TAssignment* ssaValue = ctx.CreateAssignCmd(); + + auto name = TString(parameter.Name().Value()); + auto maybeType = parameter.Type().Maybe<TCoDataType>(); + + YQL_ENSURE(maybeType.IsValid(), "Unknown type content in conversion: " << parameter.Type().Ptr()->Content()); + + auto newParameter = ssaValue->MutableParameter(); + newParameter->SetName(name); + + ctx.AddParameterName(name); + + return ssaValue->GetColumn().GetId(); +} + +ui64 GetOrCreateColumnId(const TExprBase& node, TKqpOlapCompileContext& ctx) { + if (auto maybeData = node.Maybe<TCoDataCtor>()) { + return ConvertValueToColumn(maybeData.Cast(), ctx); + } + + if (auto maybeAtom = node.Maybe<TCoAtom>()) { + return ctx.GetColumnId(maybeAtom.Cast().Value()); + } + + if (auto maybeParameter = node.Maybe<TCoParameter>()) { + return ConvertParameterToColumn(maybeParameter.Cast(), ctx); + } + + YQL_ENSURE(false, "Unknown node in OLAP comparison compiler: " << node.Ptr()->Content()); +} + +TProgram::TAssignment* CompileComparison(const TKqpOlapFilterCompare& comparison, + TKqpOlapCompileContext& ctx) +{ + // Columns should be created before comparison, otherwise comparison fail to find columns + ui32 leftColumnId = GetOrCreateColumnId(comparison.Left(), ctx); + ui32 rightColumnId = GetOrCreateColumnId(comparison.Right(), ctx); + + TProgram::TAssignment* command = ctx.CreateAssignCmd(); + auto* cmpFunc = command->MutableFunction(); + + ui32 function = TProgram::TAssignment::FUNC_UNSPECIFIED; + + if (comparison.Maybe<TKqpOlapFilterEqual>()) { + function = TProgram::TAssignment::FUNC_CMP_EQUAL; + } else if (comparison.Maybe<TKqpOlapFilterLess>()) { + function = TProgram::TAssignment::FUNC_CMP_LESS; + } else if (comparison.Maybe<TKqpOlapFilterLessOrEqual>()) { + function = TProgram::TAssignment::FUNC_CMP_LESS_EQUAL; + } else if (comparison.Maybe<TKqpOlapFilterGreater>()) { + function = TProgram::TAssignment::FUNC_CMP_GREATER; + } else if (comparison.Maybe<TKqpOlapFilterGreaterOrEqual>()) { + function = TProgram::TAssignment::FUNC_CMP_GREATER_EQUAL; + } + + cmpFunc->SetId(function); + cmpFunc->AddArguments()->SetId(leftColumnId); + cmpFunc->AddArguments()->SetId(rightColumnId); + + return command; +} + +TProgram::TAssignment* CompileExists(const TKqpOlapFilterExists& exists, TKqpOlapCompileContext& ctx) { - // Columns should be created before comparison, otherwise comparison fail to find columns - ui32 leftColumnId = GetOrCreateColumnId(comparison.Left(), ctx); - ui32 rightColumnId = GetOrCreateColumnId(comparison.Right(), ctx); - - TProgram::TAssignment* command = ctx.CreateAssignCmd(); - auto* cmpFunc = command->MutableFunction(); - - ui32 function = TProgram::TAssignment::FUNC_UNSPECIFIED; - - if (comparison.Maybe<TKqpOlapFilterEqual>()) { - function = TProgram::TAssignment::FUNC_CMP_EQUAL; - } else if (comparison.Maybe<TKqpOlapFilterLess>()) { - function = TProgram::TAssignment::FUNC_CMP_LESS; - } else if (comparison.Maybe<TKqpOlapFilterLessOrEqual>()) { - function = TProgram::TAssignment::FUNC_CMP_LESS_EQUAL; - } else if (comparison.Maybe<TKqpOlapFilterGreater>()) { - function = TProgram::TAssignment::FUNC_CMP_GREATER; - } else if (comparison.Maybe<TKqpOlapFilterGreaterOrEqual>()) { - function = TProgram::TAssignment::FUNC_CMP_GREATER_EQUAL; - } - - cmpFunc->SetId(function); - cmpFunc->AddArguments()->SetId(leftColumnId); - cmpFunc->AddArguments()->SetId(rightColumnId); - - return command; + ui32 columnId = GetOrCreateColumnId(exists.Column(), ctx); + + TProgram::TAssignment* command = ctx.CreateAssignCmd(); + auto* isNullFunc = command->MutableFunction(); + + isNullFunc->SetId(TProgram::TAssignment::FUNC_IS_NULL); + isNullFunc->AddArguments()->SetId(columnId); + + TProgram::TAssignment *notCommand = ctx.CreateAssignCmd(); + auto *notFunc = notCommand->MutableFunction(); + + notFunc->SetId(TProgram::TAssignment::FUNC_BINARY_NOT); + notFunc->AddArguments()->SetId(command->GetColumn().GetId()); + + return notCommand; } -TProgram::TAssignment* CompileExists(const TKqpOlapFilterExists& exists, - TKqpOlapCompileContext& ctx) -{ - ui32 columnId = GetOrCreateColumnId(exists.Column(), ctx); - - TProgram::TAssignment* command = ctx.CreateAssignCmd(); - auto* isNullFunc = command->MutableFunction(); - - isNullFunc->SetId(TProgram::TAssignment::FUNC_IS_NULL); - isNullFunc->AddArguments()->SetId(columnId); - - TProgram::TAssignment *notCommand = ctx.CreateAssignCmd(); - auto *notFunc = notCommand->MutableFunction(); - - notFunc->SetId(TProgram::TAssignment::FUNC_BINARY_NOT); - notFunc->AddArguments()->SetId(command->GetColumn().GetId()); - - return notCommand; -} - -TProgram::TAssignment* BuildLogicalProgram(const TExprNode::TChildrenType& args, ui32 function, +TProgram::TAssignment* BuildLogicalProgram(const TExprNode::TChildrenType& args, ui32 function, TKqpOlapCompileContext& ctx) { - ui32 childrenCount = args.size(); - - if (childrenCount == 1) { - // NOT operation is handled separately, thus only one available situation here: - // this is binary operation with only one node, just build this node and return. - return CompileCondition(TExprBase(args[0]), ctx); - } - - TProgram::TAssignment* left = nullptr; - TProgram::TAssignment* right = nullptr; - - if (childrenCount == 2) { - // Nice, we can build logical operation with two child as expected - left = CompileCondition(TExprBase(args[0]), ctx); - right = CompileCondition(TExprBase(args[1]), ctx); - } else { - // >2 children - split incoming vector in the middle call this function recursively. - auto leftArgs = args.Slice(0, childrenCount / 2); - auto rightArgs = args.Slice(childrenCount / 2); - - left = BuildLogicalProgram(leftArgs, function, ctx); - right = BuildLogicalProgram(rightArgs, function, ctx); - } - - TProgram::TAssignment *logicalOp = ctx.CreateAssignCmd(); - auto *logicalFunc = logicalOp->MutableFunction(); - - logicalFunc->SetId(function); - logicalFunc->AddArguments()->SetId(left->GetColumn().GetId()); - logicalFunc->AddArguments()->SetId(right->GetColumn().GetId()); - - return logicalOp; -} - -TProgram::TAssignment* CompileCondition(const TExprBase& condition, TKqpOlapCompileContext& ctx) { - auto maybeCompare = condition.Maybe<TKqpOlapFilterCompare>(); - - if (maybeCompare.IsValid()) { - return CompileComparison(maybeCompare.Cast(), ctx); - } - - auto maybeExists = condition.Maybe<TKqpOlapFilterExists>(); - - if (maybeExists.IsValid()) { - return CompileExists(maybeExists.Cast(), ctx); - } - - if (auto maybeNot = condition.Maybe<TCoNot>()) { - // Not is a special way in case it has only one child - TProgram::TAssignment *value = CompileCondition(maybeNot.Cast().Value(), ctx); - - TProgram::TAssignment *notOp = ctx.CreateAssignCmd(); - auto *notFunc = notOp->MutableFunction(); - - notFunc->SetId(TProgram::TAssignment::FUNC_BINARY_NOT); - notFunc->AddArguments()->SetId(value->GetColumn().GetId()); - - return notOp; - } - - ui32 function = TProgram::TAssignment::FUNC_UNSPECIFIED; - - if (condition.Maybe<TCoAnd>()) { - function = TProgram::TAssignment::FUNC_BINARY_AND; - } else if (condition.Maybe<TCoOr>()) { - function = TProgram::TAssignment::FUNC_BINARY_OR; - } else if (condition.Maybe<TCoXor>()) { - function = TProgram::TAssignment::FUNC_BINARY_XOR; - } else { - YQL_ENSURE(false, "Unsuppoted logical operation: " << condition.Ptr()->Content()); - } - - return BuildLogicalProgram(condition.Ptr()->Children(), function, ctx); -} - -void CompileFilter(const TKqpOlapFilter& filterNode, TKqpOlapCompileContext& ctx) { - TProgram::TAssignment* condition = CompileCondition(filterNode.Condition(), ctx); - - auto* filter = ctx.CreateFilter(); - filter->MutablePredicate()->SetId(condition->GetColumn().GetId()); -} - -void CompileOlapProgramImpl(TExprBase operation, TKqpOlapCompileContext& ctx) { + ui32 childrenCount = args.size(); + + if (childrenCount == 1) { + // NOT operation is handled separately, thus only one available situation here: + // this is binary operation with only one node, just build this node and return. + return CompileCondition(TExprBase(args[0]), ctx); + } + + TProgram::TAssignment* left = nullptr; + TProgram::TAssignment* right = nullptr; + + if (childrenCount == 2) { + // Nice, we can build logical operation with two child as expected + left = CompileCondition(TExprBase(args[0]), ctx); + right = CompileCondition(TExprBase(args[1]), ctx); + } else { + // >2 children - split incoming vector in the middle call this function recursively. + auto leftArgs = args.Slice(0, childrenCount / 2); + auto rightArgs = args.Slice(childrenCount / 2); + + left = BuildLogicalProgram(leftArgs, function, ctx); + right = BuildLogicalProgram(rightArgs, function, ctx); + } + + TProgram::TAssignment *logicalOp = ctx.CreateAssignCmd(); + auto *logicalFunc = logicalOp->MutableFunction(); + + logicalFunc->SetId(function); + logicalFunc->AddArguments()->SetId(left->GetColumn().GetId()); + logicalFunc->AddArguments()->SetId(right->GetColumn().GetId()); + + return logicalOp; +} + +TProgram::TAssignment* CompileCondition(const TExprBase& condition, TKqpOlapCompileContext& ctx) { + auto maybeCompare = condition.Maybe<TKqpOlapFilterCompare>(); + + if (maybeCompare.IsValid()) { + return CompileComparison(maybeCompare.Cast(), ctx); + } + + auto maybeExists = condition.Maybe<TKqpOlapFilterExists>(); + + if (maybeExists.IsValid()) { + return CompileExists(maybeExists.Cast(), ctx); + } + + if (auto maybeNot = condition.Maybe<TCoNot>()) { + // Not is a special way in case it has only one child + TProgram::TAssignment *value = CompileCondition(maybeNot.Cast().Value(), ctx); + + TProgram::TAssignment *notOp = ctx.CreateAssignCmd(); + auto *notFunc = notOp->MutableFunction(); + + notFunc->SetId(TProgram::TAssignment::FUNC_BINARY_NOT); + notFunc->AddArguments()->SetId(value->GetColumn().GetId()); + + return notOp; + } + + ui32 function = TProgram::TAssignment::FUNC_UNSPECIFIED; + + if (condition.Maybe<TCoAnd>()) { + function = TProgram::TAssignment::FUNC_BINARY_AND; + } else if (condition.Maybe<TCoOr>()) { + function = TProgram::TAssignment::FUNC_BINARY_OR; + } else if (condition.Maybe<TCoXor>()) { + function = TProgram::TAssignment::FUNC_BINARY_XOR; + } else { + YQL_ENSURE(false, "Unsuppoted logical operation: " << condition.Ptr()->Content()); + } + + return BuildLogicalProgram(condition.Ptr()->Children(), function, ctx); +} + +void CompileFilter(const TKqpOlapFilter& filterNode, TKqpOlapCompileContext& ctx) { + TProgram::TAssignment* condition = CompileCondition(filterNode.Condition(), ctx); + + auto* filter = ctx.CreateFilter(); + filter->MutablePredicate()->SetId(condition->GetColumn().GetId()); +} + +void CompileOlapProgramImpl(TExprBase operation, TKqpOlapCompileContext& ctx) { if (operation.Raw() == ctx.GetRowExpr()) { return; } - if (auto maybeFilter = operation.Maybe<TKqpOlapFilter>()) { - CompileOlapProgramImpl(maybeFilter.Cast().Input(), ctx); - CompileFilter(maybeFilter.Cast(), ctx); + if (auto maybeFilter = operation.Maybe<TKqpOlapFilter>()) { + CompileOlapProgramImpl(maybeFilter.Cast().Input(), ctx); + CompileFilter(maybeFilter.Cast(), ctx); return; } @@ -314,15 +314,15 @@ void CompileOlapProgramImpl(TExprBase operation, TKqpOlapCompileContext& ctx) { } // namespace -void CompileOlapProgram(const TCoLambda& lambda, const TKikimrTableMetadata& tableMeta, - NKqpProto::TKqpPhyOpReadOlapRanges& readProto) -{ +void CompileOlapProgram(const TCoLambda& lambda, const TKikimrTableMetadata& tableMeta, + NKqpProto::TKqpPhyOpReadOlapRanges& readProto) +{ YQL_ENSURE(lambda.Args().Size() == 1); - TKqpOlapCompileContext ctx(lambda.Args().Arg(0), tableMeta, readProto); + TKqpOlapCompileContext ctx(lambda.Args().Arg(0), tableMeta, readProto); - CompileOlapProgramImpl(lambda.Body(), ctx); - ctx.SerializeToProto(); + CompileOlapProgramImpl(lambda.Body(), ctx); + ctx.SerializeToProto(); } } // namespace NKqp diff --git a/ydb/core/kqp/compile/kqp_olap_compiler.h b/ydb/core/kqp/compile/kqp_olap_compiler.h index 541875db537..17ca3d0b3ca 100644 --- a/ydb/core/kqp/compile/kqp_olap_compiler.h +++ b/ydb/core/kqp/compile/kqp_olap_compiler.h @@ -8,8 +8,8 @@ namespace NKikimr { namespace NKqp { -void CompileOlapProgram(const NYql::NNodes::TCoLambda& lambda, const NYql::TKikimrTableMetadata& tableMeta, - NKqpProto::TKqpPhyOpReadOlapRanges& readProto); +void CompileOlapProgram(const NYql::NNodes::TCoLambda& lambda, const NYql::TKikimrTableMetadata& tableMeta, + NKqpProto::TKqpPhyOpReadOlapRanges& readProto); } // namespace NKqp } // namespace NKikimr diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp index 6f49f12fa4d..f8e3cd0c772 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp @@ -33,20 +33,20 @@ bool IsDebugLogEnabled(const TActorSystem* actorSystem, NActors::NLog::EComponen return settings && settings->Satisfies(NActors::NLog::EPriority::PRI_DEBUG, component); } -TString DebugPrintRanges(TConstArrayRef<NScheme::TTypeId> types, - const TSmallVec<TSerializedTableRange>& ranges) -{ - auto typeRegistry = AppData()->TypeRegistry; - auto out = TStringBuilder(); - - for (auto& range: ranges) { - out << DebugPrintRange(types, range.ToTableRange(), *typeRegistry); - out << " "; - } - - return out; -} - +TString DebugPrintRanges(TConstArrayRef<NScheme::TTypeId> types, + const TSmallVec<TSerializedTableRange>& ranges) +{ + auto typeRegistry = AppData()->TypeRegistry; + auto out = TStringBuilder(); + + for (auto& range: ranges) { + out << DebugPrintRange(types, range.ToTableRange(), *typeRegistry); + out << " "; + } + + return out; +} + static constexpr TDuration MIN_RETRY_DELAY = TDuration::MilliSeconds(250); static constexpr TDuration MAX_RETRY_DELAY = TDuration::Seconds(2); static constexpr ui64 MAX_SHARD_RETRIES = 5; // retry after: 0, 250, 500, 1000, 2000 @@ -790,8 +790,8 @@ private: state.ActorId = {}; CA_LOG_D("StartTableScan: '" << ScanData->TablePath << "', shardId: " << state.TabletId << ", gen: " << state.Generation - << ", ranges: " << DebugPrintRanges(KeyColumnTypes, GetScanRanges(state))); - + << ", ranges: " << DebugPrintRanges(KeyColumnTypes, GetScanRanges(state))); + SendStartScanRequest(state, state.Generation); } @@ -815,15 +815,15 @@ private: } ev->Record.MutableSkipNullKeys()->CopyFrom(Meta.GetSkipNullKeys()); - auto ranges = GetScanRanges(state); - auto protoRanges = ev->Record.MutableRanges(); - protoRanges->Reserve(ranges.size()); + auto ranges = GetScanRanges(state); + auto protoRanges = ev->Record.MutableRanges(); + protoRanges->Reserve(ranges.size()); + + for (auto& range: ranges) { + auto newRange = protoRanges->Add(); + range.Serialize(*newRange); + } - for (auto& range: ranges) { - auto newRange = protoRanges->Add(); - range.Serialize(*newRange); - } - ev->Record.MutableSnapshot()->CopyFrom(Snapshot); if (RuntimeSettings.Timeout) { ev->Record.SetTimeoutMs(RuntimeSettings.Timeout.Get()->MilliSeconds()); @@ -836,63 +836,63 @@ private: ev->Record.SetGeneration(gen); - ev->Record.SetReverse(Meta.GetReverse()); - ev->Record.SetItemsLimit(Meta.GetItemsLimit()); - - if (Meta.HasOlapProgram()) { - TString programBytes; - TStringOutput stream(programBytes); - Meta.GetOlapProgram().SerializeToArcadiaStream(&stream); - ev->Record.SetOlapProgram(programBytes); - ev->Record.SetOlapProgramType( - NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS - ); - } - + ev->Record.SetReverse(Meta.GetReverse()); + ev->Record.SetItemsLimit(Meta.GetItemsLimit()); + + if (Meta.HasOlapProgram()) { + TString programBytes; + TStringOutput stream(programBytes); + Meta.GetOlapProgram().SerializeToArcadiaStream(&stream); + ev->Record.SetOlapProgram(programBytes); + ev->Record.SetOlapProgramType( + NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS + ); + } + ev->Record.SetDataFormat(Meta.GetDataFormat()); bool subscribed = std::exchange(state.SubscribedOnTablet, true); CA_LOG_D("Send EvKqpScan to shardId: " << state.TabletId << ", tablePath: " << ScanData->TablePath << ", gen: " << gen << ", subscribe: " << (!subscribed) - << ", range: " << DebugPrintRanges(KeyColumnTypes, GetScanRanges(state))); - + << ", range: " << DebugPrintRanges(KeyColumnTypes, GetScanRanges(state))); + Send(MakePipePeNodeCacheID(false), new TEvPipeCache::TEvForward(ev.Release(), state.TabletId, !subscribed), IEventHandle::FlagTrackDelivery); } - const TSmallVec<TSerializedTableRange> GetScanRanges(const TShardState& state) const { - // No any data read previously, return all ranges - if (!LastKey.DataSize()) { - return state.Ranges; + const TSmallVec<TSerializedTableRange> GetScanRanges(const TShardState& state) const { + // No any data read previously, return all ranges + if (!LastKey.DataSize()) { + return state.Ranges; + } + + // Form new vector. Skip ranges already read. + TVector<TSerializedTableRange> ranges; + ranges.reserve(state.Ranges.size()); + + YQL_ENSURE(KeyColumnTypes.size() == LastKey.size(), "Key columns size != last key"); + + for (auto rangeIt = state.Ranges.begin(); rangeIt != state.Ranges.end(); ++rangeIt) { + int cmp = ComparePointAndRange(LastKey, rangeIt->ToTableRange(), KeyColumnTypes, KeyColumnTypes); + + YQL_ENSURE(cmp >= 0, "Missed intersection of LastKey and range."); + + if (cmp > 0) { + continue; + } + + // It is range, where read was interrupted. Restart operation from last read key. + ranges.emplace_back(std::move(TSerializedTableRange( + TSerializedCellVec::Serialize(LastKey), rangeIt->To.GetBuffer(), false, rangeIt->ToInclusive + ))); + + // And push all others + ranges.insert(ranges.end(), ++rangeIt, state.Ranges.end()); + break; } - // Form new vector. Skip ranges already read. - TVector<TSerializedTableRange> ranges; - ranges.reserve(state.Ranges.size()); - - YQL_ENSURE(KeyColumnTypes.size() == LastKey.size(), "Key columns size != last key"); - - for (auto rangeIt = state.Ranges.begin(); rangeIt != state.Ranges.end(); ++rangeIt) { - int cmp = ComparePointAndRange(LastKey, rangeIt->ToTableRange(), KeyColumnTypes, KeyColumnTypes); - - YQL_ENSURE(cmp >= 0, "Missed intersection of LastKey and range."); - - if (cmp > 0) { - continue; - } - - // It is range, where read was interrupted. Restart operation from last read key. - ranges.emplace_back(std::move(TSerializedTableRange( - TSerializedCellVec::Serialize(LastKey), rangeIt->To.GetBuffer(), false, rangeIt->ToInclusive - ))); - - // And push all others - ranges.insert(ranges.end(), ++rangeIt, state.Ranges.end()); - break; - } - - return ranges; + return ranges; } TString PrintLastKey() const { @@ -1117,7 +1117,7 @@ private: sb << ", "; } } - sb << "], " + sb << "], " << ", RetryAttempt: " << RetryAttempt << ", TotalRetries: " << TotalRetries << ", ResolveAttempt: " << ResolveAttempt << ", ActorId: " << ActorId << " }"; return sb; diff --git a/ydb/core/kqp/executer/kqp_data_executer.cpp b/ydb/core/kqp/executer/kqp_data_executer.cpp index 3d3d659ec8a..003d4ffa206 100644 --- a/ydb/core/kqp/executer/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer/kqp_data_executer.cpp @@ -930,19 +930,19 @@ private: } private: - void FillGeneralReadInfo(TTaskMeta& taskMeta, ui64 itemsLimit, bool reverse) - { - if (taskMeta.Reads && !taskMeta.Reads.GetRef().empty()) { - // Validate parameters - YQL_ENSURE(taskMeta.ReadInfo.ItemsLimit == itemsLimit); - YQL_ENSURE(taskMeta.ReadInfo.Reverse == reverse); - return; - } - - taskMeta.ReadInfo.ItemsLimit = itemsLimit; - taskMeta.ReadInfo.Reverse = reverse; - }; - + void FillGeneralReadInfo(TTaskMeta& taskMeta, ui64 itemsLimit, bool reverse) + { + if (taskMeta.Reads && !taskMeta.Reads.GetRef().empty()) { + // Validate parameters + YQL_ENSURE(taskMeta.ReadInfo.ItemsLimit == itemsLimit); + YQL_ENSURE(taskMeta.ReadInfo.Reverse == reverse); + return; + } + + taskMeta.ReadInfo.ItemsLimit = itemsLimit; + taskMeta.ReadInfo.Reverse = reverse; + }; + void BuildDatashardTasks(TStageInfo& stageInfo, const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) { @@ -971,14 +971,14 @@ private: THashMap<ui64, TShardInfo> partitions; switch (op.GetTypeCase()) { - case NKqpProto::TKqpPhyTableOperation::kReadRanges: + case NKqpProto::TKqpPhyTableOperation::kReadRanges: case NKqpProto::TKqpPhyTableOperation::kReadRange: case NKqpProto::TKqpPhyTableOperation::kLookup: { bool reverse = false; ui64 itemsLimit = 0; TString itemsLimitParamName; NDqProto::TData itemsLimitBytes; - NKikimr::NMiniKQL::TType* itemsLimitType = nullptr; + NKikimr::NMiniKQL::TType* itemsLimitType = nullptr; if (op.GetTypeCase() == NKqpProto::TKqpPhyTableOperation::kReadRanges) { partitions = PrunePartitions(TableKeys, op.GetReadRanges(), stageInfo, holderFactory, typeEnv); @@ -988,7 +988,7 @@ private: } else if (op.GetTypeCase() == NKqpProto::TKqpPhyTableOperation::kReadRange) { partitions = PrunePartitions(TableKeys, op.GetReadRange(), stageInfo, holderFactory, typeEnv); ExtractItemsLimit(stageInfo, op.GetReadRange().GetItemsLimit(), holderFactory, typeEnv, - itemsLimit, itemsLimitParamName, itemsLimitBytes, itemsLimitType); + itemsLimit, itemsLimitParamName, itemsLimitBytes, itemsLimitType); reverse = op.GetReadRange().GetReverse(); } else if (op.GetTypeCase() == NKqpProto::TKqpPhyTableOperation::kLookup) { partitions = PrunePartitions(TableKeys, op.GetLookup(), stageInfo, holderFactory, typeEnv); @@ -1000,21 +1000,21 @@ private: auto& task = getShardTask(shardId); for (auto& [name, value] : shardInfo.Params) { task.Meta.Params.emplace(name, std::move(value)); - auto typeIterator = shardInfo.ParamTypes.find(name); - YQL_ENSURE(typeIterator != shardInfo.ParamTypes.end()); - auto retType = task.Meta.ParamTypes.emplace(name, typeIterator->second); - YQL_ENSURE(retType.second); + auto typeIterator = shardInfo.ParamTypes.find(name); + YQL_ENSURE(typeIterator != shardInfo.ParamTypes.end()); + auto retType = task.Meta.ParamTypes.emplace(name, typeIterator->second); + YQL_ENSURE(retType.second); } - FillGeneralReadInfo(task.Meta, itemsLimit, reverse); - - TTaskMeta::TShardReadInfo readInfo; + FillGeneralReadInfo(task.Meta, itemsLimit, reverse); + + TTaskMeta::TShardReadInfo readInfo; readInfo.Ranges = std::move(*shardInfo.KeyReadRanges); readInfo.Columns = columns; if (itemsLimit) { task.Meta.Params.emplace(itemsLimitParamName, itemsLimitBytes); - task.Meta.ParamTypes.emplace(itemsLimitParamName, itemsLimitType); + task.Meta.ParamTypes.emplace(itemsLimitParamName, itemsLimitType); } if (!task.Meta.Reads) { @@ -1044,7 +1044,7 @@ private: task.Meta.Writes.ConstructInPlace(); task.Meta.Writes->Ranges = read.Ranges; } else { - task.Meta.Writes->Ranges.MergeWritePoints(TShardKeyRanges(read.Ranges), keyTypes); + task.Meta.Writes->Ranges.MergeWritePoints(TShardKeyRanges(read.Ranges), keyTypes); } } } @@ -1064,7 +1064,7 @@ private: task.Meta.Writes.ConstructInPlace(); task.Meta.Writes->Ranges = std::move(*shardInfo.KeyWriteRanges); } else { - task.Meta.Writes->Ranges.MergeWritePoints(std::move(*shardInfo.KeyWriteRanges), keyTypes); + task.Meta.Writes->Ranges.MergeWritePoints(std::move(*shardInfo.KeyWriteRanges), keyTypes); } } } @@ -1347,8 +1347,8 @@ private: protoColumn->SetType(column.Type); protoColumn->SetName(column.Name); } - protoReadMeta->SetItemsLimit(task.Meta.ReadInfo.ItemsLimit); - protoReadMeta->SetReverse(task.Meta.ReadInfo.Reverse); + protoReadMeta->SetItemsLimit(task.Meta.ReadInfo.ItemsLimit); + protoReadMeta->SetReverse(task.Meta.ReadInfo.Reverse); } } if (task.Meta.Writes) { diff --git a/ydb/core/kqp/executer/kqp_executer_impl.cpp b/ydb/core/kqp/executer/kqp_executer_impl.cpp index 920b53e6056..0931e5d2c7b 100644 --- a/ydb/core/kqp/executer/kqp_executer_impl.cpp +++ b/ydb/core/kqp/executer/kqp_executer_impl.cpp @@ -52,58 +52,58 @@ void PrepareKqpTaskParameters(const NKqpProto::TKqpPhyStage& stage, const TStage } } -std::pair<TString, TString> SerializeKqpTasksParametersForOlap(const NKqpProto::TKqpPhyStage& stage, - const TStageInfo& stageInfo, const TTask& task, const NMiniKQL::THolderFactory& holderFactory, - const NMiniKQL::TTypeEnvironment& typeEnv) -{ - std::vector<std::shared_ptr<arrow::Field>> columns; - std::vector<std::shared_ptr<arrow::Array>> data; - auto& parameterNames = task.Meta.ReadInfo.OlapProgram.ParameterNames; - - columns.reserve(parameterNames.size()); - data.reserve(parameterNames.size()); - - for (auto& name : stage.GetProgramParameters()) { - if (!parameterNames.contains(name)) { - continue; - } - - if (auto* taskParam = task.Meta.Params.FindPtr(name)) { - // This parameter is the list, holding type from task.Meta.ParamTypes - // Those parameters can't be used in Olap programs now - YQL_ENSURE(false, "OLAP program contains task parameter, not supported yet."); - continue; - } - - const NYql::NDq::TMkqlValueRef* mkqlValue = stageInfo.Meta.Tx.Params.Values.FindPtr(name); - +std::pair<TString, TString> SerializeKqpTasksParametersForOlap(const NKqpProto::TKqpPhyStage& stage, + const TStageInfo& stageInfo, const TTask& task, const NMiniKQL::THolderFactory& holderFactory, + const NMiniKQL::TTypeEnvironment& typeEnv) +{ + std::vector<std::shared_ptr<arrow::Field>> columns; + std::vector<std::shared_ptr<arrow::Array>> data; + auto& parameterNames = task.Meta.ReadInfo.OlapProgram.ParameterNames; + + columns.reserve(parameterNames.size()); + data.reserve(parameterNames.size()); + + for (auto& name : stage.GetProgramParameters()) { + if (!parameterNames.contains(name)) { + continue; + } + + if (auto* taskParam = task.Meta.Params.FindPtr(name)) { + // This parameter is the list, holding type from task.Meta.ParamTypes + // Those parameters can't be used in Olap programs now + YQL_ENSURE(false, "OLAP program contains task parameter, not supported yet."); + continue; + } + + const NYql::NDq::TMkqlValueRef* mkqlValue = stageInfo.Meta.Tx.Params.Values.FindPtr(name); + auto [type, value] = ImportValueFromProto(mkqlValue->GetType(), mkqlValue->GetValue(), typeEnv, holderFactory); - - YQL_ENSURE(NYql::NArrow::IsArrowCompatible(type), "Incompatible parameter type. Can't convert to arrow"); - - std::unique_ptr<arrow::ArrayBuilder> builder = NYql::NArrow::MakeArrowBuilder(type); - NYql::NArrow::AppendElement(value, builder.get(), type); - - std::shared_ptr<arrow::Array> array; - auto status = builder->Finish(&array); - - YQL_ENSURE(status.ok(), "Failed to build arrow array of variables."); - - auto field = std::make_shared<arrow::Field>(name, array->type()); - - columns.emplace_back(std::move(field)); - data.emplace_back(std::move(array)); - } - - auto schema = std::make_shared<arrow::Schema>(columns); - auto recordBatch = arrow::RecordBatch::Make(schema, 1, data); - - return std::make_pair<TString, TString>( - NArrow::SerializeSchema(*schema), - NArrow::SerializeBatchNoCompression(recordBatch) - ); -} - + + YQL_ENSURE(NYql::NArrow::IsArrowCompatible(type), "Incompatible parameter type. Can't convert to arrow"); + + std::unique_ptr<arrow::ArrayBuilder> builder = NYql::NArrow::MakeArrowBuilder(type); + NYql::NArrow::AppendElement(value, builder.get(), type); + + std::shared_ptr<arrow::Array> array; + auto status = builder->Finish(&array); + + YQL_ENSURE(status.ok(), "Failed to build arrow array of variables."); + + auto field = std::make_shared<arrow::Field>(name, array->type()); + + columns.emplace_back(std::move(field)); + data.emplace_back(std::move(array)); + } + + auto schema = std::make_shared<arrow::Schema>(columns); + auto recordBatch = arrow::RecordBatch::Make(schema, 1, data); + + return std::make_pair<TString, TString>( + NArrow::SerializeSchema(*schema), + NArrow::SerializeBatchNoCompression(recordBatch) + ); +} + TActorId ReportToRl(ui64 ru, const TString& database, const TString& userToken, const NKikimrKqp::TRlPath& path) { diff --git a/ydb/core/kqp/executer/kqp_executer_impl.h b/ydb/core/kqp/executer/kqp_executer_impl.h index cb2fb81992c..6ecf78ab059 100644 --- a/ydb/core/kqp/executer/kqp_executer_impl.h +++ b/ydb/core/kqp/executer/kqp_executer_impl.h @@ -62,10 +62,10 @@ void BuildKqpExecuterResults(const NKqpProto::TKqpPhyTx& tx, TVector<TKqpExecute void PrepareKqpTaskParameters(const NKqpProto::TKqpPhyStage& stage, const TStageInfo& stageInfo, const TTask& task, NYql::NDqProto::TDqTask& dqTask, const NMiniKQL::TTypeEnvironment& typeEnv, const NMiniKQL::THolderFactory& holderFactory); -std::pair<TString, TString> SerializeKqpTasksParametersForOlap(const NKqpProto::TKqpPhyStage& stage, - const TStageInfo& stageInfo, const TTask& task, const NMiniKQL::THolderFactory& holderFactory, - const NMiniKQL::TTypeEnvironment& typeEnv); - +std::pair<TString, TString> SerializeKqpTasksParametersForOlap(const NKqpProto::TKqpPhyStage& stage, + const TStageInfo& stageInfo, const TTask& task, const NMiniKQL::THolderFactory& holderFactory, + const NMiniKQL::TTypeEnvironment& typeEnv); + inline bool IsDebugLogEnabled() { return TlsActivationContext->LoggerSettings() && TlsActivationContext->LoggerSettings()->Satisfies(NActors::NLog::PRI_DEBUG, NKikimrServices::KQP_EXECUTER); @@ -318,34 +318,34 @@ protected: Y_VERIFY_DEBUG(stageInfo.Meta.TablePath == op.GetTable().GetPath()); auto& task = TasksGraph.AddTask(stageInfo); - TShardKeyRanges keyRanges; - - switch (op.GetTypeCase()) { - case NKqpProto::TKqpPhyTableOperation::kReadRange: - stageInfo.Meta.SkipNullKeys.assign( - op.GetReadRange().GetSkipNullKeys().begin(), - op.GetReadRange().GetSkipNullKeys().end() - ); - keyRanges.Add(MakeKeyRange( - keyTypes, op.GetReadRange().GetKeyRange(), - stageInfo, holderFactory, typeEnv) - ); - break; - case NKqpProto::TKqpPhyTableOperation::kReadRanges: - keyRanges.CopyFrom(FillReadRanges(keyTypes, op.GetReadRanges(), stageInfo, holderFactory, typeEnv)); - break; - default: - YQL_ENSURE(false, "Unexpected table scan operation: " << (ui32) op.GetTypeCase()); - } - - TTaskMeta::TShardReadInfo readInfo = { - .Ranges = std::move(keyRanges), - .Columns = BuildKqpColumns(op, table), - }; + TShardKeyRanges keyRanges; + + switch (op.GetTypeCase()) { + case NKqpProto::TKqpPhyTableOperation::kReadRange: + stageInfo.Meta.SkipNullKeys.assign( + op.GetReadRange().GetSkipNullKeys().begin(), + op.GetReadRange().GetSkipNullKeys().end() + ); + keyRanges.Add(MakeKeyRange( + keyTypes, op.GetReadRange().GetKeyRange(), + stageInfo, holderFactory, typeEnv) + ); + break; + case NKqpProto::TKqpPhyTableOperation::kReadRanges: + keyRanges.CopyFrom(FillReadRanges(keyTypes, op.GetReadRanges(), stageInfo, holderFactory, typeEnv)); + break; + default: + YQL_ENSURE(false, "Unexpected table scan operation: " << (ui32) op.GetTypeCase()); + } + + TTaskMeta::TShardReadInfo readInfo = { + .Ranges = std::move(keyRanges), + .Columns = BuildKqpColumns(op, table), + }; task.Meta.Reads.ConstructInPlace(); task.Meta.Reads->emplace_back(std::move(readInfo)); - task.Meta.ReadInfo.Reverse = op.GetReadRange().GetReverse(); + task.Meta.ReadInfo.Reverse = op.GetReadRange().GetReverse(); LOG_D("Stage " << stageInfo.Id << " create sysview scan task: " << task.Id); } @@ -450,26 +450,26 @@ protected: void ExtractItemsLimit(const TStageInfo& stageInfo, const NKqpProto::TKqpPhyParamValue& paramValue, const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv, - ui64& itemsLimit, TString& itemsLimitParamName, NYql::NDqProto::TData& itemsLimitBytes, - NKikimr::NMiniKQL::TType*& itemsLimitType) + ui64& itemsLimit, TString& itemsLimitParamName, NYql::NDqProto::TData& itemsLimitBytes, + NKikimr::NMiniKQL::TType*& itemsLimitType) { itemsLimitParamName = paramValue.GetParamName(); - if (!itemsLimitParamName) { - return; - } + if (!itemsLimitParamName) { + return; + } - auto* itemsLimitParam = stageInfo.Meta.Tx.Params.Values.FindPtr(itemsLimitParamName); - YQL_ENSURE(itemsLimitParam); + auto* itemsLimitParam = stageInfo.Meta.Tx.Params.Values.FindPtr(itemsLimitParamName); + YQL_ENSURE(itemsLimitParam); auto [type, value] = NMiniKQL::ImportValueFromProto(itemsLimitParam->GetType(), itemsLimitParam->GetValue(), typeEnv, holderFactory); - - YQL_ENSURE(type->GetKind() == NMiniKQL::TType::EKind::Data); - itemsLimit = value.Get<ui64>(); - - NYql::NDq::TDqDataSerializer dataSerializer(typeEnv, holderFactory, NYql::NDqProto::DATA_TRANSPORT_UV_PICKLE_1_0); - itemsLimitBytes = dataSerializer.Serialize(value, type); - itemsLimitType = type; + + YQL_ENSURE(type->GetKind() == NMiniKQL::TType::EKind::Data); + itemsLimit = value.Get<ui64>(); + + NYql::NDq::TDqDataSerializer dataSerializer(typeEnv, holderFactory, NYql::NDqProto::DATA_TRANSPORT_UV_PICKLE_1_0); + itemsLimitBytes = dataSerializer.Serialize(value, type); + itemsLimitType = type; } protected: diff --git a/ydb/core/kqp/executer/kqp_partition_helper.cpp b/ydb/core/kqp/executer/kqp_partition_helper.cpp index 860298ec680..ac786f6c24d 100644 --- a/ydb/core/kqp/executer/kqp_partition_helper.cpp +++ b/ydb/core/kqp/executer/kqp_partition_helper.cpp @@ -17,7 +17,7 @@ using namespace NYql; struct TShardParamValuesAndRanges { NDqProto::TData ParamValues; - NKikimr::NMiniKQL::TType* ParamType; + NKikimr::NMiniKQL::TType* ParamType; // either FullRange or Ranges are set TVector<TSerializedPointOrRange> Ranges; std::optional<TSerializedTableRange> FullRange; @@ -72,7 +72,7 @@ THashMap<ui64, TShardParamValuesAndRanges> PartitionParamByKey(const NDq::TMkqlV } else { shardData.Ranges.emplace_back(std::move(point)); } - shardData.ParamType = itemType; + shardData.ParamType = itemType; } NDq::TDqDataSerializer dataSerializer{typeEnv, holderFactory, NDqProto::EDataTransportVersion::DATA_TRANSPORT_UV_PICKLE_1_0}; @@ -155,7 +155,7 @@ THashMap<ui64, TShardParamValuesAndRanges> PartitionParamByKeyPrefix(const NDq:: } else if (!shardData.FullRange) { shardData.Ranges.emplace_back(std::move(partitionWithRange.PointOrRange)); } - shardData.ParamType = itemType; + shardData.ParamType = itemType; } } @@ -214,84 +214,84 @@ TVector<TCell> FillKeyValues(const TVector<NUdf::TDataTypeId>& keyColumnTypes, c } TSerializedPointOrRange FillOneRange(NUdf::TUnboxedValue& begin, NUdf::TUnboxedValue& end, - const TVector<NUdf::TDataTypeId>& keyColumnTypes, const NMiniKQL::TTypeEnvironment& typeEnv) -{ + const TVector<NUdf::TDataTypeId>& keyColumnTypes, const NMiniKQL::TTypeEnvironment& typeEnv) +{ const ui32 keyColumnsSize = keyColumnTypes.size(); - - // Range tuple contains ranges over all key colums + inclusive sign - YQL_ENSURE((keyColumnsSize + 1) == begin.GetListLength()); - - auto fillKeyValues = [keyColumnsSize, &keyColumnTypes, &typeEnv](NUdf::TUnboxedValue& value) { - TVector<TCell> keyValues; - keyValues.reserve(keyColumnsSize); - - for (ui32 i = 0; i < keyColumnsSize; i++) { - auto element = value.GetElement(i); - - if (!element) { - return keyValues; - } - - element = element.GetOptionalValue(); - - if (!element) { - keyValues.emplace_back(TCell()); - continue; - } - - auto cell = NMiniKQL::MakeCell(keyColumnTypes[i], element, typeEnv, /* copy */ true); + + // Range tuple contains ranges over all key colums + inclusive sign + YQL_ENSURE((keyColumnsSize + 1) == begin.GetListLength()); + + auto fillKeyValues = [keyColumnsSize, &keyColumnTypes, &typeEnv](NUdf::TUnboxedValue& value) { + TVector<TCell> keyValues; + keyValues.reserve(keyColumnsSize); + + for (ui32 i = 0; i < keyColumnsSize; i++) { + auto element = value.GetElement(i); + + if (!element) { + return keyValues; + } + + element = element.GetOptionalValue(); + + if (!element) { + keyValues.emplace_back(TCell()); + continue; + } + + auto cell = NMiniKQL::MakeCell(keyColumnTypes[i], element, typeEnv, /* copy */ true); keyValues.emplace_back(std::move(cell)); - } - - return keyValues; - }; - - TVector<TCell> fromKeyValues = fillKeyValues(begin); - TVector<TCell> toKeyValues = fillKeyValues(end); - - bool fromInclusive = !!begin.GetElement(keyColumnsSize).Get<int>(); - - /* - * Range rules: - * - no value - +inf - * - any other value means itself, please note that NULL is value and it is the minimum value in column - * - * `From` should be padded with NULL values to the count of key columns if left border is inclusive. - * For example table with Key1, Key2, X, Y, Z with predicate WHERE Key1 >= 10 will lead to - * left border [ (10, NULL), i.e. first element will be located at 10, NULL. If it is not padded, then - * first element will be located at 10, +inf which definitely is first element after this border in case - * we do not support +inf values in keys. - * - * `From` should not be padded if border is exclusive. - * For example table with Key1, Key2, X, Y, Z with predicate WHERE Key1 > 10 will lead to - * next left border ( (10,). I.e. the item will be located at 10, +inf, which definitely is first - * element after this border in case we do not support +inf values in keys. - * - * `To` should not be padded with NULLs when right border is not inclusive. - * For example table with Key1, Key2, X, Y, Z with predicate WHERE Key1 < 10 will lead to - * right border (10, NULL) ). I.e. the range ends at element before 10, NULL - * - * Note: -inf is an array full of NULLs with inclusive flag set, i.e. minimum value in table. - * Note: For `To` border +infinity is an empty array - */ - if (fromKeyValues.empty()) { - fromInclusive = true; - } - - if (fromInclusive) { - while (fromKeyValues.size() != keyColumnsSize) { - fromKeyValues.emplace_back(TCell()); - } - } - - bool toInclusive = !!end.GetElement(keyColumnsSize).Get<int>(); - - if (!toInclusive && !toKeyValues.empty()) { - while (toKeyValues.size() != keyColumnsSize) { - toKeyValues.emplace_back(TCell()); - } - } - + } + + return keyValues; + }; + + TVector<TCell> fromKeyValues = fillKeyValues(begin); + TVector<TCell> toKeyValues = fillKeyValues(end); + + bool fromInclusive = !!begin.GetElement(keyColumnsSize).Get<int>(); + + /* + * Range rules: + * - no value - +inf + * - any other value means itself, please note that NULL is value and it is the minimum value in column + * + * `From` should be padded with NULL values to the count of key columns if left border is inclusive. + * For example table with Key1, Key2, X, Y, Z with predicate WHERE Key1 >= 10 will lead to + * left border [ (10, NULL), i.e. first element will be located at 10, NULL. If it is not padded, then + * first element will be located at 10, +inf which definitely is first element after this border in case + * we do not support +inf values in keys. + * + * `From` should not be padded if border is exclusive. + * For example table with Key1, Key2, X, Y, Z with predicate WHERE Key1 > 10 will lead to + * next left border ( (10,). I.e. the item will be located at 10, +inf, which definitely is first + * element after this border in case we do not support +inf values in keys. + * + * `To` should not be padded with NULLs when right border is not inclusive. + * For example table with Key1, Key2, X, Y, Z with predicate WHERE Key1 < 10 will lead to + * right border (10, NULL) ). I.e. the range ends at element before 10, NULL + * + * Note: -inf is an array full of NULLs with inclusive flag set, i.e. minimum value in table. + * Note: For `To` border +infinity is an empty array + */ + if (fromKeyValues.empty()) { + fromInclusive = true; + } + + if (fromInclusive) { + while (fromKeyValues.size() != keyColumnsSize) { + fromKeyValues.emplace_back(TCell()); + } + } + + bool toInclusive = !!end.GetElement(keyColumnsSize).Get<int>(); + + if (!toInclusive && !toKeyValues.empty()) { + while (toKeyValues.size() != keyColumnsSize) { + toKeyValues.emplace_back(TCell()); + } + } + bool point = false; if (fromInclusive && toInclusive && fromKeyValues.size() == keyColumnsSize) { if (toKeyValues.empty()) { @@ -308,95 +308,95 @@ TSerializedPointOrRange FillOneRange(NUdf::TUnboxedValue& begin, NUdf::TUnboxedV return TSerializedCellVec(TSerializedCellVec::Serialize(fromKeyValues)); } - auto range = TSerializedTableRange(fromKeyValues, fromInclusive, toKeyValues, toInclusive); - - YQL_CLOG(DEBUG, ProviderKqp) << "Formed range [extract predicate]: " - << DebugPrintRange(keyColumnTypes, range.ToTableRange(), *AppData()->TypeRegistry); - - return range; -} - + auto range = TSerializedTableRange(fromKeyValues, fromInclusive, toKeyValues, toInclusive); + + YQL_CLOG(DEBUG, ProviderKqp) << "Formed range [extract predicate]: " + << DebugPrintRange(keyColumnTypes, range.ToTableRange(), *AppData()->TypeRegistry); + + return range; +} + TVector<TSerializedPointOrRange> BuildFullRange(const TVector<NUdf::TDataTypeId>& keyColumnTypes) { - // Build range from NULL, NULL ... NULL to +inf, +inf ... +inf - TVector<TCell> fromKeyValues(keyColumnTypes.size()); - - auto range = TSerializedTableRange(fromKeyValues, true, TVector<TCell>(), false); - - YQL_CLOG(DEBUG, ProviderKqp) << "Formed full range [extract predicate]: " - << DebugPrintRange(keyColumnTypes, range.ToTableRange(), *AppData()->TypeRegistry); - + // Build range from NULL, NULL ... NULL to +inf, +inf ... +inf + TVector<TCell> fromKeyValues(keyColumnTypes.size()); + + auto range = TSerializedTableRange(fromKeyValues, true, TVector<TCell>(), false); + + YQL_CLOG(DEBUG, ProviderKqp) << "Formed full range [extract predicate]: " + << DebugPrintRange(keyColumnTypes, range.ToTableRange(), *AppData()->TypeRegistry); + return {std::move(range)}; -} - +} + TVector<TSerializedPointOrRange> FillRangesFromParameter(const TVector<NUdf::TDataTypeId>& keyColumnTypes, - const NKqpProto::TKqpPhyParamValue& rangesParam, const TStageInfo& stageInfo, - const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) -{ - TString paramName = rangesParam.GetParamName(); - + const NKqpProto::TKqpPhyParamValue& rangesParam, const TStageInfo& stageInfo, + const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) +{ + TString paramName = rangesParam.GetParamName(); + auto param = stageInfo.Meta.Tx.Params.Values.FindPtr(paramName); - YQL_ENSURE(param, "Param not found: " << paramName); - - const auto* protoType = ¶m->GetType(); - const auto* protoValue = ¶m->GetValue(); - + YQL_ENSURE(param, "Param not found: " << paramName); + + const auto* protoType = ¶m->GetType(); + const auto* protoValue = ¶m->GetValue(); + auto [type, value] = ImportValueFromProto(*protoType, *protoValue, typeEnv, holderFactory); - - // First element is Flow wrapping Ranges List - YQL_ENSURE(value.IsBoxed()); - YQL_ENSURE(value.GetListLength() == 1); - - auto rangesList = value.GetElement(0); - YQL_ENSURE(rangesList.IsBoxed()); - + + // First element is Flow wrapping Ranges List + YQL_ENSURE(value.IsBoxed()); + YQL_ENSURE(value.GetListLength() == 1); + + auto rangesList = value.GetElement(0); + YQL_ENSURE(rangesList.IsBoxed()); + TVector<TSerializedPointOrRange> out; - out.reserve(rangesList.GetListLength()); - - const auto it = rangesList.GetListIterator(); - for (NUdf::TUnboxedValue range; it.Next(range);) { - YQL_ENSURE(range.IsBoxed()); - // Range consists of two tuples: begin and end - YQL_ENSURE(range.GetListLength() == 2); - - auto begin = range.GetElement(0); - auto end = range.GetElement(1); - - out.emplace_back(FillOneRange(begin, end, keyColumnTypes, typeEnv)); - } - - return out; -} - -template <typename PhyOpReadRanges> + out.reserve(rangesList.GetListLength()); + + const auto it = rangesList.GetListIterator(); + for (NUdf::TUnboxedValue range; it.Next(range);) { + YQL_ENSURE(range.IsBoxed()); + // Range consists of two tuples: begin and end + YQL_ENSURE(range.GetListLength() == 2); + + auto begin = range.GetElement(0); + auto end = range.GetElement(1); + + out.emplace_back(FillOneRange(begin, end, keyColumnTypes, typeEnv)); + } + + return out; +} + +template <typename PhyOpReadRanges> TVector<TSerializedPointOrRange> FillReadRangesInternal(const TVector<NUdf::TDataTypeId>& keyColumnTypes, - const PhyOpReadRanges& readRanges, const TStageInfo& stageInfo, - const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) -{ - if (readRanges.HasKeyRanges()) { - return FillRangesFromParameter( - keyColumnTypes, readRanges.GetKeyRanges(), stageInfo, holderFactory, typeEnv - ); - } - - return BuildFullRange(keyColumnTypes); -} - -} // anonymous namespace - + const PhyOpReadRanges& readRanges, const TStageInfo& stageInfo, + const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) +{ + if (readRanges.HasKeyRanges()) { + return FillRangesFromParameter( + keyColumnTypes, readRanges.GetKeyRanges(), stageInfo, holderFactory, typeEnv + ); + } + + return BuildFullRange(keyColumnTypes); +} + +} // anonymous namespace + TVector<TSerializedPointOrRange> FillReadRanges(const TVector<NUdf::TDataTypeId>& keyColumnTypes, - const NKqpProto::TKqpPhyOpReadOlapRanges& readRange, const TStageInfo& stageInfo, - const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) -{ - return FillReadRangesInternal(keyColumnTypes, readRange, stageInfo, holderFactory, typeEnv); -} - + const NKqpProto::TKqpPhyOpReadOlapRanges& readRange, const TStageInfo& stageInfo, + const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) +{ + return FillReadRangesInternal(keyColumnTypes, readRange, stageInfo, holderFactory, typeEnv); +} + TVector<TSerializedPointOrRange> FillReadRanges(const TVector<NUdf::TDataTypeId>& keyColumnTypes, - const NKqpProto::TKqpPhyOpReadRanges& readRange, const TStageInfo& stageInfo, - const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) -{ - return FillReadRangesInternal(keyColumnTypes, readRange, stageInfo, holderFactory, typeEnv); -} - + const NKqpProto::TKqpPhyOpReadRanges& readRange, const TStageInfo& stageInfo, + const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) +{ + return FillReadRangesInternal(keyColumnTypes, readRange, stageInfo, holderFactory, typeEnv); +} + TSerializedTableRange MakeKeyRange(const TVector<NUdf::TDataTypeId>& keyColumnTypes, const NKqpProto::TKqpPhyKeyRange& range, const TStageInfo& stageInfo, const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) @@ -418,11 +418,11 @@ TSerializedTableRange MakeKeyRange(const TVector<NUdf::TDataTypeId>& keyColumnTy } } - auto serialized = TSerializedTableRange(fromValues, range.GetFrom().GetIsInclusive(), toValues, range.GetTo().GetIsInclusive()); - YQL_CLOG(DEBUG, ProviderKqp) << "Formed range: " - << DebugPrintRange(keyColumnTypes, serialized.ToTableRange(), *AppData()->TypeRegistry); - - return serialized; + auto serialized = TSerializedTableRange(fromValues, range.GetFrom().GetIsInclusive(), toValues, range.GetTo().GetIsInclusive()); + YQL_CLOG(DEBUG, ProviderKqp) << "Formed range: " + << DebugPrintRange(keyColumnTypes, serialized.ToTableRange(), *AppData()->TypeRegistry); + + return serialized; } namespace { @@ -508,46 +508,46 @@ THashMap<ui64, TShardInfo> PrunePartitions(const TKqpTableKeys& tableKeys, return shardInfoMap; } - -THashMap<ui64, TShardInfo> PrunePartitions(const TKqpTableKeys& tableKeys, - const NKqpProto::TKqpPhyOpReadRanges& readRanges, const TStageInfo& stageInfo, - const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) -{ - const auto* table = tableKeys.FindTablePtr(stageInfo.Meta.TableId); - YQL_ENSURE(table); - + +THashMap<ui64, TShardInfo> PrunePartitions(const TKqpTableKeys& tableKeys, + const NKqpProto::TKqpPhyOpReadRanges& readRanges, const TStageInfo& stageInfo, + const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) +{ + const auto* table = tableKeys.FindTablePtr(stageInfo.Meta.TableId); + YQL_ENSURE(table); + const auto& keyColumnTypes = table->KeyColumnTypes; - auto ranges = FillReadRangesInternal(keyColumnTypes, readRanges, stageInfo, holderFactory, typeEnv); - - THashMap<ui64, TShardInfo> shardInfoMap; - - // KeyReadRanges must be sorted & non-intersecting, they came in such condition from predicate extraction. - for (auto& range: ranges) { + auto ranges = FillReadRangesInternal(keyColumnTypes, readRanges, stageInfo, holderFactory, typeEnv); + + THashMap<ui64, TShardInfo> shardInfoMap; + + // KeyReadRanges must be sorted & non-intersecting, they came in such condition from predicate extraction. + for (auto& range: ranges) { TTableRange tableRange = std::holds_alternative<TSerializedCellVec>(range) ? TTableRange(std::get<TSerializedCellVec>(range).GetCells(), true, std::get<TSerializedCellVec>(range).GetCells(), true, true) : TTableRange(std::get<TSerializedTableRange>(range).ToTableRange()); - + auto readPartitions = GetKeyRangePartitions(tableRange, stageInfo.Meta.ShardKey->Partitions, keyColumnTypes); - for (TPartitionWithRange& partitionWithRange : readPartitions) { - auto& shardInfo = shardInfoMap[partitionWithRange.PartitionInfo->ShardId]; - - if (!shardInfo.KeyReadRanges) { - shardInfo.KeyReadRanges.ConstructInPlace(); - } - - if (partitionWithRange.FullRange) { + for (TPartitionWithRange& partitionWithRange : readPartitions) { + auto& shardInfo = shardInfoMap[partitionWithRange.PartitionInfo->ShardId]; + + if (!shardInfo.KeyReadRanges) { + shardInfo.KeyReadRanges.ConstructInPlace(); + } + + if (partitionWithRange.FullRange) { shardInfo.KeyReadRanges->MakeFullRange(std::move(*partitionWithRange.FullRange)); - continue; - } - + continue; + } + shardInfo.KeyReadRanges->Add(std::move(partitionWithRange.PointOrRange)); - } - } - - return shardInfoMap; -} - + } + } + + return shardInfoMap; +} + namespace { using namespace NMiniKQL; @@ -765,8 +765,8 @@ THashMap<ui64, TShardInfo> PruneEffectPartitionsImpl(const TKqpTableKeys& tableK auto ret = shardInfo.Params.emplace(name, std::move(shardData.ParamValues)); YQL_ENSURE(ret.second); - auto retType = shardInfo.ParamTypes.emplace(name, std::move(shardData.ParamType)); - YQL_ENSURE(retType.second); + auto retType = shardInfo.ParamTypes.emplace(name, std::move(shardData.ParamType)); + YQL_ENSURE(retType.second); if (!shardInfo.KeyWriteRanges) { shardInfo.KeyWriteRanges.ConstructInPlace(); diff --git a/ydb/core/kqp/executer/kqp_partition_helper.h b/ydb/core/kqp/executer/kqp_partition_helper.h index 312b0dbadec..f01f25ab4a5 100644 --- a/ydb/core/kqp/executer/kqp_partition_helper.h +++ b/ydb/core/kqp/executer/kqp_partition_helper.h @@ -14,7 +14,7 @@ namespace NKikimr::NKqp { struct TShardInfo { TMap<TString, NYql::NDqProto::TData> Params; - TMap<TString, NKikimr::NMiniKQL::TType*> ParamTypes; + TMap<TString, NKikimr::NMiniKQL::TType*> ParamTypes; TMaybe<TShardKeyRanges> KeyReadRanges; // empty -> no reads TMaybe<TShardKeyRanges> KeyWriteRanges; // empty -> no writes @@ -27,22 +27,22 @@ TSerializedTableRange MakeKeyRange(const TVector<NUdf::TDataTypeId>& keyColumnTy const NMiniKQL::TTypeEnvironment& typeEnv); TVector<TSerializedPointOrRange> FillReadRanges(const TVector<NUdf::TDataTypeId>& keyColumnTypes, - const NKqpProto::TKqpPhyOpReadOlapRanges& readRange, const TStageInfo& stageInfo, - const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv); - + const NKqpProto::TKqpPhyOpReadOlapRanges& readRange, const TStageInfo& stageInfo, + const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv); + TVector<TSerializedPointOrRange> FillReadRanges(const TVector<NUdf::TDataTypeId>& keyColumnTypes, - const NKqpProto::TKqpPhyOpReadRanges& readRange, const TStageInfo& stageInfo, - const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv); - + const NKqpProto::TKqpPhyOpReadRanges& readRange, const TStageInfo& stageInfo, + const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv); + THashMap<ui64, TShardInfo> PrunePartitions(const TKqpTableKeys& tableKeys, const NKqpProto::TKqpPhyOpReadRange& readRange, const TStageInfo& stageInfo, const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv); THashMap<ui64, TShardInfo> PrunePartitions(const TKqpTableKeys& tableKeys, - const NKqpProto::TKqpPhyOpReadRanges& readRanges, const TStageInfo& stageInfo, - const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv); - -THashMap<ui64, TShardInfo> PrunePartitions(const TKqpTableKeys& tableKeys, + const NKqpProto::TKqpPhyOpReadRanges& readRanges, const TStageInfo& stageInfo, + const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv); + +THashMap<ui64, TShardInfo> PrunePartitions(const TKqpTableKeys& tableKeys, const NKqpProto::TKqpPhyOpLookup& lookup, const TStageInfo& stageInfo, const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv); diff --git a/ydb/core/kqp/executer/kqp_planner.cpp b/ydb/core/kqp/executer/kqp_planner.cpp index 23c55766e3c..6bcfa85c66f 100644 --- a/ydb/core/kqp/executer/kqp_planner.cpp +++ b/ydb/core/kqp/executer/kqp_planner.cpp @@ -24,7 +24,7 @@ constexpr ui32 MEMORY_ESTIMATION_OVERFLOW = 2; TKqpPlanner::TKqpPlanner(ui64 txId, const TActorId& executer, TVector<NDqProto::TDqTask>&& tasks, THashMap<ui64, TVector<NDqProto::TDqTask>>&& scanTasks, const IKqpGateway::TKqpSnapshot& snapshot, const TString& database, const TMaybe<TString>& userToken, TInstant deadline, const NYql::NDqProto::EDqStatsMode& statsMode, - bool disableLlvmForUdfStages, bool enableLlvm, bool withSpilling, const TMaybe<NKikimrKqp::TRlPath>& rlPath) + bool disableLlvmForUdfStages, bool enableLlvm, bool withSpilling, const TMaybe<NKikimrKqp::TRlPath>& rlPath) : TxId(txId) , ExecuterId(executer) , Tasks(std::move(tasks)) @@ -35,7 +35,7 @@ TKqpPlanner::TKqpPlanner(ui64 txId, const TActorId& executer, TVector<NDqProto:: , Deadline(deadline) , StatsMode(statsMode) , DisableLlvmForUdfStages(disableLlvmForUdfStages) - , EnableLlvm(enableLlvm) + , EnableLlvm(enableLlvm) , WithSpilling(withSpilling) , RlPath(rlPath) { @@ -234,7 +234,7 @@ THolder<TEvKqpNode::TEvStartKqpTasksRequest> TKqpPlanner::PrepareKqpNodeRequest( ev->Record.SetTxId(TxId); ActorIdToProto(ExecuterId, ev->Record.MutableExecuterActorId()); - bool withLLVM = EnableLlvm; + bool withLLVM = EnableLlvm; if (taskIds.empty()) { for (auto& taskDesc : Tasks) { @@ -319,10 +319,10 @@ ui32 TKqpPlanner::CalcSendMessageFlagsForNode(ui32 nodeId) { IActor* CreateKqpPlanner(ui64 txId, const TActorId& executer, TVector<NDqProto::TDqTask>&& tasks, THashMap<ui64, TVector<NDqProto::TDqTask>>&& scanTasks, const IKqpGateway::TKqpSnapshot& snapshot, const TString& database, const TMaybe<TString>& token, TInstant deadline, const NYql::NDqProto::EDqStatsMode& statsMode, - bool disableLlvmForUdfStages, bool enableLlvm, bool withSpilling, const TMaybe<NKikimrKqp::TRlPath>& rlPath) + bool disableLlvmForUdfStages, bool enableLlvm, bool withSpilling, const TMaybe<NKikimrKqp::TRlPath>& rlPath) { return new TKqpPlanner(txId, executer, std::move(tasks), std::move(scanTasks), snapshot, - database, token, deadline, statsMode, disableLlvmForUdfStages, enableLlvm, withSpilling, rlPath); + database, token, deadline, statsMode, disableLlvmForUdfStages, enableLlvm, withSpilling, rlPath); } } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/executer/kqp_planner.h b/ydb/core/kqp/executer/kqp_planner.h index 66bcf75a427..dc5e993a74d 100644 --- a/ydb/core/kqp/executer/kqp_planner.h +++ b/ydb/core/kqp/executer/kqp_planner.h @@ -34,7 +34,7 @@ public: TKqpPlanner(ui64 txId, const TActorId& executer, TVector<NYql::NDqProto::TDqTask>&& tasks, THashMap<ui64, TVector<NYql::NDqProto::TDqTask>>&& scanTasks, const IKqpGateway::TKqpSnapshot& snapshot, const TString& database, const TMaybe<TString>& userToken, TInstant deadline, const NYql::NDqProto::EDqStatsMode& statsMode, - bool disableLlvmForUdfStages, bool enableLlvm, bool withSpilling, const TMaybe<NKikimrKqp::TRlPath>& rlPath); + bool disableLlvmForUdfStages, bool enableLlvm, bool withSpilling, const TMaybe<NKikimrKqp::TRlPath>& rlPath); void Bootstrap(const TActorContext& ctx); @@ -65,7 +65,7 @@ private: const TInstant Deadline; const NYql::NDqProto::EDqStatsMode StatsMode; const bool DisableLlvmForUdfStages; - const bool EnableLlvm; + const bool EnableLlvm; const bool WithSpilling; const TMaybe<NKikimrKqp::TRlPath> RlPath; THashSet<ui32> TrackingNodes; @@ -74,6 +74,6 @@ private: IActor* CreateKqpPlanner(ui64 txId, const TActorId& executer, TVector<NYql::NDqProto::TDqTask>&& tasks, THashMap<ui64, TVector<NYql::NDqProto::TDqTask>>&& scanTasks, const IKqpGateway::TKqpSnapshot& snapshot, const TString& database, const TMaybe<TString>& userToken, TInstant deadline, const NYql::NDqProto::EDqStatsMode& statsMode, - bool disableLlvmForUdfStages, bool enableLlvm, bool withSpilling, const TMaybe<NKikimrKqp::TRlPath>& rlPath); + bool disableLlvmForUdfStages, bool enableLlvm, bool withSpilling, const TMaybe<NKikimrKqp::TRlPath>& rlPath); } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/executer/kqp_result_channel.cpp b/ydb/core/kqp/executer/kqp_result_channel.cpp index 125f4b53d77..0fe235323e3 100644 --- a/ydb/core/kqp/executer/kqp_result_channel.cpp +++ b/ydb/core/kqp/executer/kqp_result_channel.cpp @@ -1,7 +1,7 @@ #include "kqp_result_channel.h" #include "kqp_executer.h" -#include "kqp_executer_impl.h" +#include "kqp_executer_impl.h" #include "kqp_executer_stats.h" #include <ydb/core/base/appdata.h> @@ -12,24 +12,24 @@ namespace NKikimr { namespace NKqp { -namespace { +namespace { -class TResultCommonChannelProxy : public NActors::TActor<TResultCommonChannelProxy> { +class TResultCommonChannelProxy : public NActors::TActor<TResultCommonChannelProxy> { public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::KQP_RESULT_CHANNEL_PROXY; } TResultCommonChannelProxy(ui64 txId, ui64 channelId, TQueryExecutionStats* stats, TActorId executer) - : TActor(&TResultCommonChannelProxy::WorkState) + : TActor(&TResultCommonChannelProxy::WorkState) , TxId(txId) , ChannelId(channelId) , Stats(stats) - , Executer(executer) {} + , Executer(executer) {} + +protected: + virtual void SendResults(NYql::NDqProto::TEvComputeChannelData& computeData, TActorId sender) = 0; -protected: - virtual void SendResults(NYql::NDqProto::TEvComputeChannelData& computeData, TActorId sender) = 0; - private: STATEFN(WorkState) { switch (ev->GetTypeRewrite()) { @@ -43,30 +43,30 @@ private: } } - void UpdateStatistics(const ::NYql::NDqProto::TData& data) { - if (!Stats) { - return; - } - - Stats->ResultBytes += data.GetRaw().size(); - Stats->ResultRows += data.GetRows(); - } - - void HandleWork(NYql::NDq::TEvDqCompute::TEvChannelData::TPtr& ev) { - NYql::NDqProto::TEvComputeChannelData& record = ev->Get()->Record; - auto& channelData = record.GetChannelData(); - - ComputeActor = ev->Sender; - - LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER, "TxId: " << TxId << ", got result" - << ", channelId: " << channelData.GetChannelId() - << ", seqNo: " << record.GetSeqNo() - << ", from: " << ev->Sender); - - UpdateStatistics(channelData.GetData()); - SendResults(record, ev->Sender); - } - + void UpdateStatistics(const ::NYql::NDqProto::TData& data) { + if (!Stats) { + return; + } + + Stats->ResultBytes += data.GetRaw().size(); + Stats->ResultRows += data.GetRows(); + } + + void HandleWork(NYql::NDq::TEvDqCompute::TEvChannelData::TPtr& ev) { + NYql::NDqProto::TEvComputeChannelData& record = ev->Get()->Record; + auto& channelData = record.GetChannelData(); + + ComputeActor = ev->Sender; + + LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER, "TxId: " << TxId << ", got result" + << ", channelId: " << channelData.GetChannelId() + << ", seqNo: " << record.GetSeqNo() + << ", from: " << ev->Sender); + + UpdateStatistics(channelData.GetData()); + SendResults(record, ev->Sender); + } + void HandleWork(TEvKqpExecuter::TEvStreamDataAck::TPtr& ev) { ui64 seqNo = ev->Get()->Record.GetSeqNo(); ui64 freeSpace = ev->Get()->Record.GetFreeSpace(); @@ -92,7 +92,7 @@ private: auto evAbort = MakeHolder<TEvKqp::TEvAbortExecution>(Ydb::StatusIds::INTERNAL_ERROR, msg); Send(Executer, evAbort.Release()); - Become(&TResultCommonChannelProxy::DeadState); + Become(&TResultCommonChannelProxy::DeadState); } private: @@ -117,95 +117,95 @@ private: NActors::TActorId ComputeActor; }; -class TResultStreamChannelProxy : public TResultCommonChannelProxy { -public: - TResultStreamChannelProxy(ui64 txId, ui64 channelId, const NKikimrMiniKQL::TType& itemType, - const NKikimrMiniKQL::TType* resultItemType, TActorId target, TQueryExecutionStats* stats, +class TResultStreamChannelProxy : public TResultCommonChannelProxy { +public: + TResultStreamChannelProxy(ui64 txId, ui64 channelId, const NKikimrMiniKQL::TType& itemType, + const NKikimrMiniKQL::TType* resultItemType, TActorId target, TQueryExecutionStats* stats, TActorId executer) - : TResultCommonChannelProxy(txId, channelId, stats, executer) - , ResultItemType(resultItemType) - , ItemType(itemType) - , Target(target) {} - -private: - virtual void SendResults(NYql::NDqProto::TEvComputeChannelData& computeData, TActorId sender) { - Y_UNUSED(sender); - - auto& channelData = computeData.GetChannelData(); - - TKqpProtoBuilder protoBuilder{*AppData()->FunctionRegistry}; - auto resultSet = protoBuilder.BuildYdbResultSet({channelData.GetData()}, ItemType, ResultItemType); - - auto streamEv = MakeHolder<TEvKqpExecuter::TEvStreamData>(); - streamEv->Record.SetSeqNo(computeData.GetSeqNo()); - streamEv->Record.MutableResultSet()->Swap(&resultSet); - - LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER, - "Send TEvStreamData to " << Target << ", seqNo: " << streamEv->Record.GetSeqNo() - << ", nRows: " << channelData.GetData().GetRows() ); - - Send(Target, streamEv.Release()); - } -private: - const NKikimrMiniKQL::TType* ResultItemType; - const NKikimrMiniKQL::TType& ItemType; - const NActors::TActorId Target; -}; - -class TResultDataChannelProxy : public TResultCommonChannelProxy { -public: + : TResultCommonChannelProxy(txId, channelId, stats, executer) + , ResultItemType(resultItemType) + , ItemType(itemType) + , Target(target) {} + +private: + virtual void SendResults(NYql::NDqProto::TEvComputeChannelData& computeData, TActorId sender) { + Y_UNUSED(sender); + + auto& channelData = computeData.GetChannelData(); + + TKqpProtoBuilder protoBuilder{*AppData()->FunctionRegistry}; + auto resultSet = protoBuilder.BuildYdbResultSet({channelData.GetData()}, ItemType, ResultItemType); + + auto streamEv = MakeHolder<TEvKqpExecuter::TEvStreamData>(); + streamEv->Record.SetSeqNo(computeData.GetSeqNo()); + streamEv->Record.MutableResultSet()->Swap(&resultSet); + + LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER, + "Send TEvStreamData to " << Target << ", seqNo: " << streamEv->Record.GetSeqNo() + << ", nRows: " << channelData.GetData().GetRows() ); + + Send(Target, streamEv.Release()); + } +private: + const NKikimrMiniKQL::TType* ResultItemType; + const NKikimrMiniKQL::TType& ItemType; + const NActors::TActorId Target; +}; + +class TResultDataChannelProxy : public TResultCommonChannelProxy { +public: TResultDataChannelProxy(ui64 txId, ui64 channelId, TQueryExecutionStats* stats, TActorId executer, TVector<NYql::NDqProto::TData>* resultReceiver) - : TResultCommonChannelProxy(txId, channelId, stats, executer) - , ResultReceiver(resultReceiver) {} - -private: - virtual void SendResults(NYql::NDqProto::TEvComputeChannelData& computeData, TActorId sender) { - auto& channelData = computeData.GetChannelData(); - auto channelId = channelData.GetChannelId(); - - if (channelData.GetData().GetRows()) { - ResultReceiver->emplace_back( - std::move(*computeData.MutableChannelData()->MutableData()) - ); - } - - auto ackEv = MakeHolder<NYql::NDq::TEvDqCompute::TEvChannelDataAck>(); - - ackEv->Record.SetSeqNo(computeData.GetSeqNo()); - ackEv->Record.SetChannelId(channelId); - ackEv->Record.SetFreeSpace(1_MB); - - Send(sender, ackEv.Release(), /* TODO: undelivery */ 0, /* cookie */ channelId); - } - -private: - TVector<NYql::NDqProto::TData>* ResultReceiver = nullptr; -}; - -} // anonymous namespace end - -NActors::IActor* CreateResultStreamChannelProxy(ui64 txId, ui64 channelId, const NKikimrMiniKQL::TType& itemType, - const NKikimrMiniKQL::TType* resultItemType, TActorId target, TQueryExecutionStats* stats, TActorId executer) + : TResultCommonChannelProxy(txId, channelId, stats, executer) + , ResultReceiver(resultReceiver) {} + +private: + virtual void SendResults(NYql::NDqProto::TEvComputeChannelData& computeData, TActorId sender) { + auto& channelData = computeData.GetChannelData(); + auto channelId = channelData.GetChannelId(); + + if (channelData.GetData().GetRows()) { + ResultReceiver->emplace_back( + std::move(*computeData.MutableChannelData()->MutableData()) + ); + } + + auto ackEv = MakeHolder<NYql::NDq::TEvDqCompute::TEvChannelDataAck>(); + + ackEv->Record.SetSeqNo(computeData.GetSeqNo()); + ackEv->Record.SetChannelId(channelId); + ackEv->Record.SetFreeSpace(1_MB); + + Send(sender, ackEv.Release(), /* TODO: undelivery */ 0, /* cookie */ channelId); + } + +private: + TVector<NYql::NDqProto::TData>* ResultReceiver = nullptr; +}; + +} // anonymous namespace end + +NActors::IActor* CreateResultStreamChannelProxy(ui64 txId, ui64 channelId, const NKikimrMiniKQL::TType& itemType, + const NKikimrMiniKQL::TType* resultItemType, TActorId target, TQueryExecutionStats* stats, TActorId executer) { - LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER, - "CreateResultStreamChannelProxy: TxId: " << txId << - ", channelId: " << channelId - ); - - return new TResultStreamChannelProxy(txId, channelId, itemType, resultItemType, target, stats, executer); -} - -NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId, - TQueryExecutionStats* stats, TActorId executer, - TVector<NYql::NDqProto::TData>* resultsReceiver) -{ - LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER, - "CreateResultDataChannelProxy: TxId: " << txId << - ", channelId: " << channelId - ); - - return new TResultDataChannelProxy(txId, channelId, stats, executer, resultsReceiver); + LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER, + "CreateResultStreamChannelProxy: TxId: " << txId << + ", channelId: " << channelId + ); + + return new TResultStreamChannelProxy(txId, channelId, itemType, resultItemType, target, stats, executer); +} + +NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId, + TQueryExecutionStats* stats, TActorId executer, + TVector<NYql::NDqProto::TData>* resultsReceiver) +{ + LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER, + "CreateResultDataChannelProxy: TxId: " << txId << + ", channelId: " << channelId + ); + + return new TResultDataChannelProxy(txId, channelId, stats, executer, resultsReceiver); } } // namespace NKqp diff --git a/ydb/core/kqp/executer/kqp_result_channel.h b/ydb/core/kqp/executer/kqp_result_channel.h index 554834b39b0..299b0f686bf 100644 --- a/ydb/core/kqp/executer/kqp_result_channel.h +++ b/ydb/core/kqp/executer/kqp_result_channel.h @@ -1,20 +1,20 @@ #pragma once -#include "kqp_tasks_graph.h" - +#include "kqp_tasks_graph.h" + #include <library/cpp/actors/core/actor.h> namespace NYql { - + class TTypeAnnotationNode; - -namespace NDqProto { - -class TData; - -} // namespace NDqProto -} // namespace NYql - + +namespace NDqProto { + +class TData; + +} // namespace NDqProto +} // namespace NYql + namespace NKikimrMiniKQL { class TType; } // namespace NKikimrMiniKQL @@ -22,13 +22,13 @@ class TType; namespace NKikimr::NKqp { struct TQueryExecutionStats; -struct TKqpExecuterTxResult; +struct TKqpExecuterTxResult; -NActors::IActor* CreateResultStreamChannelProxy(ui64 txId, ui64 channelId, const NKikimrMiniKQL::TType& itemType, +NActors::IActor* CreateResultStreamChannelProxy(ui64 txId, ui64 channelId, const NKikimrMiniKQL::TType& itemType, const NKikimrMiniKQL::TType* resultItemType, NActors::TActorId target, TQueryExecutionStats* stats, - NActors::TActorId executer); + NActors::TActorId executer); + +NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId, TQueryExecutionStats* stats, + NActors::TActorId executer, TVector<NYql::NDqProto::TData>* resultReceiver); -NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId, TQueryExecutionStats* stats, - NActors::TActorId executer, TVector<NYql::NDqProto::TData>* resultReceiver); - } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/executer/kqp_scan_executer.cpp b/ydb/core/kqp/executer/kqp_scan_executer.cpp index 63508d7d689..07c29f64e21 100644 --- a/ydb/core/kqp/executer/kqp_scan_executer.cpp +++ b/ydb/core/kqp/executer/kqp_scan_executer.cpp @@ -52,19 +52,19 @@ public: YQL_ENSURE(!Request.EraseLocks); YQL_ENSURE(Request.IsolationLevel == NKikimrKqp::ISOLATION_LEVEL_UNDEFINED); YQL_ENSURE(Request.Snapshot.IsValid()); - - size_t resultsSize = Request.Transactions[0].Body.ResultsSize(); - YQL_ENSURE(resultsSize != 0); - - bool streamResult = Request.Transactions[0].Body.GetResults(0).GetIsStream(); - - if (streamResult) { - YQL_ENSURE(resultsSize == 1); - } else { - for (size_t i = 1; i < resultsSize; ++i) { - YQL_ENSURE(Request.Transactions[0].Body.GetResults(i).GetIsStream() == streamResult); - } - } + + size_t resultsSize = Request.Transactions[0].Body.ResultsSize(); + YQL_ENSURE(resultsSize != 0); + + bool streamResult = Request.Transactions[0].Body.GetResults(0).GetIsStream(); + + if (streamResult) { + YQL_ENSURE(resultsSize == 1); + } else { + for (size_t i = 1; i < resultsSize; ++i) { + YQL_ENSURE(Request.Transactions[0].Body.GetResults(i).GetIsStream() == streamResult); + } + } } public: @@ -258,16 +258,16 @@ private: << ", enough: " << ev->Get()->Record.GetEnough() << ", from: " << ev->Sender); - if (ResultChannelProxies.empty()) { - return; + if (ResultChannelProxies.empty()) { + return; } - - // Forward only for stream results, data results acks event theirselves. - YQL_ENSURE(!Results.empty() && Results[0].IsStream); - - auto channelIt = ResultChannelProxies.begin(); - auto handle = ev->Forward(channelIt->second->SelfId()); - channelIt->second->Receive(handle, TlsActivationContext->AsActorContext()); + + // Forward only for stream results, data results acks event theirselves. + YQL_ENSURE(!Results.empty() && Results[0].IsStream); + + auto channelIt = ResultChannelProxies.begin(); + auto handle = ev->Forward(channelIt->second->SelfId()); + channelIt->second->Receive(handle, TlsActivationContext->AsActorContext()); } void HandleExecute(TEvKqpNode::TEvStartKqpTasksResponse::TPtr& ev) { @@ -370,37 +370,37 @@ private: } private: - void FillReadInfo(TTaskMeta& taskMeta, ui64 itemsLimit, bool reverse, - const TMaybe<::NKqpProto::TKqpPhyOpReadOlapRanges>& readOlapRange) - { - if (taskMeta.Reads && !taskMeta.Reads.GetRef().empty()) { - // Validate parameters - YQL_ENSURE(taskMeta.ReadInfo.ItemsLimit == itemsLimit); - YQL_ENSURE(taskMeta.ReadInfo.Reverse == reverse); - - if (!readOlapRange || readOlapRange->GetOlapProgram().empty()) { - YQL_ENSURE(taskMeta.ReadInfo.OlapProgram.Program.empty()); - return; - } - - YQL_ENSURE(taskMeta.ReadInfo.OlapProgram.Program == readOlapRange->GetOlapProgram()); - return; - } - - taskMeta.ReadInfo.ItemsLimit = itemsLimit; - taskMeta.ReadInfo.Reverse = reverse; - - if (!readOlapRange || readOlapRange->GetOlapProgram().empty()) { - return; - } - - taskMeta.ReadInfo.OlapProgram.Program = readOlapRange->GetOlapProgram(); - - for (auto& name: readOlapRange->GetOlapProgramParameterNames()) { - taskMeta.ReadInfo.OlapProgram.ParameterNames.insert(name); - } - }; - + void FillReadInfo(TTaskMeta& taskMeta, ui64 itemsLimit, bool reverse, + const TMaybe<::NKqpProto::TKqpPhyOpReadOlapRanges>& readOlapRange) + { + if (taskMeta.Reads && !taskMeta.Reads.GetRef().empty()) { + // Validate parameters + YQL_ENSURE(taskMeta.ReadInfo.ItemsLimit == itemsLimit); + YQL_ENSURE(taskMeta.ReadInfo.Reverse == reverse); + + if (!readOlapRange || readOlapRange->GetOlapProgram().empty()) { + YQL_ENSURE(taskMeta.ReadInfo.OlapProgram.Program.empty()); + return; + } + + YQL_ENSURE(taskMeta.ReadInfo.OlapProgram.Program == readOlapRange->GetOlapProgram()); + return; + } + + taskMeta.ReadInfo.ItemsLimit = itemsLimit; + taskMeta.ReadInfo.Reverse = reverse; + + if (!readOlapRange || readOlapRange->GetOlapProgram().empty()) { + return; + } + + taskMeta.ReadInfo.OlapProgram.Program = readOlapRange->GetOlapProgram(); + + for (auto& name: readOlapRange->GetOlapProgramParameterNames()) { + taskMeta.ReadInfo.OlapProgram.ParameterNames.insert(name); + } + }; + void BuildDatashardScanTasks(TStageInfo& stageInfo, const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) { @@ -431,81 +431,81 @@ private: Y_VERIFY_DEBUG(stageInfo.Meta.TablePath == op.GetTable().GetPath()); auto columns = BuildKqpColumns(op, table); - THashMap<ui64, TShardInfo> partitions; + THashMap<ui64, TShardInfo> partitions; switch (op.GetTypeCase()) { - case NKqpProto::TKqpPhyTableOperation::kReadRanges: - partitions = PrunePartitions(TableKeys, op.GetReadRanges(), stageInfo, holderFactory, typeEnv); - break; + case NKqpProto::TKqpPhyTableOperation::kReadRanges: + partitions = PrunePartitions(TableKeys, op.GetReadRanges(), stageInfo, holderFactory, typeEnv); + break; case NKqpProto::TKqpPhyTableOperation::kReadRange: - partitions = PrunePartitions(TableKeys, op.GetReadRange(), stageInfo, holderFactory, typeEnv); - break; - case NKqpProto::TKqpPhyTableOperation::kLookup: - partitions = PrunePartitions(TableKeys, op.GetLookup(), stageInfo, holderFactory, typeEnv); - break; - default: - YQL_ENSURE(false, "Unexpected table scan operation: " << (ui32) op.GetTypeCase()); - break; - } - - bool reverse = false; - ui64 itemsLimit = 0; - TString itemsLimitParamName; - NDqProto::TData itemsLimitBytes; - NKikimr::NMiniKQL::TType* itemsLimitType = nullptr; - - // TODO: Support reverse, skipnull and limit for kReadRanges - if (op.GetTypeCase() == NKqpProto::TKqpPhyTableOperation::kReadRange) { - ExtractItemsLimit(stageInfo, op.GetReadRange().GetItemsLimit(), holderFactory, - typeEnv, itemsLimit, itemsLimitParamName, itemsLimitBytes, itemsLimitType); - reverse = op.GetReadRange().GetReverse(); - - YQL_ENSURE(!reverse); // TODO: not supported yet - - stageInfo.Meta.SkipNullKeys.assign(op.GetReadRange().GetSkipNullKeys().begin(), - op.GetReadRange().GetSkipNullKeys().end()); - } - - // TODO: take into account number of active scans on node - bool heavyProgram = stage.GetProgram().GetSettings().GetHasSort() || - stage.GetProgram().GetSettings().GetHasMapJoin(); - const ui32 maxScansPerNode = heavyProgram ? 4 : 16; - THashMap<ui64, ui32> nTasksOnNodes; // nodeId -> tasks count - - for (auto& [shardId, shardInfo] : partitions) { - YQL_ENSURE(!shardInfo.KeyWriteRanges); - - ui64 nodeId = ShardIdToNodeId.at(shardId); - ui32 nTasksOnNode = nTasksOnNodes[nodeId]++; - auto& task = getNodeTask(nodeId, nTasksOnNode % maxScansPerNode); - - for (auto& [name, value] : shardInfo.Params) { - auto ret = task.Meta.Params.emplace(name, std::move(value)); - YQL_ENSURE(ret.second); - auto typeIterator = shardInfo.ParamTypes.find(name); - YQL_ENSURE(typeIterator != shardInfo.ParamTypes.end()); - auto retType = task.Meta.ParamTypes.emplace(name, typeIterator->second); - YQL_ENSURE(retType.second); - } - - TTaskMeta::TShardReadInfo readInfo = { - .Ranges = std::move(*shardInfo.KeyReadRanges), // sorted & non-intersecting - .Columns = columns, - .ShardId = shardId, - }; - - if (itemsLimit && !task.Meta.Params.contains(itemsLimitParamName)) { - task.Meta.Params.emplace(itemsLimitParamName, itemsLimitBytes); - task.Meta.ParamTypes.emplace(itemsLimitParamName, itemsLimitType); + partitions = PrunePartitions(TableKeys, op.GetReadRange(), stageInfo, holderFactory, typeEnv); + break; + case NKqpProto::TKqpPhyTableOperation::kLookup: + partitions = PrunePartitions(TableKeys, op.GetLookup(), stageInfo, holderFactory, typeEnv); + break; + default: + YQL_ENSURE(false, "Unexpected table scan operation: " << (ui32) op.GetTypeCase()); + break; + } + + bool reverse = false; + ui64 itemsLimit = 0; + TString itemsLimitParamName; + NDqProto::TData itemsLimitBytes; + NKikimr::NMiniKQL::TType* itemsLimitType = nullptr; + + // TODO: Support reverse, skipnull and limit for kReadRanges + if (op.GetTypeCase() == NKqpProto::TKqpPhyTableOperation::kReadRange) { + ExtractItemsLimit(stageInfo, op.GetReadRange().GetItemsLimit(), holderFactory, + typeEnv, itemsLimit, itemsLimitParamName, itemsLimitBytes, itemsLimitType); + reverse = op.GetReadRange().GetReverse(); + + YQL_ENSURE(!reverse); // TODO: not supported yet + + stageInfo.Meta.SkipNullKeys.assign(op.GetReadRange().GetSkipNullKeys().begin(), + op.GetReadRange().GetSkipNullKeys().end()); + } + + // TODO: take into account number of active scans on node + bool heavyProgram = stage.GetProgram().GetSettings().GetHasSort() || + stage.GetProgram().GetSettings().GetHasMapJoin(); + const ui32 maxScansPerNode = heavyProgram ? 4 : 16; + THashMap<ui64, ui32> nTasksOnNodes; // nodeId -> tasks count + + for (auto& [shardId, shardInfo] : partitions) { + YQL_ENSURE(!shardInfo.KeyWriteRanges); + + ui64 nodeId = ShardIdToNodeId.at(shardId); + ui32 nTasksOnNode = nTasksOnNodes[nodeId]++; + auto& task = getNodeTask(nodeId, nTasksOnNode % maxScansPerNode); + + for (auto& [name, value] : shardInfo.Params) { + auto ret = task.Meta.Params.emplace(name, std::move(value)); + YQL_ENSURE(ret.second); + auto typeIterator = shardInfo.ParamTypes.find(name); + YQL_ENSURE(typeIterator != shardInfo.ParamTypes.end()); + auto retType = task.Meta.ParamTypes.emplace(name, typeIterator->second); + YQL_ENSURE(retType.second); + } + + TTaskMeta::TShardReadInfo readInfo = { + .Ranges = std::move(*shardInfo.KeyReadRanges), // sorted & non-intersecting + .Columns = columns, + .ShardId = shardId, + }; + + if (itemsLimit && !task.Meta.Params.contains(itemsLimitParamName)) { + task.Meta.Params.emplace(itemsLimitParamName, itemsLimitBytes); + task.Meta.ParamTypes.emplace(itemsLimitParamName, itemsLimitType); } - FillReadInfo(task.Meta, itemsLimit, reverse, TMaybe<::NKqpProto::TKqpPhyOpReadOlapRanges>()); - - if (!task.Meta.Reads) { - task.Meta.Reads.ConstructInPlace(); + FillReadInfo(task.Meta, itemsLimit, reverse, TMaybe<::NKqpProto::TKqpPhyOpReadOlapRanges>()); + + if (!task.Meta.Reads) { + task.Meta.Reads.ConstructInPlace(); } - - task.Meta.Reads->emplace_back(std::move(readInfo)); + + task.Meta.Reads->emplace_back(std::move(readInfo)); } } @@ -550,7 +550,7 @@ private: // NOTE: Unlike OLTP tables that store data in DataShards, data in OLAP tables is not range // partitioned and multiple ColumnShards store data from the same key range THashMap<ui64, TShardInfo> ListColumnshadsForRange(const TKqpTableKeys& tableKeys, - const NKqpProto::TKqpPhyOpReadOlapRanges& readRanges, const TStageInfo& stageInfo, + const NKqpProto::TKqpPhyOpReadOlapRanges& readRanges, const TStageInfo& stageInfo, const NMiniKQL::THolderFactory& holderFactory, const NMiniKQL::TTypeEnvironment& typeEnv) { const auto* table = tableKeys.FindTablePtr(stageInfo.Meta.TableId); @@ -559,7 +559,7 @@ private: YQL_ENSURE(stageInfo.Meta.TableKind == ETableKind::Olap); const auto& keyColumnTypes = table->KeyColumnTypes; - auto ranges = FillReadRanges(keyColumnTypes, readRanges, stageInfo, holderFactory, typeEnv); + auto ranges = FillReadRanges(keyColumnTypes, readRanges, stageInfo, holderFactory, typeEnv); THashMap<ui64, TShardInfo> shardInfoMap; for (const auto& partition : stageInfo.Meta.ShardKey->Partitions) { @@ -567,7 +567,7 @@ private: YQL_ENSURE(!shardInfo.KeyReadRanges); shardInfo.KeyReadRanges.ConstructInPlace(); - shardInfo.KeyReadRanges->CopyFrom(ranges); + shardInfo.KeyReadRanges->CopyFrom(ranges); } return shardInfoMap; @@ -589,69 +589,69 @@ private: } ui64 taskCount = 0; - + for (auto& op : stage.GetTableOps()) { Y_VERIFY_DEBUG(stageInfo.Meta.TablePath == op.GetTable().GetPath()); auto columns = BuildKqpColumns(op, table); - YQL_ENSURE( - op.GetTypeCase() == NKqpProto::TKqpPhyTableOperation::kReadOlapRange, - "Unexpected OLAP table scan operation: " << (ui32) op.GetTypeCase() - ); - - const auto& readRange = op.GetReadOlapRange(); - - auto allShards = ListColumnshadsForRange(TableKeys, readRange, stageInfo, holderFactory, typeEnv); - - bool reverse = readRange.GetReverse(); - ui64 itemsLimit = 0; - TString itemsLimitParamName; - NDqProto::TData itemsLimitBytes; - NKikimr::NMiniKQL::TType* itemsLimitType; - - ExtractItemsLimit(stageInfo, op.GetReadOlapRange().GetItemsLimit(), holderFactory, typeEnv, - itemsLimit, itemsLimitParamName, itemsLimitBytes, itemsLimitType); - - for (auto& [shardId, shardInfo] : allShards) { - YQL_ENSURE(!shardInfo.KeyWriteRanges); - - if (shardInfo.KeyReadRanges->GetRanges().empty()) { - continue; - } - - ui64 nodeId = ShardIdToNodeId.at(shardId); - auto& task = TasksGraph.AddTask(stageInfo); - task.Meta.NodeId = nodeId; - ++taskCount; - - for (auto& [name, value] : shardInfo.Params) { - auto ret = task.Meta.Params.emplace(name, std::move(value)); - YQL_ENSURE(ret.second); - auto typeIterator = shardInfo.ParamTypes.find(name); - YQL_ENSURE(typeIterator != shardInfo.ParamTypes.end()); - auto retType = task.Meta.ParamTypes.emplace(name, typeIterator->second); - YQL_ENSURE(retType.second); - } - - TTaskMeta::TShardReadInfo readInfo = { - .Ranges = std::move(*shardInfo.KeyReadRanges), - .Columns = columns, - .ShardId = shardId, - }; - - FillReadInfo(task.Meta, itemsLimit, reverse, readRange); - - if (itemsLimit) { - task.Meta.Params.emplace(itemsLimitParamName, itemsLimitBytes); - task.Meta.ParamTypes.emplace(itemsLimitParamName, itemsLimitType); + YQL_ENSURE( + op.GetTypeCase() == NKqpProto::TKqpPhyTableOperation::kReadOlapRange, + "Unexpected OLAP table scan operation: " << (ui32) op.GetTypeCase() + ); + + const auto& readRange = op.GetReadOlapRange(); + + auto allShards = ListColumnshadsForRange(TableKeys, readRange, stageInfo, holderFactory, typeEnv); + + bool reverse = readRange.GetReverse(); + ui64 itemsLimit = 0; + TString itemsLimitParamName; + NDqProto::TData itemsLimitBytes; + NKikimr::NMiniKQL::TType* itemsLimitType; + + ExtractItemsLimit(stageInfo, op.GetReadOlapRange().GetItemsLimit(), holderFactory, typeEnv, + itemsLimit, itemsLimitParamName, itemsLimitBytes, itemsLimitType); + + for (auto& [shardId, shardInfo] : allShards) { + YQL_ENSURE(!shardInfo.KeyWriteRanges); + + if (shardInfo.KeyReadRanges->GetRanges().empty()) { + continue; } - task.Meta.Reads.ConstructInPlace(); - task.Meta.Reads->emplace_back(std::move(readInfo)); - - LOG_D("Stage " << stageInfo.Id << " create columnshard scan task at node: " << nodeId - << ", meta: " << task.Meta.ToString(keyTypes, *AppData()->TypeRegistry)); + ui64 nodeId = ShardIdToNodeId.at(shardId); + auto& task = TasksGraph.AddTask(stageInfo); + task.Meta.NodeId = nodeId; + ++taskCount; + + for (auto& [name, value] : shardInfo.Params) { + auto ret = task.Meta.Params.emplace(name, std::move(value)); + YQL_ENSURE(ret.second); + auto typeIterator = shardInfo.ParamTypes.find(name); + YQL_ENSURE(typeIterator != shardInfo.ParamTypes.end()); + auto retType = task.Meta.ParamTypes.emplace(name, typeIterator->second); + YQL_ENSURE(retType.second); + } + + TTaskMeta::TShardReadInfo readInfo = { + .Ranges = std::move(*shardInfo.KeyReadRanges), + .Columns = columns, + .ShardId = shardId, + }; + + FillReadInfo(task.Meta, itemsLimit, reverse, readRange); + + if (itemsLimit) { + task.Meta.Params.emplace(itemsLimitParamName, itemsLimitBytes); + task.Meta.ParamTypes.emplace(itemsLimitParamName, itemsLimitType); + } + + task.Meta.Reads.ConstructInPlace(); + task.Meta.Reads->emplace_back(std::move(readInfo)); + + LOG_D("Stage " << stageInfo.Id << " create columnshard scan task at node: " << nodeId + << ", meta: " << task.Meta.ToString(keyTypes, *AppData()->TypeRegistry)); } } @@ -746,7 +746,7 @@ private: BuildKqpStageChannels(TasksGraph, TableKeys, stageInfo, TxId, AppData()->EnableKqpSpilling); } - BuildKqpExecuterResults(tx.Body, Results); + BuildKqpExecuterResults(tx.Body, Results); BuildKqpTaskGraphResultChannels(TasksGraph, tx.Body, 0); TIssue validateIssue; @@ -828,17 +828,17 @@ private: protoColumn->SetName(column.Name); } - if (!task.Meta.Reads->empty()) { - protoTaskMeta.SetReverse(task.Meta.ReadInfo.Reverse); - protoTaskMeta.SetItemsLimit(task.Meta.ReadInfo.ItemsLimit); + if (!task.Meta.Reads->empty()) { + protoTaskMeta.SetReverse(task.Meta.ReadInfo.Reverse); + protoTaskMeta.SetItemsLimit(task.Meta.ReadInfo.ItemsLimit); if (tableInfo.TableKind == ETableKind::Olap) { auto* olapProgram = protoTaskMeta.MutableOlapProgram(); olapProgram->SetProgram(task.Meta.ReadInfo.OlapProgram.Program); - + auto [schema, parameters] = SerializeKqpTasksParametersForOlap(stage, stageInfo, task, holderFactory, typeEnv); - + olapProgram->SetParametersSchema(schema); olapProgram->SetParameters(parameters); } else { @@ -863,11 +863,11 @@ private: } } - LOG_D( - "task: " << task.Id << - ", node: " << task.Meta.NodeId << - ", meta: " << protoTaskMeta.ShortDebugString() - ); + LOG_D( + "task: " << task.Id << + ", node: " << task.Meta.NodeId << + ", meta: " << protoTaskMeta.ShortDebugString() + ); taskDesc.MutableMeta()->PackFrom(protoTaskMeta); @@ -921,7 +921,7 @@ private: auto planner = CreateKqpPlanner(TxId, SelfId(), std::move(computeTasks), std::move(scanTasks), Request.Snapshot, Database, UserToken, Deadline.GetOrElse(TInstant::Zero()), Request.StatsMode, - Request.DisableLlvmForUdfStages, Request.LlvmEnabled, AppData()->EnableKqpSpilling, Request.RlPath); + Request.DisableLlvmForUdfStages, Request.LlvmEnabled, AppData()->EnableKqpSpilling, Request.RlPath); RegisterWithSameMailbox(planner); } @@ -930,19 +930,19 @@ private: response.SetStatus(Ydb::StatusIds::SUCCESS); - TKqpProtoBuilder protoBuilder(*AppData()->FunctionRegistry); - - for (auto& result : Results) { + TKqpProtoBuilder protoBuilder(*AppData()->FunctionRegistry); + + for (auto& result : Results) { auto* protoResult = response.MutableResult()->AddResults(); - - if (result.IsStream) { - // There is no support for multiple streaming results currently - YQL_ENSURE(Results.size() == 1); - protoBuilder.BuildStream(result.Data, result.ItemType, result.ResultItemType.Get(), protoResult); - continue; + + if (result.IsStream) { + // There is no support for multiple streaming results currently + YQL_ENSURE(Results.size() == 1); + protoBuilder.BuildStream(result.Data, result.ItemType, result.ResultItemType.Get(), protoResult); + continue; } - - protoBuilder.BuildValue(result.Data, result.ItemType, protoResult); + + protoBuilder.BuildValue(result.Data, result.ItemType, protoResult); } if (Stats) { @@ -981,13 +981,13 @@ private: } void PassAway() override { - for (auto channelPair: ResultChannelProxies) { - LOG_D("terminate result channel " << channelPair.first << " proxy at " << channelPair.second->SelfId()); + for (auto channelPair: ResultChannelProxies) { + LOG_D("terminate result channel " << channelPair.first << " proxy at " << channelPair.second->SelfId()); - TAutoPtr<IEventHandle> ev = new IEventHandle( - channelPair.second->SelfId(), SelfId(), new TEvents::TEvPoison - ); - channelPair.second->Receive(ev, TActivationContext::AsActorContext()); + TAutoPtr<IEventHandle> ev = new IEventHandle( + channelPair.second->SelfId(), SelfId(), new TEvents::TEvPoison + ); + channelPair.second->Receive(ev, TActivationContext::AsActorContext()); } if (KqpShardsResolverId) { @@ -1036,35 +1036,35 @@ public: } } - IActor* GetOrCreateChannelProxy(const TChannel& channel) { - IActor* proxy; - - if (Results[0].IsStream) { - if (!ResultChannelProxies.empty()) { - return ResultChannelProxies.begin()->second; - } - - proxy = CreateResultStreamChannelProxy(TxId, channel.Id, Results[0].ItemType, + IActor* GetOrCreateChannelProxy(const TChannel& channel) { + IActor* proxy; + + if (Results[0].IsStream) { + if (!ResultChannelProxies.empty()) { + return ResultChannelProxies.begin()->second; + } + + proxy = CreateResultStreamChannelProxy(TxId, channel.Id, Results[0].ItemType, Results[0].ResultItemType.Get(), Target, Stats.get(), SelfId()); - } else { - YQL_ENSURE(channel.DstInputIndex < Results.size()); - - auto channelIt = ResultChannelProxies.find(channel.Id); - - if (channelIt != ResultChannelProxies.end()) { - return channelIt->second; - } - + } else { + YQL_ENSURE(channel.DstInputIndex < Results.size()); + + auto channelIt = ResultChannelProxies.find(channel.Id); + + if (channelIt != ResultChannelProxies.end()) { + return channelIt->second; + } + proxy = CreateResultDataChannelProxy(TxId, channel.Id, Stats.get(), SelfId(), - &Results[channel.DstInputIndex].Data); - } - - RegisterWithSameMailbox(proxy); - ResultChannelProxies.emplace(std::make_pair(channel.Id, proxy)); - - return proxy; - } - + &Results[channel.DstInputIndex].Data); + } + + RegisterWithSameMailbox(proxy); + ResultChannelProxies.emplace(std::make_pair(channel.Id, proxy)); + + return proxy; + } + void FillChannelDesc(NYql::NDqProto::TChannel& channelDesc, const TChannel& channel) { channelDesc.SetId(channel.Id); channelDesc.SetSrcTaskId(channel.SrcTask); @@ -1076,8 +1076,8 @@ public: if (channel.DstTask) { FillEndpointDesc(*channelDesc.MutableDstEndpoint(), TasksGraph.GetTask(channel.DstTask)); } else { - auto proxy = GetOrCreateChannelProxy(channel); - ActorIdToProto(proxy->SelfId(), channelDesc.MutableDstEndpoint()->MutableActorId()); + auto proxy = GetOrCreateChannelProxy(channel); + ActorIdToProto(proxy->SelfId(), channelDesc.MutableDstEndpoint()->MutableActorId()); } channelDesc.SetIsPersistent(IsCrossShardChannel(TasksGraph, channel)); @@ -1085,8 +1085,8 @@ public: } private: - TVector<TKqpExecuterTxResult> Results; - std::unordered_map<ui64, IActor*> ResultChannelProxies; + TVector<TKqpExecuterTxResult> Results; + std::unordered_map<ui64, IActor*> ResultChannelProxies; THashSet<ui64> PendingComputeTasks; // Not started yet, waiting resources TMap<ui64, ui64> ShardIdToNodeId; TMap<ui64, TVector<ui64>> ShardsOnNode; diff --git a/ydb/core/kqp/executer/kqp_tasks_graph.cpp b/ydb/core/kqp/executer/kqp_tasks_graph.cpp index ca777d67501..1ccfa260471 100644 --- a/ydb/core/kqp/executer/kqp_tasks_graph.cpp +++ b/ydb/core/kqp/executer/kqp_tasks_graph.cpp @@ -66,7 +66,7 @@ void FillKqpTasksGraphStages(TKqpTasksGraph& tasksGraph, const TVector<IKqpGatew switch (op.GetTypeCase()) { case NKqpProto::TKqpPhyTableOperation::kReadRange: - case NKqpProto::TKqpPhyTableOperation::kReadRanges: + case NKqpProto::TKqpPhyTableOperation::kReadRanges: case NKqpProto::TKqpPhyTableOperation::kReadOlapRange: case NKqpProto::TKqpPhyTableOperation::kLookup: stageInfo.Meta.ShardOperations.insert(TKeyDesc::ERowOperation::Read); @@ -314,7 +314,7 @@ void TShardKeyRanges::AddRange(TSerializedTableRange&& range) { } void TShardKeyRanges::Add(TSerializedPointOrRange&& pointOrRange) { - if (!IsFullRange()) { + if (!IsFullRange()) { Ranges.emplace_back(std::move(pointOrRange)); if (std::holds_alternative<TSerializedTableRange>(Ranges.back())) { Y_VERIFY_DEBUG(!std::get<TSerializedTableRange>(Ranges.back()).Point); @@ -324,15 +324,15 @@ void TShardKeyRanges::Add(TSerializedPointOrRange&& pointOrRange) { void TShardKeyRanges::CopyFrom(const TVector<TSerializedPointOrRange>& ranges) { if (!IsFullRange()) { - Ranges = ranges; + Ranges = ranges; for (auto& x : Ranges) { if (std::holds_alternative<TSerializedTableRange>(x)) { Y_VERIFY_DEBUG(!std::get<TSerializedTableRange>(x).Point); } } - } -}; - + } +}; + void TShardKeyRanges::MakeFullRange(TSerializedTableRange&& range) { Ranges.clear(); FullRange.emplace(std::move(range)); @@ -353,7 +353,7 @@ void TShardKeyRanges::MakeFull(TSerializedPointOrRange&& pointOrRange) { } -void TShardKeyRanges::MergeWritePoints(TShardKeyRanges&& other, const TVector<NScheme::TTypeId>& keyTypes) { +void TShardKeyRanges::MergeWritePoints(TShardKeyRanges&& other, const TVector<NScheme::TTypeId>& keyTypes) { #ifdef DBG_TRACE Cerr << (TStringBuilder() << "-- merge " << ToString(keyTypes, *AppData()->TypeRegistry) << " with " << other.ToString(keyTypes, *AppData()->TypeRegistry) << Endl); @@ -396,19 +396,19 @@ void TShardKeyRanges::MergeWritePoints(TShardKeyRanges&& other, const TVector<NS YQL_ENSURE(std::holds_alternative<TSerializedCellVec>(x)); YQL_ENSURE(std::holds_alternative<TSerializedCellVec>(y)); -#if 1 - // common case for multi-effects transactions +#if 1 + // common case for multi-effects transactions cmp = CompareTypedCellVectors( std::get<TSerializedCellVec>(x).GetCells().data(), std::get<TSerializedCellVec>(y).GetCells().data(), keyTypes.data(), keyTypes.size()); -#else - if (x.IsPoint() && y.IsPoint()) { +#else + if (x.IsPoint() && y.IsPoint()) { // common case for multi-effects transactions cmp = CompareTypedCellVectors(x.From.GetCells().data(), y.From.GetCells().data(), keyTypes.data(), keyTypes.size()); - } else if (x.IsPoint()) { + } else if (x.IsPoint()) { cmp = ComparePointAndRange(x.From.GetCells(), y.ToTableRange(), keyTypes, keyTypes); - } else if (y.IsPoint()) { + } else if (y.IsPoint()) { cmp = -ComparePointAndRange(y.From.GetCells(), x.ToTableRange(), keyTypes, keyTypes); } else { cmp = CompareRanges(x.ToTableRange(), y.ToTableRange(), keyTypes); @@ -546,7 +546,7 @@ void TShardKeyRanges::SerializeTo(NKikimrTxDataShard::TKqpTransaction_TScanTaskM FullRange->Serialize(protoRange); } else { for (auto& range : Ranges) { - auto& keyRange = *proto->AddKeyRanges(); + auto& keyRange = *proto->AddKeyRanges(); if (std::holds_alternative<TSerializedTableRange>(range)) { auto& x = std::get<TSerializedTableRange>(range); Y_VERIFY_DEBUG(!x.Point); @@ -587,7 +587,7 @@ TString TTaskMeta::ToString(const TVector<NScheme::TTypeId>& keyTypes, const NSc } sb << "], Reads: { "; - + if (Reads) { for (ui64 i = 0; i < Reads->size(); ++i) { auto& read = (*Reads)[i]; @@ -601,19 +601,19 @@ TString TTaskMeta::ToString(const TVector<NScheme::TTypeId>& keyTypes, const NSc } } } else { - sb << "none"; + sb << "none"; } - sb << " }, Writes: { "; - + sb << " }, Writes: { "; + if (Writes) { - sb << "ranges: " << Writes->Ranges.ToString(keyTypes, typeRegistry); + sb << "ranges: " << Writes->Ranges.ToString(keyTypes, typeRegistry); } else { - sb << "none"; + sb << "none"; } - sb << " } }"; - + sb << " } }"; + return sb; } diff --git a/ydb/core/kqp/executer/kqp_tasks_graph.h b/ydb/core/kqp/executer/kqp_tasks_graph.h index 4684b13673c..7c356ab623f 100644 --- a/ydb/core/kqp/executer/kqp_tasks_graph.h +++ b/ydb/core/kqp/executer/kqp_tasks_graph.h @@ -86,7 +86,7 @@ struct TShardKeyRanges { bool IsFullRange() const { return FullRange.has_value(); } TVector<TSerializedPointOrRange>& GetRanges() { return Ranges; } - void MergeWritePoints(TShardKeyRanges&& other, const TVector<NScheme::TTypeId>& keyTypes); + void MergeWritePoints(TShardKeyRanges&& other, const TVector<NScheme::TTypeId>& keyTypes); TString ToString(const TVector<NScheme::TTypeId>& keyTypes, const NScheme::TTypeRegistry& typeRegistry) const; void SerializeTo(NKikimrTxDataShard::TKqpTransaction_TDataTaskMeta_TKeyRange* proto) const; @@ -101,7 +101,7 @@ struct TTaskMeta { ui64 NodeId = 0; // only in case of scans over persistent snapshots TMap<TString, NYql::NDqProto::TData> Params; - TMap<TString, NKikimr::NMiniKQL::TType*> ParamTypes; + TMap<TString, NKikimr::NMiniKQL::TType*> ParamTypes; struct TColumn { ui32 Id = 0; @@ -109,29 +109,29 @@ struct TTaskMeta { TString Name; }; - struct TShardReadInfo { + struct TShardReadInfo { TShardKeyRanges Ranges; TVector<TColumn> Columns; - ui64 ShardId = 0; // in case of persistent scans - }; - - struct TKqpOlapProgram { - TString Program; // For OLAP scans with process pushdown - std::set<TString> ParameterNames; - }; - - struct TReadInfo { + ui64 ShardId = 0; // in case of persistent scans + }; + + struct TKqpOlapProgram { + TString Program; // For OLAP scans with process pushdown + std::set<TString> ParameterNames; + }; + + struct TReadInfo { ui64 ItemsLimit = 0; bool Reverse = false; - TKqpOlapProgram OlapProgram; + TKqpOlapProgram OlapProgram; }; struct TWriteInfo { TShardKeyRanges Ranges; }; - TReadInfo ReadInfo; - TMaybe<TVector<TShardReadInfo>> Reads; // if not set -> no reads + TReadInfo ReadInfo; + TMaybe<TVector<TShardReadInfo>> Reads; // if not set -> no reads TMaybe<TWriteInfo> Writes; // if not set -> no writes TString ToString(const TVector<NScheme::TTypeId>& keyTypes, const NScheme::TTypeRegistry& typeRegistry) const; diff --git a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json index 6a9ae3ba782..2faa33d5e1d 100644 --- a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json +++ b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json @@ -76,51 +76,51 @@ "Match": {"Type": "Callable", "Name": "KqpWideReadTable"} }, { - "Name": "TKqlReadTableRangesBase", - "Base": "TCallable", - "Match": {"Type": "CallableBase"}, - "Children": [ - {"Index": 0, "Name": "Table", "Type": "TKqpTable"}, - {"Index": 1, "Name": "Ranges", "Type": "TExprBase"}, - {"Index": 2, "Name": "Columns", "Type": "TCoAtomList"}, - {"Index": 3, "Name": "Settings", "Type": "TCoNameValueTupleList"}, - {"Index": 4, "Name": "ExplainPrompt", "Type": "TCoNameValueTupleList"} - ] - }, - { - "Name": "TKqlReadTableRanges", - "Base": "TKqlReadTableRangesBase", - "Match": {"Type": "Callable", "Name": "KqlReadTableRanges"} - }, - { - "Name": "TKqpReadTableRanges", - "Base": "TKqlReadTableRangesBase", - "Match": {"Type": "Callable", "Name": "KqpReadTableRanges"} - }, - { - "Name": "TKqpWideReadTableRanges", - "Base": "TKqlReadTableRangesBase", - "Match": {"Type": "Callable", "Name": "KqpWideReadTableRanges"} - }, - { - "Name": "TKqpReadOlapTableRangesBase", - "Base": "TKqlReadTableRangesBase", - "Match": {"Type": "CallableBase"}, - "Children": [ - {"Index": 5, "Name": "Process", "Type": "TCoLambda"} - ] - }, - { - "Name": "TKqpReadOlapTableRanges", - "Base": "TKqpReadOlapTableRangesBase", - "Match": {"Type": "Callable", "Name": "KqpReadOlapTableRanges"} - }, - { - "Name": "TKqpWideReadOlapTableRanges", - "Base": "TKqpReadOlapTableRangesBase", - "Match": {"Type": "Callable", "Name": "KqpWideReadOlapTableRanges"} - }, - { + "Name": "TKqlReadTableRangesBase", + "Base": "TCallable", + "Match": {"Type": "CallableBase"}, + "Children": [ + {"Index": 0, "Name": "Table", "Type": "TKqpTable"}, + {"Index": 1, "Name": "Ranges", "Type": "TExprBase"}, + {"Index": 2, "Name": "Columns", "Type": "TCoAtomList"}, + {"Index": 3, "Name": "Settings", "Type": "TCoNameValueTupleList"}, + {"Index": 4, "Name": "ExplainPrompt", "Type": "TCoNameValueTupleList"} + ] + }, + { + "Name": "TKqlReadTableRanges", + "Base": "TKqlReadTableRangesBase", + "Match": {"Type": "Callable", "Name": "KqlReadTableRanges"} + }, + { + "Name": "TKqpReadTableRanges", + "Base": "TKqlReadTableRangesBase", + "Match": {"Type": "Callable", "Name": "KqpReadTableRanges"} + }, + { + "Name": "TKqpWideReadTableRanges", + "Base": "TKqlReadTableRangesBase", + "Match": {"Type": "Callable", "Name": "KqpWideReadTableRanges"} + }, + { + "Name": "TKqpReadOlapTableRangesBase", + "Base": "TKqlReadTableRangesBase", + "Match": {"Type": "CallableBase"}, + "Children": [ + {"Index": 5, "Name": "Process", "Type": "TCoLambda"} + ] + }, + { + "Name": "TKqpReadOlapTableRanges", + "Base": "TKqpReadOlapTableRangesBase", + "Match": {"Type": "Callable", "Name": "KqpReadOlapTableRanges"} + }, + { + "Name": "TKqpWideReadOlapTableRanges", + "Base": "TKqpReadOlapTableRangesBase", + "Match": {"Type": "Callable", "Name": "KqpWideReadOlapTableRanges"} + }, + { "Name": "TKqlLookupTableBase", "Base": "TCallable", "Match": {"Type": "CallableBase"}, @@ -367,54 +367,54 @@ ] }, { - "Name": "TKqpOlapFilter", + "Name": "TKqpOlapFilter", + "Base": "TKqpOlapOperationBase", + "Match": {"Type": "Callable", "Name": "KqpOlapFilter"}, + "Children": [ + {"Index": 1, "Name": "Condition", "Type": "TExprBase"} + ] + }, + { + "Name": "TKqpOlapFilterCompare", + "Base": "TKqpOlapOperationBase", + "Match": {"Type": "CallableBase"}, + "Children": [ + {"Index": 1, "Name": "Left", "Type": "TExprBase"}, + {"Index": 2, "Name": "Right", "Type": "TExprBase"} + ] + }, + { + "Name": "TKqpOlapFilterEqual", + "Base": "TKqpOlapFilterCompare", + "Match": {"Type": "Callable", "Name": "KqpOlapFilterEqual"} + }, + { + "Name": "TKqpOlapFilterLess", + "Base": "TKqpOlapFilterCompare", + "Match": {"Type": "Callable", "Name": "KqpOlapFilterLess"} + }, + { + "Name": "TKqpOlapFilterLessOrEqual", + "Base": "TKqpOlapFilterCompare", + "Match": {"Type": "Callable", "Name": "KqpOlapFilterLessOrEqual"} + }, + { + "Name": "TKqpOlapFilterGreater", + "Base": "TKqpOlapFilterCompare", + "Match": {"Type": "Callable", "Name": "KqpOlapFilterGreater"} + }, + { + "Name": "TKqpOlapFilterGreaterOrEqual", + "Base": "TKqpOlapFilterCompare", + "Match": {"Type": "Callable", "Name": "KqpOlapFilterGreaterOrEqual"} + }, + { + "Name": "TKqpOlapFilterExists", "Base": "TKqpOlapOperationBase", - "Match": {"Type": "Callable", "Name": "KqpOlapFilter"}, + "Match": {"Type": "Callable", "Name": "TKqpOlapFilterExists"}, "Children": [ - {"Index": 1, "Name": "Condition", "Type": "TExprBase"} - ] - }, - { - "Name": "TKqpOlapFilterCompare", - "Base": "TKqpOlapOperationBase", - "Match": {"Type": "CallableBase"}, - "Children": [ - {"Index": 1, "Name": "Left", "Type": "TExprBase"}, - {"Index": 2, "Name": "Right", "Type": "TExprBase"} + {"Index": 1, "Name": "Column", "Type": "TExprBase"} ] - }, - { - "Name": "TKqpOlapFilterEqual", - "Base": "TKqpOlapFilterCompare", - "Match": {"Type": "Callable", "Name": "KqpOlapFilterEqual"} - }, - { - "Name": "TKqpOlapFilterLess", - "Base": "TKqpOlapFilterCompare", - "Match": {"Type": "Callable", "Name": "KqpOlapFilterLess"} - }, - { - "Name": "TKqpOlapFilterLessOrEqual", - "Base": "TKqpOlapFilterCompare", - "Match": {"Type": "Callable", "Name": "KqpOlapFilterLessOrEqual"} - }, - { - "Name": "TKqpOlapFilterGreater", - "Base": "TKqpOlapFilterCompare", - "Match": {"Type": "Callable", "Name": "KqpOlapFilterGreater"} - }, - { - "Name": "TKqpOlapFilterGreaterOrEqual", - "Base": "TKqpOlapFilterCompare", - "Match": {"Type": "Callable", "Name": "KqpOlapFilterGreaterOrEqual"} - }, - { - "Name": "TKqpOlapFilterExists", - "Base": "TKqpOlapOperationBase", - "Match": {"Type": "Callable", "Name": "TKqpOlapFilterExists"}, - "Children": [ - {"Index": 1, "Name": "Column", "Type": "TExprBase"} - ] }, { "Name": "TKqpEnsure", diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp index 77de0592c5a..44af6d11682 100644 --- a/ydb/core/kqp/host/kqp_host.cpp +++ b/ydb/core/kqp/host/kqp_host.cpp @@ -372,32 +372,32 @@ public: output = input; auto evaluateNode = FindNode(input, [](const TExprNode::TPtr& node) { - return node->IsCallable({"EvaluateIf!", "EvaluateFor!", "EvaluateAtom"}); + return node->IsCallable({"EvaluateIf!", "EvaluateFor!", "EvaluateAtom"}); }); - if (!evaluateNode) - return TStatus::Ok; + if (!evaluateNode) + return TStatus::Ok; - TStringBuilder builder; + TStringBuilder builder; if (evaluateNode->Content() == "EvaluateAtom"sv) - builder << "ATOM evaluation"; + builder << "ATOM evaluation"; else if (evaluateNode->Content() == "EvaluateIf!"sv) - builder << "EVALUATE IF"; - else - builder << "EVALUATE"; - - builder << " is not supported in YDB queries."; - - ctx.AddError( - YqlIssue( - ctx.GetPosition(evaluateNode->Pos()), - TIssuesIds::KIKIMR_UNSUPPORTED, - builder - ) - ); - - return TStatus::Error; + builder << "EVALUATE IF"; + else + builder << "EVALUATE"; + + builder << " is not supported in YDB queries."; + + ctx.AddError( + YqlIssue( + ctx.GetPosition(evaluateNode->Pos()), + TIssuesIds::KIKIMR_UNSUPPORTED, + builder + ) + ); + + return TStatus::Error; } }; diff --git a/ydb/core/kqp/host/kqp_run_prepared.cpp b/ydb/core/kqp/host/kqp_run_prepared.cpp index cf54565a625..e71a3bacee2 100644 --- a/ydb/core/kqp/host/kqp_run_prepared.cpp +++ b/ydb/core/kqp/host/kqp_run_prepared.cpp @@ -141,7 +141,7 @@ private: } future = Gateway->ExecuteMkqlPrepared(Cluster, mkql.GetProgram(), std::move(execParams), - TransformCtx->GetMkqlSettings(false, Gateway->GetCurrentTime()), + TransformCtx->GetMkqlSettings(false, Gateway->GetCurrentTime()), TxState->Tx().GetSnapshot()); return true; } diff --git a/ydb/core/kqp/host/kqp_run_scan.cpp b/ydb/core/kqp/host/kqp_run_scan.cpp index 8f697cc006e..7c91ae82003 100644 --- a/ydb/core/kqp/host/kqp_run_scan.cpp +++ b/ydb/core/kqp/host/kqp_run_scan.cpp @@ -34,7 +34,7 @@ protected: request.MaxComputeActors = TransformCtx->Config->_KqpMaxComputeActors.Get().GetRef(); request.StatsMode = GetStatsMode(TransformCtx->QueryCtx->StatsMode); request.DisableLlvmForUdfStages = TransformCtx->Config->DisableLlvmForUdfStages(); - request.LlvmEnabled = TransformCtx->Config->GetEnableLlvm() != EOptionalFlag::Disabled; + request.LlvmEnabled = TransformCtx->Config->GetEnableLlvm() != EOptionalFlag::Disabled; request.Snapshot = TxState->Tx().GetSnapshot(); switch (tx->GetType()) { diff --git a/ydb/core/kqp/host/kqp_runner.cpp b/ydb/core/kqp/host/kqp_runner.cpp index c6d0f898f3f..42b4eca7c66 100644 --- a/ydb/core/kqp/host/kqp_runner.cpp +++ b/ydb/core/kqp/host/kqp_runner.cpp @@ -200,8 +200,8 @@ public: .Add(CreateKqpCheckQueryTransformer(), "CheckKqlQuery") .AddPostTypeAnnotation() .AddCommonOptimization() - .Add(CreateKqpLogOptTransformer(OptimizeCtx, *typesCtx, Config), "LogicalOptimize") - .Add(CreateKqpPhyOptTransformer(OptimizeCtx, *typesCtx), "PhysicalOptimize") + .Add(CreateKqpLogOptTransformer(OptimizeCtx, *typesCtx, Config), "LogicalOptimize") + .Add(CreateKqpPhyOptTransformer(OptimizeCtx, *typesCtx), "PhysicalOptimize") .Add(CreateKqpFinalizingOptTransformer(), "FinalizingOptimize") .Add(CreateKqpQueryPhasesTransformer(), "QueryPhases") .Add(CreateKqpQueryEffectsTransformer(OptimizeCtx), "QueryEffects") diff --git a/ydb/core/kqp/kqp_default_settings.txt b/ydb/core/kqp/kqp_default_settings.txt index 6ca2565977d..ff501a2db65 100644 --- a/ydb/core/kqp/kqp_default_settings.txt +++ b/ydb/core/kqp/kqp_default_settings.txt @@ -130,5 +130,5 @@ DefaultSettings { DefaultSettings { Name: "_KqpDisableLlvmForUdfStages" - Value: "false" + Value: "false" } diff --git a/ydb/core/kqp/opt/kqp_opt.cpp b/ydb/core/kqp/opt/kqp_opt.cpp index 36249506759..1bd771ece1f 100644 --- a/ydb/core/kqp/opt/kqp_opt.cpp +++ b/ydb/core/kqp/opt/kqp_opt.cpp @@ -46,10 +46,10 @@ bool IsKqpPureLambda(const TCoLambda& lambda) { return true; } - if (TMaybeNode<TKqlReadTableRangesBase>(node)) { - return true; - } - + if (TMaybeNode<TKqlReadTableRangesBase>(node)) { + return true; + } + if (TMaybeNode<TKqlLookupTableBase>(node)) { return true; } diff --git a/ydb/core/kqp/opt/kqp_opt.h b/ydb/core/kqp/opt/kqp_opt.h index 5c0fbf7ab25..a86561d0c4c 100644 --- a/ydb/core/kqp/opt/kqp_opt.h +++ b/ydb/core/kqp/opt/kqp_opt.h @@ -46,7 +46,7 @@ struct TKqpBuildQueryContext : TThrRefBase { bool IsKqpEffectsStage(const NYql::NNodes::TDqStageBase& stage); TMaybe<NYql::NNodes::TKqlQuery> BuildKqlQuery(NYql::NNodes::TKiDataQuery query, const NYql::TKikimrTablesData& tablesData, - NYql::TExprContext& ctx, bool withSystemColumns, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx); + NYql::TExprContext& ctx, bool withSystemColumns, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx); TAutoPtr<NYql::IGraphTransformer> CreateKqpFinalizingOptTransformer(); TAutoPtr<NYql::IGraphTransformer> CreateKqpQueryPhasesTransformer(); diff --git a/ydb/core/kqp/opt/kqp_opt_build_txs.cpp b/ydb/core/kqp/opt/kqp_opt_build_txs.cpp index e68aea82fa5..f35ba23f666 100644 --- a/ydb/core/kqp/opt/kqp_opt_build_txs.cpp +++ b/ydb/core/kqp/opt/kqp_opt_build_txs.cpp @@ -55,22 +55,22 @@ public: } private: - EPhysicalTxType GetPhyTxType(bool allStagesArePure) { - if (QueryType == EKikimrQueryType::Scan) { - if (IsPrecompute && allStagesArePure) { - return EPhysicalTxType::Compute; - } - - return EPhysicalTxType::Scan; - } - - if (allStagesArePure) { - return EPhysicalTxType::Compute; - } - - return EPhysicalTxType::Data; - } - + EPhysicalTxType GetPhyTxType(bool allStagesArePure) { + if (QueryType == EKikimrQueryType::Scan) { + if (IsPrecompute && allStagesArePure) { + return EPhysicalTxType::Compute; + } + + return EPhysicalTxType::Scan; + } + + if (allStagesArePure) { + return EPhysicalTxType::Compute; + } + + return EPhysicalTxType::Data; + } + TStatus DoBuildTxResults(TExprNode::TPtr inputExpr, TExprNode::TPtr& outputExpr, TExprContext& ctx) { auto stages = CollectStages(inputExpr, ctx); Y_VERIFY_DEBUG(!stages.empty()); @@ -82,7 +82,7 @@ private: } TKqpPhyTxSettings txSettings; - txSettings.Type = GetPhyTxType(AreAllStagesKqpPure(stages)); + txSettings.Type = GetPhyTxType(AreAllStagesKqpPure(stages)); txSettings.WithEffects = false; auto tx = Build<TKqpPhysicalTx>(ctx, inputExpr->Pos()) @@ -135,22 +135,22 @@ private: private: static TVector<TDqPhyStage> CollectStages(const TExprNode::TPtr& node, TExprContext& /* ctx */) { TVector<TDqPhyStage> stages; - - auto filter = [](const TExprNode::TPtr& exprNode) { + + auto filter = [](const TExprNode::TPtr& exprNode) { return !exprNode->IsLambda(); - }; - - auto collector = [&stages](const TExprNode::TPtr& exprNode) { - if (TDqPhyStage::Match(exprNode.Get())) { - stages.emplace_back(TDqPhyStage(exprNode)); + }; + + auto collector = [&stages](const TExprNode::TPtr& exprNode) { + if (TDqPhyStage::Match(exprNode.Get())) { + stages.emplace_back(TDqPhyStage(exprNode)); } else { YQL_ENSURE(!TDqStage::Match(exprNode.Get())); - } - return true; - }; - - VisitExpr(node, filter, collector); - + } + return true; + }; + + VisitExpr(node, filter, collector); + return stages; } @@ -195,7 +195,7 @@ private: collectStage = resultStage; } else if (resultStage.Inputs().Size() == 1 && resultStage.Inputs().Item(0).Maybe<TDqCnMerge>()) { collectStage = resultStage; - } else if (resultStage.Inputs().Size() == 1 && resultStage.Inputs().Item(0).Maybe<TDqPhyPrecompute>()) { + } else if (resultStage.Inputs().Size() == 1 && resultStage.Inputs().Item(0).Maybe<TDqPhyPrecompute>()) { collectStage = resultStage; } else if (resultStage.Inputs().Empty() && IsKqpPureLambda(resultStage.Program())) { collectStage = resultStage; @@ -251,66 +251,66 @@ private: static TMaybeNode<TDqPhyStage> ExtractParamsFromStage(const TDqPhyStage& stage, const TNodeOnNodeOwnedMap& stagesMap, TMap<TString, TKqpParamBinding>& bindingsMap, TExprContext& ctx) { - auto bindingsBuilder = [&bindingsMap, &ctx] (const TExprNode::TPtr& node) { - auto maybeParam = TMaybeNode<TCoParameter>(node); - - if (!maybeParam.IsValid()) { - return true; + auto bindingsBuilder = [&bindingsMap, &ctx] (const TExprNode::TPtr& node) { + auto maybeParam = TMaybeNode<TCoParameter>(node); + + if (!maybeParam.IsValid()) { + return true; } - - auto param = maybeParam.Cast(); - - TString paramName(param.Name()); - - if (bindingsMap.contains(paramName)) { - return true; - } - - auto paramBinding = Build<TKqpParamBinding>(ctx, param.Pos()) - .Name().Build(paramName) - .Done(); - - bindingsMap.emplace(std::move(paramName), std::move(paramBinding)); - + + auto param = maybeParam.Cast(); + + TString paramName(param.Name()); + + if (bindingsMap.contains(paramName)) { + return true; + } + + auto paramBinding = Build<TKqpParamBinding>(ctx, param.Pos()) + .Name().Build(paramName) + .Done(); + + bindingsMap.emplace(std::move(paramName), std::move(paramBinding)); + return true; - }; + }; + + VisitExpr(stage.Program().Body().Ptr(), bindingsBuilder); - VisitExpr(stage.Program().Body().Ptr(), bindingsBuilder); - TVector<TExprBase> newInputs; TVector<TCoArgument> newArgs; TNodeOnNodeOwnedMap argsMap; - + for (ui32 i = 0; i < stage.Inputs().Size(); ++i) { const auto& input = stage.Inputs().Item(i); const auto& inputArg = stage.Program().Args().Arg(i); - auto maybeBinding = input.Maybe<TKqpTxResultBinding>(); + auto maybeBinding = input.Maybe<TKqpTxResultBinding>(); - if (!maybeBinding.IsValid()) { - auto newArg = ctx.NewArgument(inputArg.Pos(), inputArg.Name()); - newInputs.push_back(input); - newArgs.emplace_back(TCoArgument(newArg)); - argsMap.emplace(inputArg.Raw(), std::move(newArg)); - continue; - } + if (!maybeBinding.IsValid()) { + auto newArg = ctx.NewArgument(inputArg.Pos(), inputArg.Name()); + newInputs.push_back(input); + newArgs.emplace_back(TCoArgument(newArg)); + argsMap.emplace(inputArg.Raw(), std::move(newArg)); + continue; + } + + auto binding = maybeBinding.Cast(); - auto binding = maybeBinding.Cast(); + TString paramName = TStringBuilder() << ParamNamePrefix + << "tx_result_binding_" << binding.TxIndex().Value() << "_" << binding.ResultIndex().Value(); - TString paramName = TStringBuilder() << ParamNamePrefix - << "tx_result_binding_" << binding.TxIndex().Value() << "_" << binding.ResultIndex().Value(); + auto type = binding.Type().Ref().GetTypeAnn(); + YQL_ENSURE(type); + YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::Type); + type = type->Cast<TTypeExprType>()->GetType(); + YQL_ENSURE(type); - auto type = binding.Type().Ref().GetTypeAnn(); - YQL_ENSURE(type); - YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::Type); - type = type->Cast<TTypeExprType>()->GetType(); - YQL_ENSURE(type); + TExprBase parameter = Build<TCoParameter>(ctx, input.Pos()) + .Name().Build(paramName) + .Type(ExpandType(input.Pos(), *type, ctx)) + .Done(); - TExprBase parameter = Build<TCoParameter>(ctx, input.Pos()) - .Name().Build(paramName) - .Type(ExpandType(input.Pos(), *type, ctx)) - .Done(); - // TODO: (Iterator|ToStream (Parameter ...)) -> (ToFlow (Parameter ...)) // if (type->GetKind() == ETypeAnnotationKind::List) { // parameter = Build<TCoToFlow>(ctx, input.Pos()) // TODO: TDqInputReader? @@ -318,19 +318,19 @@ private: // .Done(); // } - auto paramBinding = Build<TKqpParamBinding>(ctx, input.Pos()) - .Name().Build(paramName) - .Binding(binding) - .Done(); - - auto inserted = bindingsMap.emplace(paramName, paramBinding); - if (!inserted.second) { - YQL_ENSURE(inserted.first->second.Binding().Raw() == binding.Raw(), - "duplicated parameter " << paramName - << ", first: " << KqpExprToPrettyString(inserted.first->second.Binding().Ref(), ctx) - << ", second: " << KqpExprToPrettyString(binding, ctx)); + auto paramBinding = Build<TKqpParamBinding>(ctx, input.Pos()) + .Name().Build(paramName) + .Binding(binding) + .Done(); + + auto inserted = bindingsMap.emplace(paramName, paramBinding); + if (!inserted.second) { + YQL_ENSURE(inserted.first->second.Binding().Raw() == binding.Raw(), + "duplicated parameter " << paramName + << ", first: " << KqpExprToPrettyString(inserted.first->second.Binding().Ref(), ctx) + << ", second: " << KqpExprToPrettyString(binding, ctx)); } - argsMap.emplace(inputArg.Raw(), parameter.Ptr()); + argsMap.emplace(inputArg.Raw(), parameter.Ptr()); } auto inputs = Build<TExprList>(ctx, stage.Pos()) @@ -537,72 +537,72 @@ private: } std::pair<TNodeOnNodeOwnedMap, TNodeOnNodeOwnedMap> GatherPrecomputeDependencies(const TKqlQuery& query) { - TNodeOnNodeOwnedMap precomputes; - TNodeOnNodeOwnedMap dependencies; + TNodeOnNodeOwnedMap precomputes; + TNodeOnNodeOwnedMap dependencies; - auto filter = [](const TExprNode::TPtr& exprNode) { - return !exprNode->IsLambda(); - }; + auto filter = [](const TExprNode::TPtr& exprNode) { + return !exprNode->IsLambda(); + }; auto gather = [&precomputes, &dependencies](const TExprNode::TPtr& exprNode) { - TExprBase node(exprNode); + TExprBase node(exprNode); - auto maybeStage = node.Maybe<TDqStage>(); + auto maybeStage = node.Maybe<TDqStage>(); - if (!maybeStage.IsValid()) { + if (!maybeStage.IsValid()) { return true; } - auto stage = maybeStage.Cast(); - - for (const auto& input : stage.Inputs()) { - const TExprNode* inputStage; - - if (auto maybePrecompute = input.Maybe<TDqPhyPrecompute>()) { - auto precomputeStage = maybePrecompute.Cast().Connection().Output().Stage(); - precomputes.emplace(precomputeStage.Raw(), precomputeStage.Ptr()); - dependencies.emplace(stage.Raw(), stage.Ptr()); - inputStage = precomputeStage.Raw(); - } else if (auto maybeConnection = input.Maybe<TDqConnection>()) { - inputStage = maybeConnection.Cast().Output().Stage().Raw(); - } else if (input.Maybe<TKqpTxResultBinding>()) { - // ok - continue; - } else { - YQL_ENSURE(false, "Unexpected stage input: " << input.Ref().Content()); - } - - if (dependencies.contains(inputStage)) { - dependencies.emplace(stage.Raw(), stage.Ptr()); - } - } - - return true; - }; - - VisitExpr(query.Ptr(), filter, gather); - + auto stage = maybeStage.Cast(); + + for (const auto& input : stage.Inputs()) { + const TExprNode* inputStage; + + if (auto maybePrecompute = input.Maybe<TDqPhyPrecompute>()) { + auto precomputeStage = maybePrecompute.Cast().Connection().Output().Stage(); + precomputes.emplace(precomputeStage.Raw(), precomputeStage.Ptr()); + dependencies.emplace(stage.Raw(), stage.Ptr()); + inputStage = precomputeStage.Raw(); + } else if (auto maybeConnection = input.Maybe<TDqConnection>()) { + inputStage = maybeConnection.Cast().Output().Stage().Raw(); + } else if (input.Maybe<TKqpTxResultBinding>()) { + // ok + continue; + } else { + YQL_ENSURE(false, "Unexpected stage input: " << input.Ref().Content()); + } + + if (dependencies.contains(inputStage)) { + dependencies.emplace(stage.Raw(), stage.Ptr()); + } + } + + return true; + }; + + VisitExpr(query.Ptr(), filter, gather); + return std::make_pair(std::move(precomputes), std::move(dependencies)); - } - - TMaybe<TStatus> TryBuildPrecomputeTx(const TKqlQuery& query, TExprNode::TPtr& output, TExprContext& ctx) { + } + + TMaybe<TStatus> TryBuildPrecomputeTx(const TKqlQuery& query, TExprNode::TPtr& output, TExprContext& ctx) { auto [precomputeStagesMap, dependantStagesMap] = GatherPrecomputeDependencies(query); - + if (precomputeStagesMap.empty()) { return {}; } TNodeOnNodeOwnedMap phaseStagesMap; TVector<TKqlQueryResult> phaseResults; - TVector<TDqPhyPrecompute> computedInputs; + TVector<TDqPhyPrecompute> computedInputs; TNodeSet computedInputsSet; - + // Gather all Precompute stages, that are independent of any other stage and form phase of execution for (auto [raw, ptr] : precomputeStagesMap) { if (dependantStagesMap.contains(raw)) { - continue; - } - + continue; + } + // precompute stage _NOT_IN_ dependant stages YQL_ENSURE(!IsKqpEffectsStage(TDqStage(ptr))); phaseStagesMap.emplace(raw, ptr); @@ -618,33 +618,33 @@ private: TDqStage stage(stagePtr); for (const auto& input : stage.Inputs()) { - auto maybePrecompute = input.Maybe<TDqPhyPrecompute>(); + auto maybePrecompute = input.Maybe<TDqPhyPrecompute>(); + + if (!maybePrecompute.IsValid()) { + continue; + } - if (!maybePrecompute.IsValid()) { - continue; - } + auto precompute = maybePrecompute.Cast(); + auto precomputeConnection = precompute.Connection(); + auto precomputeStage = precomputeConnection.Output().Stage(); - auto precompute = maybePrecompute.Cast(); - auto precomputeConnection = precompute.Connection(); - auto precomputeStage = precomputeConnection.Output().Stage(); + if (!phaseStagesMap.contains(precomputeStage.Raw())) { + continue; + } - if (!phaseStagesMap.contains(precomputeStage.Raw())) { - continue; + if (computedInputsSet.contains(precompute.Raw())) { + continue; } - - if (computedInputsSet.contains(precompute.Raw())) { - continue; - } - - auto result = Build<TKqlQueryResult>(ctx, precompute.Pos()) - .Value(precomputeConnection) + + auto result = Build<TKqlQueryResult>(ctx, precompute.Pos()) + .Value(precomputeConnection) .ColumnHints() // no column hints on intermediate phases - .Build() - .Done(); - - phaseResults.emplace_back(result); + .Build() + .Done(); + + phaseResults.emplace_back(result); computedInputs.emplace_back(precompute); - computedInputsSet.insert(precompute.Raw()); + computedInputsSet.insert(precompute.Raw()); } } Y_VERIFY_DEBUG(phaseResults.size() == computedInputs.size()); @@ -654,8 +654,8 @@ private: .Done(); auto tx = BuildTx(phaseResultsNode.Ptr(), ctx, /* isPrecompute */ true); - - if (!tx.IsValid()) { + + if (!tx.IsValid()) { return TStatus::Error; } @@ -675,7 +675,7 @@ private: } output = ctx.ReplaceNodes(query.Ptr(), replaceMap); - + return TStatus(TStatus::Repeat, true); } @@ -687,7 +687,7 @@ private: transformer.Rewind(); BuildTxTransformer->Init(KqpCtx->QueryCtx->Type, isPrecompute); auto expr = result; - + while (true) { auto status = InstantTransform(transformer, expr, ctx); if (status == TStatus::Error) { diff --git a/ydb/core/kqp/opt/kqp_opt_effects.cpp b/ydb/core/kqp/opt/kqp_opt_effects.cpp index 505d7f4152f..f5f2401ce9f 100644 --- a/ydb/core/kqp/opt/kqp_opt_effects.cpp +++ b/ydb/core/kqp/opt/kqp_opt_effects.cpp @@ -55,7 +55,7 @@ bool IsMapWrite(const TKikimrTableDescription& table, TExprBase input) { return true; } -TDqPhyPrecompute BuildPrecomputeStage(TExprBase expr, TExprContext& ctx) { +TDqPhyPrecompute BuildPrecomputeStage(TExprBase expr, TExprContext& ctx) { Y_VERIFY_DEBUG(IsDqPureExpr(expr)); auto pureStage = Build<TDqStage>(ctx, expr.Pos()) @@ -79,7 +79,7 @@ TDqPhyPrecompute BuildPrecomputeStage(TExprBase expr, TExprContext& ctx) { .Build() .Done(); - return Build<TDqPhyPrecompute>(ctx, expr.Pos()) + return Build<TDqPhyPrecompute>(ctx, expr.Pos()) .Connection(dqValue) .Done(); } @@ -130,7 +130,7 @@ bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const .Settings(settings.BuildNode(ctx, node.Pos())) .Done(); } else { - stageInput = Build<TDqPhyPrecompute>(ctx, node.Pos()) + stageInput = Build<TDqPhyPrecompute>(ctx, node.Pos()) .Connection(dqUnion) .Done(); @@ -186,7 +186,7 @@ bool BuildDeleteRowsEffect(const TKqlDeleteRows& node, TExprContext& ctx, const .Build() .Done(); } else { - stageInput = Build<TDqPhyPrecompute>(ctx, node.Pos()) + stageInput = Build<TDqPhyPrecompute>(ctx, node.Pos()) .Connection(dqUnion) .Done(); diff --git a/ydb/core/kqp/opt/kqp_opt_impl.h b/ydb/core/kqp/opt/kqp_opt_impl.h index 9b24fe46f4e..927aa910e86 100644 --- a/ydb/core/kqp/opt/kqp_opt_impl.h +++ b/ydb/core/kqp/opt/kqp_opt_impl.h @@ -9,11 +9,11 @@ namespace NKikimr::NKqp::NOpt { -static inline void DumpAppliedRule(const TString& name, const NYql::TExprNode::TPtr& input, - const NYql::TExprNode::TPtr& output, NYql::TExprContext& ctx) -{ -//#define KQP_ENABLE_DUMP_APPLIED_RULE -#ifdef KQP_ENABLE_DUMP_APPLIED_RULE +static inline void DumpAppliedRule(const TString& name, const NYql::TExprNode::TPtr& input, + const NYql::TExprNode::TPtr& output, NYql::TExprContext& ctx) +{ +//#define KQP_ENABLE_DUMP_APPLIED_RULE +#ifdef KQP_ENABLE_DUMP_APPLIED_RULE if (input != output) { auto builder = TStringBuilder() << "Rule applied: " << name << Endl; builder << "Expression before rule application: " << Endl; @@ -22,14 +22,14 @@ static inline void DumpAppliedRule(const TString& name, const NYql::TExprNode::T builder << KqpExprToPrettyString(*output, ctx); YQL_CLOG(INFO, ProviderKqp) << builder; } -#else - Y_UNUSED(ctx); +#else + Y_UNUSED(ctx); if (input != output) { YQL_CLOG(INFO, ProviderKqp) << name; } -#endif -} - +#endif +} + bool IsKqpPureLambda(const NYql::NNodes::TCoLambda& lambda); const NYql::TKikimrTableDescription& GetTableData(const NYql::TKikimrTablesData& tablesData, @@ -39,12 +39,12 @@ NYql::NNodes::TExprBase ProjectColumns(const NYql::NNodes::TExprBase& input, con NYql::TExprContext& ctx); NYql::NNodes::TExprBase ProjectColumns(const NYql::NNodes::TExprBase& input, const THashSet<TStringBuf>& columnNames, NYql::TExprContext& ctx); - + NYql::NNodes::TKqpTable BuildTableMeta(const NYql::TKikimrTableDescription& tableDesc, const NYql::TPositionHandle& pos, NYql::TExprContext& ctx); NYql::NNodes::TKqpTable BuildTableMeta(const NYql::TKikimrTableMetadata& tableMeta, const NYql::TPositionHandle& pos, NYql::TExprContext& ctx); - + NYql::NNodes::TExprBase KqpBuildJoin(const NYql::NNodes::TExprBase& node, NYql::TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, NYql::IOptimizationContext& optCtx, const NYql::TParentsMap& parentsMap, bool allowStageMultiUsage); diff --git a/ydb/core/kqp/opt/kqp_opt_join.cpp b/ydb/core/kqp/opt/kqp_opt_join.cpp index 017ccb4a66e..650dc64104c 100644 --- a/ydb/core/kqp/opt/kqp_opt_join.cpp +++ b/ydb/core/kqp/opt/kqp_opt_join.cpp @@ -101,12 +101,12 @@ TExprBase KqpBuildJoin(const TExprBase& node, TExprContext& ctx, const TKqpOptim return node; } - auto joinType = join.JoinType().Value(); - + auto joinType = join.JoinType().Value(); + if (joinType == "Full"sv || joinType == "Exclusion"sv) { - return DqBuildJoinDict(join, ctx); - } - + return DqBuildJoinDict(join, ctx); + } + // NOTE: We don't want to broadcast table data via readsets for data queries, so we need to create a // separate stage to receive data from both sides of join. // TODO: We can push MapJoin to existing stage for data query, if it doesn't have table reads. This diff --git a/ydb/core/kqp/opt/kqp_opt_kql.cpp b/ydb/core/kqp/opt/kqp_opt_kql.cpp index 519a16bebc8..1eeb95daee4 100644 --- a/ydb/core/kqp/opt/kqp_opt_kql.cpp +++ b/ydb/core/kqp/opt/kqp_opt_kql.cpp @@ -12,36 +12,36 @@ using namespace NYql::NNodes; namespace { -bool UseReadTableRanges(const TKikimrTableDescription& tableData, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx) { - /* - * OLAP tables can not work with ordinary ReadTable in case there is no support in physical - * optimizers for them. - */ - if (tableData.Metadata->Kind == EKikimrTableKind::Olap) { - return true; - } - - auto predicateExtractSetting = kqpCtx->Config->GetOptPredicateExtract(); - - if (predicateExtractSetting != EOptionalFlag::Auto) { - return predicateExtractSetting == EOptionalFlag::Enabled; - } - - /* - * SysView tables can't work with ReadRanges, because they do not support multiple ranges. - * KIKIMR-12434 - */ - if (tableData.Metadata->Kind == EKikimrTableKind::SysView) { - return false; - } - +bool UseReadTableRanges(const TKikimrTableDescription& tableData, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx) { + /* + * OLAP tables can not work with ordinary ReadTable in case there is no support in physical + * optimizers for them. + */ + if (tableData.Metadata->Kind == EKikimrTableKind::Olap) { + return true; + } + + auto predicateExtractSetting = kqpCtx->Config->GetOptPredicateExtract(); + + if (predicateExtractSetting != EOptionalFlag::Auto) { + return predicateExtractSetting == EOptionalFlag::Enabled; + } + + /* + * SysView tables can't work with ReadRanges, because they do not support multiple ranges. + * KIKIMR-12434 + */ + if (tableData.Metadata->Kind == EKikimrTableKind::SysView) { + return false; + } + if (kqpCtx->IsScanQuery() && kqpCtx->Config->FeatureFlags.GetEnablePredicateExtractForScanQueries()) { - return true; - } - - return false; -} - + return true; + } + + return false; +} + bool HasIndexesToWrite(const TKikimrTableDescription& tableData) { bool hasIndexesToWrite = false; YQL_ENSURE(tableData.Metadata->Indexes.size() == tableData.Metadata->SecondaryGlobalIndexMetadata.size()); @@ -56,45 +56,45 @@ bool HasIndexesToWrite(const TKikimrTableDescription& tableData) { } TExprBase BuildReadTable(const TKiReadTable& read, const TKikimrTableDescription& tableData, - bool withSystemColumns, TExprContext& ctx, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx) + bool withSystemColumns, TExprContext& ctx, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx) { bool unwrapValues = HasSetting(read.Settings().Ref(), "unwrap_values"); - TExprNode::TPtr readTable; - const auto& columns = read.GetSelectColumns(ctx, tableData, withSystemColumns); - const auto& tableMeta = BuildTableMeta(tableData, read.Pos(), ctx); - - if (UseReadTableRanges(tableData, kqpCtx)) { - readTable = Build<TKqlReadTableRanges>(ctx, read.Pos()) - .Table(tableMeta) - .Ranges<TCoVoid>() - .Build() - .Columns(read.GetSelectColumns(ctx, tableData, withSystemColumns)) - .Settings() - .Build() - .ExplainPrompt() - .Build() - .Done().Ptr(); - } else { - readTable = Build<TKqlReadTable>(ctx, read.Pos()) - .Table(tableMeta) - .Range() - .From<TKqlKeyInc>() - .Build() - .To<TKqlKeyInc>() - .Build() + TExprNode::TPtr readTable; + const auto& columns = read.GetSelectColumns(ctx, tableData, withSystemColumns); + const auto& tableMeta = BuildTableMeta(tableData, read.Pos(), ctx); + + if (UseReadTableRanges(tableData, kqpCtx)) { + readTable = Build<TKqlReadTableRanges>(ctx, read.Pos()) + .Table(tableMeta) + .Ranges<TCoVoid>() + .Build() + .Columns(read.GetSelectColumns(ctx, tableData, withSystemColumns)) + .Settings() + .Build() + .ExplainPrompt() + .Build() + .Done().Ptr(); + } else { + readTable = Build<TKqlReadTable>(ctx, read.Pos()) + .Table(tableMeta) + .Range() + .From<TKqlKeyInc>() + .Build() + .To<TKqlKeyInc>() + .Build() .Build() - .Columns(columns) - .Settings() + .Columns(columns) + .Settings() .Build() - .Done().Ptr(); - } + .Done().Ptr(); + } + + auto readNode = TExprBase(readTable); - auto readNode = TExprBase(readTable); - return unwrapValues - ? UnwrapKiReadTableValues(readNode, tableData, columns, ctx) - : readNode; + ? UnwrapKiReadTableValues(readNode, tableData, columns, ctx) + : readNode; } TExprBase BuildReadTableIndex(const TKiReadTable& read, const TKikimrTableDescription& tableData, @@ -523,7 +523,7 @@ TVector<TExprBase> BuildUpdateTableWithIndex(const TKiUpdateTable& update, const } TExprNode::TPtr HandleReadTable(const TKiReadTable& read, TExprContext& ctx, const TKikimrTablesData& tablesData, - bool withSystemColumns, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx) + bool withSystemColumns, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx) { TKikimrKey key(ctx); YQL_ENSURE(key.Extract(read.TableKey().Ref())); @@ -558,7 +558,7 @@ TExprNode::TPtr HandleReadTable(const TKiReadTable& read, TExprContext& ctx, con return BuildReadTableIndex(read, tableData, indexName, withSystemColumns, ctx).Ptr(); } - return BuildReadTable(read, tableData, withSystemColumns, ctx, kqpCtx).Ptr(); + return BuildReadTable(read, tableData, withSystemColumns, ctx, kqpCtx).Ptr(); } TExprBase WriteTableSimple(const TKiWriteTable& write, const TCoAtomList& inputColumns, @@ -664,7 +664,7 @@ TIntrusivePtr<TKikimrTableMetadata> GetIndexMetadata(const TKqlReadTableIndex& r } TMaybe<TKqlQuery> BuildKqlQuery(TKiDataQuery query, const TKikimrTablesData& tablesData, TExprContext& ctx, - bool withSystemColumns, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx) + bool withSystemColumns, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx) { TVector<TExprBase> kqlEffects; for (const auto& effect : query.Effects()) { @@ -707,12 +707,12 @@ TMaybe<TKqlQuery> BuildKqlQuery(TKiDataQuery query, const TKikimrTablesData& tab TOptimizeExprSettings optSettings(nullptr); optSettings.VisitChanges = true; auto status = OptimizeExpr(kqlQuery.Ptr(), optResult, - [&tablesData, withSystemColumns, &kqpCtx](const TExprNode::TPtr& input, TExprContext& ctx) { + [&tablesData, withSystemColumns, &kqpCtx](const TExprNode::TPtr& input, TExprContext& ctx) { auto node = TExprBase(input); TExprNode::TPtr effect; if (auto maybeRead = node.Maybe<TCoRight>().Input().Maybe<TKiReadTable>()) { - return HandleReadTable(maybeRead.Cast(), ctx, tablesData, withSystemColumns, kqpCtx); + return HandleReadTable(maybeRead.Cast(), ctx, tablesData, withSystemColumns, kqpCtx); } return input; diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index 21dce4d743c..d951c5ce30e 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -1,20 +1,20 @@ -#include "kqp_opt_log_rules.h" - +#include "kqp_opt_log_rules.h" + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/opt/kqp_opt_impl.h> #include <ydb/core/kqp/provider/yql_kikimr_provider_impl.h> - + #include <ydb/library/yql/core/yql_opt_utils.h> #include <ydb/library/yql/dq/opt/dq_opt_log.h> #include <ydb/library/yql/providers/common/transform/yql_optimize.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NCommon; -using namespace NYql::NDq; -using namespace NYql::NNodes; - + +using namespace NYql; +using namespace NYql::NCommon; +using namespace NYql::NDq; +using namespace NYql::NNodes; + class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { public: TKqpLogicalOptTransformer(TTypeAnnotationContext& typesCtx, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, @@ -33,7 +33,7 @@ public: AddHandler(0, &TCoExtractMembers::Match, HNDL(ApplyExtractMembersToReadOlapTable)); AddHandler(0, &TCoTake::Match, HNDL(RewriteTakeSortToTopSort)); AddHandler(0, &TCoFlatMap::Match, HNDL(RewriteSqlInToEquiJoin)); - AddHandler(0, &TCoFlatMap::Match, HNDL(RewriteSqlInCompactToJoin)); + AddHandler(0, &TCoFlatMap::Match, HNDL(RewriteSqlInCompactToJoin)); AddHandler(0, &TCoEquiJoin::Match, HNDL(RewriteEquiJoin)); AddHandler(0, &TDqJoin::Match, HNDL(JoinToIndexLookup)); AddHandler(0, &TCoCalcOverWindowBase::Match, HNDL(ExpandWindowFunctions)); @@ -43,50 +43,50 @@ public: AddHandler(0, &TCoFlatMapBase::Match, HNDL(RewriteFlatMapOverExtend)); AddHandler(0, &TKqlDeleteRows::Match, HNDL(DeleteOverLookup)); AddHandler(0, &TKqlUpsertRowsBase::Match, HNDL(ExcessUpsertInputColumns)); - + AddHandler(1, &TKqlReadTableIndex::Match, HNDL(RewriteIndexRead)); AddHandler(1, &TKqlLookupIndex::Match, HNDL(RewriteLookupIndex)); #undef HNDL } - + protected: - + TMaybeNode<TExprBase> PushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx) { TExprBase output = KqpPushExtractedPredicateToReadTable(node, ctx, KqpCtx, TypesCtx); DumpAppliedRule("PushExtractedPredicateToReadTable", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> PushPredicateToReadTable(TExprBase node, TExprContext& ctx) { TExprBase output = KqpPushPredicateToReadTable(node, ctx, KqpCtx); DumpAppliedRule("PushPredicateToReadTable", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> RewriteAggregate(TExprBase node, TExprContext& ctx) { TExprBase output = DqRewriteAggregate(node, ctx); DumpAppliedRule("RewriteAggregate", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> ApplyExtractMembersToReadTable(TExprBase node, TExprContext& ctx) { TExprBase output = KqpApplyExtractMembersToReadTable(node, ctx); DumpAppliedRule("ApplyExtractMembersToReadTable", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> ApplyExtractMembersToReadTableRanges(TExprBase node, TExprContext& ctx) { TExprBase output = KqpApplyExtractMembersToReadTableRanges(node, ctx); DumpAppliedRule("ApplyExtractMembersToReadTableRanges", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> ApplyExtractMembersToReadOlapTable(TExprBase node, TExprContext& ctx) { TExprBase output = KqpApplyExtractMembersToReadOlapTable(node, ctx); DumpAppliedRule("ApplyExtractMembersToReadOlapTable", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> RewriteTakeSortToTopSort(TExprBase node, TExprContext& ctx, const TGetParents& getParents) { TExprBase output = DqRewriteTakeSortToTopSort(node, ctx, *getParents()); DumpAppliedRule("RewriteTakeSortToTopSort", node.Ptr(), output.Ptr(), ctx); @@ -99,30 +99,30 @@ protected: return output; } - TMaybeNode<TExprBase> RewriteSqlInCompactToJoin(TExprBase node, TExprContext& ctx) { - TExprBase output = KqpRewriteSqlInCompactToJoin(node, ctx); - DumpAppliedRule("KqpRewriteSqlInCompactToJoin", node.Ptr(), output.Ptr(), ctx); - return output; - } - + TMaybeNode<TExprBase> RewriteSqlInCompactToJoin(TExprBase node, TExprContext& ctx) { + TExprBase output = KqpRewriteSqlInCompactToJoin(node, ctx); + DumpAppliedRule("KqpRewriteSqlInCompactToJoin", node.Ptr(), output.Ptr(), ctx); + return output; + } + TMaybeNode<TExprBase> RewriteEquiJoin(TExprBase node, TExprContext& ctx) { TExprBase output = DqRewriteEquiJoin(node, ctx); DumpAppliedRule("RewriteEquiJoin", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> JoinToIndexLookup(TExprBase node, TExprContext& ctx) { TExprBase output = KqpJoinToIndexLookup(node, ctx, KqpCtx, Config); DumpAppliedRule("JoinToIndexLookup", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> ExpandWindowFunctions(TExprBase node, TExprContext& ctx) { TExprBase output = DqExpandWindowFunctions(node, ctx, true); DumpAppliedRule("ExpandWindowFunctions", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> RewriteTopSortOverIndexRead(TExprBase node, TExprContext& ctx) { TExprBase output = KqpRewriteTopSortOverIndexRead(node, ctx, KqpCtx); DumpAppliedRule("RewriteTopSortOverIndexRead", node.Ptr(), output.Ptr(), ctx); @@ -152,7 +152,7 @@ protected: DumpAppliedRule("RewriteLookupIndex", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> DeleteOverLookup(TExprBase node, TExprContext& ctx) { TExprBase output = KqpDeleteOverLookup(node, ctx, KqpCtx); DumpAppliedRule("DeleteOverLookup", node.Ptr(), output.Ptr(), ctx); @@ -170,11 +170,11 @@ private: const TKqpOptimizeContext& KqpCtx; const TKikimrConfiguration::TPtr& Config; }; - + TAutoPtr<IGraphTransformer> CreateKqpLogOptTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, TTypeAnnotationContext& typesCtx, const TKikimrConfiguration::TPtr& config) { return THolder<IGraphTransformer>(new TKqpLogicalOptTransformer(typesCtx, kqpCtx, config)); -} - +} + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.h b/ydb/core/kqp/opt/logical/kqp_opt_log.h index 0df6bd3af8f..2739a638057 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.h +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.h @@ -1,13 +1,13 @@ -#pragma once - +#pragma once + #include <ydb/core/kqp/common/kqp_transform.h> #include <ydb/core/kqp/opt/kqp_opt.h> - + namespace NKikimr::NKqp::NOpt { - -struct TKqpOptimizeContext; - -TAutoPtr<NYql::IGraphTransformer> CreateKqpLogOptTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, - NYql::TTypeAnnotationContext& typesCtx, const NYql::TKikimrConfiguration::TPtr& config); - + +struct TKqpOptimizeContext; + +TAutoPtr<NYql::IGraphTransformer> CreateKqpLogOptTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, + NYql::TTypeAnnotationContext& typesCtx, const NYql::TKikimrConfiguration::TPtr& config); + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_extract.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_extract.cpp index 79e5059f345..96642998db4 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_extract.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_extract.cpp @@ -1,39 +1,39 @@ -#include "kqp_opt_log_rules.h" - +#include "kqp_opt_log_rules.h" + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/opt/kqp_opt_impl.h> #include <ydb/core/kqp/provider/yql_kikimr_provider_impl.h> - + #include <ydb/library/yql/core/yql_opt_utils.h> #include <ydb/library/yql/dq/opt/dq_opt_log.h> #include <ydb/library/yql/providers/common/provider/yql_table_lookup.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NCommon; -using namespace NYql::NDq; -using namespace NYql::NNodes; - - -TExprBase KqpApplyExtractMembersToReadTable(TExprBase node, TExprContext& ctx) { - if (!node.Maybe<TCoExtractMembers>()) { - return node; - } - - auto extract = node.Cast<TCoExtractMembers>(); - auto input = extract.Input(); - - if (!input.Maybe<TKqlReadTableBase>()) { - return node; - } - - auto read = extract.Input().Cast<TKqlReadTableBase>(); - + +using namespace NYql; +using namespace NYql::NCommon; +using namespace NYql::NDq; +using namespace NYql::NNodes; + + +TExprBase KqpApplyExtractMembersToReadTable(TExprBase node, TExprContext& ctx) { + if (!node.Maybe<TCoExtractMembers>()) { + return node; + } + + auto extract = node.Cast<TCoExtractMembers>(); + auto input = extract.Input(); + + if (!input.Maybe<TKqlReadTableBase>()) { + return node; + } + + auto read = extract.Input().Cast<TKqlReadTableBase>(); + if (auto maybeIndexRead = read.Maybe<TKqlReadTableIndex>()) { auto indexRead = maybeIndexRead.Cast(); - return Build<TKqlReadTableIndex>(ctx, extract.Pos()) + return Build<TKqlReadTableIndex>(ctx, extract.Pos()) .Table(indexRead.Table()) .Range(indexRead.Range()) .Columns(extract.Members()) @@ -42,74 +42,74 @@ TExprBase KqpApplyExtractMembersToReadTable(TExprBase node, TExprContext& ctx) { .Done(); } - return Build<TKqlReadTableBase>(ctx, extract.Pos()) - .CallableName(read.CallableName()) - .Table(read.Table()) - .Range(read.Range()) - .Columns(extract.Members()) - .Settings(read.Settings()) - .Done(); -} - -TExprBase KqpApplyExtractMembersToReadTableRanges(TExprBase node, TExprContext& ctx) { - if (!node.Maybe<TCoExtractMembers>()) { - return node; - } - - auto extract = node.Cast<TCoExtractMembers>(); - auto input = extract.Input(); - - if (!input.Maybe<TKqlReadTableRangesBase>()) { - return node; - } - - // TKqpReadOlapTableRangesBase is derived from TKqlReadTableRangesBase, but should be handled separately - if (input.Maybe<TKqpReadOlapTableRangesBase>()) { - return node; - } - - auto read = extract.Input().Cast<TKqlReadTableRangesBase>(); - - return Build<TKqlReadTableRangesBase>(ctx, extract.Pos()) - .CallableName(read.CallableName()) - .Table(read.Table()) - .Ranges(read.Ranges()) - .Columns(extract.Members()) - .Settings(read.Settings()) - .ExplainPrompt(read.ExplainPrompt()) - .Done(); -} - -TExprBase KqpApplyExtractMembersToReadOlapTable(TExprBase node, TExprContext& ctx) { - if (!node.Maybe<TCoExtractMembers>()) { - return node; - } - - auto extract = node.Cast<TCoExtractMembers>(); - auto input = extract.Input(); - - if (!input.Maybe<TKqlReadTableRangesBase>()) { - return node; - } - - auto read = extract.Input().Cast<TKqpReadOlapTableRangesBase>(); - - // When process is set it may use columns in read.Columns() but those columns may not be present - // in the results. Thus do not apply extract members if process is not empty lambda - if (read.Process().Body().Raw() != read.Process().Args().Arg(0).Raw()) { - return node; - } - - return Build<TKqpReadOlapTableRangesBase>(ctx, extract.Pos()) - .CallableName(read.CallableName()) - .Table(read.Table()) - .Ranges(read.Ranges()) - .Columns(extract.Members()) - .Settings(read.Settings()) - .ExplainPrompt(read.ExplainPrompt()) - .Process(read.Process()) - .Done(); -} - + return Build<TKqlReadTableBase>(ctx, extract.Pos()) + .CallableName(read.CallableName()) + .Table(read.Table()) + .Range(read.Range()) + .Columns(extract.Members()) + .Settings(read.Settings()) + .Done(); +} + +TExprBase KqpApplyExtractMembersToReadTableRanges(TExprBase node, TExprContext& ctx) { + if (!node.Maybe<TCoExtractMembers>()) { + return node; + } + + auto extract = node.Cast<TCoExtractMembers>(); + auto input = extract.Input(); + + if (!input.Maybe<TKqlReadTableRangesBase>()) { + return node; + } + + // TKqpReadOlapTableRangesBase is derived from TKqlReadTableRangesBase, but should be handled separately + if (input.Maybe<TKqpReadOlapTableRangesBase>()) { + return node; + } + + auto read = extract.Input().Cast<TKqlReadTableRangesBase>(); + + return Build<TKqlReadTableRangesBase>(ctx, extract.Pos()) + .CallableName(read.CallableName()) + .Table(read.Table()) + .Ranges(read.Ranges()) + .Columns(extract.Members()) + .Settings(read.Settings()) + .ExplainPrompt(read.ExplainPrompt()) + .Done(); +} + +TExprBase KqpApplyExtractMembersToReadOlapTable(TExprBase node, TExprContext& ctx) { + if (!node.Maybe<TCoExtractMembers>()) { + return node; + } + + auto extract = node.Cast<TCoExtractMembers>(); + auto input = extract.Input(); + + if (!input.Maybe<TKqlReadTableRangesBase>()) { + return node; + } + + auto read = extract.Input().Cast<TKqpReadOlapTableRangesBase>(); + + // When process is set it may use columns in read.Columns() but those columns may not be present + // in the results. Thus do not apply extract members if process is not empty lambda + if (read.Process().Body().Raw() != read.Process().Args().Arg(0).Raw()) { + return node; + } + + return Build<TKqpReadOlapTableRangesBase>(ctx, extract.Pos()) + .CallableName(read.CallableName()) + .Table(read.Table()) + .Ranges(read.Ranges()) + .Columns(extract.Members()) + .Settings(read.Settings()) + .ExplainPrompt(read.ExplainPrompt()) + .Process(read.Process()) + .Done(); +} + } // namespace NKikimr::NKqp::NOpt - + diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp index d284cd9adf5..ac0fe0bc39c 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp @@ -304,7 +304,7 @@ TMaybeNode<TExprBase> KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext return {}; } - auto leftData = Build<TDqPrecompute>(ctx, join.Pos()) + auto leftData = Build<TDqPrecompute>(ctx, join.Pos()) .Input(join.LeftInput()) .Done(); auto leftDataDeduplicated = DeduplicateByMembers(leftData, leftJoinKeys, ctx, join.Pos()); diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges.cpp index bded7479a1c..38933b19fba 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges.cpp @@ -1,213 +1,213 @@ -#include "kqp_opt_log_rules.h" - +#include "kqp_opt_log_rules.h" + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/opt/kqp_opt_impl.h> #include <ydb/core/kqp/provider/yql_kikimr_provider_impl.h> - + #include <ydb/library/yql/core/yql_opt_utils.h> #include <ydb/library/yql/dq/opt/dq_opt_log.h> #include <ydb/library/yql/providers/common/provider/yql_table_lookup.h> - + namespace NKikimr::NKqp::NOpt { -namespace { - -using namespace NYql; -using namespace NYql::NCommon; -using namespace NYql::NDq; -using namespace NYql::NNodes; - -TExprBase BuildEquiRangeLookup(const TKeyRange& keyRange, const TKikimrTableDescription& tableDesc, - TPositionHandle pos, TExprContext& ctx) -{ - YQL_ENSURE(keyRange.IsEquiRange()); - - TVector<TExprBase> structMembers; - TVector<TCoAtom> skipNullColumns; - for (ui32 i = 0; i < keyRange.GetNumDefined(); ++i) { - const auto& columnName = tableDesc.Metadata->KeyColumnNames[i]; - TCoAtom columnNameAtom(ctx.NewAtom(pos, columnName)); - auto value = keyRange.GetFromTuple().GetValue(i).Cast(); - - if (TCoNull::Match(value.Raw())) { - value = Build<TCoNothing>(ctx, pos) - .OptionalType(NCommon::BuildTypeExpr(pos, *tableDesc.GetColumnType(columnName), ctx)) - .Done(); - } else { - skipNullColumns.push_back(columnNameAtom); - } - - auto member = Build<TExprList>(ctx, pos) - .Add(columnNameAtom) - .Add(value) - .Done(); - - structMembers.push_back(member); - } - - auto keysToLookup = Build<TCoAsList>(ctx, pos) - .Add<TCoAsStruct>() - .Add(structMembers) - .Build() - .Done(); - - // Actually residual predicate for the key range already has a check for NULL keys, - // but it's better to skip redundant lookup. Consider removing check from residual - // predicate in this case. - return Build<TCoSkipNullMembers>(ctx, pos) - .Input(keysToLookup) - .Members() - .Add(skipNullColumns) - .Build() - .Done(); -} - -TKqlKeyRange BuildKeyRangeExpr(const TKeyRange& keyRange, const TKikimrTableDescription& tableDesc, - TPositionHandle pos, TExprContext& ctx) -{ - bool fromInclusive = true; - bool toInclusive = true; - TVector<TExprBase> fromValues; - TVector<TExprBase> toValues; - - for (size_t i = 0; i < keyRange.GetColumnRangesCount(); ++i) { - const auto& columnName = tableDesc.Metadata->KeyColumnNames[i]; - const auto& range = keyRange.GetColumnRange(i); - - if (range.GetFrom().IsDefined()) { - fromInclusive = range.GetFrom().IsInclusive(); - if (TCoNull::Match(range.GetFrom().GetValue().Raw())) { - fromValues.emplace_back( - Build<TCoNothing>(ctx, pos) - .OptionalType(NCommon::BuildTypeExpr(pos, *tableDesc.GetColumnType(columnName), ctx)) - .Done()); - } else { - fromValues.emplace_back(range.GetFrom().GetValue()); - } - } - - if (range.GetTo().IsDefined()) { - toInclusive = range.GetTo().IsInclusive(); - if (TCoNull::Match(range.GetTo().GetValue().Raw())) { - toValues.emplace_back( - Build<TCoNothing>(ctx, pos) - .OptionalType(NCommon::BuildTypeExpr(pos, *tableDesc.GetColumnType(columnName), ctx)) - .Done()); - } else { - toValues.emplace_back(range.GetTo().GetValue()); - } - } - } - - auto fromExpr = fromInclusive - ? Build<TKqlKeyInc>(ctx, pos).Add(fromValues).Done().Cast<TKqlKeyTuple>() - : Build<TKqlKeyExc>(ctx, pos).Add(fromValues).Done().Cast<TKqlKeyTuple>(); - - auto toExpr = toInclusive - ? Build<TKqlKeyInc>(ctx, pos).Add(toValues).Done().Cast<TKqlKeyTuple>() - : Build<TKqlKeyExc>(ctx, pos).Add(toValues).Done().Cast<TKqlKeyTuple>(); - - return Build<TKqlKeyRange>(ctx, pos) - .From(fromExpr) - .To(toExpr) - .Done(); -} - +namespace { + +using namespace NYql; +using namespace NYql::NCommon; +using namespace NYql::NDq; +using namespace NYql::NNodes; + +TExprBase BuildEquiRangeLookup(const TKeyRange& keyRange, const TKikimrTableDescription& tableDesc, + TPositionHandle pos, TExprContext& ctx) +{ + YQL_ENSURE(keyRange.IsEquiRange()); + + TVector<TExprBase> structMembers; + TVector<TCoAtom> skipNullColumns; + for (ui32 i = 0; i < keyRange.GetNumDefined(); ++i) { + const auto& columnName = tableDesc.Metadata->KeyColumnNames[i]; + TCoAtom columnNameAtom(ctx.NewAtom(pos, columnName)); + auto value = keyRange.GetFromTuple().GetValue(i).Cast(); + + if (TCoNull::Match(value.Raw())) { + value = Build<TCoNothing>(ctx, pos) + .OptionalType(NCommon::BuildTypeExpr(pos, *tableDesc.GetColumnType(columnName), ctx)) + .Done(); + } else { + skipNullColumns.push_back(columnNameAtom); + } + + auto member = Build<TExprList>(ctx, pos) + .Add(columnNameAtom) + .Add(value) + .Done(); + + structMembers.push_back(member); + } + + auto keysToLookup = Build<TCoAsList>(ctx, pos) + .Add<TCoAsStruct>() + .Add(structMembers) + .Build() + .Done(); + + // Actually residual predicate for the key range already has a check for NULL keys, + // but it's better to skip redundant lookup. Consider removing check from residual + // predicate in this case. + return Build<TCoSkipNullMembers>(ctx, pos) + .Input(keysToLookup) + .Members() + .Add(skipNullColumns) + .Build() + .Done(); +} + +TKqlKeyRange BuildKeyRangeExpr(const TKeyRange& keyRange, const TKikimrTableDescription& tableDesc, + TPositionHandle pos, TExprContext& ctx) +{ + bool fromInclusive = true; + bool toInclusive = true; + TVector<TExprBase> fromValues; + TVector<TExprBase> toValues; + + for (size_t i = 0; i < keyRange.GetColumnRangesCount(); ++i) { + const auto& columnName = tableDesc.Metadata->KeyColumnNames[i]; + const auto& range = keyRange.GetColumnRange(i); + + if (range.GetFrom().IsDefined()) { + fromInclusive = range.GetFrom().IsInclusive(); + if (TCoNull::Match(range.GetFrom().GetValue().Raw())) { + fromValues.emplace_back( + Build<TCoNothing>(ctx, pos) + .OptionalType(NCommon::BuildTypeExpr(pos, *tableDesc.GetColumnType(columnName), ctx)) + .Done()); + } else { + fromValues.emplace_back(range.GetFrom().GetValue()); + } + } + + if (range.GetTo().IsDefined()) { + toInclusive = range.GetTo().IsInclusive(); + if (TCoNull::Match(range.GetTo().GetValue().Raw())) { + toValues.emplace_back( + Build<TCoNothing>(ctx, pos) + .OptionalType(NCommon::BuildTypeExpr(pos, *tableDesc.GetColumnType(columnName), ctx)) + .Done()); + } else { + toValues.emplace_back(range.GetTo().GetValue()); + } + } + } + + auto fromExpr = fromInclusive + ? Build<TKqlKeyInc>(ctx, pos).Add(fromValues).Done().Cast<TKqlKeyTuple>() + : Build<TKqlKeyExc>(ctx, pos).Add(fromValues).Done().Cast<TKqlKeyTuple>(); + + auto toExpr = toInclusive + ? Build<TKqlKeyInc>(ctx, pos).Add(toValues).Done().Cast<TKqlKeyTuple>() + : Build<TKqlKeyExc>(ctx, pos).Add(toValues).Done().Cast<TKqlKeyTuple>(); + + return Build<TKqlKeyRange>(ctx, pos) + .From(fromExpr) + .To(toExpr) + .Done(); +} + } // namespace - -TExprBase KqpPushPredicateToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { - if (!node.Maybe<TCoFlatMap>()) { - return node; - } - auto flatmap = node.Cast<TCoFlatMap>(); - - if (!IsPredicateFlatMap(flatmap.Lambda().Body().Ref())) { - return node; - } - + +TExprBase KqpPushPredicateToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { + if (!node.Maybe<TCoFlatMap>()) { + return node; + } + auto flatmap = node.Cast<TCoFlatMap>(); + + if (!IsPredicateFlatMap(flatmap.Lambda().Body().Ref())) { + return node; + } + TMaybeNode<TKqlReadTableBase> readTable; - TMaybeNode<TCoFilterNullMembers> filterNull; - TMaybeNode<TCoSkipNullMembers> skipNull; - + TMaybeNode<TCoFilterNullMembers> filterNull; + TMaybeNode<TCoSkipNullMembers> skipNull; + TMaybeNode<TCoAtom> indexName; - if (auto maybeRead = flatmap.Input().Maybe<TKqlReadTable>()) { - readTable = maybeRead.Cast(); - } - + if (auto maybeRead = flatmap.Input().Maybe<TKqlReadTable>()) { + readTable = maybeRead.Cast(); + } + if (auto maybeRead = flatmap.Input().Maybe<TKqlReadTableIndex>()) { readTable = maybeRead.Cast(); indexName = maybeRead.Cast().Index(); } - if (auto maybeRead = flatmap.Input().Maybe<TCoFilterNullMembers>().Input().Maybe<TKqlReadTable>()) { - readTable = maybeRead.Cast(); - filterNull = flatmap.Input().Cast<TCoFilterNullMembers>(); - } - + if (auto maybeRead = flatmap.Input().Maybe<TCoFilterNullMembers>().Input().Maybe<TKqlReadTable>()) { + readTable = maybeRead.Cast(); + filterNull = flatmap.Input().Cast<TCoFilterNullMembers>(); + } + if (auto maybeRead = flatmap.Input().Maybe<TCoFilterNullMembers>().Input().Maybe<TKqlReadTableIndex>()) { readTable = maybeRead.Cast(); filterNull = flatmap.Input().Cast<TCoFilterNullMembers>(); indexName = maybeRead.Cast().Index(); } - if (auto maybeRead = flatmap.Input().Maybe<TCoSkipNullMembers>().Input().Maybe<TKqlReadTable>()) { - readTable = maybeRead.Cast(); - skipNull = flatmap.Input().Cast<TCoSkipNullMembers>(); - } - + if (auto maybeRead = flatmap.Input().Maybe<TCoSkipNullMembers>().Input().Maybe<TKqlReadTable>()) { + readTable = maybeRead.Cast(); + skipNull = flatmap.Input().Cast<TCoSkipNullMembers>(); + } + if (auto maybeRead = flatmap.Input().Maybe<TCoSkipNullMembers>().Input().Maybe<TKqlReadTableIndex>()) { readTable = maybeRead.Cast(); skipNull = flatmap.Input().Cast<TCoSkipNullMembers>(); indexName = maybeRead.Cast().Index(); } - if (!readTable) { - return node; - } - - auto read = readTable.Cast(); - - if (read.Range().From().ArgCount() > 0 || read.Range().To().ArgCount() > 0) { - return node; - } - + if (!readTable) { + return node; + } + + auto read = readTable.Cast(); + + if (read.Range().From().ArgCount() > 0 || read.Range().To().ArgCount() > 0) { + return node; + } + auto& mainTableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, read.Table().Path()); - + auto& tableDesc = indexName ? kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, mainTableDesc.Metadata->GetIndexMetadata(TString(indexName.Cast())).first->Name) : mainTableDesc; - YQL_ENSURE(tableDesc.Metadata->Kind != EKikimrTableKind::Olap); - - auto row = flatmap.Lambda().Args().Arg(0); - auto predicate = TExprBase(flatmap.Lambda().Body().Ref().ChildPtr(0)); - TTableLookup lookup = ExtractTableLookup(row, predicate, tableDesc.Metadata->KeyColumnNames, - &KiTableLookupGetValue, &KiTableLookupCanCompare, &KiTableLookupCompare, ctx, - kqpCtx.Config->HasAllowNullCompareInIndex()); - - if (lookup.IsFullScan()) { - return node; - } - - auto readSettings = TKqpReadTableSettings::Parse(read); - - TVector<TExprBase> fetches; - fetches.reserve(lookup.GetKeyRanges().size()); - - for (auto& keyRange : lookup.GetKeyRanges()) { - bool useLookup = false; - if (keyRange.IsEquiRange()) { - bool isFullKey = keyRange.GetNumDefined() == tableDesc.Metadata->KeyColumnNames.size(); - - // NOTE: Use more efficient full key lookup implementation in datashard. - // Consider using lookup for partial keys as well once better constant folding - // is available, currently it can introduce redundant compute stage. - useLookup = kqpCtx.IsDataQuery() && isFullKey; - } - - TMaybeNode<TExprBase> readInput; - if (useLookup) { - auto lookupKeys = BuildEquiRangeLookup(keyRange, tableDesc, read.Pos(), ctx); - + YQL_ENSURE(tableDesc.Metadata->Kind != EKikimrTableKind::Olap); + + auto row = flatmap.Lambda().Args().Arg(0); + auto predicate = TExprBase(flatmap.Lambda().Body().Ref().ChildPtr(0)); + TTableLookup lookup = ExtractTableLookup(row, predicate, tableDesc.Metadata->KeyColumnNames, + &KiTableLookupGetValue, &KiTableLookupCanCompare, &KiTableLookupCompare, ctx, + kqpCtx.Config->HasAllowNullCompareInIndex()); + + if (lookup.IsFullScan()) { + return node; + } + + auto readSettings = TKqpReadTableSettings::Parse(read); + + TVector<TExprBase> fetches; + fetches.reserve(lookup.GetKeyRanges().size()); + + for (auto& keyRange : lookup.GetKeyRanges()) { + bool useLookup = false; + if (keyRange.IsEquiRange()) { + bool isFullKey = keyRange.GetNumDefined() == tableDesc.Metadata->KeyColumnNames.size(); + + // NOTE: Use more efficient full key lookup implementation in datashard. + // Consider using lookup for partial keys as well once better constant folding + // is available, currently it can introduce redundant compute stage. + useLookup = kqpCtx.IsDataQuery() && isFullKey; + } + + TMaybeNode<TExprBase> readInput; + if (useLookup) { + auto lookupKeys = BuildEquiRangeLookup(keyRange, tableDesc, read.Pos(), ctx); + if (indexName) { readInput = Build<TKqlLookupIndex>(ctx, read.Pos()) .Table(read.Table()) @@ -222,18 +222,18 @@ TExprBase KqpPushPredicateToReadTable(TExprBase node, TExprContext& ctx, const T .Columns(read.Columns()) .Done(); } - } else { - auto keyRangeExpr = BuildKeyRangeExpr(keyRange, tableDesc, node.Pos(), ctx); - - TKqpReadTableSettings settings = readSettings; - for (size_t i = 0; i < keyRange.GetColumnRangesCount(); ++i) { - const auto& column = tableDesc.Metadata->KeyColumnNames[i]; - auto& range = keyRange.GetColumnRange(i); - if (range.IsDefined() && !range.IsNull()) { - settings.AddSkipNullKey(column); - } - } - + } else { + auto keyRangeExpr = BuildKeyRangeExpr(keyRange, tableDesc, node.Pos(), ctx); + + TKqpReadTableSettings settings = readSettings; + for (size_t i = 0; i < keyRange.GetColumnRangesCount(); ++i) { + const auto& column = tableDesc.Metadata->KeyColumnNames[i]; + auto& range = keyRange.GetColumnRange(i); + if (range.IsDefined() && !range.IsNull()) { + settings.AddSkipNullKey(column); + } + } + if (indexName) { readInput = Build<TKqlReadTableIndex>(ctx, read.Pos()) .Table(read.Table()) @@ -250,48 +250,48 @@ TExprBase KqpPushPredicateToReadTable(TExprBase node, TExprContext& ctx, const T .Settings(settings.BuildNode(ctx, read.Pos())) .Done(); } - } - - auto input = readInput.Cast(); - - auto residualPredicate = keyRange.GetResidualPredicate() - ? keyRange.GetResidualPredicate().Cast().Ptr() - : MakeBool<true>(node.Pos(), ctx); - - auto newBody = ctx.ChangeChild(flatmap.Lambda().Body().Ref(), 0, std::move(residualPredicate)); - - if (filterNull) { - input = Build<TCoFilterNullMembers>(ctx, node.Pos()) - .Input(input) - .Members(filterNull.Cast().Members()) - .Done(); - } - - if (skipNull) { - input = Build<TCoSkipNullMembers>(ctx, node.Pos()) - .Input(input) - .Members(skipNull.Cast().Members()) - .Done(); - } - - auto fetch = Build<TCoFlatMap>(ctx, node.Pos()) - .Input(input) - .Lambda() - .Args({"item"}) - .Body<TExprApplier>() - .Apply(TExprBase(newBody)) - .With(flatmap.Lambda().Args().Arg(0), "item") - .Build() - .Build() - .Done(); - - fetches.push_back(fetch); - } - - return Build<TCoExtend>(ctx, node.Pos()) - .Add(fetches) - .Done(); -} - + } + + auto input = readInput.Cast(); + + auto residualPredicate = keyRange.GetResidualPredicate() + ? keyRange.GetResidualPredicate().Cast().Ptr() + : MakeBool<true>(node.Pos(), ctx); + + auto newBody = ctx.ChangeChild(flatmap.Lambda().Body().Ref(), 0, std::move(residualPredicate)); + + if (filterNull) { + input = Build<TCoFilterNullMembers>(ctx, node.Pos()) + .Input(input) + .Members(filterNull.Cast().Members()) + .Done(); + } + + if (skipNull) { + input = Build<TCoSkipNullMembers>(ctx, node.Pos()) + .Input(input) + .Members(skipNull.Cast().Members()) + .Done(); + } + + auto fetch = Build<TCoFlatMap>(ctx, node.Pos()) + .Input(input) + .Lambda() + .Args({"item"}) + .Body<TExprApplier>() + .Apply(TExprBase(newBody)) + .With(flatmap.Lambda().Args().Arg(0), "item") + .Build() + .Build() + .Done(); + + fetches.push_back(fetch); + } + + return Build<TCoExtend>(ctx, node.Pos()) + .Add(fetches) + .Done(); +} + } // namespace NKikimr::NKqp::NOpt - + diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp index 8698f75f60d..a4a4a0b3e0b 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp @@ -1,137 +1,137 @@ -#include "kqp_opt_log_rules.h" - +#include "kqp_opt_log_rules.h" + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/opt/kqp_opt_impl.h> #include <ydb/core/kqp/provider/yql_kikimr_provider_impl.h> - + #include <ydb/library/yql/core/yql_opt_utils.h> #include <ydb/library/yql/dq/opt/dq_opt_log.h> #include <ydb/library/yql/providers/common/provider/yql_table_lookup.h> #include <ydb/library/yql/core/extract_predicate/extract_predicate.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NCommon; -using namespace NYql::NDq; -using namespace NYql::NNodes; - -TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, - TTypeAnnotationContext& typesCtx) -{ - if (!node.Maybe<TCoFlatMap>()) { - return node; - } - - auto flatmap = node.Cast<TCoFlatMap>(); - - if (!IsPredicateFlatMap(flatmap.Lambda().Body().Ref())) { - return node; - } - - TMaybeNode<TKqlReadTableRanges> readTable; - TMaybeNode<TCoFilterNullMembers> filterNull; - TMaybeNode<TCoSkipNullMembers> skipNull; - - if (auto maybeRead = flatmap.Input().Maybe<TKqlReadTableRanges>()) { - readTable = maybeRead.Cast(); - } - - if (auto maybeRead = flatmap.Input().Maybe<TCoFilterNullMembers>().Input().Maybe<TKqlReadTableRanges>()) { - readTable = maybeRead.Cast(); - filterNull = flatmap.Input().Cast<TCoFilterNullMembers>(); - } - - if (auto maybeRead = flatmap.Input().Maybe<TCoSkipNullMembers>().Input().Maybe<TKqlReadTableRanges>()) { - readTable = maybeRead.Cast(); - skipNull = flatmap.Input().Cast<TCoSkipNullMembers>(); - } - - if (!readTable) { - return node; - } - - /* - * ReadTableRanges supported predicate extraction, but it may be disabled via flag. For example to force - * pushdown predicates to OLAP SSA program. - */ - auto predicateExtractSetting = kqpCtx.Config->GetOptPredicateExtract(); - - if (predicateExtractSetting == EOptionalFlag::Disabled) { - return node; - } - - auto read = readTable.Cast(); - - if (!read.Ranges().Maybe<TCoVoid>()) { - return node; - } - - auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, read.Table().Path()); - - THashSet<TString> possibleKeys; + +using namespace NYql; +using namespace NYql::NCommon; +using namespace NYql::NDq; +using namespace NYql::NNodes; + +TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, + TTypeAnnotationContext& typesCtx) +{ + if (!node.Maybe<TCoFlatMap>()) { + return node; + } + + auto flatmap = node.Cast<TCoFlatMap>(); + + if (!IsPredicateFlatMap(flatmap.Lambda().Body().Ref())) { + return node; + } + + TMaybeNode<TKqlReadTableRanges> readTable; + TMaybeNode<TCoFilterNullMembers> filterNull; + TMaybeNode<TCoSkipNullMembers> skipNull; + + if (auto maybeRead = flatmap.Input().Maybe<TKqlReadTableRanges>()) { + readTable = maybeRead.Cast(); + } + + if (auto maybeRead = flatmap.Input().Maybe<TCoFilterNullMembers>().Input().Maybe<TKqlReadTableRanges>()) { + readTable = maybeRead.Cast(); + filterNull = flatmap.Input().Cast<TCoFilterNullMembers>(); + } + + if (auto maybeRead = flatmap.Input().Maybe<TCoSkipNullMembers>().Input().Maybe<TKqlReadTableRanges>()) { + readTable = maybeRead.Cast(); + skipNull = flatmap.Input().Cast<TCoSkipNullMembers>(); + } + + if (!readTable) { + return node; + } + + /* + * ReadTableRanges supported predicate extraction, but it may be disabled via flag. For example to force + * pushdown predicates to OLAP SSA program. + */ + auto predicateExtractSetting = kqpCtx.Config->GetOptPredicateExtract(); + + if (predicateExtractSetting == EOptionalFlag::Disabled) { + return node; + } + + auto read = readTable.Cast(); + + if (!read.Ranges().Maybe<TCoVoid>()) { + return node; + } + + auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, read.Table().Path()); + + THashSet<TString> possibleKeys; TPredicateExtractorSettings settings; - settings.MergeAdjacentPointRanges = true; + settings.MergeAdjacentPointRanges = true; auto extractor = MakePredicateRangeExtractor(settings); YQL_ENSURE(tableDesc.SchemeNode); - + bool prepareSuccess = extractor->Prepare(flatmap.Lambda().Ptr(), *tableDesc.SchemeNode, possibleKeys, ctx, typesCtx); - YQL_ENSURE(prepareSuccess); - + YQL_ENSURE(prepareSuccess); + auto buildResult = extractor->BuildComputeNode(tableDesc.Metadata->KeyColumnNames, ctx); TExprNode::TPtr ranges = buildResult.ComputeNode; - - if (!ranges) { - return node; - } - - TExprNode::TPtr residualLambda = buildResult.PrunedLambda; - - TVector<TString> usedColumns; - usedColumns.reserve(buildResult.UsedPrefixLen); - - for (size_t i = 0; i < buildResult.UsedPrefixLen; ++i) { - usedColumns.emplace_back(tableDesc.Metadata->KeyColumnNames[i]); - } - - TKqpReadTableExplainPrompt prompt; - prompt.SetUsedKeyColumns(usedColumns); - if (buildResult.ExpectedMaxRanges.Defined()) { - prompt.SetExpectedMaxRanges(buildResult.ExpectedMaxRanges.GetRef()); - } - - YQL_CLOG(DEBUG, ProviderKqp) << "Ranges extracted: " << KqpExprToPrettyString(*ranges, ctx); - YQL_CLOG(DEBUG, ProviderKqp) << "Residual lambda: " << KqpExprToPrettyString(*residualLambda, ctx); - - TMaybeNode<TExprBase> readInput = Build<TKqlReadTableRanges>(ctx, read.Pos()) - .Table(read.Table()) - .Ranges(ranges) - .Columns(read.Columns()) - .Settings(read.Settings()) - .ExplainPrompt(prompt.BuildNode(ctx, read.Pos())) - .Done(); - - auto input = readInput.Cast(); - - if (filterNull) { - input = Build<TCoFilterNullMembers>(ctx, node.Pos()) - .Input(input) - .Members(filterNull.Cast().Members()) - .Done(); - } - - if (skipNull) { - input = Build<TCoSkipNullMembers>(ctx, node.Pos()) - .Input(input) - .Members(skipNull.Cast().Members()) - .Done(); - } - - return Build<TCoFlatMap>(ctx, node.Pos()) - .Input(input) - .Lambda(residualLambda) - .Done(); -} - + + if (!ranges) { + return node; + } + + TExprNode::TPtr residualLambda = buildResult.PrunedLambda; + + TVector<TString> usedColumns; + usedColumns.reserve(buildResult.UsedPrefixLen); + + for (size_t i = 0; i < buildResult.UsedPrefixLen; ++i) { + usedColumns.emplace_back(tableDesc.Metadata->KeyColumnNames[i]); + } + + TKqpReadTableExplainPrompt prompt; + prompt.SetUsedKeyColumns(usedColumns); + if (buildResult.ExpectedMaxRanges.Defined()) { + prompt.SetExpectedMaxRanges(buildResult.ExpectedMaxRanges.GetRef()); + } + + YQL_CLOG(DEBUG, ProviderKqp) << "Ranges extracted: " << KqpExprToPrettyString(*ranges, ctx); + YQL_CLOG(DEBUG, ProviderKqp) << "Residual lambda: " << KqpExprToPrettyString(*residualLambda, ctx); + + TMaybeNode<TExprBase> readInput = Build<TKqlReadTableRanges>(ctx, read.Pos()) + .Table(read.Table()) + .Ranges(ranges) + .Columns(read.Columns()) + .Settings(read.Settings()) + .ExplainPrompt(prompt.BuildNode(ctx, read.Pos())) + .Done(); + + auto input = readInput.Cast(); + + if (filterNull) { + input = Build<TCoFilterNullMembers>(ctx, node.Pos()) + .Input(input) + .Members(filterNull.Cast().Members()) + .Done(); + } + + if (skipNull) { + input = Build<TCoSkipNullMembers>(ctx, node.Pos()) + .Input(input) + .Members(skipNull.Cast().Members()) + .Done(); + } + + return Build<TCoFlatMap>(ctx, node.Pos()) + .Input(input) + .Lambda(residualLambda) + .Done(); +} + } // namespace NKikimr::NKqp::NOpt - + diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_rules.h b/ydb/core/kqp/opt/logical/kqp_opt_log_rules.h index c5067e62f5a..9e84d83d824 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_rules.h +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_rules.h @@ -1,36 +1,36 @@ -#pragma once - +#pragma once + #include <ydb/core/kqp/opt/kqp_opt.h> #include <ydb/core/kqp/provider/yql_kikimr_expr_nodes.h> - + #include <ydb/library/yql/ast/yql_expr.h> - -/* - * This file contains declaration of all rule functions for logical optimizer - */ - + +/* + * This file contains declaration of all rule functions for logical optimizer + */ + namespace NKikimr::NKqp::NOpt { - + NYql::NNodes::TExprBase KqpPushPredicateToReadTable(NYql::NNodes::TExprBase node, NYql::TExprContext &ctx, const TKqpOptimizeContext &kqpCtx); -NYql::NNodes::TExprBase KqpPushExtractedPredicateToReadTable(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, - const TKqpOptimizeContext& kqpCtx, NYql::TTypeAnnotationContext& typesCtx); - -NYql::NNodes::TExprBase KqpApplyExtractMembersToReadTable(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx); - -NYql::NNodes::TExprBase KqpApplyExtractMembersToReadOlapTable(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx); - -NYql::NNodes::TExprBase KqpApplyExtractMembersToReadTableRanges(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx); - +NYql::NNodes::TExprBase KqpPushExtractedPredicateToReadTable(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, + const TKqpOptimizeContext& kqpCtx, NYql::TTypeAnnotationContext& typesCtx); + +NYql::NNodes::TExprBase KqpApplyExtractMembersToReadTable(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx); + +NYql::NNodes::TExprBase KqpApplyExtractMembersToReadOlapTable(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx); + +NYql::NNodes::TExprBase KqpApplyExtractMembersToReadTableRanges(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx); + NYql::NNodes::TExprBase KqpJoinToIndexLookup(const NYql::NNodes::TExprBase& node, NYql::TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, const NYql::TKikimrConfiguration::TPtr& config); NYql::NNodes::TExprBase KqpRewriteSqlInToEquiJoin(const NYql::NNodes::TExprBase& node, NYql::TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, const NYql::TKikimrConfiguration::TPtr& config); -NYql::NNodes::TExprBase KqpRewriteSqlInCompactToJoin(const NYql::NNodes::TExprBase& node, NYql::TExprContext& ctx); - +NYql::NNodes::TExprBase KqpRewriteSqlInCompactToJoin(const NYql::NNodes::TExprBase& node, NYql::TExprContext& ctx); + NYql::NNodes::TExprBase KqpRewriteIndexRead(const NYql::NNodes::TExprBase& node, NYql::TExprContext& ctx, const TKqpOptimizeContext& kqpCtx); diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp index 4f00ba7459d..fd5d2130011 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin.cpp @@ -27,7 +27,7 @@ TExprBase KqpRewriteSqlInToEquiJoin(const TExprBase& node, TExprContext& ctx, co if (!node.Maybe<TCoFlatMap>()) { return node; } - + const auto flatMap = node.Cast<TCoFlatMap>(); const auto lambdaBody = flatMap.Lambda().Body(); diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin_compact.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin_compact.cpp index 0f0e0841000..f3ffaa1ec0f 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin_compact.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_sqlin_compact.cpp @@ -1,232 +1,232 @@ -#include "kqp_opt_log_rules.h" - +#include "kqp_opt_log_rules.h" + #include <ydb/core/kqp/opt/kqp_opt_impl.h> #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/provider/yql_kikimr_provider_impl.h> #include <ydb/core/kqp/provider/yql_kikimr_opt_utils.h> - + #include <ydb/library/yql/core/common_opt/yql_co_sqlin.h> #include <ydb/library/yql/core/yql_opt_utils.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NNodes; -using namespace NYql::NDq; - -TExprBase KqpRewriteSqlInCompactToJoin(const TExprBase& node, TExprContext& ctx) { - if (!node.Maybe<TCoFlatMap>()) { - return node; - } - - const auto flatMap = node.Cast<TCoFlatMap>(); - const auto lambdaBody = flatMap.Lambda().Body(); - - if (!lambdaBody.Maybe<TCoOptionalIf>()) { - return node; - } - - auto optionalIf = lambdaBody.Cast<TCoOptionalIf>(); - - auto sqlInPtr = FindNode( - lambdaBody.Ptr(), - [](const TExprNode::TPtr &x) { - return TCoSqlIn::Match(x.Get()); - } - ); - - if (!sqlInPtr) { - return node; - } - - auto sqlIn = TCoSqlIn(sqlInPtr); - - if (!HasSetting(sqlIn.Options().Ref(), "isCompact")) { - return node; - } - - // Check both inputs (Flatmap and SqlIn) are union all (rewrite to shuffle?) - auto leftInput = flatMap.Input(); - auto rightInput = sqlIn.Collection(); - - if (!leftInput.Maybe<TDqCnUnionAll>()) { - return node; - } - - if (!rightInput.Maybe<TDqCnUnionAll>()) { - return node; - } - - auto rightUnion = rightInput.Cast<TDqCnUnionAll>(); - auto leftUnion = leftInput.Cast<TDqCnUnionAll>(); - - auto extractColumns = [&ctx, &node](const TDqOutput& output) { - TVector<TCoAtom> renames; - - TExprBase connection = output.Stage().Program().Body(); - - if (connection.Maybe<TDqReplicate>()) { - auto replicate = connection.Cast<TDqReplicate>(); - auto index = FromString<uint32_t>(output.Index().Value()); - // Index 0 is replicate input, others - are lambdas. - connection = replicate.Args().Get(index + 1); - } - - const auto itemType = GetSeqItemType(connection.Ptr()->GetTypeAnn()); - YQL_ENSURE(itemType->GetKind() == ETypeAnnotationKind::Struct, - "Expected Struct, got " << itemType->GetKind() - ); - renames.reserve(itemType->Cast<TStructExprType>()->GetSize()); - - for (const auto& column : itemType->Cast<TStructExprType>()->GetItems()) { - renames.emplace_back( - Build<TCoAtom>(ctx, node.Pos()) - .Value(column->GetName()) - .Done() - ); - } - return renames; - }; - - auto rightColumns = extractColumns(rightUnion.Output()); - auto leftColumns = extractColumns(leftUnion.Output()); - - if (rightColumns.empty() || leftColumns.empty()) { - return node; - } - - // Extract right column for join - if (rightColumns.size() != 1) { - return node; - } - - auto rightColumn = rightColumns[0]; - - // Extract left column to join - if (!sqlIn.Lookup().Maybe<TCoMember>()) { - return node; - } - - auto leftMember = sqlIn.Lookup().Cast<TCoMember>(); - auto leftColumn = leftMember.Name(); - - TString rightLabelStr = "rightLabel"; - TString leftLabelStr = "leftLabel"; - - // Join (left semi) both incoming tables by SqlIn parameter - auto rightLabel = Build<TCoAtom>(ctx, node.Pos()) - .Value(rightLabelStr) - .Done().Ptr(); - auto leftLabel = Build<TCoAtom>(ctx, node.Pos()) - .Value(leftLabelStr) - .Done().Ptr(); - - auto joinKeys = Build<TDqJoinKeyTupleList>(ctx, node.Pos()) - .Add<TDqJoinKeyTuple>() - .LeftLabel(leftLabel) - .LeftColumn(leftColumn) - .RightLabel(rightLabel) - .RightColumn(rightColumn) - .Build() - .Done(); - - TCoArgument leftArg = Build<TCoArgument>(ctx, node.Pos()) - .Name("leftArg") - .Done(); - - TCoArgument rightArg = Build<TCoArgument>(ctx, node.Pos()) - .Name("rightArg") - .Done(); - - TVector<TCoArgument> args = {leftArg, rightArg}; - TVector<TExprBase> inputs = {leftInput, rightInput}; - - auto join = Build<TDqPhyMapJoin>(ctx, node.Pos()) - .LeftInput(leftArg) - .LeftLabel(leftLabel) - .RightInput(rightArg) - .RightLabel(rightLabel) - .JoinType<TCoAtom>() - .Value("LeftSemi") - .Build() - .JoinKeys(joinKeys) - .Done(); - - // Convert column names back, i.e. leftLabel.ColumnName -> ColumnName - TVector<TExprBase> requiredMembers; - TCoArgument convertArg = Build<TCoArgument>(ctx, node.Pos()) - .Name("convertArg") - .Done(); - - // If there is AsStruct in OptionalIf - get column names from it, otherwise get them from incoming stream - if (optionalIf.Value().Maybe<TCoAsStruct>()) { - for (const auto& item: optionalIf.Value().Cast<TCoAsStruct>().Ptr()->Children()) { - auto tuple = TCoNameValueTuple(item); - - if (!tuple.Value().Maybe<TCoMember>()) { - return node; - } - - auto columnName = TString(tuple.Value().Cast<TCoMember>().Name()); - auto newTuple = Build<TCoNameValueTuple>(ctx, node.Pos()) - .Name(tuple.Name()) - .Value<TCoMember>() - .Struct(convertArg) - .Name<TCoAtom>() - .Value(Join(".", leftLabelStr, columnName)) - .Build() - .Build() - .Done(); - - requiredMembers.emplace_back(std::move(newTuple)); - } - } else { - for (const auto& columnName: leftColumns) { - auto newTuple = Build<TCoNameValueTuple>(ctx, node.Pos()) - .Name(columnName) - .Value<TCoMember>() - .Struct(convertArg) - .Name<TCoAtom>() - .Value(Join(".", leftLabelStr, columnName.Value())) - .Build() - .Build() - .Done(); - - requiredMembers.emplace_back(std::move(newTuple)); - } - } - - auto convert = Build<TCoFlatMap>(ctx, node.Pos()) - .Input<TCoToStream>() - .Input(join) - .Build() - .Lambda() - .Args({convertArg}) - .Body<TCoJust>() - .Input<TCoAsStruct>() - .Add(requiredMembers) - .Build() - .Build() - .Build() - .Done(); - - return Build<TDqCnUnionAll>(ctx, node.Pos()) - .Output() - .Stage<TDqStage>() - .Inputs() - .Add(inputs) - .Build() - .Program() - .Args(args) - .Body(convert) - .Build() - .Settings() - .Build() - .Build() - .Index().Build("0") - .Build() - .Done(); -} - + +using namespace NYql; +using namespace NYql::NNodes; +using namespace NYql::NDq; + +TExprBase KqpRewriteSqlInCompactToJoin(const TExprBase& node, TExprContext& ctx) { + if (!node.Maybe<TCoFlatMap>()) { + return node; + } + + const auto flatMap = node.Cast<TCoFlatMap>(); + const auto lambdaBody = flatMap.Lambda().Body(); + + if (!lambdaBody.Maybe<TCoOptionalIf>()) { + return node; + } + + auto optionalIf = lambdaBody.Cast<TCoOptionalIf>(); + + auto sqlInPtr = FindNode( + lambdaBody.Ptr(), + [](const TExprNode::TPtr &x) { + return TCoSqlIn::Match(x.Get()); + } + ); + + if (!sqlInPtr) { + return node; + } + + auto sqlIn = TCoSqlIn(sqlInPtr); + + if (!HasSetting(sqlIn.Options().Ref(), "isCompact")) { + return node; + } + + // Check both inputs (Flatmap and SqlIn) are union all (rewrite to shuffle?) + auto leftInput = flatMap.Input(); + auto rightInput = sqlIn.Collection(); + + if (!leftInput.Maybe<TDqCnUnionAll>()) { + return node; + } + + if (!rightInput.Maybe<TDqCnUnionAll>()) { + return node; + } + + auto rightUnion = rightInput.Cast<TDqCnUnionAll>(); + auto leftUnion = leftInput.Cast<TDqCnUnionAll>(); + + auto extractColumns = [&ctx, &node](const TDqOutput& output) { + TVector<TCoAtom> renames; + + TExprBase connection = output.Stage().Program().Body(); + + if (connection.Maybe<TDqReplicate>()) { + auto replicate = connection.Cast<TDqReplicate>(); + auto index = FromString<uint32_t>(output.Index().Value()); + // Index 0 is replicate input, others - are lambdas. + connection = replicate.Args().Get(index + 1); + } + + const auto itemType = GetSeqItemType(connection.Ptr()->GetTypeAnn()); + YQL_ENSURE(itemType->GetKind() == ETypeAnnotationKind::Struct, + "Expected Struct, got " << itemType->GetKind() + ); + renames.reserve(itemType->Cast<TStructExprType>()->GetSize()); + + for (const auto& column : itemType->Cast<TStructExprType>()->GetItems()) { + renames.emplace_back( + Build<TCoAtom>(ctx, node.Pos()) + .Value(column->GetName()) + .Done() + ); + } + return renames; + }; + + auto rightColumns = extractColumns(rightUnion.Output()); + auto leftColumns = extractColumns(leftUnion.Output()); + + if (rightColumns.empty() || leftColumns.empty()) { + return node; + } + + // Extract right column for join + if (rightColumns.size() != 1) { + return node; + } + + auto rightColumn = rightColumns[0]; + + // Extract left column to join + if (!sqlIn.Lookup().Maybe<TCoMember>()) { + return node; + } + + auto leftMember = sqlIn.Lookup().Cast<TCoMember>(); + auto leftColumn = leftMember.Name(); + + TString rightLabelStr = "rightLabel"; + TString leftLabelStr = "leftLabel"; + + // Join (left semi) both incoming tables by SqlIn parameter + auto rightLabel = Build<TCoAtom>(ctx, node.Pos()) + .Value(rightLabelStr) + .Done().Ptr(); + auto leftLabel = Build<TCoAtom>(ctx, node.Pos()) + .Value(leftLabelStr) + .Done().Ptr(); + + auto joinKeys = Build<TDqJoinKeyTupleList>(ctx, node.Pos()) + .Add<TDqJoinKeyTuple>() + .LeftLabel(leftLabel) + .LeftColumn(leftColumn) + .RightLabel(rightLabel) + .RightColumn(rightColumn) + .Build() + .Done(); + + TCoArgument leftArg = Build<TCoArgument>(ctx, node.Pos()) + .Name("leftArg") + .Done(); + + TCoArgument rightArg = Build<TCoArgument>(ctx, node.Pos()) + .Name("rightArg") + .Done(); + + TVector<TCoArgument> args = {leftArg, rightArg}; + TVector<TExprBase> inputs = {leftInput, rightInput}; + + auto join = Build<TDqPhyMapJoin>(ctx, node.Pos()) + .LeftInput(leftArg) + .LeftLabel(leftLabel) + .RightInput(rightArg) + .RightLabel(rightLabel) + .JoinType<TCoAtom>() + .Value("LeftSemi") + .Build() + .JoinKeys(joinKeys) + .Done(); + + // Convert column names back, i.e. leftLabel.ColumnName -> ColumnName + TVector<TExprBase> requiredMembers; + TCoArgument convertArg = Build<TCoArgument>(ctx, node.Pos()) + .Name("convertArg") + .Done(); + + // If there is AsStruct in OptionalIf - get column names from it, otherwise get them from incoming stream + if (optionalIf.Value().Maybe<TCoAsStruct>()) { + for (const auto& item: optionalIf.Value().Cast<TCoAsStruct>().Ptr()->Children()) { + auto tuple = TCoNameValueTuple(item); + + if (!tuple.Value().Maybe<TCoMember>()) { + return node; + } + + auto columnName = TString(tuple.Value().Cast<TCoMember>().Name()); + auto newTuple = Build<TCoNameValueTuple>(ctx, node.Pos()) + .Name(tuple.Name()) + .Value<TCoMember>() + .Struct(convertArg) + .Name<TCoAtom>() + .Value(Join(".", leftLabelStr, columnName)) + .Build() + .Build() + .Done(); + + requiredMembers.emplace_back(std::move(newTuple)); + } + } else { + for (const auto& columnName: leftColumns) { + auto newTuple = Build<TCoNameValueTuple>(ctx, node.Pos()) + .Name(columnName) + .Value<TCoMember>() + .Struct(convertArg) + .Name<TCoAtom>() + .Value(Join(".", leftLabelStr, columnName.Value())) + .Build() + .Build() + .Done(); + + requiredMembers.emplace_back(std::move(newTuple)); + } + } + + auto convert = Build<TCoFlatMap>(ctx, node.Pos()) + .Input<TCoToStream>() + .Input(join) + .Build() + .Lambda() + .Args({convertArg}) + .Body<TCoJust>() + .Input<TCoAsStruct>() + .Add(requiredMembers) + .Build() + .Build() + .Build() + .Done(); + + return Build<TDqCnUnionAll>(ctx, node.Pos()) + .Output() + .Stage<TDqStage>() + .Inputs() + .Add(inputs) + .Build() + .Program() + .Args(args) + .Body(convert) + .Build() + .Settings() + .Build() + .Build() + .Index().Build("0") + .Build() + .Done(); +} + } // namespace NKikimr::NKqp::NOpt - + diff --git a/ydb/core/kqp/opt/logical/ya.make b/ydb/core/kqp/opt/logical/ya.make index 60ff9dcb9f6..b9811606c15 100644 --- a/ydb/core/kqp/opt/logical/ya.make +++ b/ydb/core/kqp/opt/logical/ya.make @@ -1,29 +1,29 @@ -LIBRARY() - -OWNER( - spuchin - g:kikimr -) - -SRCS( +LIBRARY() + +OWNER( + spuchin + g:kikimr +) + +SRCS( kqp_opt_log_effects.cpp - kqp_opt_log_extract.cpp + kqp_opt_log_extract.cpp kqp_opt_log_join.cpp kqp_opt_log_indexes.cpp - kqp_opt_log_ranges.cpp - kqp_opt_log_ranges_predext.cpp + kqp_opt_log_ranges.cpp + kqp_opt_log_ranges_predext.cpp kqp_opt_log_sqlin.cpp - kqp_opt_log_sqlin_compact.cpp + kqp_opt_log_sqlin_compact.cpp kqp_opt_log.cpp -) - -PEERDIR( +) + +PEERDIR( ydb/core/kqp/common ydb/library/yql/core/extract_predicate ydb/library/yql/dq/common ydb/library/yql/dq/opt -) - -YQL_LAST_ABI_VERSION() - -END() +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/kqp/opt/peephole/kqp_opt_peephole.cpp b/ydb/core/kqp/opt/peephole/kqp_opt_peephole.cpp index fddf388dbfd..55aca64df87 100644 --- a/ydb/core/kqp/opt/peephole/kqp_opt_peephole.cpp +++ b/ydb/core/kqp/opt/peephole/kqp_opt_peephole.cpp @@ -1,9 +1,9 @@ -#include "kqp_opt_peephole_rules.h" - +#include "kqp_opt_peephole_rules.h" + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/opt/kqp_opt_impl.h> #include <ydb/library/naming_conventions/naming_conventions.h> - + #include <ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.h> #include <ydb/library/yql/core/yql_expr_optimize.h> #include <ydb/library/yql/core/yql_join.h> @@ -11,20 +11,20 @@ #include <ydb/library/yql/dq/opt/dq_opt_peephole.h> #include <ydb/library/yql/core/services/yql_transform_pipeline.h> #include <ydb/library/yql/providers/common/transform/yql_optimize.h> - -#include <util/generic/size_literals.h> -#include <util/string/cast.h> - + +#include <util/generic/size_literals.h> +#include <util/string/cast.h> + namespace NKikimr::NKqp::NOpt { - -namespace { - -using namespace NYql; -using namespace NYql::NDq; -using namespace NYql::NNodes; - -using TStatus = IGraphTransformer::TStatus; - + +namespace { + +using namespace NYql; +using namespace NYql::NDq; +using namespace NYql::NNodes; + +using TStatus = IGraphTransformer::TStatus; + class TKqpPeepholeTransformer : public TOptimizeTransformerBase { public: TKqpPeepholeTransformer() @@ -39,126 +39,126 @@ public: AddHandler(0, TOptimizeTransformerBase::Any(), HNDL(BuildWideReadTable)); #undef HNDL } - + protected: TMaybeNode<TExprBase> RewriteReplicate(TExprBase node, TExprContext& ctx) { TExprBase output = DqPeepholeRewriteReplicate(node, ctx); DumpAppliedRule("RewriteReplicate", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> RewriteMapJoin(TExprBase node, TExprContext& ctx) { TExprBase output = DqPeepholeRewriteMapJoin(node, ctx); DumpAppliedRule("RewriteMapJoin", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> RewriteCrossJoin(TExprBase node, TExprContext& ctx) { TExprBase output = DqPeepholeRewriteCrossJoin(node, ctx); DumpAppliedRule("RewriteCrossJoin", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> RewriteDictJoin(TExprBase node, TExprContext& ctx) { TExprBase output = DqPeepholeRewriteJoinDict(node, ctx); DumpAppliedRule("RewriteDictJoin", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> RewritePureJoin(TExprBase node, TExprContext& ctx) { TExprBase output = DqPeepholeRewritePureJoin(node, ctx); DumpAppliedRule("RewritePureJoin", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> BuildWideReadTable(TExprBase node, TExprContext& ctx) { TExprBase output = KqpBuildWideReadTable(node, ctx); DumpAppliedRule("BuildWideReadTable", node.Ptr(), output.Ptr(), ctx); return output; } }; - -struct TKqpPeepholePipelineConfigurator : IPipelineConfigurator { - TKqpPeepholePipelineConfigurator(TKikimrConfiguration::TPtr config) - : Config(config) - {} - - void AfterCreate(TTransformationPipeline*) const override { - } - - void AfterTypeAnnotation(TTransformationPipeline*) const override { - } - - void AfterOptimize(TTransformationPipeline* pipeline) const override { + +struct TKqpPeepholePipelineConfigurator : IPipelineConfigurator { + TKqpPeepholePipelineConfigurator(TKikimrConfiguration::TPtr config) + : Config(config) + {} + + void AfterCreate(TTransformationPipeline*) const override { + } + + void AfterTypeAnnotation(TTransformationPipeline*) const override { + } + + void AfterOptimize(TTransformationPipeline* pipeline) const override { pipeline->Add(new TKqpPeepholeTransformer(), "KqpPeephole"); - } - -private: - TKikimrConfiguration::TPtr Config; -}; - -TStatus PeepHoleOptimize(const TExprBase& program, TExprNode::TPtr& newProgram, TExprContext& ctx, + } + +private: + TKikimrConfiguration::TPtr Config; +}; + +TStatus PeepHoleOptimize(const TExprBase& program, TExprNode::TPtr& newProgram, TExprContext& ctx, IGraphTransformer& typeAnnTransformer, TTypeAnnotationContext& typesCtx, TKikimrConfiguration::TPtr config, bool allowNonDeterministicFunctions, bool withFinalStageRules) -{ - TKqpPeepholePipelineConfigurator kqpPeephole(config); +{ + TKqpPeepholePipelineConfigurator kqpPeephole(config); TPeepholeSettings peepholeSettings; peepholeSettings.CommonConfig = &kqpPeephole; peepholeSettings.WithFinalStageRules = withFinalStageRules; peepholeSettings.WithNonDeterministicRules = false; - - bool hasNonDeterministicFunctions; - auto status = PeepHoleOptimizeNode<true>(program.Ptr(), newProgram, ctx, typesCtx, &typeAnnTransformer, + + bool hasNonDeterministicFunctions; + auto status = PeepHoleOptimizeNode<true>(program.Ptr(), newProgram, ctx, typesCtx, &typeAnnTransformer, hasNonDeterministicFunctions, peepholeSettings); - if (status == TStatus::Error) { - return status; - } - + if (status == TStatus::Error) { + return status; + } + if (!allowNonDeterministicFunctions && hasNonDeterministicFunctions) { ctx.AddError(TIssue(ctx.GetPosition(program.Pos()), "Unexpected non-deterministic functions in KQP program")); - return TStatus::Error; - } - - return status; -} - -TMaybeNode<TKqpPhysicalTx> PeepholeOptimize(const TKqpPhysicalTx& tx, TExprContext& ctx, - IGraphTransformer& typeAnnTransformer, TTypeAnnotationContext& typesCtx, THashSet<ui64>& optimizedStages, + return TStatus::Error; + } + + return status; +} + +TMaybeNode<TKqpPhysicalTx> PeepholeOptimize(const TKqpPhysicalTx& tx, TExprContext& ctx, + IGraphTransformer& typeAnnTransformer, TTypeAnnotationContext& typesCtx, THashSet<ui64>& optimizedStages, TKikimrConfiguration::TPtr config, bool withFinalStageRules) -{ - TVector<TDqPhyStage> stages; - stages.reserve(tx.Stages().Size()); - TNodeOnNodeOwnedMap stagesMap; +{ + TVector<TDqPhyStage> stages; + stages.reserve(tx.Stages().Size()); + TNodeOnNodeOwnedMap stagesMap; TVector<TKqpParamBinding> bindings(tx.ParamBindings().begin(), tx.ParamBindings().end()); - - for (const auto& stage : tx.Stages()) { - YQL_ENSURE(!optimizedStages.contains(stage.Ref().UniqueId())); - - TVector<const TTypeAnnotationNode*> argTypes; - for (const auto& arg : stage.Program().Args()) { - YQL_ENSURE(arg.Ref().GetTypeAnn()); - argTypes.push_back(arg.Ref().GetTypeAnn()); - } - - // TODO: get rid of TKqpProgram-callable (https://st.yandex-team.ru/YQL-10078) - TNodeOnNodeOwnedMap tmp; - auto program = Build<TKqpProgram>(ctx, stage.Pos()) - //.Lambda(ctx.DeepCopy(stage.Program().Ref(), ctx, tmp, true /* internStrings */, false /* copyTypes */)) - .Lambda(stage.Program()) - .ArgsType(ExpandType(stage.Pos(), *ctx.MakeType<TTupleExprType>(argTypes), ctx)) - .Done(); - + + for (const auto& stage : tx.Stages()) { + YQL_ENSURE(!optimizedStages.contains(stage.Ref().UniqueId())); + + TVector<const TTypeAnnotationNode*> argTypes; + for (const auto& arg : stage.Program().Args()) { + YQL_ENSURE(arg.Ref().GetTypeAnn()); + argTypes.push_back(arg.Ref().GetTypeAnn()); + } + + // TODO: get rid of TKqpProgram-callable (https://st.yandex-team.ru/YQL-10078) + TNodeOnNodeOwnedMap tmp; + auto program = Build<TKqpProgram>(ctx, stage.Pos()) + //.Lambda(ctx.DeepCopy(stage.Program().Ref(), ctx, tmp, true /* internStrings */, false /* copyTypes */)) + .Lambda(stage.Program()) + .ArgsType(ExpandType(stage.Pos(), *ctx.MakeType<TTupleExprType>(argTypes), ctx)) + .Done(); + bool allowNonDeterministicFunctions = !stage.Program().Body().Maybe<TKqpEffects>(); - TExprNode::TPtr newProgram; + TExprNode::TPtr newProgram; auto status = PeepHoleOptimize(program, newProgram, ctx, typeAnnTransformer, typesCtx, config, allowNonDeterministicFunctions, withFinalStageRules); - if (status != TStatus::Ok) { - ctx.AddError(TIssue(ctx.GetPosition(stage.Pos()), "Peephole optimization failed for KQP transaction")); - return {}; - } - + if (status != TStatus::Ok) { + ctx.AddError(TIssue(ctx.GetPosition(stage.Pos()), "Peephole optimization failed for KQP transaction")); + return {}; + } + if (allowNonDeterministicFunctions) { status = ReplaceNonDetFunctionsWithParams(newProgram, ctx, &bindings); @@ -169,77 +169,77 @@ TMaybeNode<TKqpPhysicalTx> PeepholeOptimize(const TKqpPhysicalTx& tx, TExprConte } } - auto newStage = Build<TDqPhyStage>(ctx, stage.Pos()) - .Inputs(ctx.ReplaceNodes(stage.Inputs().Ptr(), stagesMap)) - .Program(ctx.DeepCopyLambda(TKqpProgram(newProgram).Lambda().Ref())) - .Settings(stage.Settings()) - .Done(); - - stages.emplace_back(newStage); - stagesMap.emplace(stage.Raw(), newStage.Ptr()); - - optimizedStages.emplace(stage.Ref().UniqueId()); - } - - return Build<TKqpPhysicalTx>(ctx, tx.Pos()) - .Stages() - .Add(stages) - .Build() - .Results(ctx.ReplaceNodes(tx.Results().Ptr(), stagesMap)) + auto newStage = Build<TDqPhyStage>(ctx, stage.Pos()) + .Inputs(ctx.ReplaceNodes(stage.Inputs().Ptr(), stagesMap)) + .Program(ctx.DeepCopyLambda(TKqpProgram(newProgram).Lambda().Ref())) + .Settings(stage.Settings()) + .Done(); + + stages.emplace_back(newStage); + stagesMap.emplace(stage.Raw(), newStage.Ptr()); + + optimizedStages.emplace(stage.Ref().UniqueId()); + } + + return Build<TKqpPhysicalTx>(ctx, tx.Pos()) + .Stages() + .Add(stages) + .Build() + .Results(ctx.ReplaceNodes(tx.Results().Ptr(), stagesMap)) .ParamBindings().Add(bindings).Build() - .Settings(tx.Settings()) - .Done(); -} - -class TKqpTxPeepholeTransformer : public TSyncTransformerBase { -public: - TKqpTxPeepholeTransformer(IGraphTransformer* typeAnnTransformer, + .Settings(tx.Settings()) + .Done(); +} + +class TKqpTxPeepholeTransformer : public TSyncTransformerBase { +public: + TKqpTxPeepholeTransformer(IGraphTransformer* typeAnnTransformer, TTypeAnnotationContext& typesCtx, TKikimrConfiguration::TPtr config, bool withFinalStageRules) - : TypeAnnTransformer(typeAnnTransformer) - , TypesCtx(typesCtx) - , Config(config) + : TypeAnnTransformer(typeAnnTransformer) + , TypesCtx(typesCtx) + , Config(config) , WithFinalStageRules(withFinalStageRules) - {} - - TStatus DoTransform(TExprNode::TPtr inputExpr, TExprNode::TPtr& outputExpr, TExprContext& ctx) final { - if (Optimized) { - YQL_CLOG(DEBUG, ProviderKqp) << ">>> TKqpTxPeepholeTransformer[skip]: " << KqpExprToPrettyString(*inputExpr, ctx); - outputExpr = inputExpr; - return TStatus::Ok; - } - - YQL_CLOG(DEBUG, ProviderKqp) << ">>> TKqpTxPeepholeTransformer: " << KqpExprToPrettyString(*inputExpr, ctx); - - TExprBase input(inputExpr); - YQL_ENSURE(input.Maybe<TKqpPhysicalTx>()); - - auto tx = input.Cast<TKqpPhysicalTx>(); - - THashSet<ui64> optimizedStages; + {} + + TStatus DoTransform(TExprNode::TPtr inputExpr, TExprNode::TPtr& outputExpr, TExprContext& ctx) final { + if (Optimized) { + YQL_CLOG(DEBUG, ProviderKqp) << ">>> TKqpTxPeepholeTransformer[skip]: " << KqpExprToPrettyString(*inputExpr, ctx); + outputExpr = inputExpr; + return TStatus::Ok; + } + + YQL_CLOG(DEBUG, ProviderKqp) << ">>> TKqpTxPeepholeTransformer: " << KqpExprToPrettyString(*inputExpr, ctx); + + TExprBase input(inputExpr); + YQL_ENSURE(input.Maybe<TKqpPhysicalTx>()); + + auto tx = input.Cast<TKqpPhysicalTx>(); + + THashSet<ui64> optimizedStages; auto optimizedTx = PeepholeOptimize(tx, ctx, *TypeAnnTransformer, TypesCtx, optimizedStages, Config, WithFinalStageRules); - - if (!optimizedTx) { - return TStatus::Error; - } - - outputExpr = optimizedTx.Cast().Ptr(); - Optimized = true; - - return TStatus(TStatus::Repeat, true); - } - - void Rewind() final { - Optimized = false; - } - -private: - IGraphTransformer* TypeAnnTransformer; - TTypeAnnotationContext& TypesCtx; - TKikimrConfiguration::TPtr Config; - bool Optimized = false; + + if (!optimizedTx) { + return TStatus::Error; + } + + outputExpr = optimizedTx.Cast().Ptr(); + Optimized = true; + + return TStatus(TStatus::Repeat, true); + } + + void Rewind() final { + Optimized = false; + } + +private: + IGraphTransformer* TypeAnnTransformer; + TTypeAnnotationContext& TypesCtx; + TKikimrConfiguration::TPtr Config; + bool Optimized = false; bool WithFinalStageRules = true; -}; - +}; + class TKqpTxsPeepholeTransformer : public TSyncTransformerBase { public: TKqpTxsPeepholeTransformer(TAutoPtr<NYql::IGraphTransformer> typeAnnTransformer, @@ -311,8 +311,8 @@ private: TAutoPtr<NYql::IGraphTransformer> TypeAnnTransformer; }; -} // anonymous namespace - +} // anonymous namespace + TAutoPtr<IGraphTransformer> CreateKqpTxPeepholeTransformer(NYql::IGraphTransformer* typeAnnTransformer, TTypeAnnotationContext& typesCtx, const TKikimrConfiguration::TPtr& config, bool withFinalStageRules) { @@ -320,11 +320,11 @@ TAutoPtr<IGraphTransformer> CreateKqpTxPeepholeTransformer(NYql::IGraphTransform } TAutoPtr<IGraphTransformer> CreateKqpTxsPeepholeTransformer(TAutoPtr<NYql::IGraphTransformer> typeAnnTransformer, - TTypeAnnotationContext& typesCtx, const TKikimrConfiguration::TPtr& config) -{ + TTypeAnnotationContext& typesCtx, const TKikimrConfiguration::TPtr& config) +{ return new TKqpTxsPeepholeTransformer(std::move(typeAnnTransformer), typesCtx, config); -} - +} + TStatus ReplaceNonDetFunctionsWithParams(TExprNode::TPtr& input, TExprContext& ctx, TVector<TKqpParamBinding>* paramBindings) { static const std::unordered_set<std::string_view> nonDeterministicFunctions = { "RandomNumber", diff --git a/ydb/core/kqp/opt/peephole/kqp_opt_peephole.h b/ydb/core/kqp/opt/peephole/kqp_opt_peephole.h index 46f341db630..a0602bb07da 100644 --- a/ydb/core/kqp/opt/peephole/kqp_opt_peephole.h +++ b/ydb/core/kqp/opt/peephole/kqp_opt_peephole.h @@ -1,16 +1,16 @@ -#pragma once - +#pragma once + #include <ydb/core/kqp/common/kqp_transform.h> #include <ydb/core/kqp/opt/kqp_opt.h> - + namespace NKikimr::NKqp::NOpt { - -TAutoPtr<NYql::IGraphTransformer> CreateKqpTxPeepholeTransformer(NYql::IGraphTransformer* typeAnnTransformer, + +TAutoPtr<NYql::IGraphTransformer> CreateKqpTxPeepholeTransformer(NYql::IGraphTransformer* typeAnnTransformer, NYql::TTypeAnnotationContext& typesCtx, const NYql::TKikimrConfiguration::TPtr& config, bool withFinalStageRules = true); TAutoPtr<NYql::IGraphTransformer> CreateKqpTxsPeepholeTransformer(TAutoPtr<NYql::IGraphTransformer> typeAnnTransformer, - NYql::TTypeAnnotationContext& typesCtx, const NYql::TKikimrConfiguration::TPtr& config); - + NYql::TTypeAnnotationContext& typesCtx, const NYql::TKikimrConfiguration::TPtr& config); + NYql::IGraphTransformer::TStatus ReplaceNonDetFunctionsWithParams(NYql::TExprNode::TPtr& input, NYql::TExprContext& ctx, TVector<NYql::NNodes::TKqpParamBinding>* paramBindings = nullptr); diff --git a/ydb/core/kqp/opt/peephole/kqp_opt_peephole_rules.h b/ydb/core/kqp/opt/peephole/kqp_opt_peephole_rules.h index b53086f0d08..489a2afe1e4 100644 --- a/ydb/core/kqp/opt/peephole/kqp_opt_peephole_rules.h +++ b/ydb/core/kqp/opt/peephole/kqp_opt_peephole_rules.h @@ -1,16 +1,16 @@ -#pragma once - +#pragma once + #include <ydb/core/kqp/opt/kqp_opt.h> #include <ydb/core/kqp/provider/yql_kikimr_expr_nodes.h> - + #include <ydb/library/yql/ast/yql_expr.h> - -/* - * This file contains declaration of all rule functions for peephole optimizer - */ - + +/* + * This file contains declaration of all rule functions for peephole optimizer + */ + namespace NKikimr::NKqp::NOpt { - -NYql::NNodes::TExprBase KqpBuildWideReadTable(const NYql::NNodes::TExprBase& node, NYql::TExprContext& ctx); - + +NYql::NNodes::TExprBase KqpBuildWideReadTable(const NYql::NNodes::TExprBase& node, NYql::TExprContext& ctx); + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/peephole/kqp_opt_peephole_wide_read.cpp b/ydb/core/kqp/opt/peephole/kqp_opt_peephole_wide_read.cpp index 711f726a232..75d76930bdd 100644 --- a/ydb/core/kqp/opt/peephole/kqp_opt_peephole_wide_read.cpp +++ b/ydb/core/kqp/opt/peephole/kqp_opt_peephole_wide_read.cpp @@ -1,84 +1,84 @@ -#include "kqp_opt_peephole_rules.h" - +#include "kqp_opt_peephole_rules.h" + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/opt/kqp_opt_impl.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NDq; -using namespace NYql::NNodes; - -TExprBase KqpBuildWideReadTable(const TExprBase& node, TExprContext& ctx) { - if (!node.Maybe<TKqpReadTable>() && - !node.Maybe<TKqpReadOlapTableRanges>() && - !node.Maybe<TKqpReadTableRanges>()) - { - return node; - } - - auto rowType = node.Ref().GetTypeAnn()->Cast<TFlowExprType>()->GetItemType()->Cast<TStructExprType>(); - - TVector<TCoArgument> args; - args.reserve(rowType->GetSize()); - for (ui32 i = 0; i < rowType->GetSize(); ++i) { - args.push_back(TCoArgument(ctx.NewArgument(node.Pos(), "arg"))); - } - - TVector<TExprBase> structItems; - structItems.reserve(args.size()); - for (ui32 i = 0; i < args.size(); ++i) { - structItems.emplace_back( - Build<TCoNameValueTuple>(ctx, node.Pos()) - .Name().Build(rowType->GetItems()[i]->GetName()) - .Value(args[i]) - .Done()); - } - - TMaybeNode<TExprBase> wideRead; - if (auto maybeRead = node.Maybe<TKqpReadTable>()) { - auto read = maybeRead.Cast(); - - wideRead = Build<TKqpWideReadTable>(ctx, node.Pos()) - .Table(read.Table()) - .Range(read.Range()) - .Columns(read.Columns()) - .Settings(read.Settings()) - .Done(); - } else if (auto maybeRead = node.Maybe<TKqpReadTableRanges>()) { - auto read = maybeRead.Cast(); - - wideRead = Build<TKqpWideReadTableRanges>(ctx, node.Pos()) - .Table(read.Table()) - .Ranges(read.Ranges()) - .Columns(read.Columns()) - .Settings(read.Settings()) - .ExplainPrompt(read.ExplainPrompt()) - .Done(); - } else if (auto maybeRead = node.Maybe<TKqpReadOlapTableRanges>()) { - auto read = maybeRead.Cast(); - - wideRead = Build<TKqpWideReadOlapTableRanges>(ctx, node.Pos()) - .Table(read.Table()) - .Ranges(read.Ranges()) - .Columns(read.Columns()) - .Settings(read.Settings()) - .ExplainPrompt(read.ExplainPrompt()) - .Process(read.Process()) - .Done(); - } else { - YQL_ENSURE(false, "Unknown read table operation: " << node.Ptr()->Content()); - } - - return Build<TCoNarrowMap>(ctx, node.Pos()) - .Input(wideRead.Cast()) - .Lambda() - .Args(args) - .Body<TCoAsStruct>() - .Add(structItems) - .Build() - .Build() - .Done(); -} - + +using namespace NYql; +using namespace NYql::NDq; +using namespace NYql::NNodes; + +TExprBase KqpBuildWideReadTable(const TExprBase& node, TExprContext& ctx) { + if (!node.Maybe<TKqpReadTable>() && + !node.Maybe<TKqpReadOlapTableRanges>() && + !node.Maybe<TKqpReadTableRanges>()) + { + return node; + } + + auto rowType = node.Ref().GetTypeAnn()->Cast<TFlowExprType>()->GetItemType()->Cast<TStructExprType>(); + + TVector<TCoArgument> args; + args.reserve(rowType->GetSize()); + for (ui32 i = 0; i < rowType->GetSize(); ++i) { + args.push_back(TCoArgument(ctx.NewArgument(node.Pos(), "arg"))); + } + + TVector<TExprBase> structItems; + structItems.reserve(args.size()); + for (ui32 i = 0; i < args.size(); ++i) { + structItems.emplace_back( + Build<TCoNameValueTuple>(ctx, node.Pos()) + .Name().Build(rowType->GetItems()[i]->GetName()) + .Value(args[i]) + .Done()); + } + + TMaybeNode<TExprBase> wideRead; + if (auto maybeRead = node.Maybe<TKqpReadTable>()) { + auto read = maybeRead.Cast(); + + wideRead = Build<TKqpWideReadTable>(ctx, node.Pos()) + .Table(read.Table()) + .Range(read.Range()) + .Columns(read.Columns()) + .Settings(read.Settings()) + .Done(); + } else if (auto maybeRead = node.Maybe<TKqpReadTableRanges>()) { + auto read = maybeRead.Cast(); + + wideRead = Build<TKqpWideReadTableRanges>(ctx, node.Pos()) + .Table(read.Table()) + .Ranges(read.Ranges()) + .Columns(read.Columns()) + .Settings(read.Settings()) + .ExplainPrompt(read.ExplainPrompt()) + .Done(); + } else if (auto maybeRead = node.Maybe<TKqpReadOlapTableRanges>()) { + auto read = maybeRead.Cast(); + + wideRead = Build<TKqpWideReadOlapTableRanges>(ctx, node.Pos()) + .Table(read.Table()) + .Ranges(read.Ranges()) + .Columns(read.Columns()) + .Settings(read.Settings()) + .ExplainPrompt(read.ExplainPrompt()) + .Process(read.Process()) + .Done(); + } else { + YQL_ENSURE(false, "Unknown read table operation: " << node.Ptr()->Content()); + } + + return Build<TCoNarrowMap>(ctx, node.Pos()) + .Input(wideRead.Cast()) + .Lambda() + .Args(args) + .Body<TCoAsStruct>() + .Add(structItems) + .Build() + .Build() + .Done(); +} + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/peephole/ya.make b/ydb/core/kqp/opt/peephole/ya.make index 12957bd2c4b..b6608c6462a 100644 --- a/ydb/core/kqp/opt/peephole/ya.make +++ b/ydb/core/kqp/opt/peephole/ya.make @@ -1,21 +1,21 @@ -LIBRARY() - -OWNER( - spuchin - g:kikimr -) - -SRCS( - kqp_opt_peephole_wide_read.cpp +LIBRARY() + +OWNER( + spuchin + g:kikimr +) + +SRCS( + kqp_opt_peephole_wide_read.cpp kqp_opt_peephole.cpp -) - -PEERDIR( +) + +PEERDIR( ydb/core/kqp/common ydb/library/naming_conventions ydb/library/yql/dq/opt -) - -YQL_LAST_ABI_VERSION() - -END() +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp index 9b6445cbda7..11e1af7ae87 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp @@ -1,23 +1,23 @@ -#include "kqp_opt_phy_rules.h" - +#include "kqp_opt_phy_rules.h" + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/opt/kqp_opt_impl.h> #include <ydb/core/kqp/opt/physical/effects/kqp_opt_phy_effects_rules.h> - + #include <ydb/library/yql/core/yql_expr_optimize.h> #include <ydb/library/yql/core/yql_opt_utils.h> #include <ydb/library/yql/dq/opt/dq_opt.h> #include <ydb/library/yql/dq/opt/dq_opt_phy.h> #include <ydb/library/yql/providers/common/transform/yql_optimize.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NDq; -using namespace NYql::NNodes; - -using TStatus = IGraphTransformer::TStatus; - + +using namespace NYql; +using namespace NYql::NDq; +using namespace NYql::NNodes; + +using TStatus = IGraphTransformer::TStatus; + class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase { public: TKqpPhysicalOptTransformer(TTypeAnnotationContext& typesCtx, const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx) @@ -58,9 +58,9 @@ public: AddHandler(0, &TKqlInsertRowsIndex::Match, HNDL(BuildInsertIndexStages)); AddHandler(0, &TKqlDeleteRowsIndex::Match, HNDL(BuildDeleteIndexStages)); AddHandler(0, &TDqStage::Match, HNDL(FloatUpStage)); - AddHandler(0, &TCoHasItems::Match, HNDL(BuildHasItems)); - AddHandler(0, &TCoToOptional::Match, HNDL(BuildScalarPrecompute)); - + AddHandler(0, &TCoHasItems::Match, HNDL(BuildHasItems)); + AddHandler(0, &TCoToOptional::Match, HNDL(BuildScalarPrecompute)); + AddHandler(1, &TCoSkipNullMembers::Match, HNDL(PushSkipNullMembersToStage<true>)); AddHandler(1, &TCoExtractMembers::Match, HNDL(PushExtractMembersToStage<true>)); AddHandler(1, &TCoFlatMapBase::Match, HNDL(BuildFlatmapStage<true>)); @@ -76,44 +76,44 @@ public: #undef HNDL SetGlobal(1u); } - + protected: TMaybeNode<TExprBase> BuildReadTableStage(TExprBase node, TExprContext& ctx) { TExprBase output = KqpBuildReadTableStage(node, ctx, KqpCtx); DumpAppliedRule("BuildReadTableStage", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> BuildReadTableRangesStage(TExprBase node, TExprContext& ctx) { TExprBase output = KqpBuildReadTableRangesStage(node, ctx, KqpCtx); DumpAppliedRule("BuildReadTableRangesStage", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> BuildLookupTableStage(TExprBase node, TExprContext& ctx) { TExprBase output = KqpBuildLookupTableStage(node, ctx); DumpAppliedRule("BuildLookupTableStage", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> RemoveRedundantSortByPk(TExprBase node, TExprContext& ctx) { TExprBase output = KqpRemoveRedundantSortByPk(node, ctx, KqpCtx); DumpAppliedRule("RemoveRedundantSortByPk", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> ApplyLimitToReadTable(TExprBase node, TExprContext& ctx) { TExprBase output = KqpApplyLimitToReadTable(node, ctx, KqpCtx); DumpAppliedRule("ApplyLimitToReadTable", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> PushOlapFilter(TExprBase node, TExprContext& ctx) { TExprBase output = KqpPushOlapFilter(node, ctx, KqpCtx, TypesCtx); DumpAppliedRule("PushOlapFilter", node.Ptr(), output.Ptr(), ctx); return output; } - + template <bool IsGlobal> TMaybeNode<TExprBase> PushSkipNullMembersToStage(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) @@ -122,7 +122,7 @@ protected: DumpAppliedRule("PushSkipNullMembersToStage", node.Ptr(), output.Ptr(), ctx); return output; } - + template <bool IsGlobal> TMaybeNode<TExprBase> PushExtractMembersToStage(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) @@ -131,7 +131,7 @@ protected: DumpAppliedRule("PushExtractMembersToStage", node.Ptr(), output.Ptr(), ctx); return output; } - + template <bool IsGlobal> TMaybeNode<TExprBase> BuildFlatmapStage(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) @@ -140,7 +140,7 @@ protected: DumpAppliedRule("BuildFlatmapStage", node.Ptr(), output.Ptr(), ctx); return output; } - + template <bool IsGlobal> TMaybeNode<TExprBase> PushCombineToStage(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) @@ -149,19 +149,19 @@ protected: DumpAppliedRule("PushCombineToStage", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> BuildPartitionsStage(TExprBase node, TExprContext& ctx, const TGetParents& getParents) { TExprBase output = DqBuildPartitionsStage(node, ctx, *getParents()); DumpAppliedRule("BuildPartitionsStage", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> BuildPartitionStage(TExprBase node, TExprContext& ctx, const TGetParents& getParents) { TExprBase output = DqBuildPartitionStage(node, ctx, *getParents()); DumpAppliedRule("BuildPartitionStage", node.Ptr(), output.Ptr(), ctx); return output; } - + template <bool IsGlobal> TMaybeNode<TExprBase> BuildTopSortStage(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) @@ -170,7 +170,7 @@ protected: DumpAppliedRule("BuildTopSortStage", node.Ptr(), output.Ptr(), ctx); return output; } - + template <bool IsGlobal> TMaybeNode<TExprBase> BuildTakeSkipStage(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) @@ -179,7 +179,7 @@ protected: DumpAppliedRule("BuildTakeSkipStage", node.Ptr(), output.Ptr(), ctx); return output; } - + template <bool IsGlobal> TMaybeNode<TExprBase> BuildSortStage(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) @@ -188,7 +188,7 @@ protected: DumpAppliedRule("BuildSortStage", node.Ptr(), output.Ptr(), ctx); return output; } - + template <bool IsGlobal> TMaybeNode<TExprBase> BuildTakeStage(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) @@ -203,13 +203,13 @@ protected: DumpAppliedRule("RewriteLengthOfStageOutput", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> BuildExtendStage(TExprBase node, TExprContext& ctx) { TExprBase output = DqBuildExtendStage(node, ctx); DumpAppliedRule("BuildExtendStage", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> RewriteRightJoinToLeft(TExprBase node, TExprContext& ctx) { TExprBase output = DqRewriteRightJoinToLeft(node, ctx); DumpAppliedRule("RewriteRightJoinToLeft", node.Ptr(), output.Ptr(), ctx); @@ -224,7 +224,7 @@ protected: DumpAppliedRule("PushJoinToStage", node.Ptr(), output.Ptr(), ctx); return output; } - + template <bool IsGlobal> TMaybeNode<TExprBase> BuildJoin(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) @@ -248,7 +248,7 @@ protected: DumpAppliedRule("PushLMapToStage", node.Ptr(), output.Ptr(), ctx); return output; } - + template <bool IsGlobal> TMaybeNode<TExprBase> PushOrderedLMapToStage(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TGetParents& getParents) @@ -263,13 +263,13 @@ protected: DumpAppliedRule("BuildInsertStages", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> BuildUpdateStages(TExprBase node, TExprContext& ctx) { TExprBase output = KqpBuildUpdateStages(node, ctx, KqpCtx); DumpAppliedRule("BuildUpdateStages", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> BuildUpdateIndexStages(TExprBase node, TExprContext& ctx) { TExprBase output = KqpBuildUpdateIndexStages(node, ctx, KqpCtx); DumpAppliedRule("BuildUpdateIndexStages", node.Ptr(), output.Ptr(), ctx); @@ -299,28 +299,28 @@ protected: DumpAppliedRule("FloatUpStage", node.Ptr(), output.Ptr(), ctx); return output; } - + TMaybeNode<TExprBase> BuildHasItems(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx) { TExprBase output = DqBuildHasItems(node, ctx, optCtx); - DumpAppliedRule("DqBuildHasItems", node.Ptr(), output.Ptr(), ctx); - return output; - } - + DumpAppliedRule("DqBuildHasItems", node.Ptr(), output.Ptr(), ctx); + return output; + } + TMaybeNode<TExprBase> BuildScalarPrecompute(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx) { TExprBase output = DqBuildScalarPrecompute(node, ctx, optCtx); - DumpAppliedRule("BuildScalarPrecompute", node.Ptr(), output.Ptr(), ctx); - return output; - } - + DumpAppliedRule("BuildScalarPrecompute", node.Ptr(), output.Ptr(), ctx); + return output; + } + private: TTypeAnnotationContext& TypesCtx; const TKqpOptimizeContext& KqpCtx; }; - + TAutoPtr<IGraphTransformer> CreateKqpPhyOptTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, NYql::TTypeAnnotationContext& typesCtx) { return THolder<IGraphTransformer>(new TKqpPhysicalOptTransformer(typesCtx, kqpCtx)); -} - +} + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy.h b/ydb/core/kqp/opt/physical/kqp_opt_phy.h index be6ceb969d2..06d4d188267 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy.h +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy.h @@ -1,13 +1,13 @@ -#pragma once - +#pragma once + #include <ydb/core/kqp/common/kqp_transform.h> #include <ydb/core/kqp/opt/kqp_opt.h> - + namespace NKikimr::NKqp::NOpt { - -struct TKqpOptimizeContext; - -TAutoPtr<NYql::IGraphTransformer> CreateKqpPhyOptTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, - NYql::TTypeAnnotationContext& typesCtx); - + +struct TKqpOptimizeContext; + +TAutoPtr<NYql::IGraphTransformer> CreateKqpPhyOptTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, + NYql::TTypeAnnotationContext& typesCtx); + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_build_stage.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_build_stage.cpp index d5160059fb1..f49aff031f1 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_build_stage.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_build_stage.cpp @@ -1,22 +1,22 @@ -#include "kqp_opt_phy_rules.h" - +#include "kqp_opt_phy_rules.h" + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/opt/kqp_opt_impl.h> #include <ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h> #include <ydb/core/kqp/provider/kqp_opt_helpers.h> #include <ydb/core/tx/schemeshard/schemeshard_utils.h> - + #include <ydb/public/lib/scheme_types/scheme_type_id.h> #include <ydb/library/yql/dq/opt/dq_opt.h> #include <ydb/library/yql/core/yql_opt_utils.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NDq; -using namespace NYql::NNodes; - + +using namespace NYql; +using namespace NYql::NDq; +using namespace NYql::NNodes; + TMaybeNode<TDqPhyPrecompute> BuildLookupKeysPrecompute(const TExprBase& input, TExprContext& ctx) { TMaybeNode<TDqConnection> precomputeInput; @@ -60,224 +60,224 @@ TExprBase KqpBuildReadTableStage(TExprBase node, TExprContext& ctx, const TKqpOp return node; } const TKqlReadTable& read = node.Cast<TKqlReadTable>(); - - TVector<TExprBase> values; - TNodeOnNodeOwnedMap replaceMap; - + + TVector<TExprBase> values; + TNodeOnNodeOwnedMap replaceMap; + auto checkRange = [&values](const TVarArgCallable<TExprBase>& tuple) { - for (const auto& value : tuple) { - if (!IsDqPureExpr(value)) { - return false; - } - - if (!value.Maybe<TCoParameter>()) { - values.push_back(value); - } - } - - return true; - }; - - if (!checkRange(read.Range().From())) { - return read; - } - - if (!checkRange(read.Range().To())) { - return read; - } - - TVector<TExprBase> inputs; - TVector<TCoArgument> programArgs; - TNodeOnNodeOwnedMap rangeReplaces; - if (!values.empty()) { - auto computeStage = Build<TDqStage>(ctx, read.Pos()) - .Inputs() - .Build() - .Program() - .Args({}) + for (const auto& value : tuple) { + if (!IsDqPureExpr(value)) { + return false; + } + + if (!value.Maybe<TCoParameter>()) { + values.push_back(value); + } + } + + return true; + }; + + if (!checkRange(read.Range().From())) { + return read; + } + + if (!checkRange(read.Range().To())) { + return read; + } + + TVector<TExprBase> inputs; + TVector<TCoArgument> programArgs; + TNodeOnNodeOwnedMap rangeReplaces; + if (!values.empty()) { + auto computeStage = Build<TDqStage>(ctx, read.Pos()) + .Inputs() + .Build() + .Program() + .Args({}) .Body<TCoToStream>() - .Input<TCoJust>() - .Input<TExprList>() - .Add(values) - .Build() - .Build() - .Build() - .Build() - .Settings().Build() - .Done(); - - auto precompute = Build<TDqPhyPrecompute>(ctx, read.Pos()) - .Connection<TDqCnValue>() - .Output() - .Stage(computeStage) - .Index().Build("0") - .Build() - .Build() - .Done(); - - TCoArgument arg{ctx.NewArgument(read.Pos(), TStringBuilder() << "_kqp_pc_arg_0")}; - programArgs.push_back(arg); - inputs.push_back(precompute); - - for (size_t i = 0; i < values.size(); ++i) { - auto replace = Build<TCoNth>(ctx, read.Pos()) - .Tuple(arg) - .Index().Build(ToString(i)) - .Done() - .Ptr(); - - rangeReplaces[values[i].Raw()] = replace; - } - } - - auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, read.Table().Path()); - - TMaybeNode<TExprBase> phyRead; - switch (tableDesc.Metadata->Kind) { - case EKikimrTableKind::Datashard: - case EKikimrTableKind::SysView: - phyRead = Build<TKqpReadTable>(ctx, read.Pos()) - .Table(read.Table()) - .Range(ctx.ReplaceNodes(read.Range().Ptr(), rangeReplaces)) - .Columns(read.Columns()) - .Settings(read.Settings()) - .Done(); - break; - - default: - YQL_ENSURE(false, "Unexpected table kind: " << (ui32)tableDesc.Metadata->Kind); - break; - } - - auto stage = Build<TDqStage>(ctx, read.Pos()) - .Inputs() - .Add(inputs) - .Build() - .Program() - .Args(programArgs) - .Body(phyRead.Cast()) - .Build() - .Settings().Build() - .Done(); - - return Build<TDqCnUnionAll>(ctx, read.Pos()) - .Output() - .Stage(stage) - .Index().Build("0") - .Build() - .Done(); -} - + .Input<TCoJust>() + .Input<TExprList>() + .Add(values) + .Build() + .Build() + .Build() + .Build() + .Settings().Build() + .Done(); + + auto precompute = Build<TDqPhyPrecompute>(ctx, read.Pos()) + .Connection<TDqCnValue>() + .Output() + .Stage(computeStage) + .Index().Build("0") + .Build() + .Build() + .Done(); + + TCoArgument arg{ctx.NewArgument(read.Pos(), TStringBuilder() << "_kqp_pc_arg_0")}; + programArgs.push_back(arg); + inputs.push_back(precompute); + + for (size_t i = 0; i < values.size(); ++i) { + auto replace = Build<TCoNth>(ctx, read.Pos()) + .Tuple(arg) + .Index().Build(ToString(i)) + .Done() + .Ptr(); + + rangeReplaces[values[i].Raw()] = replace; + } + } + + auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, read.Table().Path()); + + TMaybeNode<TExprBase> phyRead; + switch (tableDesc.Metadata->Kind) { + case EKikimrTableKind::Datashard: + case EKikimrTableKind::SysView: + phyRead = Build<TKqpReadTable>(ctx, read.Pos()) + .Table(read.Table()) + .Range(ctx.ReplaceNodes(read.Range().Ptr(), rangeReplaces)) + .Columns(read.Columns()) + .Settings(read.Settings()) + .Done(); + break; + + default: + YQL_ENSURE(false, "Unexpected table kind: " << (ui32)tableDesc.Metadata->Kind); + break; + } + + auto stage = Build<TDqStage>(ctx, read.Pos()) + .Inputs() + .Add(inputs) + .Build() + .Program() + .Args(programArgs) + .Body(phyRead.Cast()) + .Build() + .Settings().Build() + .Done(); + + return Build<TDqCnUnionAll>(ctx, read.Pos()) + .Output() + .Stage(stage) + .Index().Build("0") + .Build() + .Done(); +} + TExprBase KqpBuildReadTableRangesStage(TExprBase node, TExprContext& ctx, - const TKqpOptimizeContext& kqpCtx) -{ + const TKqpOptimizeContext& kqpCtx) +{ if (!node.Maybe<TKqlReadTableRanges>()) { return node; } const TKqlReadTableRanges& read = node.Cast<TKqlReadTableRanges>(); - auto ranges = read.Ranges(); - auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, read.Table().Path()); - - if (!IsDqPureExpr(ranges)) { - return read; - } - - bool fullScan = TCoVoid::Match(ranges.Raw()); - - TVector<TExprBase> input; - TMaybeNode<TExprBase> argument; - TVector<TCoArgument> programArgs; - - if (!fullScan) { - auto computeStage = Build<TDqStage>(ctx, read.Pos()) - .Inputs() - .Build() - .Program() - .Args({}) + auto ranges = read.Ranges(); + auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, read.Table().Path()); + + if (!IsDqPureExpr(ranges)) { + return read; + } + + bool fullScan = TCoVoid::Match(ranges.Raw()); + + TVector<TExprBase> input; + TMaybeNode<TExprBase> argument; + TVector<TCoArgument> programArgs; + + if (!fullScan) { + auto computeStage = Build<TDqStage>(ctx, read.Pos()) + .Inputs() + .Build() + .Program() + .Args({}) .Body<TCoToStream>() - .Input<TCoJust>() - .Input<TExprList>() - .Add(ranges) - .Build() - .Build() - .Build() - .Build() - .Settings() - .Build() - .Done(); - - auto precompute = Build<TDqPhyPrecompute>(ctx, read.Pos()) - .Connection<TDqCnValue>() - .Output() - .Stage(computeStage) - .Index().Build("0") - .Build() - .Build() - .Done(); - - argument = Build<TCoArgument>(ctx, read.Pos()) - .Name("_kqp_pc_ranges_arg_0") - .Done(); - - input.push_back(precompute); - programArgs.push_back(argument.Cast<TCoArgument>()); - } else { - argument = read.Ranges(); - } - - TMaybeNode<TExprBase> phyRead; - - switch (tableDesc.Metadata->Kind) { - case EKikimrTableKind::Datashard: - case EKikimrTableKind::SysView: - phyRead = Build<TKqpReadTableRanges>(ctx, read.Pos()) - .Table(read.Table()) - .Ranges(argument.Cast()) - .Columns(read.Columns()) - .Settings(read.Settings()) - .ExplainPrompt(read.ExplainPrompt()) - .Done(); - break; - - case EKikimrTableKind::Olap: - phyRead = Build<TKqpReadOlapTableRanges>(ctx, read.Pos()) - .Table(read.Table()) - .Ranges(argument.Cast()) - .Columns(read.Columns()) - .Settings(read.Settings()) - .ExplainPrompt(read.ExplainPrompt()) - .Process() - .Args({"row"}) - .Body("row") - .Build() - .Done(); - break; - - default: - YQL_ENSURE(false, "Unexpected table kind: " << (ui32)tableDesc.Metadata->Kind); - break; - } - - auto stage = Build<TDqStage>(ctx, read.Pos()) - .Inputs() - .Add(input) - .Build() - .Program() - .Args(programArgs) - .Body(phyRead.Cast()) - .Build() - .Settings().Build() - .Done(); - - return Build<TDqCnUnionAll>(ctx, read.Pos()) - .Output() - .Stage(stage) - .Index().Build("0") - .Build() - .Done(); -} - + .Input<TCoJust>() + .Input<TExprList>() + .Add(ranges) + .Build() + .Build() + .Build() + .Build() + .Settings() + .Build() + .Done(); + + auto precompute = Build<TDqPhyPrecompute>(ctx, read.Pos()) + .Connection<TDqCnValue>() + .Output() + .Stage(computeStage) + .Index().Build("0") + .Build() + .Build() + .Done(); + + argument = Build<TCoArgument>(ctx, read.Pos()) + .Name("_kqp_pc_ranges_arg_0") + .Done(); + + input.push_back(precompute); + programArgs.push_back(argument.Cast<TCoArgument>()); + } else { + argument = read.Ranges(); + } + + TMaybeNode<TExprBase> phyRead; + + switch (tableDesc.Metadata->Kind) { + case EKikimrTableKind::Datashard: + case EKikimrTableKind::SysView: + phyRead = Build<TKqpReadTableRanges>(ctx, read.Pos()) + .Table(read.Table()) + .Ranges(argument.Cast()) + .Columns(read.Columns()) + .Settings(read.Settings()) + .ExplainPrompt(read.ExplainPrompt()) + .Done(); + break; + + case EKikimrTableKind::Olap: + phyRead = Build<TKqpReadOlapTableRanges>(ctx, read.Pos()) + .Table(read.Table()) + .Ranges(argument.Cast()) + .Columns(read.Columns()) + .Settings(read.Settings()) + .ExplainPrompt(read.ExplainPrompt()) + .Process() + .Args({"row"}) + .Body("row") + .Build() + .Done(); + break; + + default: + YQL_ENSURE(false, "Unexpected table kind: " << (ui32)tableDesc.Metadata->Kind); + break; + } + + auto stage = Build<TDqStage>(ctx, read.Pos()) + .Inputs() + .Add(input) + .Build() + .Program() + .Args(programArgs) + .Body(phyRead.Cast()) + .Build() + .Settings().Build() + .Done(); + + return Build<TDqCnUnionAll>(ctx, read.Pos()) + .Output() + .Stage(stage) + .Index().Build("0") + .Build() + .Done(); +} + bool RequireLookupPrecomputeStage(const TKqlLookupTable& lookup) { if (!lookup.LookupKeys().Maybe<TCoAsList>()) { return true; @@ -334,7 +334,7 @@ TExprBase KqpBuildLookupTableStage(TExprBase node, TExprContext& ctx) { .Build() .Columns(lookup.Columns()) .Build() - .Build() + .Build() .Settings().Build() .Done(); } else { @@ -342,7 +342,7 @@ TExprBase KqpBuildLookupTableStage(TExprBase node, TExprContext& ctx) { if (!precompute) { return node; } - + stage = Build<TDqStage>(ctx, lookup.Pos()) .Inputs() .Add(precompute.Cast()) @@ -362,11 +362,11 @@ TExprBase KqpBuildLookupTableStage(TExprBase node, TExprContext& ctx) { } return Build<TDqCnUnionAll>(ctx, lookup.Pos()) - .Output() + .Output() .Stage(stage.Cast()) - .Index().Build("0") - .Build() - .Done(); -} + .Index().Build("0") + .Build() + .Done(); +} } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp index 296f0353db8..bdb7029c64b 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp @@ -1,10 +1,10 @@ #include <ydb/core/kqp/common/kqp_yql.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NNodes; - + +using namespace NYql; +using namespace NYql::NNodes; + namespace { template <typename TContainer> @@ -22,60 +22,60 @@ TCoAtomList BuildColumnsListImpl(const TContainer& columns, TPositionHandle pos, } // namespace -TExprBase BuildReadNode(TPositionHandle pos, TExprContext& ctx, TExprBase input, TKqpReadTableSettings& settings) { - TCoNameValueTupleList settingsNode = settings.BuildNode(ctx, pos); - - if (input.Maybe<TKqpReadTable>().IsValid()) { - auto dataReadTable = input.Cast<TKqpReadTable>(); - - return Build<TKqpReadTable>(ctx, pos) - .Table(dataReadTable.Table()) - .Range(dataReadTable.Range()) - .Columns(dataReadTable.Columns()) - .Settings(settingsNode) - .Done(); - } else if (input.Maybe<TKqpReadTableRanges>().IsValid()) { - auto readTableRanges = input.Cast<TKqpReadTableRanges>(); - - return Build<TKqpReadTableRanges>(ctx, pos) - .Table(readTableRanges.Table()) - .Ranges(readTableRanges.Ranges()) - .Columns(readTableRanges.Columns()) - .Settings(settingsNode) - .ExplainPrompt(readTableRanges.ExplainPrompt()) - .Done(); - } else if (input.Maybe<TKqpReadOlapTableRanges>().IsValid()) { - auto olapReadTable = input.Cast<TKqpReadOlapTableRanges>(); - - return Build<TKqpReadOlapTableRanges>(ctx, pos) - .Table(olapReadTable.Table()) - .Ranges(olapReadTable.Ranges()) - .Columns(olapReadTable.Columns()) - .Settings(settingsNode) - .ExplainPrompt(olapReadTable.ExplainPrompt()) - .Process(olapReadTable.Process()) - .Done(); - } - - YQL_ENSURE(false, "Unknown read table operation: " << input.Ptr()->Content()); -} - -TCoAtom GetReadTablePath(TExprBase input, bool isReadRanges) { - if (isReadRanges) { - return input.Cast<TKqlReadTableRangesBase>().Table().Path(); - } - - return input.Cast<TKqpReadTable>().Table().Path(); -} - -TKqpReadTableSettings GetReadTableSettings(TExprBase input, bool isReadRanges) { - if (isReadRanges) { - return TKqpReadTableSettings::Parse(input.Cast<TKqlReadTableRangesBase>()); - } - - return TKqpReadTableSettings::Parse(input.Cast<TKqpReadTable>()); -}; - +TExprBase BuildReadNode(TPositionHandle pos, TExprContext& ctx, TExprBase input, TKqpReadTableSettings& settings) { + TCoNameValueTupleList settingsNode = settings.BuildNode(ctx, pos); + + if (input.Maybe<TKqpReadTable>().IsValid()) { + auto dataReadTable = input.Cast<TKqpReadTable>(); + + return Build<TKqpReadTable>(ctx, pos) + .Table(dataReadTable.Table()) + .Range(dataReadTable.Range()) + .Columns(dataReadTable.Columns()) + .Settings(settingsNode) + .Done(); + } else if (input.Maybe<TKqpReadTableRanges>().IsValid()) { + auto readTableRanges = input.Cast<TKqpReadTableRanges>(); + + return Build<TKqpReadTableRanges>(ctx, pos) + .Table(readTableRanges.Table()) + .Ranges(readTableRanges.Ranges()) + .Columns(readTableRanges.Columns()) + .Settings(settingsNode) + .ExplainPrompt(readTableRanges.ExplainPrompt()) + .Done(); + } else if (input.Maybe<TKqpReadOlapTableRanges>().IsValid()) { + auto olapReadTable = input.Cast<TKqpReadOlapTableRanges>(); + + return Build<TKqpReadOlapTableRanges>(ctx, pos) + .Table(olapReadTable.Table()) + .Ranges(olapReadTable.Ranges()) + .Columns(olapReadTable.Columns()) + .Settings(settingsNode) + .ExplainPrompt(olapReadTable.ExplainPrompt()) + .Process(olapReadTable.Process()) + .Done(); + } + + YQL_ENSURE(false, "Unknown read table operation: " << input.Ptr()->Content()); +} + +TCoAtom GetReadTablePath(TExprBase input, bool isReadRanges) { + if (isReadRanges) { + return input.Cast<TKqlReadTableRangesBase>().Table().Path(); + } + + return input.Cast<TKqpReadTable>().Table().Path(); +} + +TKqpReadTableSettings GetReadTableSettings(TExprBase input, bool isReadRanges) { + if (isReadRanges) { + return TKqpReadTableSettings::Parse(input.Cast<TKqlReadTableRangesBase>()); + } + + return TKqpReadTableSettings::Parse(input.Cast<TKqpReadTable>()); +}; + TCoAtomList BuildColumnsList(const THashSet<TStringBuf>& columns, TPositionHandle pos, TExprContext& ctx) { return BuildColumnsListImpl(columns, pos, ctx); } diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_limit.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_limit.cpp index 6f5ddc67a58..e09357ea692 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_limit.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_limit.cpp @@ -1,92 +1,92 @@ -#include "kqp_opt_phy_rules.h" +#include "kqp_opt_phy_rules.h" #include "kqp_opt_phy_impl.h" - + #include <ydb/core/kqp/common/kqp_yql.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NNodes; - -TExprBase KqpApplyLimitToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { - if (!node.Maybe<TCoTake>()) { - return node; - } - auto take = node.Cast<TCoTake>(); - - auto maybeSkip = take.Input().Maybe<TCoSkip>(); - auto input = maybeSkip ? maybeSkip.Cast().Input() : take.Input(); - - bool isReadTable = input.Maybe<TKqpReadTable>().IsValid(); - bool isReadTableRanges = input.Maybe<TKqlReadTableRangesBase>().IsValid(); - - if (!isReadTable && !isReadTableRanges) { - return node; - } - - if (kqpCtx.IsScanQuery()) { - auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, GetReadTablePath(input, isReadTableRanges)); - - if (tableDesc.Metadata->Kind != EKikimrTableKind::Olap) { - return node; - } - } - - auto settings = GetReadTableSettings(input, isReadTableRanges); - if (settings.ItemsLimit) { - return node; // already set? - } - - TMaybeNode<TExprBase> limitValue; - if (auto maybeTakeCount = take.Count().Maybe<TCoUint64>()) { - ui64 totalLimit; - ui64 takeValue = FromString<ui64>(maybeTakeCount.Cast().Literal().Value()); - - if (maybeSkip) { - if (auto maybeSkipCount = maybeSkip.Count().Maybe<TCoUint64>()) { - auto skipValue = FromString<ui64>(maybeSkipCount.Cast().Literal().Value()); - totalLimit = takeValue + skipValue; - } else { - return node; // ??? - } - } else { - totalLimit = takeValue; - } - - limitValue = Build<TCoUint64>(ctx, node.Pos()) - .Literal<TCoAtom>() - .Value(ToString(totalLimit)).Build() - .Done(); - } else { - limitValue = take.Count(); - if (maybeSkip) { - limitValue = Build<TCoPlus>(ctx, node.Pos()) - .Left(limitValue.Cast()) - .Right(maybeSkip.Cast().Count()) - .Done(); - } - } - - YQL_CLOG(TRACE, ProviderKqp) << "-- set limit items value to " << limitValue.Cast().Ref().Dump(); - - settings.SetItemsLimit(Build<TDqPrecompute>(ctx, node.Pos()) - .Input(limitValue.Cast()) - .Done().Ptr()); - - input = BuildReadNode(node.Pos(), ctx, input, settings); - - if (maybeSkip) { - input = Build<TCoSkip>(ctx, node.Pos()) - .Input(input) - .Count(maybeSkip.Cast().Count()) - .Done(); - } - - return Build<TCoTake>(ctx, take.Pos()) - .Input(input) - .Count(take.Count()) - .Done(); -} - + +using namespace NYql; +using namespace NYql::NNodes; + +TExprBase KqpApplyLimitToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { + if (!node.Maybe<TCoTake>()) { + return node; + } + auto take = node.Cast<TCoTake>(); + + auto maybeSkip = take.Input().Maybe<TCoSkip>(); + auto input = maybeSkip ? maybeSkip.Cast().Input() : take.Input(); + + bool isReadTable = input.Maybe<TKqpReadTable>().IsValid(); + bool isReadTableRanges = input.Maybe<TKqlReadTableRangesBase>().IsValid(); + + if (!isReadTable && !isReadTableRanges) { + return node; + } + + if (kqpCtx.IsScanQuery()) { + auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, GetReadTablePath(input, isReadTableRanges)); + + if (tableDesc.Metadata->Kind != EKikimrTableKind::Olap) { + return node; + } + } + + auto settings = GetReadTableSettings(input, isReadTableRanges); + if (settings.ItemsLimit) { + return node; // already set? + } + + TMaybeNode<TExprBase> limitValue; + if (auto maybeTakeCount = take.Count().Maybe<TCoUint64>()) { + ui64 totalLimit; + ui64 takeValue = FromString<ui64>(maybeTakeCount.Cast().Literal().Value()); + + if (maybeSkip) { + if (auto maybeSkipCount = maybeSkip.Count().Maybe<TCoUint64>()) { + auto skipValue = FromString<ui64>(maybeSkipCount.Cast().Literal().Value()); + totalLimit = takeValue + skipValue; + } else { + return node; // ??? + } + } else { + totalLimit = takeValue; + } + + limitValue = Build<TCoUint64>(ctx, node.Pos()) + .Literal<TCoAtom>() + .Value(ToString(totalLimit)).Build() + .Done(); + } else { + limitValue = take.Count(); + if (maybeSkip) { + limitValue = Build<TCoPlus>(ctx, node.Pos()) + .Left(limitValue.Cast()) + .Right(maybeSkip.Cast().Count()) + .Done(); + } + } + + YQL_CLOG(TRACE, ProviderKqp) << "-- set limit items value to " << limitValue.Cast().Ref().Dump(); + + settings.SetItemsLimit(Build<TDqPrecompute>(ctx, node.Pos()) + .Input(limitValue.Cast()) + .Done().Ptr()); + + input = BuildReadNode(node.Pos(), ctx, input, settings); + + if (maybeSkip) { + input = Build<TCoSkip>(ctx, node.Pos()) + .Input(input) + .Count(maybeSkip.Cast().Count()) + .Done(); + } + + return Build<TCoTake>(ctx, take.Pos()) + .Input(input) + .Count(take.Count()) + .Done(); +} + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp index 600f5961db7..fa0bb9cc808 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp @@ -1,721 +1,721 @@ -#include "kqp_opt_phy_rules.h" - +#include "kqp_opt_phy_rules.h" + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/library/yql/core/extract_predicate/extract_predicate.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NNodes; - -namespace { - -static TMaybeNode<TExprBase> NullNode = TMaybeNode<TExprBase>(); - -bool IsSupportedPredicate(const TCoCompare& predicate) { - if (predicate.Maybe<TCoCmpEqual>()) { - return true; - } - - if (predicate.Maybe<TCoCmpLess>()) { - return true; - } - - if (predicate.Maybe<TCoCmpGreater>()) { - return true; - } - - if (predicate.Maybe<TCoCmpNotEqual>()) { - return true; - } - - if (predicate.Maybe<TCoCmpGreaterOrEqual>()) { - return true; - } - - if (predicate.Maybe<TCoCmpLessOrEqual>()) { - return true; - } - - return false; -} - -bool ValidateIfArgument(const TCoOptionalIf& optionalIf, const TExprNode* rawLambdaArg) { - // Check it is SELECT * or SELECT `field1`, `field2`... - if (optionalIf.Value().Raw() == rawLambdaArg) { - return true; - } - - // Ok, maybe it is SELECT `field` ? - auto maybeAsStruct = optionalIf.Value().Maybe<TCoAsStruct>(); - - if (!maybeAsStruct) { - return false; - } - - auto asStruct = maybeAsStruct.Cast(); - - // SELECT `field` has only one item - if (asStruct.ArgCount() != 1) { - return false; - } - - // Check that second tuple element is Member(lambda arg) - auto tuple = asStruct.Arg(0).Maybe<TExprList>().Cast(); - - if (tuple.Size() != 2) { - return false; - } - - auto maybeMember = tuple.Item(1).Maybe<TCoMember>(); - - if (!maybeMember) { - return false; - } - - auto member = maybeMember.Cast(); - - if (member.Struct().Raw() != rawLambdaArg) { - return false; - } - - return true; -} - -bool IsSupportedDataType(const TCoDataCtor& node) { - if (node.Maybe<TCoUtf8>() || - node.Maybe<TCoString>() || - node.Maybe<TCoBool>() || - node.Maybe<TCoFloat>() || - node.Maybe<TCoDouble>() || - node.Maybe<TCoInt8>() || - node.Maybe<TCoInt16>() || - node.Maybe<TCoInt32>() || - node.Maybe<TCoInt64>() || - node.Maybe<TCoUint8>() || - node.Maybe<TCoUint16>() || - node.Maybe<TCoUint32>() || - node.Maybe<TCoUint64>()) - { - return true; - } - - return false; -} - -bool IsComparableTypes(const TExprBase& leftNode, const TExprBase& rightNode, bool equality, - const TTypeAnnotationNode* inputType) -{ - const TExprNode::TPtr leftPtr = leftNode.Ptr(); - const TExprNode::TPtr rightPtr = rightNode.Ptr(); - - auto getDataType = [inputType](const TExprNode::TPtr& node) { - auto type = node->GetTypeAnn(); - - if (type->GetKind() == ETypeAnnotationKind::Unit) { - auto rowType = inputType->Cast<TStructExprType>(); - type = rowType->FindItemType(node->Content()); - } - - if (type->GetKind() == ETypeAnnotationKind::Optional) { - type = type->Cast<TOptionalExprType>()->GetItemType(); - } - - return type; - }; - - auto defaultCompare = [equality](const TTypeAnnotationNode* left, const TTypeAnnotationNode* right) { - if (equality) { - return CanCompare<true>(left, right); - } - - return CanCompare<false>(left, right); - }; - - auto canCompare = [&defaultCompare](const TTypeAnnotationNode* left, const TTypeAnnotationNode* right) { - if (left->GetKind() != ETypeAnnotationKind::Data || - right->GetKind() != ETypeAnnotationKind::Data) - { - return defaultCompare(left, right); - } - - auto leftTypeId = GetDataTypeInfo(left->Cast<TDataExprType>()->GetSlot()).TypeId; - auto rightTypeId = GetDataTypeInfo(right->Cast<TDataExprType>()->GetSlot()).TypeId; - - if (leftTypeId == rightTypeId) { - return ECompareOptions::Comparable; - } - - /* - * Check special case UInt32 <-> Datetime in case i can't put it inside switch without lot of copypaste - */ - if (leftTypeId == NYql::NProto::Uint32 && rightTypeId == NYql::NProto::Date) { - return ECompareOptions::Comparable; - } - - /* - * SSA program requires strict equality of some types, otherwise columnshard fails to execute comparison - */ - switch (leftTypeId) { - case NYql::NProto::Int8: - case NYql::NProto::Int16: - case NYql::NProto::Int32: - // SSA program cast those values to Int32 - if (rightTypeId == NYql::NProto::Int8 || - rightTypeId == NYql::NProto::Int16 || - rightTypeId == NYql::NProto::Int32) - { - return ECompareOptions::Comparable; - } - break; - case NYql::NProto::Uint16: - if (rightTypeId == NYql::NProto::Date) { - return ECompareOptions::Comparable; - } - [[fallthrough]]; - case NYql::NProto::Uint8: - case NYql::NProto::Uint32: - // SSA program cast those values to Uint32 - if (rightTypeId == NYql::NProto::Uint8 || - rightTypeId == NYql::NProto::Uint16 || - rightTypeId == NYql::NProto::Uint32) - { - return ECompareOptions::Comparable; - } - break; - case NYql::NProto::Date: + +using namespace NYql; +using namespace NYql::NNodes; + +namespace { + +static TMaybeNode<TExprBase> NullNode = TMaybeNode<TExprBase>(); + +bool IsSupportedPredicate(const TCoCompare& predicate) { + if (predicate.Maybe<TCoCmpEqual>()) { + return true; + } + + if (predicate.Maybe<TCoCmpLess>()) { + return true; + } + + if (predicate.Maybe<TCoCmpGreater>()) { + return true; + } + + if (predicate.Maybe<TCoCmpNotEqual>()) { + return true; + } + + if (predicate.Maybe<TCoCmpGreaterOrEqual>()) { + return true; + } + + if (predicate.Maybe<TCoCmpLessOrEqual>()) { + return true; + } + + return false; +} + +bool ValidateIfArgument(const TCoOptionalIf& optionalIf, const TExprNode* rawLambdaArg) { + // Check it is SELECT * or SELECT `field1`, `field2`... + if (optionalIf.Value().Raw() == rawLambdaArg) { + return true; + } + + // Ok, maybe it is SELECT `field` ? + auto maybeAsStruct = optionalIf.Value().Maybe<TCoAsStruct>(); + + if (!maybeAsStruct) { + return false; + } + + auto asStruct = maybeAsStruct.Cast(); + + // SELECT `field` has only one item + if (asStruct.ArgCount() != 1) { + return false; + } + + // Check that second tuple element is Member(lambda arg) + auto tuple = asStruct.Arg(0).Maybe<TExprList>().Cast(); + + if (tuple.Size() != 2) { + return false; + } + + auto maybeMember = tuple.Item(1).Maybe<TCoMember>(); + + if (!maybeMember) { + return false; + } + + auto member = maybeMember.Cast(); + + if (member.Struct().Raw() != rawLambdaArg) { + return false; + } + + return true; +} + +bool IsSupportedDataType(const TCoDataCtor& node) { + if (node.Maybe<TCoUtf8>() || + node.Maybe<TCoString>() || + node.Maybe<TCoBool>() || + node.Maybe<TCoFloat>() || + node.Maybe<TCoDouble>() || + node.Maybe<TCoInt8>() || + node.Maybe<TCoInt16>() || + node.Maybe<TCoInt32>() || + node.Maybe<TCoInt64>() || + node.Maybe<TCoUint8>() || + node.Maybe<TCoUint16>() || + node.Maybe<TCoUint32>() || + node.Maybe<TCoUint64>()) + { + return true; + } + + return false; +} + +bool IsComparableTypes(const TExprBase& leftNode, const TExprBase& rightNode, bool equality, + const TTypeAnnotationNode* inputType) +{ + const TExprNode::TPtr leftPtr = leftNode.Ptr(); + const TExprNode::TPtr rightPtr = rightNode.Ptr(); + + auto getDataType = [inputType](const TExprNode::TPtr& node) { + auto type = node->GetTypeAnn(); + + if (type->GetKind() == ETypeAnnotationKind::Unit) { + auto rowType = inputType->Cast<TStructExprType>(); + type = rowType->FindItemType(node->Content()); + } + + if (type->GetKind() == ETypeAnnotationKind::Optional) { + type = type->Cast<TOptionalExprType>()->GetItemType(); + } + + return type; + }; + + auto defaultCompare = [equality](const TTypeAnnotationNode* left, const TTypeAnnotationNode* right) { + if (equality) { + return CanCompare<true>(left, right); + } + + return CanCompare<false>(left, right); + }; + + auto canCompare = [&defaultCompare](const TTypeAnnotationNode* left, const TTypeAnnotationNode* right) { + if (left->GetKind() != ETypeAnnotationKind::Data || + right->GetKind() != ETypeAnnotationKind::Data) + { + return defaultCompare(left, right); + } + + auto leftTypeId = GetDataTypeInfo(left->Cast<TDataExprType>()->GetSlot()).TypeId; + auto rightTypeId = GetDataTypeInfo(right->Cast<TDataExprType>()->GetSlot()).TypeId; + + if (leftTypeId == rightTypeId) { + return ECompareOptions::Comparable; + } + + /* + * Check special case UInt32 <-> Datetime in case i can't put it inside switch without lot of copypaste + */ + if (leftTypeId == NYql::NProto::Uint32 && rightTypeId == NYql::NProto::Date) { + return ECompareOptions::Comparable; + } + + /* + * SSA program requires strict equality of some types, otherwise columnshard fails to execute comparison + */ + switch (leftTypeId) { + case NYql::NProto::Int8: + case NYql::NProto::Int16: + case NYql::NProto::Int32: + // SSA program cast those values to Int32 + if (rightTypeId == NYql::NProto::Int8 || + rightTypeId == NYql::NProto::Int16 || + rightTypeId == NYql::NProto::Int32) + { + return ECompareOptions::Comparable; + } + break; + case NYql::NProto::Uint16: + if (rightTypeId == NYql::NProto::Date) { + return ECompareOptions::Comparable; + } + [[fallthrough]]; + case NYql::NProto::Uint8: + case NYql::NProto::Uint32: + // SSA program cast those values to Uint32 + if (rightTypeId == NYql::NProto::Uint8 || + rightTypeId == NYql::NProto::Uint16 || + rightTypeId == NYql::NProto::Uint32) + { + return ECompareOptions::Comparable; + } + break; + case NYql::NProto::Date: // See arcadia/ydb/library/yql/dq/runtime/dq_arrow_helpers.cpp SwitchMiniKQLDataTypeToArrowType - if (rightTypeId == NYql::NProto::Uint16) { - return ECompareOptions::Comparable; - } - break; - case NYql::NProto::Datetime: + if (rightTypeId == NYql::NProto::Uint16) { + return ECompareOptions::Comparable; + } + break; + case NYql::NProto::Datetime: // See arcadia/ydb/library/yql/dq/runtime/dq_arrow_helpers.cpp SwitchMiniKQLDataTypeToArrowType - if (rightTypeId == NYql::NProto::Uint32) { - return ECompareOptions::Comparable; - } - break; - case NYql::NProto::Bool: - case NYql::NProto::Int64: - case NYql::NProto::Uint64: - case NYql::NProto::Float: - case NYql::NProto::Double: - case NYql::NProto::Decimal: - case NYql::NProto::Timestamp: - case NYql::NProto::Interval: - // Obviosly here right node has not same type as left one - break; - default: - return defaultCompare(left, right); - } - - return ECompareOptions::Uncomparable; - }; - - auto leftType = getDataType(leftPtr); - auto rightType = getDataType(rightPtr); - - if (canCompare(leftType, rightType) == ECompareOptions::Uncomparable) { - YQL_CLOG(DEBUG, ProviderKqp) << "OLAP Pushdown: " - << "Uncompatible types in compare of nodes: " - << leftPtr->Content() << " of type " << FormatType(leftType) - << " and " - << rightPtr->Content() << " of type " << FormatType(rightType); - - return false; - } - - return true; -} - - -TVector<std::pair<TExprBase, TExprBase>> ExtractComparisonParameters(const TCoCompare& predicate, + if (rightTypeId == NYql::NProto::Uint32) { + return ECompareOptions::Comparable; + } + break; + case NYql::NProto::Bool: + case NYql::NProto::Int64: + case NYql::NProto::Uint64: + case NYql::NProto::Float: + case NYql::NProto::Double: + case NYql::NProto::Decimal: + case NYql::NProto::Timestamp: + case NYql::NProto::Interval: + // Obviosly here right node has not same type as left one + break; + default: + return defaultCompare(left, right); + } + + return ECompareOptions::Uncomparable; + }; + + auto leftType = getDataType(leftPtr); + auto rightType = getDataType(rightPtr); + + if (canCompare(leftType, rightType) == ECompareOptions::Uncomparable) { + YQL_CLOG(DEBUG, ProviderKqp) << "OLAP Pushdown: " + << "Uncompatible types in compare of nodes: " + << leftPtr->Content() << " of type " << FormatType(leftType) + << " and " + << rightPtr->Content() << " of type " << FormatType(rightType); + + return false; + } + + return true; +} + + +TVector<std::pair<TExprBase, TExprBase>> ExtractComparisonParameters(const TCoCompare& predicate, const TExprNode* rawLambdaArg, const TExprBase& input) -{ - TVector<std::pair<TExprBase, TExprBase>> out; - +{ + TVector<std::pair<TExprBase, TExprBase>> out; + auto convertNode = [rawLambdaArg](const TExprBase& node) -> TMaybeNode<TExprBase> { - if (node.Maybe<TCoNull>()) { - return node; - } - - if (auto maybeParameter = node.Maybe<TCoParameter>()) { - return maybeParameter.Cast(); - } - - if (auto maybeData = node.Maybe<TCoDataCtor>()) { - if (IsSupportedDataType(maybeData.Cast())) { - return node; - } - - return NullNode; - } - - if (auto maybeMember = node.Maybe<TCoMember>()) { - if (maybeMember.Cast().Struct().Raw() == rawLambdaArg) { - return maybeMember.Cast().Name(); - } - - return NullNode; - } - - return NullNode; - }; - - // Columns & values may be single element - TMaybeNode<TExprBase> left = convertNode(predicate.Left()); - TMaybeNode<TExprBase> right = convertNode(predicate.Right()); - - TMaybeNode<TCoCmpEqual> maybeEqual = predicate.Maybe<TCoCmpEqual>(); - TMaybeNode<TCoCmpNotEqual> maybeNotEqual = predicate.Maybe<TCoCmpNotEqual>(); - - bool equality = maybeEqual.IsValid() || maybeNotEqual.IsValid(); - const TTypeAnnotationNode* inputType = input.Ptr()->GetTypeAnn(); - - switch (inputType->GetKind()) { - case ETypeAnnotationKind::Flow: - inputType = inputType->Cast<TFlowExprType>()->GetItemType(); - break; - case ETypeAnnotationKind::Stream: - inputType = inputType->Cast<TStreamExprType>()->GetItemType(); - break; - default: - YQL_ENSURE(false, "Unsupported type of incoming data: " << (ui32)inputType->GetKind()); - // We do not know how process input that is not a sequence of elements - return out; - } - - YQL_ENSURE(inputType->GetKind() == ETypeAnnotationKind::Struct); - - if (inputType->GetKind() != ETypeAnnotationKind::Struct) { - // We do not know how process input that is not a sequence of elements - return out; - } - - if (left.IsValid() && right.IsValid()) { - if (!IsComparableTypes(left.Cast(), right.Cast(), equality, inputType)) { - return out; - } - - out.emplace_back(std::move(std::make_pair(left.Cast(), right.Cast()))); - return out; - } - - // Or columns and values can be Tuple - if (!predicate.Left().Maybe<TExprList>() || !predicate.Right().Maybe<TExprList>()) { - // something unusual found, return empty vector - return out; - } - - auto tupleLeft = predicate.Left().Cast<TExprList>(); - auto tupleRight = predicate.Right().Cast<TExprList>(); - - if (tupleLeft.Size() != tupleRight.Size()) { - return out; - } - - out.reserve(tupleLeft.Size()); - - for (ui32 i = 0; i < tupleLeft.Size(); ++i) { - TMaybeNode<TExprBase> left = convertNode(tupleLeft.Item(i)); - TMaybeNode<TExprBase> right = convertNode(tupleRight.Item(i)); - - if (!left.IsValid() || !right.IsValid()) { - // Return empty vector - return TVector<std::pair<TExprBase, TExprBase>>(); - } - - if (!IsComparableTypes(left.Cast(), right.Cast(), equality, inputType)) { - // Return empty vector - return TVector<std::pair<TExprBase, TExprBase>>(); - } - - out.emplace_back(std::move(std::make_pair(left.Cast(), right.Cast()))); - } - - return out; -} - -TExprBase BuildOneElementComparison(const std::pair<TExprBase, TExprBase>& parameter, const TCoCompare& predicate, - TExprContext& ctx, TPositionHandle pos, const TExprBase& input) -{ - auto isNull = [](const TExprBase& node) { - if (node.Maybe<TCoNull>()) { - return true; - } - - if (node.Maybe<TCoNothing>()) { - return true; - } - - return false; - }; - - // Any comparison with NULL should return false even if NULL is uncomparable - // See postgres documentation https://www.postgresql.org/docs/13/functions-comparisons.html - // 9.24.5. Row Constructor Comparison - if (isNull(parameter.first) || isNull(parameter.second)) { - return Build<TCoBool>(ctx, pos) - .Literal().Build("false") - .Done(); - } - - if (predicate.Maybe<TCoCmpEqual>()) { - return Build<TKqpOlapFilterEqual>(ctx, pos) - .Input(input) - .Left(parameter.first) - .Right(parameter.second) - .Done(); - } - - if (predicate.Maybe<TCoCmpLess>()) { - return Build<TKqpOlapFilterLess>(ctx, pos) - .Input(input) - .Left(parameter.first) - .Right(parameter.second) - .Done(); - } - - if (predicate.Maybe<TCoCmpLessOrEqual>()) { - return Build<TKqpOlapFilterLessOrEqual>(ctx, pos) - .Input(input) - .Left(parameter.first) - .Right(parameter.second) - .Done(); - } - - if (predicate.Maybe<TCoCmpGreater>()) { - return Build<TKqpOlapFilterGreater>(ctx, pos) - .Input(input) - .Left(parameter.first) - .Right(parameter.second) - .Done(); - } - - if (predicate.Maybe<TCoCmpGreaterOrEqual>()) { - return Build<TKqpOlapFilterGreaterOrEqual>(ctx, pos) - .Input(input) - .Left(parameter.first) - .Right(parameter.second) - .Done(); - } - - YQL_ENSURE(predicate.Maybe<TCoCmpNotEqual>(), "Unsupported comparison node: " << predicate.Ptr()->Content()); - - return Build<TCoNot>(ctx, pos) - .Value<TKqpOlapFilterEqual>() - .Input(input) - .Left(parameter.first) - .Right(parameter.second) - .Build() - .Done(); -} - -TExprBase ComparisonPushdown(const TVector<std::pair<TExprBase, TExprBase>>& parameters, const TCoCompare& predicate, - TExprContext& ctx, TPositionHandle pos, const TExprBase& input) -{ - ui32 conditionsCount = parameters.size(); - - if (conditionsCount == 1) { - return BuildOneElementComparison(parameters[0], predicate, ctx, pos, input); - } - - if (predicate.Maybe<TCoCmpEqual>() || predicate.Maybe<TCoCmpNotEqual>()) { - TVector<TExprBase> conditions; - conditions.reserve(conditionsCount); - - for (ui32 i = 0; i < conditionsCount; ++i) { - conditions.emplace_back(BuildOneElementComparison(parameters[i], predicate, ctx, pos, input)); - } - - if (predicate.Maybe<TCoCmpEqual>()) { - return Build<TCoAnd>(ctx, pos) - .Add(conditions) - .Done(); - } - - return Build<TCoOr>(ctx, pos) - .Add(conditions) - .Done(); - } - - TVector<TExprBase> orConditions; - orConditions.reserve(conditionsCount); - - // Here we can be only whe comparing tuples lexicographically - for (ui32 i = 0; i < conditionsCount; ++i) { - TVector<TExprBase> andConditions; - andConditions.reserve(conditionsCount); - - andConditions.emplace_back(BuildOneElementComparison(parameters[i], predicate, ctx, pos, input)); - - for (ui32 j = 0; j < i; ++j) { - andConditions.emplace_back(Build<TKqpOlapFilterEqual>(ctx, pos) - .Input(input) - .Left(parameters[j].first) - .Right(parameters[j].second) - .Done()); - } - - orConditions.emplace_back(Build<TCoAnd>(ctx, pos) - .Add(std::move(andConditions)) - .Done()); - } - - return Build<TCoOr>(ctx, pos) - .Add(std::move(orConditions)) - .Done(); -} - -// TODO: Check how to reduce columns if they are not needed. Unfortunately columnshard need columns list -// for every column present in program even if it is not used in result set. -//#define ENABLE_COLUMNS_PRUNING -#ifdef ENABLE_COLUMNS_PRUNING -TMaybeNode<TCoAtomList> BuildColumnsFromLambda(const TCoLambda& lambda, TExprContext& ctx, TPositionHandle pos) -{ - auto exprType = lambda.Ptr()->GetTypeAnn(); - - if (exprType->GetKind() == ETypeAnnotationKind::Optional) { - exprType = exprType->Cast<TOptionalExprType>()->GetItemType(); - } - - if (exprType->GetKind() != ETypeAnnotationKind::Struct) { - return nullptr; - } - - auto items = exprType->Cast<TStructExprType>()->GetItems(); - - auto columnsList = Build<TCoAtomList>(ctx, pos); - - for (auto& item: items) { - columnsList.Add(ctx.NewAtom(pos, item->GetName())); - } - - return columnsList.Done(); -} -#endif - -TMaybeNode<TExprBase> ExistsPushdown(const TCoExists& exists, TExprContext& ctx, TPositionHandle pos, - const TExprNode* lambdaArg, const TExprBase& input) -{ - auto maybeMember = exists.Optional().Maybe<TCoMember>(); - - if (!maybeMember.IsValid()) { - return NullNode; - } - - if (maybeMember.Cast().Struct().Raw() != lambdaArg) { - return NullNode; - } - - auto columnName = maybeMember.Cast().Name(); - - return Build<TKqpOlapFilterExists>(ctx, pos) - .Input(input) - .Column(columnName) - .Done(); -} - -TMaybeNode<TExprBase> CoalescePushdown(const TCoCoalesce& coalesce, TExprContext& ctx, TPositionHandle pos, - const TExprNode* lambdaArg, const TExprBase& input) -{ - auto maybePredicate = coalesce.Predicate().Maybe<TCoCompare>(); - - if (!maybePredicate.IsValid()) { - return NullNode; - } - - auto predicate = maybePredicate.Cast(); - - if (!IsSupportedPredicate(predicate)) { - return NullNode; - } - - if (!coalesce.Value().Maybe<TCoBool>()) { - return NullNode; - } - - if (coalesce.Value().Cast<TCoBool>().Literal().Value() != "false") { - return NullNode; - } - + if (node.Maybe<TCoNull>()) { + return node; + } + + if (auto maybeParameter = node.Maybe<TCoParameter>()) { + return maybeParameter.Cast(); + } + + if (auto maybeData = node.Maybe<TCoDataCtor>()) { + if (IsSupportedDataType(maybeData.Cast())) { + return node; + } + + return NullNode; + } + + if (auto maybeMember = node.Maybe<TCoMember>()) { + if (maybeMember.Cast().Struct().Raw() == rawLambdaArg) { + return maybeMember.Cast().Name(); + } + + return NullNode; + } + + return NullNode; + }; + + // Columns & values may be single element + TMaybeNode<TExprBase> left = convertNode(predicate.Left()); + TMaybeNode<TExprBase> right = convertNode(predicate.Right()); + + TMaybeNode<TCoCmpEqual> maybeEqual = predicate.Maybe<TCoCmpEqual>(); + TMaybeNode<TCoCmpNotEqual> maybeNotEqual = predicate.Maybe<TCoCmpNotEqual>(); + + bool equality = maybeEqual.IsValid() || maybeNotEqual.IsValid(); + const TTypeAnnotationNode* inputType = input.Ptr()->GetTypeAnn(); + + switch (inputType->GetKind()) { + case ETypeAnnotationKind::Flow: + inputType = inputType->Cast<TFlowExprType>()->GetItemType(); + break; + case ETypeAnnotationKind::Stream: + inputType = inputType->Cast<TStreamExprType>()->GetItemType(); + break; + default: + YQL_ENSURE(false, "Unsupported type of incoming data: " << (ui32)inputType->GetKind()); + // We do not know how process input that is not a sequence of elements + return out; + } + + YQL_ENSURE(inputType->GetKind() == ETypeAnnotationKind::Struct); + + if (inputType->GetKind() != ETypeAnnotationKind::Struct) { + // We do not know how process input that is not a sequence of elements + return out; + } + + if (left.IsValid() && right.IsValid()) { + if (!IsComparableTypes(left.Cast(), right.Cast(), equality, inputType)) { + return out; + } + + out.emplace_back(std::move(std::make_pair(left.Cast(), right.Cast()))); + return out; + } + + // Or columns and values can be Tuple + if (!predicate.Left().Maybe<TExprList>() || !predicate.Right().Maybe<TExprList>()) { + // something unusual found, return empty vector + return out; + } + + auto tupleLeft = predicate.Left().Cast<TExprList>(); + auto tupleRight = predicate.Right().Cast<TExprList>(); + + if (tupleLeft.Size() != tupleRight.Size()) { + return out; + } + + out.reserve(tupleLeft.Size()); + + for (ui32 i = 0; i < tupleLeft.Size(); ++i) { + TMaybeNode<TExprBase> left = convertNode(tupleLeft.Item(i)); + TMaybeNode<TExprBase> right = convertNode(tupleRight.Item(i)); + + if (!left.IsValid() || !right.IsValid()) { + // Return empty vector + return TVector<std::pair<TExprBase, TExprBase>>(); + } + + if (!IsComparableTypes(left.Cast(), right.Cast(), equality, inputType)) { + // Return empty vector + return TVector<std::pair<TExprBase, TExprBase>>(); + } + + out.emplace_back(std::move(std::make_pair(left.Cast(), right.Cast()))); + } + + return out; +} + +TExprBase BuildOneElementComparison(const std::pair<TExprBase, TExprBase>& parameter, const TCoCompare& predicate, + TExprContext& ctx, TPositionHandle pos, const TExprBase& input) +{ + auto isNull = [](const TExprBase& node) { + if (node.Maybe<TCoNull>()) { + return true; + } + + if (node.Maybe<TCoNothing>()) { + return true; + } + + return false; + }; + + // Any comparison with NULL should return false even if NULL is uncomparable + // See postgres documentation https://www.postgresql.org/docs/13/functions-comparisons.html + // 9.24.5. Row Constructor Comparison + if (isNull(parameter.first) || isNull(parameter.second)) { + return Build<TCoBool>(ctx, pos) + .Literal().Build("false") + .Done(); + } + + if (predicate.Maybe<TCoCmpEqual>()) { + return Build<TKqpOlapFilterEqual>(ctx, pos) + .Input(input) + .Left(parameter.first) + .Right(parameter.second) + .Done(); + } + + if (predicate.Maybe<TCoCmpLess>()) { + return Build<TKqpOlapFilterLess>(ctx, pos) + .Input(input) + .Left(parameter.first) + .Right(parameter.second) + .Done(); + } + + if (predicate.Maybe<TCoCmpLessOrEqual>()) { + return Build<TKqpOlapFilterLessOrEqual>(ctx, pos) + .Input(input) + .Left(parameter.first) + .Right(parameter.second) + .Done(); + } + + if (predicate.Maybe<TCoCmpGreater>()) { + return Build<TKqpOlapFilterGreater>(ctx, pos) + .Input(input) + .Left(parameter.first) + .Right(parameter.second) + .Done(); + } + + if (predicate.Maybe<TCoCmpGreaterOrEqual>()) { + return Build<TKqpOlapFilterGreaterOrEqual>(ctx, pos) + .Input(input) + .Left(parameter.first) + .Right(parameter.second) + .Done(); + } + + YQL_ENSURE(predicate.Maybe<TCoCmpNotEqual>(), "Unsupported comparison node: " << predicate.Ptr()->Content()); + + return Build<TCoNot>(ctx, pos) + .Value<TKqpOlapFilterEqual>() + .Input(input) + .Left(parameter.first) + .Right(parameter.second) + .Build() + .Done(); +} + +TExprBase ComparisonPushdown(const TVector<std::pair<TExprBase, TExprBase>>& parameters, const TCoCompare& predicate, + TExprContext& ctx, TPositionHandle pos, const TExprBase& input) +{ + ui32 conditionsCount = parameters.size(); + + if (conditionsCount == 1) { + return BuildOneElementComparison(parameters[0], predicate, ctx, pos, input); + } + + if (predicate.Maybe<TCoCmpEqual>() || predicate.Maybe<TCoCmpNotEqual>()) { + TVector<TExprBase> conditions; + conditions.reserve(conditionsCount); + + for (ui32 i = 0; i < conditionsCount; ++i) { + conditions.emplace_back(BuildOneElementComparison(parameters[i], predicate, ctx, pos, input)); + } + + if (predicate.Maybe<TCoCmpEqual>()) { + return Build<TCoAnd>(ctx, pos) + .Add(conditions) + .Done(); + } + + return Build<TCoOr>(ctx, pos) + .Add(conditions) + .Done(); + } + + TVector<TExprBase> orConditions; + orConditions.reserve(conditionsCount); + + // Here we can be only whe comparing tuples lexicographically + for (ui32 i = 0; i < conditionsCount; ++i) { + TVector<TExprBase> andConditions; + andConditions.reserve(conditionsCount); + + andConditions.emplace_back(BuildOneElementComparison(parameters[i], predicate, ctx, pos, input)); + + for (ui32 j = 0; j < i; ++j) { + andConditions.emplace_back(Build<TKqpOlapFilterEqual>(ctx, pos) + .Input(input) + .Left(parameters[j].first) + .Right(parameters[j].second) + .Done()); + } + + orConditions.emplace_back(Build<TCoAnd>(ctx, pos) + .Add(std::move(andConditions)) + .Done()); + } + + return Build<TCoOr>(ctx, pos) + .Add(std::move(orConditions)) + .Done(); +} + +// TODO: Check how to reduce columns if they are not needed. Unfortunately columnshard need columns list +// for every column present in program even if it is not used in result set. +//#define ENABLE_COLUMNS_PRUNING +#ifdef ENABLE_COLUMNS_PRUNING +TMaybeNode<TCoAtomList> BuildColumnsFromLambda(const TCoLambda& lambda, TExprContext& ctx, TPositionHandle pos) +{ + auto exprType = lambda.Ptr()->GetTypeAnn(); + + if (exprType->GetKind() == ETypeAnnotationKind::Optional) { + exprType = exprType->Cast<TOptionalExprType>()->GetItemType(); + } + + if (exprType->GetKind() != ETypeAnnotationKind::Struct) { + return nullptr; + } + + auto items = exprType->Cast<TStructExprType>()->GetItems(); + + auto columnsList = Build<TCoAtomList>(ctx, pos); + + for (auto& item: items) { + columnsList.Add(ctx.NewAtom(pos, item->GetName())); + } + + return columnsList.Done(); +} +#endif + +TMaybeNode<TExprBase> ExistsPushdown(const TCoExists& exists, TExprContext& ctx, TPositionHandle pos, + const TExprNode* lambdaArg, const TExprBase& input) +{ + auto maybeMember = exists.Optional().Maybe<TCoMember>(); + + if (!maybeMember.IsValid()) { + return NullNode; + } + + if (maybeMember.Cast().Struct().Raw() != lambdaArg) { + return NullNode; + } + + auto columnName = maybeMember.Cast().Name(); + + return Build<TKqpOlapFilterExists>(ctx, pos) + .Input(input) + .Column(columnName) + .Done(); +} + +TMaybeNode<TExprBase> CoalescePushdown(const TCoCoalesce& coalesce, TExprContext& ctx, TPositionHandle pos, + const TExprNode* lambdaArg, const TExprBase& input) +{ + auto maybePredicate = coalesce.Predicate().Maybe<TCoCompare>(); + + if (!maybePredicate.IsValid()) { + return NullNode; + } + + auto predicate = maybePredicate.Cast(); + + if (!IsSupportedPredicate(predicate)) { + return NullNode; + } + + if (!coalesce.Value().Maybe<TCoBool>()) { + return NullNode; + } + + if (coalesce.Value().Cast<TCoBool>().Literal().Value() != "false") { + return NullNode; + } + auto parameters = ExtractComparisonParameters(predicate, lambdaArg, input); - - if (parameters.empty()) { - return NullNode; - } - - return ComparisonPushdown(parameters, predicate, ctx, pos, input); -} - -TMaybeNode<TExprBase> PredicatePushdown(const TExprBase& predicate, TExprContext& ctx, TPositionHandle pos, - const TExprNode* lambdaArg, const TExprBase& input) -{ - auto maybeCoalesce = predicate.Maybe<TCoCoalesce>(); - - if (maybeCoalesce.IsValid()) { - return CoalescePushdown(maybeCoalesce.Cast(), ctx, pos, lambdaArg, input); - } - - auto maybeExists = predicate.Maybe<TCoExists>(); - - if (maybeExists.IsValid()) { - return ExistsPushdown(maybeExists.Cast(), ctx, pos, lambdaArg, input); - } - - if (predicate.Maybe<TCoNot>()) { - auto notNode = predicate.Cast<TCoNot>(); - auto pushedNot = PredicatePushdown(notNode.Value(), ctx, pos, lambdaArg, input); - - if (!pushedNot.IsValid()) { - return NullNode; - } - - return Build<TCoNot>(ctx, pos) - .Value(pushedNot.Cast()) - .Done(); - } - - if (!predicate.Maybe<TCoAnd>() && !predicate.Maybe<TCoOr>() && !predicate.Maybe<TCoXor>()) { - return NullNode; - } - - TVector<TExprBase> pushedOps; - pushedOps.reserve(predicate.Ptr()->ChildrenSize()); - - for (auto& child: predicate.Ptr()->Children()) { - auto pushedChild = PredicatePushdown(TExprBase(child), ctx, pos, lambdaArg, input); - - if (!pushedChild.IsValid()) { - return NullNode; - } - - pushedOps.emplace_back(pushedChild.Cast()); - } - - if (predicate.Maybe<TCoAnd>()) { - return Build<TCoAnd>(ctx, pos) - .Add(pushedOps) - .Done(); - } - - if (predicate.Maybe<TCoOr>()) { - return Build<TCoOr>(ctx, pos) - .Add(pushedOps) - .Done(); - } - - Y_VERIFY_DEBUG(predicate.Maybe<TCoXor>()); - - return Build<TCoXor>(ctx, pos) - .Add(pushedOps) - .Done(); -} - -} // annymous namespace end - -TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, - TTypeAnnotationContext& typesCtx) -{ - Y_UNUSED(typesCtx); - - if (!kqpCtx.Config->PushOlapProcess()) { - return node; - } - - if (!node.Maybe<TCoFlatMap>().Input().Maybe<TKqpReadOlapTableRanges>()) { - return node; - } - - auto flatmap = node.Cast<TCoFlatMap>(); - auto read = flatmap.Input().Cast<TKqpReadOlapTableRanges>(); - - if (read.Process().Body().Raw() != read.Process().Args().Arg(0).Raw()) { - return node; - } - - const auto& lambda = flatmap.Lambda(); - auto lambdaArg = lambda.Args().Arg(0).Raw(); - - YQL_CLOG(INFO, ProviderKqp) << "Initial OLAP lambda: " << KqpExprToPrettyString(lambda, ctx); - - auto maybeOptionalIf = lambda.Body().Maybe<TCoOptionalIf>(); - - if (!maybeOptionalIf.IsValid()) { - return node; - } - - auto optionalIf = maybeOptionalIf.Cast(); - - if (!ValidateIfArgument(optionalIf, lambdaArg)) { - return node; - } - - auto pushedPredicate = PredicatePushdown( - optionalIf.Predicate(), ctx, node.Pos(), lambdaArg, read.Process().Body() - ); - - if (!pushedPredicate.IsValid()) { - return node; - } - - auto olapFilter = Build<TKqpOlapFilter>(ctx, node.Pos()) - .Input(read.Process().Body()) - .Condition(pushedPredicate.Cast()) - .Done(); - - auto newProcessLambda = Build<TCoLambda>(ctx, node.Pos()) - .Args({"row"}) - .Body<TExprApplier>() - .Apply(olapFilter) - .With(read.Process().Args().Arg(0), "row") - .Build() - .Done(); - - YQL_CLOG(INFO, ProviderKqp) << "Pushed OLAP lambda: " << KqpExprToPrettyString(newProcessLambda, ctx); - -#ifdef ENABLE_COLUMNS_PRUNING - TMaybeNode<TCoAtomList> readColumns = BuildColumnsFromLambda(lambda, ctx, node.Pos()); - - if (!readColumns.IsValid()) { - readColumns = read.Columns(); - } -#endif - - auto newRead = Build<TKqpReadOlapTableRanges>(ctx, node.Pos()) - .Table(read.Table()) - .Ranges(read.Ranges()) -#ifdef ENABLE_COLUMNS_PRUNING - .Columns(readColumns.Cast()) -#else - .Columns(read.Columns()) -#endif - .Settings(read.Settings()) - .ExplainPrompt(read.ExplainPrompt()) - .Process(newProcessLambda) - .Done(); - -#ifdef ENABLE_COLUMNS_PRUNING - return newRead; -#else - auto newFlatmap = Build<TCoFlatMap>(ctx, node.Pos()) - .Input(newRead) - .Lambda<TCoLambda>() - .Args({"new_arg"}) - .Body<TCoOptionalIf>() - .Predicate<TCoBool>() - .Literal().Build("true") - .Build() - .Value<TExprApplier>() - .Apply(optionalIf.Value()) - .With(lambda.Args().Arg(0), "new_arg") - .Build() - .Build() - .Build() - .Done(); - - return newFlatmap; -#endif -} - + + if (parameters.empty()) { + return NullNode; + } + + return ComparisonPushdown(parameters, predicate, ctx, pos, input); +} + +TMaybeNode<TExprBase> PredicatePushdown(const TExprBase& predicate, TExprContext& ctx, TPositionHandle pos, + const TExprNode* lambdaArg, const TExprBase& input) +{ + auto maybeCoalesce = predicate.Maybe<TCoCoalesce>(); + + if (maybeCoalesce.IsValid()) { + return CoalescePushdown(maybeCoalesce.Cast(), ctx, pos, lambdaArg, input); + } + + auto maybeExists = predicate.Maybe<TCoExists>(); + + if (maybeExists.IsValid()) { + return ExistsPushdown(maybeExists.Cast(), ctx, pos, lambdaArg, input); + } + + if (predicate.Maybe<TCoNot>()) { + auto notNode = predicate.Cast<TCoNot>(); + auto pushedNot = PredicatePushdown(notNode.Value(), ctx, pos, lambdaArg, input); + + if (!pushedNot.IsValid()) { + return NullNode; + } + + return Build<TCoNot>(ctx, pos) + .Value(pushedNot.Cast()) + .Done(); + } + + if (!predicate.Maybe<TCoAnd>() && !predicate.Maybe<TCoOr>() && !predicate.Maybe<TCoXor>()) { + return NullNode; + } + + TVector<TExprBase> pushedOps; + pushedOps.reserve(predicate.Ptr()->ChildrenSize()); + + for (auto& child: predicate.Ptr()->Children()) { + auto pushedChild = PredicatePushdown(TExprBase(child), ctx, pos, lambdaArg, input); + + if (!pushedChild.IsValid()) { + return NullNode; + } + + pushedOps.emplace_back(pushedChild.Cast()); + } + + if (predicate.Maybe<TCoAnd>()) { + return Build<TCoAnd>(ctx, pos) + .Add(pushedOps) + .Done(); + } + + if (predicate.Maybe<TCoOr>()) { + return Build<TCoOr>(ctx, pos) + .Add(pushedOps) + .Done(); + } + + Y_VERIFY_DEBUG(predicate.Maybe<TCoXor>()); + + return Build<TCoXor>(ctx, pos) + .Add(pushedOps) + .Done(); +} + +} // annymous namespace end + +TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, + TTypeAnnotationContext& typesCtx) +{ + Y_UNUSED(typesCtx); + + if (!kqpCtx.Config->PushOlapProcess()) { + return node; + } + + if (!node.Maybe<TCoFlatMap>().Input().Maybe<TKqpReadOlapTableRanges>()) { + return node; + } + + auto flatmap = node.Cast<TCoFlatMap>(); + auto read = flatmap.Input().Cast<TKqpReadOlapTableRanges>(); + + if (read.Process().Body().Raw() != read.Process().Args().Arg(0).Raw()) { + return node; + } + + const auto& lambda = flatmap.Lambda(); + auto lambdaArg = lambda.Args().Arg(0).Raw(); + + YQL_CLOG(INFO, ProviderKqp) << "Initial OLAP lambda: " << KqpExprToPrettyString(lambda, ctx); + + auto maybeOptionalIf = lambda.Body().Maybe<TCoOptionalIf>(); + + if (!maybeOptionalIf.IsValid()) { + return node; + } + + auto optionalIf = maybeOptionalIf.Cast(); + + if (!ValidateIfArgument(optionalIf, lambdaArg)) { + return node; + } + + auto pushedPredicate = PredicatePushdown( + optionalIf.Predicate(), ctx, node.Pos(), lambdaArg, read.Process().Body() + ); + + if (!pushedPredicate.IsValid()) { + return node; + } + + auto olapFilter = Build<TKqpOlapFilter>(ctx, node.Pos()) + .Input(read.Process().Body()) + .Condition(pushedPredicate.Cast()) + .Done(); + + auto newProcessLambda = Build<TCoLambda>(ctx, node.Pos()) + .Args({"row"}) + .Body<TExprApplier>() + .Apply(olapFilter) + .With(read.Process().Args().Arg(0), "row") + .Build() + .Done(); + + YQL_CLOG(INFO, ProviderKqp) << "Pushed OLAP lambda: " << KqpExprToPrettyString(newProcessLambda, ctx); + +#ifdef ENABLE_COLUMNS_PRUNING + TMaybeNode<TCoAtomList> readColumns = BuildColumnsFromLambda(lambda, ctx, node.Pos()); + + if (!readColumns.IsValid()) { + readColumns = read.Columns(); + } +#endif + + auto newRead = Build<TKqpReadOlapTableRanges>(ctx, node.Pos()) + .Table(read.Table()) + .Ranges(read.Ranges()) +#ifdef ENABLE_COLUMNS_PRUNING + .Columns(readColumns.Cast()) +#else + .Columns(read.Columns()) +#endif + .Settings(read.Settings()) + .ExplainPrompt(read.ExplainPrompt()) + .Process(newProcessLambda) + .Done(); + +#ifdef ENABLE_COLUMNS_PRUNING + return newRead; +#else + auto newFlatmap = Build<TCoFlatMap>(ctx, node.Pos()) + .Input(newRead) + .Lambda<TCoLambda>() + .Args({"new_arg"}) + .Body<TCoOptionalIf>() + .Predicate<TCoBool>() + .Literal().Build("true") + .Build() + .Value<TExprApplier>() + .Apply(optionalIf.Value()) + .With(lambda.Args().Arg(0), "new_arg") + .Build() + .Build() + .Build() + .Done(); + + return newFlatmap; +#endif +} + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_rules.h b/ydb/core/kqp/opt/physical/kqp_opt_phy_rules.h index d444de3ef7a..83e1114b177 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_rules.h +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_rules.h @@ -1,33 +1,33 @@ -#pragma once - +#pragma once + #include <ydb/core/kqp/opt/kqp_opt.h> #include <ydb/core/kqp/provider/yql_kikimr_expr_nodes.h> - + #include <ydb/library/yql/ast/yql_expr.h> - -/* - * This file contains declaration of all rule functions for physical optimizer - */ - + +/* + * This file contains declaration of all rule functions for physical optimizer + */ + namespace NKikimr::NKqp::NOpt { - + NYql::NNodes::TExprBase KqpBuildReadTableStage(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, - const TKqpOptimizeContext& kqpCtx); - + const TKqpOptimizeContext& kqpCtx); + NYql::NNodes::TExprBase KqpBuildReadTableRangesStage(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, const TKqpOptimizeContext& kqpCtx); NYql::NNodes::TExprBase KqpBuildLookupTableStage(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx); -NYql::NNodes::TExprBase KqpRemoveRedundantSortByPk(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, - const TKqpOptimizeContext& kqpCtx); - -NYql::NNodes::TExprBase KqpApplyLimitToReadTable(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, - const TKqpOptimizeContext& kqpCtx); - -NYql::NNodes::TExprBase KqpPushOlapFilter(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, - const TKqpOptimizeContext& kqpCtx, NYql::TTypeAnnotationContext& typesCtx); - -NYql::NNodes::TExprBase KqpFloatUpStage(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx); - +NYql::NNodes::TExprBase KqpRemoveRedundantSortByPk(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, + const TKqpOptimizeContext& kqpCtx); + +NYql::NNodes::TExprBase KqpApplyLimitToReadTable(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, + const TKqpOptimizeContext& kqpCtx); + +NYql::NNodes::TExprBase KqpPushOlapFilter(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, + const TKqpOptimizeContext& kqpCtx, NYql::TTypeAnnotationContext& typesCtx); + +NYql::NNodes::TExprBase KqpFloatUpStage(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx); + } // NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_sort.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_sort.cpp index 2802153b0fd..97b416f7a8f 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_sort.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_sort.cpp @@ -1,160 +1,160 @@ -#include "kqp_opt_phy_rules.h" +#include "kqp_opt_phy_rules.h" #include "kqp_opt_phy_impl.h" - + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/opt/kqp_opt_impl.h> - + #include <ydb/library/yql/core/yql_opt_utils.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NNodes; - -// Temporary solution, should be replaced with constraints -// copy-past from old engine algo: https://a.yandex-team.ru/arc_vcs/yql/providers/kikimr/yql_kikimr_opt.cpp?rev=e592a5a9509952f1c29f1ec02343dd4c05fe426d#L122 -TExprBase KqpRemoveRedundantSortByPk(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { - auto maybeSort = node.Maybe<TCoSort>(); - auto maybeTopSort = node.Maybe<TCoTopSort>(); - - if (!maybeSort && !maybeTopSort) { - return node; - } - - auto input = maybeSort ? maybeSort.Cast().Input() : maybeTopSort.Cast().Input(); - auto sortDirections = maybeSort ? maybeSort.Cast().SortDirections() : maybeTopSort.Cast().SortDirections(); - auto keySelector = maybeSort ? maybeSort.Cast().KeySelectorLambda() : maybeTopSort.Cast().KeySelectorLambda(); - - auto maybeFlatmap = input.Maybe<TCoFlatMap>(); - - TMaybe<THashSet<TStringBuf>> passthroughFields; - if (maybeFlatmap) { - auto flatmap = input.Cast<TCoFlatMap>(); - - if (!IsPassthroughFlatMap(flatmap, &passthroughFields)) { - return node; - } - - input = flatmap.Input(); - } - - bool isReadTable = input.Maybe<TKqpReadTable>().IsValid(); - bool isReadTableRanges = input.Maybe<TKqlReadTableRangesBase>().IsValid(); - - if (!isReadTable && !isReadTableRanges) { - return node; - } - - enum : ui32 { - SortDirectionNone = 0, - SortDirectionForward = 1, - SortDirectionReverse = 2, - SortDirectionUnknown = 4, - }; - - auto getDirection = [] (TExprBase expr) -> ui32 { - if (!expr.Maybe<TCoBool>()) { - return SortDirectionUnknown; - } - - if (!FromString<bool>(expr.Cast<TCoBool>().Literal().Value())) { - return SortDirectionReverse; - } - - return SortDirectionForward; - }; - - ui32 direction = SortDirectionNone; - - if (auto maybeList = sortDirections.Maybe<TExprList>()) { - for (const auto& expr : maybeList.Cast()) { - direction |= getDirection(expr); - if (direction != SortDirectionForward && direction != SortDirectionReverse) { - return node; - } - } - } else { - direction |= getDirection(sortDirections); - if (direction != SortDirectionForward && direction != SortDirectionReverse) { - return node; - } - } - - auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, GetReadTablePath(input, isReadTableRanges)); - - auto checkKey = [keySelector, &tableDesc, &passthroughFields] (TExprBase key, ui32 index) { - if (!key.Maybe<TCoMember>()) { - return false; - } - - auto member = key.Cast<TCoMember>(); - if (member.Struct().Raw() != keySelector.Args().Arg(0).Raw()) { - return false; - } - - auto column = TString(member.Name().Value()); - auto columnIndex = tableDesc.GetKeyColumnIndex(column); - if (!columnIndex || *columnIndex != index) { - return false; - } - - if (passthroughFields && !passthroughFields->contains(column)) { - return false; - } - - return true; - }; - - auto lambdaBody = keySelector.Body(); - if (auto maybeTuple = lambdaBody.Maybe<TExprList>()) { - auto tuple = maybeTuple.Cast(); - for (size_t i = 0; i < tuple.Size(); ++i) { - if (!checkKey(tuple.Item(i), i)) { - return node; - } - } - } else { - if (!checkKey(lambdaBody, 0)) { - return node; - } - } - - if (direction == SortDirectionReverse) { -// if (!config.AllowReverseRange()) { -// return node; -// } - bool olapTable = tableDesc.Metadata->Kind == EKikimrTableKind::Olap; - if (!olapTable && kqpCtx.IsScanQuery()) { - return node; - } - - auto settings = GetReadTableSettings(input, isReadTableRanges); - - if (settings.Reverse) { - return node; - } - - settings.SetReverse(); - - input = BuildReadNode(input.Pos(), ctx, input, settings); - } - - if (maybeFlatmap) { - input = Build<TCoFlatMap>(ctx, node.Pos()) - .Input(input) - .Lambda(maybeFlatmap.Cast().Lambda()) - .Done(); - } - - if (maybeTopSort) { - return Build<TCoTake>(ctx, node.Pos()) - .Input(input) - .Count(maybeTopSort.Cast().Count()) - .Done(); - } else { - return input; - } -} - + +using namespace NYql; +using namespace NYql::NNodes; + +// Temporary solution, should be replaced with constraints +// copy-past from old engine algo: https://a.yandex-team.ru/arc_vcs/yql/providers/kikimr/yql_kikimr_opt.cpp?rev=e592a5a9509952f1c29f1ec02343dd4c05fe426d#L122 +TExprBase KqpRemoveRedundantSortByPk(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { + auto maybeSort = node.Maybe<TCoSort>(); + auto maybeTopSort = node.Maybe<TCoTopSort>(); + + if (!maybeSort && !maybeTopSort) { + return node; + } + + auto input = maybeSort ? maybeSort.Cast().Input() : maybeTopSort.Cast().Input(); + auto sortDirections = maybeSort ? maybeSort.Cast().SortDirections() : maybeTopSort.Cast().SortDirections(); + auto keySelector = maybeSort ? maybeSort.Cast().KeySelectorLambda() : maybeTopSort.Cast().KeySelectorLambda(); + + auto maybeFlatmap = input.Maybe<TCoFlatMap>(); + + TMaybe<THashSet<TStringBuf>> passthroughFields; + if (maybeFlatmap) { + auto flatmap = input.Cast<TCoFlatMap>(); + + if (!IsPassthroughFlatMap(flatmap, &passthroughFields)) { + return node; + } + + input = flatmap.Input(); + } + + bool isReadTable = input.Maybe<TKqpReadTable>().IsValid(); + bool isReadTableRanges = input.Maybe<TKqlReadTableRangesBase>().IsValid(); + + if (!isReadTable && !isReadTableRanges) { + return node; + } + + enum : ui32 { + SortDirectionNone = 0, + SortDirectionForward = 1, + SortDirectionReverse = 2, + SortDirectionUnknown = 4, + }; + + auto getDirection = [] (TExprBase expr) -> ui32 { + if (!expr.Maybe<TCoBool>()) { + return SortDirectionUnknown; + } + + if (!FromString<bool>(expr.Cast<TCoBool>().Literal().Value())) { + return SortDirectionReverse; + } + + return SortDirectionForward; + }; + + ui32 direction = SortDirectionNone; + + if (auto maybeList = sortDirections.Maybe<TExprList>()) { + for (const auto& expr : maybeList.Cast()) { + direction |= getDirection(expr); + if (direction != SortDirectionForward && direction != SortDirectionReverse) { + return node; + } + } + } else { + direction |= getDirection(sortDirections); + if (direction != SortDirectionForward && direction != SortDirectionReverse) { + return node; + } + } + + auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, GetReadTablePath(input, isReadTableRanges)); + + auto checkKey = [keySelector, &tableDesc, &passthroughFields] (TExprBase key, ui32 index) { + if (!key.Maybe<TCoMember>()) { + return false; + } + + auto member = key.Cast<TCoMember>(); + if (member.Struct().Raw() != keySelector.Args().Arg(0).Raw()) { + return false; + } + + auto column = TString(member.Name().Value()); + auto columnIndex = tableDesc.GetKeyColumnIndex(column); + if (!columnIndex || *columnIndex != index) { + return false; + } + + if (passthroughFields && !passthroughFields->contains(column)) { + return false; + } + + return true; + }; + + auto lambdaBody = keySelector.Body(); + if (auto maybeTuple = lambdaBody.Maybe<TExprList>()) { + auto tuple = maybeTuple.Cast(); + for (size_t i = 0; i < tuple.Size(); ++i) { + if (!checkKey(tuple.Item(i), i)) { + return node; + } + } + } else { + if (!checkKey(lambdaBody, 0)) { + return node; + } + } + + if (direction == SortDirectionReverse) { +// if (!config.AllowReverseRange()) { +// return node; +// } + bool olapTable = tableDesc.Metadata->Kind == EKikimrTableKind::Olap; + if (!olapTable && kqpCtx.IsScanQuery()) { + return node; + } + + auto settings = GetReadTableSettings(input, isReadTableRanges); + + if (settings.Reverse) { + return node; + } + + settings.SetReverse(); + + input = BuildReadNode(input.Pos(), ctx, input, settings); + } + + if (maybeFlatmap) { + input = Build<TCoFlatMap>(ctx, node.Pos()) + .Input(input) + .Lambda(maybeFlatmap.Cast().Lambda()) + .Done(); + } + + if (maybeTopSort) { + return Build<TCoTake>(ctx, node.Pos()) + .Input(input) + .Count(maybeTopSort.Cast().Count()) + .Done(); + } else { + return input; + } +} + } // namespace NKikimr::NKqp::NOpt - + diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_stage_float_up.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_stage_float_up.cpp index 43a8bda3826..020a2f9572a 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_stage_float_up.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_stage_float_up.cpp @@ -1,207 +1,207 @@ -#include "kqp_opt_phy_rules.h" +#include "kqp_opt_phy_rules.h" #include "kqp_opt_phy_impl.h" - + #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/core/kqp/opt/kqp_opt_impl.h> - + #include <ydb/library/yql/core/yql_opt_utils.h> #include <ydb/library/yql/core/yql_expr_optimize.h> - + namespace NKikimr::NKqp::NOpt { - -using namespace NYql; -using namespace NYql::NNodes; -using TStatus = IGraphTransformer::TStatus; - -namespace { -TExprNode::TListType FindPrecomputes(const TExprBase& node) { - auto filter = [](const TExprNode::TPtr& node) { - return !TMaybeNode<TDqPhyPrecompute>(node).IsValid(); - }; - - auto predicate = [](const TExprNode::TPtr& node) { - auto maybePrecompute = TMaybeNode<TDqPhyPrecompute>(node); - - if (!maybePrecompute.IsValid()) { - return false; - } - - return true; - }; - - return FindNodes(node.Ptr(), filter, predicate); -} - -bool IsArgumentUsed(const TExprNode::TPtr node, const TExprNode* argument) { - auto filter = [](const TExprNode::TPtr& node) { - return !TMaybeNode<TDqStage>(node).IsValid(); - }; - - auto predicate = [argument](const TExprNode::TPtr& node) { - return node.Get() == argument; - }; - - return !!FindNode(node, filter, predicate); -} - -} // anonymous namespace end - -TExprBase KqpFloatUpStage(TExprBase node, TExprContext& ctx) { - auto maybeStage = node.Maybe<TDqStage>(); - - if (!maybeStage.IsValid()) { - return node; - } - - TExprNode::TListType innerPrecomputePtrs = FindPrecomputes(maybeStage.Cast().Program()); - - if (innerPrecomputePtrs.empty()) { - return node; - } - - /* - * There is "stage inside stage", i.e. outer stage program refers to inner stage not listed in outer stage inputs, - * thus need to move inner stage output to outer stage input - * I.e. you have following code, it is wrong in case stage can't be inside stage - * Stage1(a1, .., TDqPhyPrecompute(aX), TDqPhyPrecompute(aY), ..., an) (args) - * - let CN = Connection(Stage2) - * -- Stage2(b1, .., bn) (args) - * --- Operation (aX) - * - Operation (TDqPhyPrecompute(CN)) - * - Operation (aY) - * - * Convert to - * - * Stage2(b1, .., bn, TDqPhyPrecompute(aX)) (args) - * - Operation (aX) - * let CN = Connection(Stage2) - * Stage1(a1, .., TDqPhyPrecompute(aY), .., an, TDqPhyPrecompute(CN)) (args, X) - * - Operation (X) - * - Operation (aY) - * - * Also this function additionaly may move precomputes to stage inputs in case it is same logic. I.e. - * DqStage( () () ... Operation(TDqPhyPrecompute(X)) ... ) - * will be converted to - * DqStage( (TDqPhyPrecompute(X)) (arg1) ... Operation(arg1) ... ) - * - * Sometimes there is situation when both stages share part of program. Thus need to deep copy both of them - * 1. Outer stage copy converts TDqPhyPrecompute node to argument - * 2. Inner stage should copy whole program - * - * Both of copy functions need to receive replacement in deepClones, i.e. which arguments to replace. - */ - struct TStageData { - TVector<TExprNode::TPtr> Inputs; - TVector<TExprNode::TPtr> Args; - TNodeOnNodeOwnedMap Replaces; - - void Add(TExprNode::TPtr input, const TExprNode* arg, TExprContext& ctx, const TString& argName) { - auto newArg = ctx.NewArgument(input->Pos(), argName); - Inputs.emplace_back(input); - Args.emplace_back(newArg); - Replaces.emplace(arg, newArg); - } - - bool Empty() { - return Inputs.empty(); - } - }; - - struct TStageData outer; - auto outerStage = maybeStage.Cast(); - - for (ui64 i = 0; i < outerStage.Inputs().Size(); ++i) { - auto input = outerStage.Inputs().Item(i); - auto arg = outerStage.Program().Args().Ptr()->Child(i); - - if (!IsArgumentUsed(outerStage.Program().Body().Ptr(), arg)) { - continue; - } - - outer.Add(input.Ptr(), arg, ctx, TStringBuilder() << "_kqp_outer_stage_arg_" << i); - } - - std::vector<std::pair<TDqPhyPrecompute, TStageData>> innerPrecomputes; - innerPrecomputes.reserve(innerPrecomputePtrs.size()); - - for (auto& precomputePtr: innerPrecomputePtrs) { - innerPrecomputes.emplace_back( - std::make_pair<TDqPhyPrecompute, struct TStageData>(std::move(TDqPhyPrecompute(precomputePtr)), TStageData()) - ); - } - - ui64 precomputeIndex = 0; - for (auto& item: innerPrecomputes) { - auto innerStage = item.first.Connection().Output().Stage(); - - for (ui64 i = 0; i < outerStage.Inputs().Size(); ++i) { - auto input = outerStage.Inputs().Item(i); - auto arg = outerStage.Program().Args().Ptr()->Child(i); - - if (!IsArgumentUsed(innerStage.Program().Body().Ptr(), arg)) { - continue; - } - - item.second.Add(input.Ptr(), arg, ctx, TStringBuilder() << "_kqp_inner_stage_arg_" << i); - } - - TExprNode::TPtr newConnection; - - if (!item.second.Empty()) { - // Deep copy inner stage - auto newInnerBody = ctx.DeepCopy( - innerStage.Program().Body().Ref(), ctx, item.second.Replaces, - true /* internStrings */, true /* copyTypes */, false, nullptr - ); - - auto newInnerStage = Build<TDqStage>(ctx, innerStage.Pos()) - .Inputs() - .Add(item.second.Inputs) - .Build() - .Program() - .Args(item.second.Args) - .Body(newInnerBody) - .Build() - .Settings() - .Build() - .Done(); - - newConnection = ctx.ReplaceNode( - std::move(item.first.Connection().Ptr()), innerStage.Ref(), newInnerStage.Ptr() - ); - } else { - newConnection = item.first.Connection().Ptr(); - } - - outer.Inputs.emplace_back( - Build<TDqPhyPrecompute>(ctx, outerStage.Pos()) - .Connection(newConnection) - .Done().Ptr() - ); - - auto argument = ctx.NewArgument( - outerStage.Pos(), - TStringBuilder() << "_kqp_inner_to_outer_arg_" << ++precomputeIndex - ); - outer.Args.emplace_back(argument); - outer.Replaces.emplace(item.first.Ptr().Get(), argument); - } - - // Change TDqPhyPrecompute to arguments - auto newOuterBody = ctx.ReplaceNodes(outerStage.Program().Body().Ptr(), outer.Replaces); - - return Build<TDqStage>(ctx, outerStage.Pos()) - .Inputs() - .Add(outer.Inputs) - .Build() - .Program() - .Args(outer.Args) - .Body(newOuterBody) - .Build() - .Settings() - .Build() - .Done(); -} - + +using namespace NYql; +using namespace NYql::NNodes; +using TStatus = IGraphTransformer::TStatus; + +namespace { +TExprNode::TListType FindPrecomputes(const TExprBase& node) { + auto filter = [](const TExprNode::TPtr& node) { + return !TMaybeNode<TDqPhyPrecompute>(node).IsValid(); + }; + + auto predicate = [](const TExprNode::TPtr& node) { + auto maybePrecompute = TMaybeNode<TDqPhyPrecompute>(node); + + if (!maybePrecompute.IsValid()) { + return false; + } + + return true; + }; + + return FindNodes(node.Ptr(), filter, predicate); +} + +bool IsArgumentUsed(const TExprNode::TPtr node, const TExprNode* argument) { + auto filter = [](const TExprNode::TPtr& node) { + return !TMaybeNode<TDqStage>(node).IsValid(); + }; + + auto predicate = [argument](const TExprNode::TPtr& node) { + return node.Get() == argument; + }; + + return !!FindNode(node, filter, predicate); +} + +} // anonymous namespace end + +TExprBase KqpFloatUpStage(TExprBase node, TExprContext& ctx) { + auto maybeStage = node.Maybe<TDqStage>(); + + if (!maybeStage.IsValid()) { + return node; + } + + TExprNode::TListType innerPrecomputePtrs = FindPrecomputes(maybeStage.Cast().Program()); + + if (innerPrecomputePtrs.empty()) { + return node; + } + + /* + * There is "stage inside stage", i.e. outer stage program refers to inner stage not listed in outer stage inputs, + * thus need to move inner stage output to outer stage input + * I.e. you have following code, it is wrong in case stage can't be inside stage + * Stage1(a1, .., TDqPhyPrecompute(aX), TDqPhyPrecompute(aY), ..., an) (args) + * - let CN = Connection(Stage2) + * -- Stage2(b1, .., bn) (args) + * --- Operation (aX) + * - Operation (TDqPhyPrecompute(CN)) + * - Operation (aY) + * + * Convert to + * + * Stage2(b1, .., bn, TDqPhyPrecompute(aX)) (args) + * - Operation (aX) + * let CN = Connection(Stage2) + * Stage1(a1, .., TDqPhyPrecompute(aY), .., an, TDqPhyPrecompute(CN)) (args, X) + * - Operation (X) + * - Operation (aY) + * + * Also this function additionaly may move precomputes to stage inputs in case it is same logic. I.e. + * DqStage( () () ... Operation(TDqPhyPrecompute(X)) ... ) + * will be converted to + * DqStage( (TDqPhyPrecompute(X)) (arg1) ... Operation(arg1) ... ) + * + * Sometimes there is situation when both stages share part of program. Thus need to deep copy both of them + * 1. Outer stage copy converts TDqPhyPrecompute node to argument + * 2. Inner stage should copy whole program + * + * Both of copy functions need to receive replacement in deepClones, i.e. which arguments to replace. + */ + struct TStageData { + TVector<TExprNode::TPtr> Inputs; + TVector<TExprNode::TPtr> Args; + TNodeOnNodeOwnedMap Replaces; + + void Add(TExprNode::TPtr input, const TExprNode* arg, TExprContext& ctx, const TString& argName) { + auto newArg = ctx.NewArgument(input->Pos(), argName); + Inputs.emplace_back(input); + Args.emplace_back(newArg); + Replaces.emplace(arg, newArg); + } + + bool Empty() { + return Inputs.empty(); + } + }; + + struct TStageData outer; + auto outerStage = maybeStage.Cast(); + + for (ui64 i = 0; i < outerStage.Inputs().Size(); ++i) { + auto input = outerStage.Inputs().Item(i); + auto arg = outerStage.Program().Args().Ptr()->Child(i); + + if (!IsArgumentUsed(outerStage.Program().Body().Ptr(), arg)) { + continue; + } + + outer.Add(input.Ptr(), arg, ctx, TStringBuilder() << "_kqp_outer_stage_arg_" << i); + } + + std::vector<std::pair<TDqPhyPrecompute, TStageData>> innerPrecomputes; + innerPrecomputes.reserve(innerPrecomputePtrs.size()); + + for (auto& precomputePtr: innerPrecomputePtrs) { + innerPrecomputes.emplace_back( + std::make_pair<TDqPhyPrecompute, struct TStageData>(std::move(TDqPhyPrecompute(precomputePtr)), TStageData()) + ); + } + + ui64 precomputeIndex = 0; + for (auto& item: innerPrecomputes) { + auto innerStage = item.first.Connection().Output().Stage(); + + for (ui64 i = 0; i < outerStage.Inputs().Size(); ++i) { + auto input = outerStage.Inputs().Item(i); + auto arg = outerStage.Program().Args().Ptr()->Child(i); + + if (!IsArgumentUsed(innerStage.Program().Body().Ptr(), arg)) { + continue; + } + + item.second.Add(input.Ptr(), arg, ctx, TStringBuilder() << "_kqp_inner_stage_arg_" << i); + } + + TExprNode::TPtr newConnection; + + if (!item.second.Empty()) { + // Deep copy inner stage + auto newInnerBody = ctx.DeepCopy( + innerStage.Program().Body().Ref(), ctx, item.second.Replaces, + true /* internStrings */, true /* copyTypes */, false, nullptr + ); + + auto newInnerStage = Build<TDqStage>(ctx, innerStage.Pos()) + .Inputs() + .Add(item.second.Inputs) + .Build() + .Program() + .Args(item.second.Args) + .Body(newInnerBody) + .Build() + .Settings() + .Build() + .Done(); + + newConnection = ctx.ReplaceNode( + std::move(item.first.Connection().Ptr()), innerStage.Ref(), newInnerStage.Ptr() + ); + } else { + newConnection = item.first.Connection().Ptr(); + } + + outer.Inputs.emplace_back( + Build<TDqPhyPrecompute>(ctx, outerStage.Pos()) + .Connection(newConnection) + .Done().Ptr() + ); + + auto argument = ctx.NewArgument( + outerStage.Pos(), + TStringBuilder() << "_kqp_inner_to_outer_arg_" << ++precomputeIndex + ); + outer.Args.emplace_back(argument); + outer.Replaces.emplace(item.first.Ptr().Get(), argument); + } + + // Change TDqPhyPrecompute to arguments + auto newOuterBody = ctx.ReplaceNodes(outerStage.Program().Body().Ptr(), outer.Replaces); + + return Build<TDqStage>(ctx, outerStage.Pos()) + .Inputs() + .Add(outer.Inputs) + .Build() + .Program() + .Args(outer.Args) + .Body(newOuterBody) + .Build() + .Settings() + .Build() + .Done(); +} + } // namespace NKikimr::NKqp::NOpt - + diff --git a/ydb/core/kqp/opt/physical/ya.make b/ydb/core/kqp/opt/physical/ya.make index dd3509b9547..81c4b319b98 100644 --- a/ydb/core/kqp/opt/physical/ya.make +++ b/ydb/core/kqp/opt/physical/ya.make @@ -1,27 +1,27 @@ -LIBRARY() - -OWNER( - spuchin - g:kikimr -) - -SRCS( - kqp_opt_phy_build_stage.cpp - kqp_opt_phy_limit.cpp - kqp_opt_phy_olap_filter.cpp - kqp_opt_phy_sort.cpp - kqp_opt_phy_helpers.cpp - kqp_opt_phy_stage_float_up.cpp +LIBRARY() + +OWNER( + spuchin + g:kikimr +) + +SRCS( + kqp_opt_phy_build_stage.cpp + kqp_opt_phy_limit.cpp + kqp_opt_phy_olap_filter.cpp + kqp_opt_phy_sort.cpp + kqp_opt_phy_helpers.cpp + kqp_opt_phy_stage_float_up.cpp kqp_opt_phy.cpp -) - -PEERDIR( +) + +PEERDIR( ydb/core/kqp/common ydb/core/kqp/opt/physical/effects ydb/library/yql/dq/common ydb/library/yql/dq/opt -) - -YQL_LAST_ABI_VERSION() - -END() +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/kqp/opt/ya.make b/ydb/core/kqp/opt/ya.make index 3f49a0fe271..6287365b191 100644 --- a/ydb/core/kqp/opt/ya.make +++ b/ydb/core/kqp/opt/ya.make @@ -25,6 +25,6 @@ PEERDIR( ydb/library/yql/dq/opt ) -YQL_LAST_ABI_VERSION() +YQL_LAST_ABI_VERSION() END() diff --git a/ydb/core/kqp/prepare/kqp_prepare.h b/ydb/core/kqp/prepare/kqp_prepare.h index cec5c556d7f..d114a449458 100644 --- a/ydb/core/kqp/prepare/kqp_prepare.h +++ b/ydb/core/kqp/prepare/kqp_prepare.h @@ -119,7 +119,7 @@ struct TKqlTransformContext : TThrRefBase { return *PreparedKql; } - IKqpGateway::TMkqlSettings GetMkqlSettings(bool hasDataEffects, TInstant now) const; + IKqpGateway::TMkqlSettings GetMkqlSettings(bool hasDataEffects, TInstant now) const; void AddMkqlStats(const TString& program, NKikimrQueryStats::TTxStats&& txStats); void Reset() { diff --git a/ydb/core/kqp/prepare/kqp_query_exec.cpp b/ydb/core/kqp/prepare/kqp_query_exec.cpp index 6088e952ffe..bc2d4e94ce9 100644 --- a/ydb/core/kqp/prepare/kqp_query_exec.cpp +++ b/ydb/core/kqp/prepare/kqp_query_exec.cpp @@ -459,9 +459,9 @@ void TKqlTransformContext::AddMkqlStats(const TString& program, NKikimrQueryStat ExtractQueryStats(QueryStats, txStats); } -IKqpGateway::TMkqlSettings TKqlTransformContext::GetMkqlSettings(bool hasDataEffects, TInstant now) const { +IKqpGateway::TMkqlSettings TKqlTransformContext::GetMkqlSettings(bool hasDataEffects, TInstant now) const { IKqpGateway::TMkqlSettings mkqlSettings; - mkqlSettings.LlvmRuntime = false; + mkqlSettings.LlvmRuntime = false; mkqlSettings.CollectStats = QueryCtx->StatsMode >= EKikimrStatsMode::Basic; if (hasDataEffects) { diff --git a/ydb/core/kqp/prepare/kqp_query_plan.cpp b/ydb/core/kqp/prepare/kqp_query_plan.cpp index fe42cb3c981..346a8b15cbe 100644 --- a/ydb/core/kqp/prepare/kqp_query_plan.cpp +++ b/ydb/core/kqp/prepare/kqp_query_plan.cpp @@ -51,13 +51,13 @@ struct TTableInfo { struct TSerializerCtx { TSerializerCtx(TExprContext& exprCtx, const TString& cluster, - const TIntrusivePtr<NYql::TKikimrTablesData> tablesData, + const TIntrusivePtr<NYql::TKikimrTablesData> tablesData, const TKikimrConfiguration::TPtr config, THashMap<ui32, TVector<NKikimrMiniKQL::TResult>> pureTxResults) : ExprCtx(exprCtx) , Cluster(cluster) , TablesData(tablesData) - , Config(config) + , Config(config) , PureTxResults(std::move(pureTxResults)) {} @@ -72,7 +72,7 @@ struct TSerializerCtx { const TExprContext& ExprCtx; const TString& Cluster; const TIntrusivePtr<NYql::TKikimrTablesData> TablesData; - const TKikimrConfiguration::TPtr Config; + const TKikimrConfiguration::TPtr Config; THashMap<ui32, TVector<NKikimrMiniKQL::TResult>> PureTxResults; }; @@ -985,7 +985,7 @@ private: planNode.NodeInfo["Tables"].AppendValue(op.Properties["Table"]); auto rangesDesc = PrettyExprStr(read.Ranges()); - if (rangesDesc == "Void" || explainPrompt.UsedKeyColumns.empty()) { + if (rangesDesc == "Void" || explainPrompt.UsedKeyColumns.empty()) { readInfo.Type = ETableReadType::FullScan; auto& ranges = op.Properties["ReadRanges"]; @@ -1030,17 +1030,17 @@ private: Y_ENSURE(false, rangesDesc); } - if (!explainPrompt.UsedKeyColumns.empty()) { + if (!explainPrompt.UsedKeyColumns.empty()) { auto& usedColumns = op.Properties["ReadRangesKeys"]; - for (const auto& col : explainPrompt.UsedKeyColumns) { + for (const auto& col : explainPrompt.UsedKeyColumns) { usedColumns.AppendValue(col); - } - } - + } + } + if (explainPrompt.ExpectedMaxRanges) { op.Properties["ReadRangesExpectedSize"] = explainPrompt.ExpectedMaxRanges; } - + auto& columns = op.Properties["ReadColumns"]; for (const auto& col : read.Columns()) { readInfo.Columns.emplace_back(TString(col.Value())); diff --git a/ydb/core/kqp/prepare/kqp_type_ann.cpp b/ydb/core/kqp/prepare/kqp_type_ann.cpp index 182ca5807f9..ee51c7349c2 100644 --- a/ydb/core/kqp/prepare/kqp_type_ann.cpp +++ b/ydb/core/kqp/prepare/kqp_type_ann.cpp @@ -125,22 +125,22 @@ std::pair<TString, const TKikimrTableDescription*> ResolveTable(const TExprNode* return {std::move(tableName), tableDesc}; } -const TFlowExprType* GetWideRowsType(TExprContext& ctx, const TStructExprType* rowType) { - YQL_ENSURE(rowType); - - const auto& columns = rowType->GetItems(); - - TTypeAnnotationNode::TListType items; - items.reserve(columns.size()); - - for (const auto& column: columns) { - items.push_back(column->GetItemType()); - } - - auto wideRowType = ctx.MakeType<TMultiExprType>(items); - return ctx.MakeType<TFlowExprType>(wideRowType); -} - +const TFlowExprType* GetWideRowsType(TExprContext& ctx, const TStructExprType* rowType) { + YQL_ENSURE(rowType); + + const auto& columns = rowType->GetItems(); + + TTypeAnnotationNode::TListType items; + items.reserve(columns.size()); + + for (const auto& column: columns) { + items.push_back(column->GetItemType()); + } + + auto wideRowType = ctx.MakeType<TMultiExprType>(items); + return ctx.MakeType<TFlowExprType>(wideRowType); +} + bool CalcKeyColumnsCount(TExprContext& ctx, const TPositionHandle pos, const TStructExprType& structType, const TKikimrTableDescription& tableDesc, const TKikimrTableMetadata& metadata, ui32& keyColumnsCount) { @@ -174,7 +174,7 @@ TStatus AnnotateReadTable(const TExprNode::TPtr& node, TExprContext& ctx, const return TStatus::Error; } - if (!readIndex && !EnsureArgsCount(*node, 4, ctx)) { + if (!readIndex && !EnsureArgsCount(*node, 4, ctx)) { return TStatus::Error; } @@ -226,7 +226,7 @@ TStatus AnnotateReadTable(const TExprNode::TPtr& node, TExprContext& ctx, const } else if (TKqpReadTable::Match(node.Get())) { node->SetTypeAnn(ctx.MakeType<TFlowExprType>(rowType)); } else if (TKqpWideReadTable::Match(node.Get())) { - node->SetTypeAnn(GetWideRowsType(ctx, rowType->Cast<TStructExprType>())); + node->SetTypeAnn(GetWideRowsType(ctx, rowType->Cast<TStructExprType>())); } else { YQL_ENSURE(false, "Unexpected ReadTable callable: " << node->Content()); } @@ -234,89 +234,89 @@ TStatus AnnotateReadTable(const TExprNode::TPtr& node, TExprContext& ctx, const return TStatus::Ok; } -TStatus AnnotateReadTableRanges(const TExprNode::TPtr& node, TExprContext& ctx, const TString& cluster, - const TKikimrTablesData& tablesData, bool withSystemColumns) -{ - bool olapTable = TKqpReadOlapTableRangesBase::Match(node.Get()); - - if ((olapTable && !EnsureArgsCount(*node, 6, ctx)) || (!olapTable && !EnsureArgsCount(*node, 5, ctx))) { - return TStatus::Error; - } - - auto table = ResolveTable(node->Child(TKqlReadTableRangesBase::idx_Table), ctx, cluster, tablesData); - if (!table.second) { - return TStatus::Error; - } - - const auto& columns = node->ChildPtr(TKqlReadTableRangesBase::idx_Columns); - if (!EnsureTupleOfAtoms(*columns, ctx)) { - return TStatus::Error; - } - - auto rowType = GetReadTableRowType(ctx, tablesData, cluster, table.first, TCoAtomList(columns), withSystemColumns); - if (!rowType) { - return TStatus::Error; - } - - auto ranges = node->Child(TKqlReadTableRangesBase::idx_Ranges); - if (!TCoVoid::Match(ranges) && - !TCoArgument::Match(ranges) && - !TCoParameter::Match(ranges) && - !TCoRangeFinalize::Match(ranges)) - { - ctx.AddError(TIssue( - ctx.GetPosition(ranges->Pos()), - TStringBuilder() - << "Expected Void, Parameter, Argument or RangeFinalize in ranges, but got: " - << ranges->Content() - )); - return TStatus::Error; - } - - if (TKqlReadTableRanges::Match(node.Get())) { - node->SetTypeAnn(ctx.MakeType<TListExprType>(rowType)); - } else if (TKqpReadTableRanges::Match(node.Get())) { - node->SetTypeAnn(ctx.MakeType<TFlowExprType>(rowType)); - } else if (TKqpWideReadTableRanges::Match(node.Get())) { - node->SetTypeAnn(GetWideRowsType(ctx, rowType->Cast<TStructExprType>())); - } else if (TKqpReadOlapTableRangesBase::Match(node.Get())) { - if (!EnsureLambda(*node->Child(TKqpReadOlapTableRangesBase::idx_Process), ctx)) { - return TStatus::Error; - } - - auto& processLambda = node->ChildRef(TKqpReadOlapTableRangesBase::idx_Process); - if (!UpdateLambdaAllArgumentsTypes(processLambda, {ctx.MakeType<TFlowExprType>(rowType)}, ctx)) { - return IGraphTransformer::TStatus::Error; - } - - if (!processLambda->GetTypeAnn()) { - return IGraphTransformer::TStatus::Repeat; - } - - auto processType = processLambda->GetTypeAnn(); - const TTypeAnnotationNode* processRowType; - if (!EnsureNewSeqType<false, false, true>(node->Pos(), *processType, ctx, &processRowType)) { - return TStatus::Error; - } - - if (!EnsureStructType(node->Pos(), *processRowType, ctx)) { - return IGraphTransformer::TStatus::Error; - } - - if (TKqpReadOlapTableRanges::Match(node.Get())) { - node->SetTypeAnn(ctx.MakeType<TFlowExprType>(processRowType)); - } else if (TKqpWideReadOlapTableRanges::Match(node.Get())) { - node->SetTypeAnn(GetWideRowsType(ctx, processRowType->Cast<TStructExprType>())); - } else { - YQL_ENSURE(false, "Unexpected ReadOlapTable callable." << node->Content()); - } - } else { - YQL_ENSURE(false, "Unexpected ReadTableRanges callable." << node->Content()); - } - - return TStatus::Ok; -} - +TStatus AnnotateReadTableRanges(const TExprNode::TPtr& node, TExprContext& ctx, const TString& cluster, + const TKikimrTablesData& tablesData, bool withSystemColumns) +{ + bool olapTable = TKqpReadOlapTableRangesBase::Match(node.Get()); + + if ((olapTable && !EnsureArgsCount(*node, 6, ctx)) || (!olapTable && !EnsureArgsCount(*node, 5, ctx))) { + return TStatus::Error; + } + + auto table = ResolveTable(node->Child(TKqlReadTableRangesBase::idx_Table), ctx, cluster, tablesData); + if (!table.second) { + return TStatus::Error; + } + + const auto& columns = node->ChildPtr(TKqlReadTableRangesBase::idx_Columns); + if (!EnsureTupleOfAtoms(*columns, ctx)) { + return TStatus::Error; + } + + auto rowType = GetReadTableRowType(ctx, tablesData, cluster, table.first, TCoAtomList(columns), withSystemColumns); + if (!rowType) { + return TStatus::Error; + } + + auto ranges = node->Child(TKqlReadTableRangesBase::idx_Ranges); + if (!TCoVoid::Match(ranges) && + !TCoArgument::Match(ranges) && + !TCoParameter::Match(ranges) && + !TCoRangeFinalize::Match(ranges)) + { + ctx.AddError(TIssue( + ctx.GetPosition(ranges->Pos()), + TStringBuilder() + << "Expected Void, Parameter, Argument or RangeFinalize in ranges, but got: " + << ranges->Content() + )); + return TStatus::Error; + } + + if (TKqlReadTableRanges::Match(node.Get())) { + node->SetTypeAnn(ctx.MakeType<TListExprType>(rowType)); + } else if (TKqpReadTableRanges::Match(node.Get())) { + node->SetTypeAnn(ctx.MakeType<TFlowExprType>(rowType)); + } else if (TKqpWideReadTableRanges::Match(node.Get())) { + node->SetTypeAnn(GetWideRowsType(ctx, rowType->Cast<TStructExprType>())); + } else if (TKqpReadOlapTableRangesBase::Match(node.Get())) { + if (!EnsureLambda(*node->Child(TKqpReadOlapTableRangesBase::idx_Process), ctx)) { + return TStatus::Error; + } + + auto& processLambda = node->ChildRef(TKqpReadOlapTableRangesBase::idx_Process); + if (!UpdateLambdaAllArgumentsTypes(processLambda, {ctx.MakeType<TFlowExprType>(rowType)}, ctx)) { + return IGraphTransformer::TStatus::Error; + } + + if (!processLambda->GetTypeAnn()) { + return IGraphTransformer::TStatus::Repeat; + } + + auto processType = processLambda->GetTypeAnn(); + const TTypeAnnotationNode* processRowType; + if (!EnsureNewSeqType<false, false, true>(node->Pos(), *processType, ctx, &processRowType)) { + return TStatus::Error; + } + + if (!EnsureStructType(node->Pos(), *processRowType, ctx)) { + return IGraphTransformer::TStatus::Error; + } + + if (TKqpReadOlapTableRanges::Match(node.Get())) { + node->SetTypeAnn(ctx.MakeType<TFlowExprType>(processRowType)); + } else if (TKqpWideReadOlapTableRanges::Match(node.Get())) { + node->SetTypeAnn(GetWideRowsType(ctx, processRowType->Cast<TStructExprType>())); + } else { + YQL_ENSURE(false, "Unexpected ReadOlapTable callable." << node->Content()); + } + } else { + YQL_ENSURE(false, "Unexpected ReadTableRanges callable." << node->Content()); + } + + return TStatus::Ok; +} + TStatus AnnotateLookupTable(const TExprNode::TPtr& node, TExprContext& ctx, const TString& cluster, const TKikimrTablesData& tablesData, bool withSystemColumns) { @@ -696,146 +696,146 @@ TStatus AnnotateDeleteRows(const TExprNode::TPtr& node, TExprContext& ctx, const return TStatus::Ok; } -TStatus AnnotateOlapFilter(const TExprNode::TPtr& node, TExprContext& ctx) { - if (!EnsureArgsCount(*node, 2, ctx)) { - return TStatus::Error; - } - - auto* input = node->Child(TKqpOlapFilter::idx_Input); - - const TTypeAnnotationNode* itemType; - if (!EnsureNewSeqType<false, false, true>(*input, ctx, &itemType)) { - return TStatus::Error; - } - - if (!EnsureStructType(input->Pos(), *itemType, ctx)) { - return TStatus::Error; - } - - if (!EnsureSpecificDataType(*node->Child(TKqpOlapFilter::idx_Condition), EDataSlot::Bool, ctx)) { - return TStatus::Error; - } - - node->SetTypeAnn(input->GetTypeAnn()); - return TStatus::Ok; -} - -TStatus AnnotateOlapFilterCompare(const TExprNode::TPtr& node, TExprContext& ctx) { +TStatus AnnotateOlapFilter(const TExprNode::TPtr& node, TExprContext& ctx) { + if (!EnsureArgsCount(*node, 2, ctx)) { + return TStatus::Error; + } + + auto* input = node->Child(TKqpOlapFilter::idx_Input); + + const TTypeAnnotationNode* itemType; + if (!EnsureNewSeqType<false, false, true>(*input, ctx, &itemType)) { + return TStatus::Error; + } + + if (!EnsureStructType(input->Pos(), *itemType, ctx)) { + return TStatus::Error; + } + + if (!EnsureSpecificDataType(*node->Child(TKqpOlapFilter::idx_Condition), EDataSlot::Bool, ctx)) { + return TStatus::Error; + } + + node->SetTypeAnn(input->GetTypeAnn()); + return TStatus::Ok; +} + +TStatus AnnotateOlapFilterCompare(const TExprNode::TPtr& node, TExprContext& ctx) { if (!EnsureArgsCount(*node, 3, ctx)) { return TStatus::Error; } - auto* input = node->Child(TKqpOlapFilterCompare::idx_Input); + auto* input = node->Child(TKqpOlapFilterCompare::idx_Input); + + const TTypeAnnotationNode* itemType; + + if (!EnsureNewSeqType<false, false, true>(*input, ctx, &itemType)) { + return TStatus::Error; + } + + if (!EnsureStructType(input->Pos(), *itemType, ctx)) { + return TStatus::Error; + } + + auto validateNode = [itemType, &ctx](TExprNode* node) { + // Column name, validate that it is present in Input node + if (TCoAtom::Match(node)) { + auto rowType = itemType->Cast<TStructExprType>(); + + if (rowType->FindItem(node->Content())) { + return true; + } + + ctx.AddError(TIssue( + ctx.GetPosition(node->Pos()), + TStringBuilder() << "Missing column in input type: " << node->Content() + )); + + return false; + } + + // Null argument for IS NULL/NOT NULL + if (TCoNull::Match(node)) { + return true; + } + + // Incoming parameter + if (TCoParameter::Match(node)) { + return true; + } + + // Any supported literal + if (TCoDataCtor::Match(node)) { + return true; + } + + ctx.AddError(TIssue( + ctx.GetPosition(node->Pos()), + TStringBuilder() + << "Expected literal or column as OLAP filter value, got: " << node->Content() + )); + + return false; + }; + + auto leftNode = node->Child(TKqpOlapFilterCompare::idx_Left); + auto rightNode = node->Child(TKqpOlapFilterCompare::idx_Right); + + if (!validateNode(leftNode)) { + return TStatus::Error; + } + + if (!validateNode(rightNode)) { + return TStatus::Error; + } + + node->SetTypeAnn(ctx.MakeType<TDataExprType>(EDataSlot::Bool)); + + return TStatus::Ok; +} + +TStatus AnnotateOlapFilterExists(const TExprNode::TPtr& node, TExprContext& ctx) { + if (!EnsureArgsCount(*node, 2, ctx)) { + return TStatus::Error; + } + + auto* input = node->Child(TKqpOlapFilterExists::idx_Input); const TTypeAnnotationNode* itemType; - + if (!EnsureNewSeqType<false, false, true>(*input, ctx, &itemType)) { return TStatus::Error; } - + if (!EnsureStructType(input->Pos(), *itemType, ctx)) { return TStatus::Error; } - auto validateNode = [itemType, &ctx](TExprNode* node) { - // Column name, validate that it is present in Input node - if (TCoAtom::Match(node)) { - auto rowType = itemType->Cast<TStructExprType>(); - - if (rowType->FindItem(node->Content())) { - return true; - } - - ctx.AddError(TIssue( - ctx.GetPosition(node->Pos()), - TStringBuilder() << "Missing column in input type: " << node->Content() - )); - - return false; - } - - // Null argument for IS NULL/NOT NULL - if (TCoNull::Match(node)) { - return true; - } - - // Incoming parameter - if (TCoParameter::Match(node)) { - return true; - } - - // Any supported literal - if (TCoDataCtor::Match(node)) { - return true; - } - - ctx.AddError(TIssue( - ctx.GetPosition(node->Pos()), - TStringBuilder() - << "Expected literal or column as OLAP filter value, got: " << node->Content() - )); - - return false; - }; - - auto leftNode = node->Child(TKqpOlapFilterCompare::idx_Left); - auto rightNode = node->Child(TKqpOlapFilterCompare::idx_Right); - - if (!validateNode(leftNode)) { - return TStatus::Error; - } - - if (!validateNode(rightNode)) { - return TStatus::Error; - } - - node->SetTypeAnn(ctx.MakeType<TDataExprType>(EDataSlot::Bool)); - + auto column = node->Child(TKqpOlapFilterExists::idx_Column); + + if (!EnsureAtom(*column, ctx)) { + ctx.AddError(TIssue( + ctx.GetPosition(node->Pos()), + TStringBuilder() + << "Expected column in OLAP Exists filter, got: " << column->Content() + )); + + return TStatus::Error; + } + + auto rowType = itemType->Cast<TStructExprType>(); + + if (!rowType->FindItem(column->Content())) { + ctx.AddError(TIssue( + ctx.GetPosition(node->Pos()), + TStringBuilder() << "Missing column in OLAP Exists filter in input type: " << column->Content() + )); + } + + node->SetTypeAnn(ctx.MakeType<TDataExprType>(EDataSlot::Bool)); return TStatus::Ok; } -TStatus AnnotateOlapFilterExists(const TExprNode::TPtr& node, TExprContext& ctx) { - if (!EnsureArgsCount(*node, 2, ctx)) { - return TStatus::Error; - } - - auto* input = node->Child(TKqpOlapFilterExists::idx_Input); - - const TTypeAnnotationNode* itemType; - - if (!EnsureNewSeqType<false, false, true>(*input, ctx, &itemType)) { - return TStatus::Error; - } - - if (!EnsureStructType(input->Pos(), *itemType, ctx)) { - return TStatus::Error; - } - - auto column = node->Child(TKqpOlapFilterExists::idx_Column); - - if (!EnsureAtom(*column, ctx)) { - ctx.AddError(TIssue( - ctx.GetPosition(node->Pos()), - TStringBuilder() - << "Expected column in OLAP Exists filter, got: " << column->Content() - )); - - return TStatus::Error; - } - - auto rowType = itemType->Cast<TStructExprType>(); - - if (!rowType->FindItem(column->Content())) { - ctx.AddError(TIssue( - ctx.GetPosition(node->Pos()), - TStringBuilder() << "Missing column in OLAP Exists filter in input type: " << column->Content() - )); - } - - node->SetTypeAnn(ctx.MakeType<TDataExprType>(EDataSlot::Bool)); - return TStatus::Ok; -} - TStatus AnnotateKqpTxInternalBinding(const TExprNode::TPtr& node, TExprContext& ctx) { if (!EnsureArgsCount(*node, 2, ctx)) { return TStatus::Error; @@ -1050,10 +1050,10 @@ TAutoPtr<IGraphTransformer> CreateKqpTypeAnnotationTransformer(const TString& cl return AnnotateReadTable(input, ctx, cluster, *tablesData, config->SystemColumnsEnabled()); } - if (TKqlReadTableRangesBase::Match(input.Get())) { - return AnnotateReadTableRanges(input, ctx, cluster, *tablesData, config->SystemColumnsEnabled()); - } - + if (TKqlReadTableRangesBase::Match(input.Get())) { + return AnnotateReadTableRanges(input, ctx, cluster, *tablesData, config->SystemColumnsEnabled()); + } + if (TKqlLookupTableBase::Match(input.Get())) { return AnnotateLookupTable(input, ctx, cluster, *tablesData, config->SystemColumnsEnabled()); } @@ -1078,18 +1078,18 @@ TAutoPtr<IGraphTransformer> CreateKqpTypeAnnotationTransformer(const TString& cl return AnnotateDeleteRows(input, ctx, cluster, *tablesData); } - if (TKqpOlapFilter::Match(input.Get())) { - return AnnotateOlapFilter(input, ctx); - } - - if (TKqpOlapFilterCompare::Match(input.Get())) { - return AnnotateOlapFilterCompare(input, ctx); + if (TKqpOlapFilter::Match(input.Get())) { + return AnnotateOlapFilter(input, ctx); + } + + if (TKqpOlapFilterCompare::Match(input.Get())) { + return AnnotateOlapFilterCompare(input, ctx); + } + + if (TKqpOlapFilterExists::Match(input.Get())) { + return AnnotateOlapFilterExists(input, ctx); } - if (TKqpOlapFilterExists::Match(input.Get())) { - return AnnotateOlapFilterExists(input, ctx); - } - if (TKqpCnMapShard::Match(input.Get()) || TKqpCnShuffleShard::Match(input.Get())) { return AnnotateDqConnection(input, ctx); } diff --git a/ydb/core/kqp/provider/yql_kikimr_kql.cpp b/ydb/core/kqp/provider/yql_kikimr_kql.cpp index 428687672b9..95f5915a57d 100644 --- a/ydb/core/kqp/provider/yql_kikimr_kql.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_kql.cpp @@ -1166,7 +1166,7 @@ TKiProgram BuildKiProgram(TKiDataQuery query, const TKikimrTablesData& tablesDat } TExprBase UnwrapKiReadTableValues(TExprBase input, const TKikimrTableDescription& tableDesc, - const TCoAtomList columns, TExprContext& ctx) + const TCoAtomList columns, TExprContext& ctx) { TCoArgument itemArg = Build<TCoArgument>(ctx, input.Pos()) .Name("item") diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.cpp b/ydb/core/kqp/provider/yql_kikimr_settings.cpp index 4388e37b118..ebd392e16c1 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_settings.cpp @@ -4,27 +4,27 @@ namespace NYql { using namespace NCommon; -namespace { - -template <typename TType> -EOptionalFlag GetOptionalFlagValue(const TMaybe<TType>& flag) { - if (!flag) { - return EOptionalFlag::Auto; - } - - if (flag.GetRef()) { - return EOptionalFlag::Enabled; - } - - return EOptionalFlag::Disabled; -} - -static inline bool GetFlagValue(const TMaybe<bool>& flag) { +namespace { + +template <typename TType> +EOptionalFlag GetOptionalFlagValue(const TMaybe<TType>& flag) { + if (!flag) { + return EOptionalFlag::Auto; + } + + if (flag.GetRef()) { + return EOptionalFlag::Enabled; + } + + return EOptionalFlag::Disabled; +} + +static inline bool GetFlagValue(const TMaybe<bool>& flag) { return flag ? flag.GetRef() : false; } -} // anonymous namespace end - +} // anonymous namespace end + TKikimrConfiguration::TKikimrConfiguration() { /* KQP */ REGISTER_SETTING(*this, _KqpQueryTimeoutSec); @@ -46,7 +46,7 @@ TKikimrConfiguration::TKikimrConfiguration() { REGISTER_SETTING(*this, _KqpEnableSpilling); REGISTER_SETTING(*this, _KqpDisableLlvmForUdfStages); REGISTER_SETTING(*this, _KqpPushOlapProcess); - REGISTER_SETTING(*this, KqpPushOlapProcess); + REGISTER_SETTING(*this, KqpPushOlapProcess); /* Compile time */ REGISTER_SETTING(*this, _CommitPerShardKeysSizeLimitBytes); @@ -59,7 +59,7 @@ TKikimrConfiguration::TKikimrConfiguration() { REGISTER_SETTING(*this, UnwrapReadTableValues); REGISTER_SETTING(*this, AllowNullCompareInIndex); REGISTER_SETTING(*this, EnableSystemColumns); - REGISTER_SETTING(*this, EnableLlvm); + REGISTER_SETTING(*this, EnableLlvm); REGISTER_SETTING(*this, OptDisableJoinRewrite); REGISTER_SETTING(*this, OptDisableJoinTableLookup); @@ -68,7 +68,7 @@ TKikimrConfiguration::TKikimrConfiguration() { REGISTER_SETTING(*this, OptDisableTopSort); REGISTER_SETTING(*this, OptDisableSqlInToJoin); REGISTER_SETTING(*this, OptEnableInplaceUpdate); - REGISTER_SETTING(*this, OptEnablePredicateExtract); + REGISTER_SETTING(*this, OptEnablePredicateExtract); /* Runtime */ REGISTER_SETTING(*this, _RestrictModifyPermissions); @@ -124,21 +124,21 @@ bool TKikimrSettings::DisableLlvmForUdfStages() const { } bool TKikimrSettings::PushOlapProcess() const { - auto settingsFlag = GetFlagValue(_KqpPushOlapProcess.Get()); - auto runtimeFlag = GetFlagValue(KqpPushOlapProcess.Get()); - - // There are no settings or it set to False, but pragma enable pushdown - if (!settingsFlag && runtimeFlag) { - return true; - } - - // Settings are set to True but no pragma present - enable pushdown - if (settingsFlag && !KqpPushOlapProcess.Get()) { - return true; - } - - // Other cases handled by AND - return settingsFlag && runtimeFlag; + auto settingsFlag = GetFlagValue(_KqpPushOlapProcess.Get()); + auto runtimeFlag = GetFlagValue(KqpPushOlapProcess.Get()); + + // There are no settings or it set to False, but pragma enable pushdown + if (!settingsFlag && runtimeFlag) { + return true; + } + + // Settings are set to True but no pragma present - enable pushdown + if (settingsFlag && !KqpPushOlapProcess.Get()) { + return true; + } + + // Other cases handled by AND + return settingsFlag && runtimeFlag; } bool TKikimrSettings::HasOptDisableJoinRewrite() const { @@ -169,14 +169,14 @@ bool TKikimrSettings::HasOptEnableInplaceUpdate() const { return GetFlagValue(OptEnableInplaceUpdate.Get()); } -EOptionalFlag TKikimrSettings::GetOptPredicateExtract() const { - return GetOptionalFlagValue(OptEnablePredicateExtract.Get()); -} - -EOptionalFlag TKikimrSettings::GetEnableLlvm() const { - return GetOptionalFlagValue(EnableLlvm.Get()); -} - +EOptionalFlag TKikimrSettings::GetOptPredicateExtract() const { + return GetOptionalFlagValue(OptEnablePredicateExtract.Get()); +} + +EOptionalFlag TKikimrSettings::GetEnableLlvm() const { + return GetOptionalFlagValue(EnableLlvm.Get()); +} + TKikimrSettings::TConstPtr TKikimrConfiguration::Snapshot() const { return std::make_shared<const TKikimrSettings>(*this); } diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.h b/ydb/core/kqp/provider/yql_kikimr_settings.h index a06afdf0aa6..530cfb28558 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.h +++ b/ydb/core/kqp/provider/yql_kikimr_settings.h @@ -6,12 +6,12 @@ namespace NYql { -enum EOptionalFlag { - Disabled = 0, - Enabled = 1, - Auto = 2 -}; - +enum EOptionalFlag { + Disabled = 0, + Enabled = 1, + Auto = 2 +}; + struct TKikimrSettings { using TConstPtr = std::shared_ptr<const TKikimrSettings>; @@ -34,11 +34,11 @@ struct TKikimrSettings { NCommon::TConfSetting<ui32, false> _KqpMaxComputeActors; NCommon::TConfSetting<bool, false> _KqpEnableSpilling; NCommon::TConfSetting<bool, false> _KqpDisableLlvmForUdfStages; - /* - * Both settings for predicates push are needed. - */ + /* + * Both settings for predicates push are needed. + */ NCommon::TConfSetting<bool, false> _KqpPushOlapProcess; - NCommon::TConfSetting<bool, false> KqpPushOlapProcess; + NCommon::TConfSetting<bool, false> KqpPushOlapProcess; /* Compile time */ NCommon::TConfSetting<bool, false> _AllowReverseRange; @@ -51,7 +51,7 @@ struct TKikimrSettings { NCommon::TConfSetting<bool, false> UnwrapReadTableValues; NCommon::TConfSetting<bool, false> AllowNullCompareInIndex; NCommon::TConfSetting<bool, false> EnableSystemColumns; - NCommon::TConfSetting<bool, false> EnableLlvm; + NCommon::TConfSetting<bool, false> EnableLlvm; /* Disable optimizer rules */ NCommon::TConfSetting<bool, false> OptDisableJoinRewrite; @@ -61,7 +61,7 @@ struct TKikimrSettings { NCommon::TConfSetting<bool, false> OptDisableTopSort; NCommon::TConfSetting<bool, false> OptDisableSqlInToJoin; NCommon::TConfSetting<bool, false> OptEnableInplaceUpdate; - NCommon::TConfSetting<bool, false> OptEnablePredicateExtract; + NCommon::TConfSetting<bool, false> OptEnablePredicateExtract; /* Runtime */ NCommon::TConfSetting<bool, true> _UseLocalProvider; @@ -91,8 +91,8 @@ struct TKikimrSettings { bool HasOptDisableJoinReverseTableLookupLeftSemi() const; bool HasOptDisableTopSort() const; bool HasOptDisableSqlInToJoin() const; - EOptionalFlag GetOptPredicateExtract() const; - EOptionalFlag GetEnableLlvm() const; + EOptionalFlag GetOptPredicateExtract() const; + EOptionalFlag GetEnableLlvm() const; // WARNING: For testing purposes only, inplace update is not ready for production usage. bool HasOptEnableInplaceUpdate() const; diff --git a/ydb/core/kqp/runtime/kqp_program_builder.cpp b/ydb/core/kqp/runtime/kqp_program_builder.cpp index c418bb62342..b5169c19d82 100644 --- a/ydb/core/kqp/runtime/kqp_program_builder.cpp +++ b/ydb/core/kqp/runtime/kqp_program_builder.cpp @@ -93,11 +93,11 @@ TRuntimeNode BuildKeyRangeNode(TProgramBuilder& builder, const TKqpKeyRange& ran return builder.NewTuple(rangeItems); } -TRuntimeNode BuildKeyRangesNode(TProgramBuilder& builder, const TKqpKeyRanges& range) { - TVector<TRuntimeNode> rangeItems{range.Ranges}; - return builder.NewTuple(rangeItems); -} - +TRuntimeNode BuildKeyRangesNode(TProgramBuilder& builder, const TKqpKeyRanges& range) { + TVector<TRuntimeNode> rangeItems{range.Ranges}; + return builder.NewTuple(rangeItems); +} + TRuntimeNode BuildSkipNullKeysNode(TProgramBuilder& builder, const TKqpKeyRange& range) { TListLiteralBuilder skipNullKeysBuilder( builder.GetTypeEnvironment(), @@ -142,12 +142,12 @@ TRuntimeNode TKqpProgramBuilder::KqpReadTable(const TTableId& tableId, const TKq } TRuntimeNode TKqpProgramBuilder::KqpWideReadTable(const TTableId& tableId, const TKqpKeyRange& range, - const TArrayRef<TKqpTableColumn>& columns) + const TArrayRef<TKqpTableColumn>& columns) { - auto rowType = GetRowType(*this, columns); - auto structType = AS_TYPE(TStructType, rowType); - auto returnType = MakeWideFlowType(*this, structType); - + auto rowType = GetRowType(*this, columns); + auto structType = AS_TYPE(TStructType, rowType); + auto returnType = MakeWideFlowType(*this, structType); + MKQL_ENSURE_S(returnType); MKQL_ENSURE_S(returnType->IsFlow()); const auto itemType = AS_TYPE(TFlowType, returnType)->GetItemType(); @@ -164,30 +164,30 @@ TRuntimeNode TKqpProgramBuilder::KqpWideReadTable(const TTableId& tableId, const return TRuntimeNode(builder.Build(), false); } -TRuntimeNode TKqpProgramBuilder::KqpWideReadTableRanges(const TTableId& tableId, const TKqpKeyRanges& ranges, - const TArrayRef<TKqpTableColumn>& columns, TType* returnType) -{ - if (returnType == nullptr) { - auto rowType = GetRowType(*this, columns); - auto structType = AS_TYPE(TStructType, rowType); - returnType = MakeWideFlowType(*this, structType); - } else { - MKQL_ENSURE_S(returnType); - MKQL_ENSURE_S(returnType->IsFlow()); - const auto itemType = AS_TYPE(TFlowType, returnType)->GetItemType(); - MKQL_ENSURE_S(itemType->IsTuple()); - } - - TCallableBuilder builder(Env, __func__, returnType); - builder.Add(BuildTableIdLiteral(tableId, *this)); - builder.Add(BuildKeyRangesNode(*this, ranges)); - builder.Add(BuildColumnTags(*this, columns)); - builder.Add(ranges.ItemsLimit); - builder.Add(NewDataLiteral(ranges.Reverse)); - - return TRuntimeNode(builder.Build(), false); -} - +TRuntimeNode TKqpProgramBuilder::KqpWideReadTableRanges(const TTableId& tableId, const TKqpKeyRanges& ranges, + const TArrayRef<TKqpTableColumn>& columns, TType* returnType) +{ + if (returnType == nullptr) { + auto rowType = GetRowType(*this, columns); + auto structType = AS_TYPE(TStructType, rowType); + returnType = MakeWideFlowType(*this, structType); + } else { + MKQL_ENSURE_S(returnType); + MKQL_ENSURE_S(returnType->IsFlow()); + const auto itemType = AS_TYPE(TFlowType, returnType)->GetItemType(); + MKQL_ENSURE_S(itemType->IsTuple()); + } + + TCallableBuilder builder(Env, __func__, returnType); + builder.Add(BuildTableIdLiteral(tableId, *this)); + builder.Add(BuildKeyRangesNode(*this, ranges)); + builder.Add(BuildColumnTags(*this, columns)); + builder.Add(ranges.ItemsLimit); + builder.Add(NewDataLiteral(ranges.Reverse)); + + return TRuntimeNode(builder.Build(), false); +} + TRuntimeNode TKqpProgramBuilder::KqpLookupTable(const TTableId& tableId, const TRuntimeNode& lookupKeys, const TArrayRef<TKqpTableColumn>& keyColumns, const TArrayRef<TKqpTableColumn>& columns) { diff --git a/ydb/core/kqp/runtime/kqp_program_builder.h b/ydb/core/kqp/runtime/kqp_program_builder.h index 21e7b494ec7..348d25eeea3 100644 --- a/ydb/core/kqp/runtime/kqp_program_builder.h +++ b/ydb/core/kqp/runtime/kqp_program_builder.h @@ -33,13 +33,13 @@ struct TKqpKeyRange { bool Reverse = false; }; -struct TKqpKeyRanges { - TRuntimeNode Ranges; - TSmallVec<bool> SkipNullKeys; - TRuntimeNode ItemsLimit; - bool Reverse = false; -}; - +struct TKqpKeyRanges { + TRuntimeNode Ranges; + TSmallVec<bool> SkipNullKeys; + TRuntimeNode ItemsLimit; + bool Reverse = false; +}; + class TKqpProgramBuilder: public TProgramBuilder { public: TKqpProgramBuilder(const TTypeEnvironment& env, const IFunctionRegistry& functionRegistry); @@ -50,9 +50,9 @@ public: TRuntimeNode KqpWideReadTable(const TTableId& tableId, const TKqpKeyRange& range, const TArrayRef<TKqpTableColumn>& columns); - TRuntimeNode KqpWideReadTableRanges(const TTableId& tableId, const TKqpKeyRanges& range, - const TArrayRef<TKqpTableColumn>& columns, TType* returnType); - + TRuntimeNode KqpWideReadTableRanges(const TTableId& tableId, const TKqpKeyRanges& range, + const TArrayRef<TKqpTableColumn>& columns, TType* returnType); + TRuntimeNode KqpLookupTable(const TTableId& tableId, const TRuntimeNode& lookupKeys, const TArrayRef<TKqpTableColumn>& keyColumns, const TArrayRef<TKqpTableColumn>& columns); diff --git a/ydb/core/kqp/runtime/kqp_read_table.cpp b/ydb/core/kqp/runtime/kqp_read_table.cpp index f8a630541fe..358ebd0a3eb 100644 --- a/ydb/core/kqp/runtime/kqp_read_table.cpp +++ b/ydb/core/kqp/runtime/kqp_read_table.cpp @@ -69,30 +69,30 @@ void BuildKeyTupleCells(const TTupleType* tupleType, const TUnboxedValue& tupleV } } -void ParseReadColumns(const TType* readType, const TRuntimeNode& tagsNode, +void ParseReadColumns(const TType* readType, const TRuntimeNode& tagsNode, TSmallVec<TKqpComputeContextBase::TColumn>& columns, TSmallVec<TKqpComputeContextBase::TColumn>& systemColumns) { MKQL_ENSURE_S(readType); MKQL_ENSURE_S(readType->GetKind() == TType::EKind::Flow); - - auto tags = AS_VALUE(TStructLiteral, tagsNode); - MKQL_ENSURE_S(tags); - + + auto tags = AS_VALUE(TStructLiteral, tagsNode); + MKQL_ENSURE_S(tags); + auto itemType = AS_TYPE(TFlowType, readType)->GetItemType(); MKQL_ENSURE_S(itemType->GetKind() == TType::EKind::Struct); - + auto structType = AS_TYPE(TStructType, itemType); - MKQL_ENSURE_S(tags->GetValuesCount() == structType->GetMembersCount()); - + MKQL_ENSURE_S(tags->GetValuesCount() == structType->GetMembersCount()); + columns.reserve(structType->GetMembersCount()); - + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { auto memberType = structType->GetMemberType(i); if (memberType->GetKind() == TType::EKind::Optional) { memberType = AS_TYPE(TOptionalType, memberType)->GetItemType(); } MKQL_ENSURE_S(memberType->GetKind() == TType::EKind::Data); - NTable::TTag columnId = AS_VALUE(TDataLiteral, tags->GetValue(i))->AsValue().Get<ui32>(); + NTable::TTag columnId = AS_VALUE(TDataLiteral, tags->GetValue(i))->AsValue().Get<ui32>(); if (IsSystemColumn(columnId)) { systemColumns.push_back({columnId, AS_TYPE(TDataType, memberType)->GetSchemeType()}); } else { @@ -101,38 +101,38 @@ void ParseReadColumns(const TType* readType, const TRuntimeNode& tagsNode, } } -void ParseWideReadColumns(const TCallable& callable, const TRuntimeNode& tagsNode, - TSmallVec<TKqpComputeContextBase::TColumn>& columns, TSmallVec<TKqpComputeContextBase::TColumn>& systemColumns) -{ - auto tags = AS_VALUE(TStructLiteral, tagsNode); - MKQL_ENSURE_S(tags); +void ParseWideReadColumns(const TCallable& callable, const TRuntimeNode& tagsNode, + TSmallVec<TKqpComputeContextBase::TColumn>& columns, TSmallVec<TKqpComputeContextBase::TColumn>& systemColumns) +{ + auto tags = AS_VALUE(TStructLiteral, tagsNode); + MKQL_ENSURE_S(tags); - TType* returnType = callable.GetType()->GetReturnType(); - MKQL_ENSURE_S(returnType->GetKind() == TType::EKind::Flow); + TType* returnType = callable.GetType()->GetReturnType(); + MKQL_ENSURE_S(returnType->GetKind() == TType::EKind::Flow); - auto itemType = AS_TYPE(TFlowType, returnType)->GetItemType(); - MKQL_ENSURE_S(itemType->GetKind() == TType::EKind::Tuple); + auto itemType = AS_TYPE(TFlowType, returnType)->GetItemType(); + MKQL_ENSURE_S(itemType->GetKind() == TType::EKind::Tuple); - auto tupleType = AS_TYPE(TTupleType, itemType); - MKQL_ENSURE_S(tags->GetValuesCount() == tupleType->GetElementsCount()); + auto tupleType = AS_TYPE(TTupleType, itemType); + MKQL_ENSURE_S(tags->GetValuesCount() == tupleType->GetElementsCount()); - columns.reserve(tupleType->GetElementsCount()); + columns.reserve(tupleType->GetElementsCount()); - for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { - auto memberType = tupleType->GetElementType(i); + for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { + auto memberType = tupleType->GetElementType(i); - if (memberType->GetKind() == TType::EKind::Optional) { - memberType = AS_TYPE(TOptionalType, memberType)->GetItemType(); - } + if (memberType->GetKind() == TType::EKind::Optional) { + memberType = AS_TYPE(TOptionalType, memberType)->GetItemType(); + } - MKQL_ENSURE_S(memberType->GetKind() == TType::EKind::Data); + MKQL_ENSURE_S(memberType->GetKind() == TType::EKind::Data); - NTable::TTag columnId = AS_VALUE(TDataLiteral, tags->GetValue(i))->AsValue().Get<ui32>(); + NTable::TTag columnId = AS_VALUE(TDataLiteral, tags->GetValue(i))->AsValue().Get<ui32>(); - if (IsSystemColumn(columnId)) { - systemColumns.push_back({columnId, AS_TYPE(TDataType, memberType)->GetSchemeType()}); + if (IsSystemColumn(columnId)) { + systemColumns.push_back({columnId, AS_TYPE(TDataType, memberType)->GetSchemeType()}); } else { - columns.push_back({columnId, AS_TYPE(TDataType, memberType)->GetSchemeType()}); + columns.push_back({columnId, AS_TYPE(TDataType, memberType)->GetSchemeType()}); } } } @@ -161,7 +161,7 @@ TParseReadTableResult ParseWideReadTable(TCallable& callable) { MKQL_ENSURE_S(result.ToTuple); result.ToInclusive = AS_VALUE(TDataLiteral, range->GetValue(3))->AsValue().Get<bool>(); - ParseWideReadColumns(callable, tagsNode, result.Columns, result.SystemColumns); + ParseWideReadColumns(callable, tagsNode, result.Columns, result.SystemColumns); auto skipNullKeys = AS_VALUE(TListLiteral, callable.GetInput(3)); result.SkipNullKeys.reserve(skipNullKeys->GetItemsCount()); @@ -187,54 +187,54 @@ TParseReadTableResult ParseWideReadTable(TCallable& callable) { return result; } -TParseReadTableRangesResult ParseWideReadTableRanges(TCallable& callable) { - MKQL_ENSURE_S(callable.GetInputsCount() == 5); - - TParseReadTableRangesResult result; - - result.CallableId = 0; // callable.GetUniqueId(); - - auto tableNode = callable.GetInput(0); - auto rangeNode = callable.GetInput(1); - auto tagsNode = callable.GetInput(2); - auto limit = callable.GetInput(3); - auto reverse = callable.GetInput(4); - - result.TableId = NKqp::ParseTableId(tableNode); - - result.Ranges = AS_VALUE(TTupleLiteral, rangeNode); - MKQL_ENSURE_S(result.Ranges); - MKQL_ENSURE_S(result.Ranges->GetValuesCount() == 1); - - ParseWideReadColumns(callable, tagsNode, result.Columns, result.SystemColumns); - - auto limitNode = limit.GetNode(); - - switch (limitNode->GetType()->GetKind()) { - case TType::EKind::Callable: - MKQL_ENSURE_S( - AS_TYPE(TDataType, AS_TYPE(TCallableType, limitNode->GetType())->GetReturnType())->GetSchemeType() - == NUdf::TDataType<ui64>::Id, "ItemsLimit must be () -> ui64" - ); - result.ItemsLimit = limitNode; - break; - case TType::EKind::Null: - break; - default: - MKQL_ENSURE(false, "ItemsLimit expected to be Callable or Null"); - } - - result.Reverse = AS_VALUE(TDataLiteral, reverse)->AsValue().Get<bool>(); - - return result; -} - +TParseReadTableRangesResult ParseWideReadTableRanges(TCallable& callable) { + MKQL_ENSURE_S(callable.GetInputsCount() == 5); + + TParseReadTableRangesResult result; + + result.CallableId = 0; // callable.GetUniqueId(); + + auto tableNode = callable.GetInput(0); + auto rangeNode = callable.GetInput(1); + auto tagsNode = callable.GetInput(2); + auto limit = callable.GetInput(3); + auto reverse = callable.GetInput(4); + + result.TableId = NKqp::ParseTableId(tableNode); + + result.Ranges = AS_VALUE(TTupleLiteral, rangeNode); + MKQL_ENSURE_S(result.Ranges); + MKQL_ENSURE_S(result.Ranges->GetValuesCount() == 1); + + ParseWideReadColumns(callable, tagsNode, result.Columns, result.SystemColumns); + + auto limitNode = limit.GetNode(); + + switch (limitNode->GetType()->GetKind()) { + case TType::EKind::Callable: + MKQL_ENSURE_S( + AS_TYPE(TDataType, AS_TYPE(TCallableType, limitNode->GetType())->GetReturnType())->GetSchemeType() + == NUdf::TDataType<ui64>::Id, "ItemsLimit must be () -> ui64" + ); + result.ItemsLimit = limitNode; + break; + case TType::EKind::Null: + break; + default: + MKQL_ENSURE(false, "ItemsLimit expected to be Callable or Null"); + } + + result.Reverse = AS_VALUE(TDataLiteral, reverse)->AsValue().Get<bool>(); + + return result; +} + namespace { -class TKqpScanWideReadTableWrapperBase : public TStatelessWideFlowCodegeneratorNode<TKqpScanWideReadTableWrapperBase> { - using TBase = TStatelessWideFlowCodegeneratorNode<TKqpScanWideReadTableWrapperBase>; +class TKqpScanWideReadTableWrapperBase : public TStatelessWideFlowCodegeneratorNode<TKqpScanWideReadTableWrapperBase> { + using TBase = TStatelessWideFlowCodegeneratorNode<TKqpScanWideReadTableWrapperBase>; public: - TKqpScanWideReadTableWrapperBase(TKqpScanComputeContext& computeCtx, std::vector<EValueRepresentation>&& representations) + TKqpScanWideReadTableWrapperBase(TKqpScanComputeContext& computeCtx, std::vector<EValueRepresentation>&& representations) : TBase(this) , ComputeCtx(computeCtx) , Representations(std::move(representations)) @@ -244,7 +244,7 @@ public: Y_UNUSED(ctx); if (!TableReader) { - TableReader = ComputeCtx.ReadTable(GetCallableId()); + TableReader = ComputeCtx.ReadTable(GetCallableId()); } return TableReader->Next(output); @@ -253,7 +253,7 @@ public: #ifndef MKQL_DISABLE_CODEGEN ICodegeneratorInlineWideNode::TGenerateResult DoGenGetValues(const TCodegenContext& ctx, BasicBlock*& block) const { auto& context = ctx.Codegen->GetContext(); - const auto size = GetAllColumnsSize(); + const auto size = GetAllColumnsSize(); ICodegeneratorInlineWideNode::TGettersList getters(size); const auto valueType = Type::getInt128Ty(context); @@ -294,7 +294,7 @@ public: } const auto ptrType = PointerType::getUnqual(StructType::get(context)); - const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TKqpScanWideReadTableWrapperBase::DoCalculate)); + const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TKqpScanWideReadTableWrapperBase::DoCalculate)); const auto self = CastInst::Create(Instruction::IntToPtr, ConstantInt::get(Type::getInt64Ty(context), uintptr_t(this)), ptrType, "self", block); const auto funcType = FunctionType::get(Type::getInt32Ty(context), { self->getType(), ctx.Ctx->getType(), fields->getType() }, false); const auto funcPtr = CastInst::Create(Instruction::IntToPtr, func, PointerType::getUnqual(funcType), "fetch_func", block); @@ -311,34 +311,34 @@ public: } #endif - virtual ui32 GetCallableId() const = 0; - virtual ui32 GetAllColumnsSize() const = 0; - + virtual ui32 GetCallableId() const = 0; + virtual ui32 GetAllColumnsSize() const = 0; + +private: + TKqpScanComputeContext& ComputeCtx; + mutable TIntrusivePtr<IKqpTableReader> TableReader; + const std::vector<EValueRepresentation> Representations; +}; + +class TKqpScanWideReadTableWrapper : public TKqpScanWideReadTableWrapperBase { +public: + TKqpScanWideReadTableWrapper(TKqpScanComputeContext& computeCtx, const TParseReadTableResult& parseResult, + IComputationNode* fromNode, IComputationNode* toNode, std::vector<EValueRepresentation>&& representations) + : TKqpScanWideReadTableWrapperBase(computeCtx, std::move(representations)) + , FromNode(fromNode) + , ToNode(toNode) + , ParseResult(parseResult) + {} + private: - TKqpScanComputeContext& ComputeCtx; - mutable TIntrusivePtr<IKqpTableReader> TableReader; - const std::vector<EValueRepresentation> Representations; -}; - -class TKqpScanWideReadTableWrapper : public TKqpScanWideReadTableWrapperBase { -public: - TKqpScanWideReadTableWrapper(TKqpScanComputeContext& computeCtx, const TParseReadTableResult& parseResult, - IComputationNode* fromNode, IComputationNode* toNode, std::vector<EValueRepresentation>&& representations) - : TKqpScanWideReadTableWrapperBase(computeCtx, std::move(representations)) - , FromNode(fromNode) - , ToNode(toNode) - , ParseResult(parseResult) - {} - -private: - ui32 GetCallableId() const { - return ParseResult.CallableId; - } - - ui32 GetAllColumnsSize() const { - return ParseResult.Columns.size() + ParseResult.SystemColumns.size(); - } - + ui32 GetCallableId() const { + return ParseResult.CallableId; + } + + ui32 GetAllColumnsSize() const { + return ParseResult.Columns.size() + ParseResult.SystemColumns.size(); + } + void RegisterDependencies() const { FlowDependsOn(FromNode); FlowDependsOn(ToNode); @@ -347,60 +347,60 @@ private: private: IComputationNode* FromNode; IComputationNode* ToNode; - TParseReadTableResult ParseResult; + TParseReadTableResult ParseResult; +}; + +class TKqpScanWideReadTableRangesWrapper : public TKqpScanWideReadTableWrapperBase { +public: + TKqpScanWideReadTableRangesWrapper(TKqpScanComputeContext& computeCtx, const TParseReadTableRangesResult& parseResult, + IComputationNode* rangesNode, std::vector<EValueRepresentation>&& representations) + : TKqpScanWideReadTableWrapperBase(computeCtx, std::move(representations)) + , RangesNode(rangesNode) + , ParseResult(parseResult) + {} + +private: + ui32 GetCallableId() const { + return ParseResult.CallableId; + } + + ui32 GetAllColumnsSize() const { + return ParseResult.Columns.size() + ParseResult.SystemColumns.size(); + } + + void RegisterDependencies() const { + this->FlowDependsOn(RangesNode); + } + +private: + IComputationNode* RangesNode; + TParseReadTableRangesResult ParseResult; }; -class TKqpScanWideReadTableRangesWrapper : public TKqpScanWideReadTableWrapperBase { -public: - TKqpScanWideReadTableRangesWrapper(TKqpScanComputeContext& computeCtx, const TParseReadTableRangesResult& parseResult, - IComputationNode* rangesNode, std::vector<EValueRepresentation>&& representations) - : TKqpScanWideReadTableWrapperBase(computeCtx, std::move(representations)) - , RangesNode(rangesNode) - , ParseResult(parseResult) - {} - -private: - ui32 GetCallableId() const { - return ParseResult.CallableId; - } - - ui32 GetAllColumnsSize() const { - return ParseResult.Columns.size() + ParseResult.SystemColumns.size(); - } - - void RegisterDependencies() const { - this->FlowDependsOn(RangesNode); - } - -private: - IComputationNode* RangesNode; - TParseReadTableRangesResult ParseResult; -}; - } // namespace -IComputationNode* WrapKqpScanWideReadTableRanges(TCallable& callable, const TComputationNodeFactoryContext& ctx, - TKqpScanComputeContext& computeCtx) -{ - std::vector<EValueRepresentation> representations; - - auto parseResult = ParseWideReadTableRanges(callable); - auto rangesNode = LocateNode(ctx.NodeLocator, *parseResult.Ranges); - - const auto type = callable.GetType()->GetReturnType(); - const auto returnItemType = type->IsFlow() ? - AS_TYPE(TFlowType, callable.GetType()->GetReturnType())->GetItemType(): - AS_TYPE(TStreamType, callable.GetType()->GetReturnType())->GetItemType(); - - const auto tupleType = AS_TYPE(TTupleType, returnItemType); - - representations.reserve(tupleType->GetElementsCount()); - for (ui32 i = 0U; i < tupleType->GetElementsCount(); ++i) - representations.emplace_back(GetValueRepresentation(tupleType->GetElementType(i))); - - return new TKqpScanWideReadTableRangesWrapper(computeCtx, parseResult, rangesNode, std::move(representations)); -} - +IComputationNode* WrapKqpScanWideReadTableRanges(TCallable& callable, const TComputationNodeFactoryContext& ctx, + TKqpScanComputeContext& computeCtx) +{ + std::vector<EValueRepresentation> representations; + + auto parseResult = ParseWideReadTableRanges(callable); + auto rangesNode = LocateNode(ctx.NodeLocator, *parseResult.Ranges); + + const auto type = callable.GetType()->GetReturnType(); + const auto returnItemType = type->IsFlow() ? + AS_TYPE(TFlowType, callable.GetType()->GetReturnType())->GetItemType(): + AS_TYPE(TStreamType, callable.GetType()->GetReturnType())->GetItemType(); + + const auto tupleType = AS_TYPE(TTupleType, returnItemType); + + representations.reserve(tupleType->GetElementsCount()); + for (ui32 i = 0U; i < tupleType->GetElementsCount(); ++i) + representations.emplace_back(GetValueRepresentation(tupleType->GetElementType(i))); + + return new TKqpScanWideReadTableRangesWrapper(computeCtx, parseResult, rangesNode, std::move(representations)); +} + IComputationNode* WrapKqpScanWideReadTable(TCallable& callable, const TComputationNodeFactoryContext& ctx, TKqpScanComputeContext& computeCtx) { diff --git a/ydb/core/kqp/runtime/kqp_read_table.h b/ydb/core/kqp/runtime/kqp_read_table.h index 7c3ea521d94..99c9cc3f050 100644 --- a/ydb/core/kqp/runtime/kqp_read_table.h +++ b/ydb/core/kqp/runtime/kqp_read_table.h @@ -13,7 +13,7 @@ namespace NMiniKQL { void BuildKeyTupleCells(const TTupleType* tupleType, const NUdf::TUnboxedValue& tupleValue, TVector<TCell>& cells, const TTypeEnvironment& env); -struct TParseReadTableResultBase { +struct TParseReadTableResultBase { ui32 CallableId = 0; TTableId TableId; @@ -24,25 +24,25 @@ struct TParseReadTableResultBase { bool Reverse = false; }; -struct TParseReadTableResult : TParseReadTableResultBase { - TTupleLiteral* FromTuple = nullptr; - bool FromInclusive = false; - TTupleLiteral* ToTuple = nullptr; - bool ToInclusive = false; -}; - -struct TParseReadTableRangesResult : TParseReadTableResultBase { - TTupleLiteral* Ranges = nullptr; -}; - -void ParseReadColumns(const TType* readType, const TRuntimeNode& tagsNode, +struct TParseReadTableResult : TParseReadTableResultBase { + TTupleLiteral* FromTuple = nullptr; + bool FromInclusive = false; + TTupleLiteral* ToTuple = nullptr; + bool ToInclusive = false; +}; + +struct TParseReadTableRangesResult : TParseReadTableResultBase { + TTupleLiteral* Ranges = nullptr; +}; + +void ParseReadColumns(const TType* readType, const TRuntimeNode& tagsNode, TSmallVec<TKqpComputeContextBase::TColumn>& columns, TSmallVec<TKqpComputeContextBase::TColumn>& systemColumns); TParseReadTableResult ParseWideReadTable(TCallable& callable); TParseReadTableRangesResult ParseWideReadTableRanges(TCallable& callable); -IComputationNode* WrapKqpScanWideReadTableRanges(TCallable& callable, const TComputationNodeFactoryContext& ctx, - TKqpScanComputeContext& computeCtx); +IComputationNode* WrapKqpScanWideReadTableRanges(TCallable& callable, const TComputationNodeFactoryContext& ctx, + TKqpScanComputeContext& computeCtx); IComputationNode* WrapKqpScanWideReadTable(TCallable& callable, const TComputationNodeFactoryContext& ctx, TKqpScanComputeContext& computeCtx); diff --git a/ydb/core/kqp/runtime/kqp_scan_data.cpp b/ydb/core/kqp/runtime/kqp_scan_data.cpp index ae3a73ec0bc..c4d8e241966 100644 --- a/ydb/core/kqp/runtime/kqp_scan_data.cpp +++ b/ydb/core/kqp/runtime/kqp_scan_data.cpp @@ -107,25 +107,25 @@ void FillSystemColumns(NUdf::TUnboxedValue* rowItems, TMaybe<ui64> shardId, cons } template <typename TArrayType, typename TValueType = typename TArrayType::value_type> -NUdf::TUnboxedValue MakeUnboxedValue(arrow::Array* column, ui32 row) { - auto array = reinterpret_cast<TArrayType*>(column); +NUdf::TUnboxedValue MakeUnboxedValue(arrow::Array* column, ui32 row) { + auto array = reinterpret_cast<TArrayType*>(column); return NUdf::TUnboxedValuePod(static_cast<TValueType>(array->Value(row))); } -NUdf::TUnboxedValue MakeUnboxedValueFromBinaryData(arrow::Array* column, ui32 row) { - auto array = reinterpret_cast<arrow::BinaryArray*>(column); +NUdf::TUnboxedValue MakeUnboxedValueFromBinaryData(arrow::Array* column, ui32 row) { + auto array = reinterpret_cast<arrow::BinaryArray*>(column); auto data = array->GetView(row); return MakeString(NUdf::TStringRef(data.data(), data.size())); } -NUdf::TUnboxedValue MakeUnboxedValueFromFixedSizeBinaryData(arrow::Array* column, ui32 row) { - auto array = reinterpret_cast<arrow::FixedSizeBinaryArray*>(column); +NUdf::TUnboxedValue MakeUnboxedValueFromFixedSizeBinaryData(arrow::Array* column, ui32 row) { + auto array = reinterpret_cast<arrow::FixedSizeBinaryArray*>(column); auto data = array->GetView(row); return MakeString(NUdf::TStringRef(data.data(), data.size()-1)); } -NUdf::TUnboxedValue MakeUnboxedValueFromDecimal128Array(arrow::Array* column, ui32 row) { - auto array = reinterpret_cast<arrow::Decimal128Array*>(column); +NUdf::TUnboxedValue MakeUnboxedValueFromDecimal128Array(arrow::Array* column, ui32 row) { + auto array = reinterpret_cast<arrow::Decimal128Array*>(column); auto data = array->GetView(row); // It's known that Decimal params are always Decimal(22,9), // so we verify Decimal type here before store it in UnboxedValue. @@ -138,62 +138,62 @@ NUdf::TUnboxedValue MakeUnboxedValueFromDecimal128Array(arrow::Array* column, ui return NUdf::TUnboxedValuePod(val); } -TBytesStatistics WriteColumnValuesFromArrow(const TVector<NUdf::TUnboxedValue*>& editAccessors, - const arrow::RecordBatch& batch, i64 columnIndex, NScheme::TTypeId columnType) -{ +TBytesStatistics WriteColumnValuesFromArrow(const TVector<NUdf::TUnboxedValue*>& editAccessors, + const arrow::RecordBatch& batch, i64 columnIndex, NScheme::TTypeId columnType) +{ TBytesStatistics columnStats; - // Hold pointer to column until function end - std::shared_ptr<arrow::Array> columnSharedPtr = batch.column(columnIndex); - arrow::Array* columnPtr = columnSharedPtr.get(); + // Hold pointer to column until function end + std::shared_ptr<arrow::Array> columnSharedPtr = batch.column(columnIndex); + arrow::Array* columnPtr = columnSharedPtr.get(); namespace NTypeIds = NScheme::NTypeIds; for (i64 rowIndex = 0; rowIndex < batch.num_rows(); ++rowIndex) { auto& rowItem = editAccessors[rowIndex][columnIndex]; - if (columnPtr->IsNull(rowIndex)) { + if (columnPtr->IsNull(rowIndex)) { rowItem = NUdf::TUnboxedValue(); } else { switch(columnType) { case NTypeIds::Bool: { - rowItem = MakeUnboxedValue<arrow::BooleanArray, bool>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::BooleanArray, bool>(columnPtr, rowIndex); break; } case NTypeIds::Int8: { - rowItem = MakeUnboxedValue<arrow::Int8Array>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::Int8Array>(columnPtr, rowIndex); break; } case NTypeIds::Int16: { - rowItem = MakeUnboxedValue<arrow::Int16Array>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::Int16Array>(columnPtr, rowIndex); break; } case NTypeIds::Int32: { - rowItem = MakeUnboxedValue<arrow::Int32Array>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::Int32Array>(columnPtr, rowIndex); break; } case NTypeIds::Int64: { - rowItem = MakeUnboxedValue<arrow::Int64Array, i64>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::Int64Array, i64>(columnPtr, rowIndex); break; } case NTypeIds::Uint8: { - rowItem = MakeUnboxedValue<arrow::UInt8Array>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::UInt8Array>(columnPtr, rowIndex); break; } case NTypeIds::Uint16: { - rowItem = MakeUnboxedValue<arrow::UInt16Array>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::UInt16Array>(columnPtr, rowIndex); break; } case NTypeIds::Uint32: { - rowItem = MakeUnboxedValue<arrow::UInt32Array>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::UInt32Array>(columnPtr, rowIndex); break; } case NTypeIds::Uint64: { - rowItem = MakeUnboxedValue<arrow::UInt64Array, ui64>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::UInt64Array, ui64>(columnPtr, rowIndex); break; } case NTypeIds::Float: { - rowItem = MakeUnboxedValue<arrow::FloatArray>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::FloatArray>(columnPtr, rowIndex); break; } case NTypeIds::Double: { - rowItem = MakeUnboxedValue<arrow::DoubleArray>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::DoubleArray>(columnPtr, rowIndex); break; } case NTypeIds::String: @@ -202,34 +202,34 @@ TBytesStatistics WriteColumnValuesFromArrow(const TVector<NUdf::TUnboxedValue*>& case NTypeIds::Yson: case NTypeIds::JsonDocument: case NTypeIds::DyNumber: { - rowItem = MakeUnboxedValueFromBinaryData(columnPtr, rowIndex); + rowItem = MakeUnboxedValueFromBinaryData(columnPtr, rowIndex); break; } case NTypeIds::Date: { - rowItem = MakeUnboxedValue<arrow::UInt16Array>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::UInt16Array>(columnPtr, rowIndex); break; } case NTypeIds::Datetime: { - rowItem = MakeUnboxedValue<arrow::UInt32Array>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::UInt32Array>(columnPtr, rowIndex); break; } case NTypeIds::Timestamp: { - rowItem = MakeUnboxedValue<arrow::TimestampArray, ui64>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::TimestampArray, ui64>(columnPtr, rowIndex); break; } case NTypeIds::Interval: { - rowItem = MakeUnboxedValue<arrow::DurationArray, ui64>(columnPtr, rowIndex); + rowItem = MakeUnboxedValue<arrow::DurationArray, ui64>(columnPtr, rowIndex); break; } case NTypeIds::Decimal: { - rowItem = MakeUnboxedValueFromDecimal128Array(columnPtr, rowIndex); + rowItem = MakeUnboxedValueFromDecimal128Array(columnPtr, rowIndex); break; } case NTypeIds::PairUi64Ui64: case NTypeIds::ActorId: case NTypeIds::StepOrderId: { Y_VERIFY_DEBUG_S(false, "Unsupported (deprecated) type: " << NScheme::TypeName(columnType)); - rowItem = MakeUnboxedValueFromFixedSizeBinaryData(columnPtr, rowIndex); + rowItem = MakeUnboxedValueFromFixedSizeBinaryData(columnPtr, rowIndex); break; } default: @@ -294,14 +294,14 @@ ui64 TKqpScanComputeContext::TScanData::AddRows(const TVector<TOwnedCellVec>& ba return 0; } - TBytesStatistics stats; - + TBytesStatistics stats; + TVector<ui64> bytesList; bytesList.reserve(batch.size()); TUnboxedValueVector rows; rows.reserve(batch.size()); - + for (size_t rowIndex = 0; rowIndex < batch.size(); ++rowIndex) { auto& row = batch[rowIndex]; @@ -326,9 +326,9 @@ ui64 TKqpScanComputeContext::TScanData::AddRows(const TVector<TOwnedCellVec>& ba return stats.AllocatedBytes; } -ui64 TKqpScanComputeContext::TScanData::AddRows(const arrow::RecordBatch& batch, TMaybe<ui64> shardId, - const THolderFactory& holderFactory) -{ +ui64 TKqpScanComputeContext::TScanData::AddRows(const arrow::RecordBatch& batch, TMaybe<ui64> shardId, + const THolderFactory& holderFactory) +{ // RecordBatch hasn't empty method so check the number of rows if (Finished || batch.num_rows() == 0) { return 0; @@ -337,34 +337,34 @@ ui64 TKqpScanComputeContext::TScanData::AddRows(const arrow::RecordBatch& batch, TBytesStatistics stats; TUnboxedValueVector rows; - if (Columns.empty() && SystemColumns.empty()) { - rows.resize(batch.num_rows(), holderFactory.GetEmptyContainer()); - } else { - TVector<NUdf::TUnboxedValue*> editAccessors(batch.num_rows()); - rows.reserve(batch.num_rows()); - + if (Columns.empty() && SystemColumns.empty()) { + rows.resize(batch.num_rows(), holderFactory.GetEmptyContainer()); + } else { + TVector<NUdf::TUnboxedValue*> editAccessors(batch.num_rows()); + rows.reserve(batch.num_rows()); + for (i64 rowIndex = 0; rowIndex < batch.num_rows(); ++rowIndex) { - rows.emplace_back(holderFactory.CreateDirectArrayHolder( - Columns.size() + SystemColumns.size(), - editAccessors[rowIndex]) - ); + rows.emplace_back(holderFactory.CreateDirectArrayHolder( + Columns.size() + SystemColumns.size(), + editAccessors[rowIndex]) + ); + } + + for (size_t columnIndex = 0; columnIndex < Columns.size(); ++columnIndex) { + stats.AddStatistics( + WriteColumnValuesFromArrow(editAccessors, batch, columnIndex, Columns[columnIndex].Type) + ); + } + + if (!SystemColumns.empty()) { + for (i64 rowIndex = 0; rowIndex < batch.num_rows(); ++rowIndex) { + FillSystemColumns(&editAccessors[rowIndex][Columns.size()], shardId, SystemColumns); + } + + stats.AllocatedBytes += batch.num_rows() * SystemColumns.size() * sizeof(NUdf::TUnboxedValue); } - - for (size_t columnIndex = 0; columnIndex < Columns.size(); ++columnIndex) { - stats.AddStatistics( - WriteColumnValuesFromArrow(editAccessors, batch, columnIndex, Columns[columnIndex].Type) - ); - } - - if (!SystemColumns.empty()) { - for (i64 rowIndex = 0; rowIndex < batch.num_rows(); ++rowIndex) { - FillSystemColumns(&editAccessors[rowIndex][Columns.size()], shardId, SystemColumns); - } - - stats.AllocatedBytes += batch.num_rows() * SystemColumns.size() * sizeof(NUdf::TUnboxedValue); - } } - + if (Columns.empty()) { stats.AddStatistics({sizeof(ui64) * batch.num_rows(), sizeof(ui64) * batch.num_rows()}); } diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index 4fae1726c69..9bc7889f92f 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -754,7 +754,7 @@ NJson::TJsonValue FindPlanNodeByKv(const NJson::TJsonValue& plan, const TString& } } else if (plan.IsMap()) { auto map = plan.GetMap(); - if (map.contains(key) && map.at(key).GetStringRobust() == value) { + if (map.contains(key) && map.at(key).GetStringRobust() == value) { return plan; } if (map.contains("Plans")) { diff --git a/ydb/core/kqp/ut/kqp_explain_ut.cpp b/ydb/core/kqp/ut/kqp_explain_ut.cpp index f54fb14d400..c3a06836367 100644 --- a/ydb/core/kqp/ut/kqp_explain_ut.cpp +++ b/ydb/core/kqp/ut/kqp_explain_ut.cpp @@ -48,11 +48,11 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(Explain) { TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - TStreamExecScanQuerySettings settings; + auto db = kikimr.GetTableClient(); + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( SELECT count(*) FROM `/Root/EightShard` AS t JOIN `/Root/KeyValue` AS kv ON t.Data = kv.Key; )", settings).GetValueSync(); @@ -118,11 +118,11 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(AggGroupLimit) { TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - TStreamExecScanQuerySettings settings; + auto db = kikimr.GetTableClient(); + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( SELECT min(Message), max(Message) FROM `/Root/Logs` WHERE Ts > 1 and Ts <= 4 or App="ydb" GROUP BY App; )", settings).GetValueSync(); @@ -152,11 +152,11 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(ComplexJoin) { TKikimrRunner kikimr; CreateSampleTables(kikimr); - auto db = kikimr.GetTableClient(); - TStreamExecScanQuerySettings settings; + auto db = kikimr.GetTableClient(); + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( $join = ( SELECT l.Key as Key, l.Text as Text, l.Data as Data, r.Value1 as Value1, r.Value2 as Value2 FROM `/Root/EightShard` AS l JOIN `/Root/FourShard` AS r ON l.Key = r.Key @@ -176,11 +176,11 @@ Y_UNIT_TEST_SUITE(KqpExplain) { NJson::TJsonValue plan; NJson::ReadJsonTree(*res.PlanJson, &plan, true); - auto join = FindPlanNodeByKv( - plan, - "Node Type", + auto join = FindPlanNodeByKv( + plan, + "Node Type", "Aggregate-InnerJoin (MapJoin)-Filter-TableFullScan" - ); + ); UNIT_ASSERT(join.IsDefined()); auto left = FindPlanNodeByKv(join, "Table", "EightShard"); UNIT_ASSERT(left.IsDefined()); @@ -190,11 +190,11 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(PrecomputeRange) { TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - TStreamExecScanQuerySettings settings; + auto db = kikimr.GetTableClient(); + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( SELECT * FROM `/Root/EightShard` WHERE Key BETWEEN 150 AND 266 ORDER BY Data LIMIT 4; )", settings).GetValueSync(); @@ -218,12 +218,12 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(CompoundKeyRange) { TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - TStreamExecScanQuerySettings settings; + auto db = kikimr.GetTableClient(); + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto it = db.StreamExecuteScanQuery(R"( - PRAGMA Kikimr.OptEnablePredicateExtract = "false"; + auto it = db.StreamExecuteScanQuery(R"( + PRAGMA Kikimr.OptEnablePredicateExtract = "false"; SELECT * FROM `/Root/Logs` WHERE App = "new_app_1" AND Host < "xyz" AND Ts = (42+7) Limit 10; )", settings).GetValueSync(); @@ -246,11 +246,11 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(SortStage) { TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - TStreamExecScanQuerySettings settings; + auto db = kikimr.GetTableClient(); + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( SELECT * FROM `/Root/EightShard` WHERE Key BETWEEN 150 AND 266 ORDER BY Text; )", settings).GetValueSync(); @@ -261,18 +261,18 @@ Y_UNIT_TEST_SUITE(KqpExplain) { NJson::TJsonValue plan; NJson::ReadJsonTree(*res.PlanJson, &plan, true); - + auto scanSort = FindPlanNodeByKv(plan, "Node Type", "Sort-TableRangesScan"); UNIT_ASSERT(scanSort.IsDefined()); } Y_UNIT_TEST(LimitOffset) { TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - TStreamExecScanQuerySettings settings; + auto db = kikimr.GetTableClient(); + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( SELECT * FROM `/Root/EightShard` ORDER BY Text LIMIT 10 OFFSET 15; )", settings).GetValueSync(); @@ -292,11 +292,11 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(SelfJoin3xSameLabels) { TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - TStreamExecScanQuerySettings settings; + auto db = kikimr.GetTableClient(); + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( $foo = ( SELECT t1.Key AS Key FROM `/Root/KeyValue` AS t1 @@ -327,11 +327,11 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(PureExpr) { TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - TStreamExecScanQuerySettings settings; + auto db = kikimr.GetTableClient(); + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( SELECT 1,2,3 UNION ALL SELECT 4,5,6; )", settings).GetValueSync(); @@ -354,11 +354,11 @@ Y_UNIT_TEST_SUITE(KqpExplain) { spilling->SetRoot("./spilling/"); TKikimrRunner kikimr(appCfg); - auto db = kikimr.GetTableClient(); - TStreamExecScanQuerySettings settings; + auto db = kikimr.GetTableClient(); + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( select count(*) from `/Root/KeyValue` AS t1 join `/Root/KeyValue` AS t2 on t1.Key = t2.Key; )", settings).GetValueSync(); @@ -385,19 +385,19 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(SqlIn) { TKikimrRunner kikimr; - CreateSampleTables(kikimr); - - TStreamExecScanQuerySettings settings; + CreateSampleTables(kikimr); + + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto db = kikimr.GetTableClient(); + auto db = kikimr.GetTableClient(); auto query = R"( - PRAGMA Kikimr.OptEnablePredicateExtract = "false"; + PRAGMA Kikimr.OptEnablePredicateExtract = "false"; SELECT Key, Value FROM `/Root/KeyValue` WHERE Key IN (1, 2, 3, 42) ORDER BY Key )"; - auto it = db.StreamExecuteScanQuery(query, settings).GetValueSync(); + auto it = db.StreamExecuteScanQuery(query, settings).GetValueSync(); auto res = CollectStreamResult(it); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); UNIT_ASSERT(res.PlanJson); @@ -527,12 +527,12 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(FullOuterJoin) { TKikimrRunner kikimr; CreateSampleTables(kikimr); - - TStreamExecScanQuerySettings settings; + + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto db = kikimr.GetTableClient(); + auto db = kikimr.GetTableClient(); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( SELECT l.Key, l.Text, l.Data, r.Value1, r.Value2 FROM `/Root/EightShard` AS l FULL OUTER JOIN `/Root/FourShard` AS r ON l.Key = r.Key @@ -555,76 +555,76 @@ Y_UNIT_TEST_SUITE(KqpExplain) { Y_UNIT_TEST(ReadTableRangesFullScan) { TKikimrRunner kikimr; - TStreamExecScanQuerySettings settings; + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto db = kikimr.GetTableClient(); - - auto session = db.CreateSession().GetValueSync().GetSession(); - - auto res = session.ExecuteSchemeQuery(R"( - CREATE TABLE `/Root/TwoKeys` ( - Key1 Int32, - Key2 Int32, - Value Int32, - PRIMARY KEY (Key1, Key2) - ); - )").GetValueSync(); - UNIT_ASSERT_C(res.IsSuccess(), res.GetIssues().ToString()); - - auto result = session.ExecuteDataQuery(R"( - REPLACE INTO `TwoKeys` (Key1, Key2, Value) VALUES - (1, 1, 1), - (2, 1, 2), - (3, 2, 3), - (4, 2, 4), - (1000, 100, 5), - (1001, 101, 6), - (1002, 102, 7), - (1003, 103, 8); - )", TTxControl::BeginTx().CommitTx()).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - TVector<std::pair<TString, TString>> testData = { - { - "SELECT * FROM `/Root/TwoKeys`;", - "TableFullScan" - }, - { - "SELECT * FROM `/Root/TwoKeys` WHERE Key2 > 101;", + auto db = kikimr.GetTableClient(); + + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto res = session.ExecuteSchemeQuery(R"( + CREATE TABLE `/Root/TwoKeys` ( + Key1 Int32, + Key2 Int32, + Value Int32, + PRIMARY KEY (Key1, Key2) + ); + )").GetValueSync(); + UNIT_ASSERT_C(res.IsSuccess(), res.GetIssues().ToString()); + + auto result = session.ExecuteDataQuery(R"( + REPLACE INTO `TwoKeys` (Key1, Key2, Value) VALUES + (1, 1, 1), + (2, 1, 2), + (3, 2, 3), + (4, 2, 4), + (1000, 100, 5), + (1001, 101, 6), + (1002, 102, 7), + (1003, 103, 8); + )", TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + TVector<std::pair<TString, TString>> testData = { + { + "SELECT * FROM `/Root/TwoKeys`;", + "TableFullScan" + }, + { + "SELECT * FROM `/Root/TwoKeys` WHERE Key2 > 101;", "Filter-TableFullScan" - } - }; - - for (auto& data: testData) { - auto it = db.StreamExecuteScanQuery(data.first, settings).GetValueSync(); - - auto res = CollectStreamResult(it); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - UNIT_ASSERT(res.PlanJson); - - NJson::TJsonValue plan; - NJson::ReadJsonTree(*res.PlanJson, &plan, true); - - auto read = FindPlanNodeByKv(plan, "Node Type", data.second); - UNIT_ASSERT(read.IsDefined()); - - auto rangesKeys = FindPlanNodeByKv(plan, "ReadRangesKeys", "[]"); - UNIT_ASSERT(!rangesKeys.IsDefined()); - - auto expected = FindPlanNodeByKv(plan, "ReadRangesExpectedSize", ""); - UNIT_ASSERT(!expected.IsDefined()); - } + } + }; + + for (auto& data: testData) { + auto it = db.StreamExecuteScanQuery(data.first, settings).GetValueSync(); + + auto res = CollectStreamResult(it); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + UNIT_ASSERT(res.PlanJson); + + NJson::TJsonValue plan; + NJson::ReadJsonTree(*res.PlanJson, &plan, true); + + auto read = FindPlanNodeByKv(plan, "Node Type", data.second); + UNIT_ASSERT(read.IsDefined()); + + auto rangesKeys = FindPlanNodeByKv(plan, "ReadRangesKeys", "[]"); + UNIT_ASSERT(!rangesKeys.IsDefined()); + + auto expected = FindPlanNodeByKv(plan, "ReadRangesExpectedSize", ""); + UNIT_ASSERT(!expected.IsDefined()); + } } Y_UNIT_TEST(ReadTableRanges) { TKikimrRunner kikimr; CreateSampleTables(kikimr); - - TStreamExecScanQuerySettings settings; + + TStreamExecScanQuerySettings settings; settings.Explain(true); - auto db = kikimr.GetTableClient(); + auto db = kikimr.GetTableClient(); - auto it = db.StreamExecuteScanQuery(R"( + auto it = db.StreamExecuteScanQuery(R"( SELECT * FROM `/Root/KeyValue` WHERE Key >= 2000 OR Key < 100; )", settings).GetValueSync(); @@ -638,10 +638,10 @@ Y_UNIT_TEST_SUITE(KqpExplain) { auto read = FindPlanNodeByKv(plan, "Node Type", "TableRangesScan"); UNIT_ASSERT(read.IsDefined()); - auto keys = FindPlanNodeByKv(plan, "ReadRangesKeys", "[\"Key\"]"); - UNIT_ASSERT(keys.IsDefined()); - auto count = FindPlanNodeByKv(plan, "ReadRangesExpectedSize", "2"); - UNIT_ASSERT(count.IsDefined()); + auto keys = FindPlanNodeByKv(plan, "ReadRangesKeys", "[\"Key\"]"); + UNIT_ASSERT(keys.IsDefined()); + auto count = FindPlanNodeByKv(plan, "ReadRangesExpectedSize", "2"); + UNIT_ASSERT(count.IsDefined()); } Y_UNIT_TEST(Predicates) { diff --git a/ydb/core/kqp/ut/kqp_join_ut.cpp b/ydb/core/kqp/ut/kqp_join_ut.cpp index cd76d10e707..8fa59d31b36 100644 --- a/ydb/core/kqp/ut/kqp_join_ut.cpp +++ b/ydb/core/kqp/ut/kqp_join_ut.cpp @@ -83,8 +83,8 @@ static void CreateSampleTables(TSession session) { (105, "One", "Name2", "Value27"), (105, "Two", "Name4", "Value28"), (106, "One", "Name3", "Value29"), - (108, "One", NULL, "Value31"), - (109, "Four", NULL, "Value41"); + (108, "One", NULL, "Value31"), + (109, "Four", NULL, "Value41"); REPLACE INTO `/Root/Join1_3` (Key, Value) VALUES ("Name1", 1001), @@ -833,47 +833,47 @@ Y_UNIT_TEST_SUITE(KqpJoin) { CompareYson(R"([[["Value5"];["Value31"];[108]]])", FormatResultSetYson(result.GetResultSet(0))); } - - Y_UNIT_TEST_NEW_ENGINE(ExclusionJoin) { - TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); - CreateSampleTables(session); - + + Y_UNIT_TEST_NEW_ENGINE(ExclusionJoin) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + CreateSampleTables(session); + auto result = session.ExecuteDataQuery(Q_(R"( - SELECT left.Fk21, left.Key, left.Value, right.Key1, right.Value - FROM `/Root/Join1_1` as left - EXCLUSION JOIN `/Root/Join1_2` as right - ON left.Fk21 = right.Key1 + SELECT left.Fk21, left.Key, left.Value, right.Key1, right.Value + FROM `/Root/Join1_1` as left + EXCLUSION JOIN `/Root/Join1_2` as right + ON left.Fk21 = right.Key1 )"), TTxControl::BeginTx().CommitTx()).GetValueSync(); - - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - CompareYson( - R"([[ - [107];[7];["Value4"];#;#]; - [#;#;#;[109];["Value41"] - ]])", + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + CompareYson( + R"([[ + [107];[7];["Value4"];#;#]; + [#;#;#;[109];["Value41"] + ]])", FormatResultSetYson(result.GetResultSet(0)) - ); - } - - Y_UNIT_TEST_NEW_ENGINE(FullOuterJoin) { - TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); - CreateSampleTables(session); - + ); + } + + Y_UNIT_TEST_NEW_ENGINE(FullOuterJoin) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + CreateSampleTables(session); + auto result = session.ExecuteDataQuery(Q_(R"( - SELECT left.Fk21, left.Key, left.Value, right.Key1, right.Value - FROM `/Root/Join1_1` as left - FULL OUTER JOIN `/Root/Join1_2` as right - ON left.Fk21 = right.Key1 + SELECT left.Fk21, left.Key, left.Value, right.Key1, right.Value + FROM `/Root/Join1_1` as left + FULL OUTER JOIN `/Root/Join1_2` as right + ON left.Fk21 = right.Key1 ORDER BY left.Fk21, left.Key, left.Value, right.Key1, right.Value )"), TTxControl::BeginTx().CommitTx()).GetValueSync(); - - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - CompareYson( - R"([ + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + CompareYson( + R"([ [#;#;#;[109];["Value41"]]; [[101];[1];["Value1"];[101];["Value21"]]; [[101];[1];["Value1"];[101];["Value22"]]; @@ -881,15 +881,15 @@ Y_UNIT_TEST_SUITE(KqpJoin) { [[102];[2];["Value1"];[102];["Value24"]]; [[103];[3];["Value2"];[103];["Value25"]]; [[104];[4];["Value2"];[104];["Value26"]]; - [[105];[5];["Value3"];[105];["Value27"]]; - [[105];[5];["Value3"];[105];["Value28"]]; + [[105];[5];["Value3"];[105];["Value27"]]; + [[105];[5];["Value3"];[105];["Value28"]]; [[106];[6];["Value3"];[106];["Value29"]]; - [[107];[7];["Value4"];#;#]; + [[107];[7];["Value4"];#;#]; [[108];[8];["Value5"];[108];["Value31"]] - ])", + ])", FormatResultSetYson(result.GetResultSet(0)) - ); - } + ); + } Y_UNIT_TEST_NEW_ENGINE(FullOuterJoin2) { TKikimrRunner kikimr; diff --git a/ydb/core/kqp/ut/kqp_newengine_ut.cpp b/ydb/core/kqp/ut/kqp_newengine_ut.cpp index 1203061eb0c..3737687d8d6 100644 --- a/ydb/core/kqp/ut/kqp_newengine_ut.cpp +++ b/ydb/core/kqp/ut/kqp_newengine_ut.cpp @@ -2194,7 +2194,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { auto range = FindPlanNodeByKv(plan, "ReadRange", "[\"Key [$min_key, $max_key]\"]"); UNIT_ASSERT(range.IsDefined()); } - + Y_UNIT_TEST(Nondeterministic) { // TODO: KIKIMR-4759 TKikimrRunner kikimr; auto db = kikimr.GetTableClient(); @@ -2221,193 +2221,193 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { UNIT_ASSERT_VALUES_EQUAL(result.GetResultSet(0).RowsCount(), 1); } - Y_UNIT_TEST(ScalarFunctions) { - TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); - - auto result = session.ExecuteSchemeQuery(R"( - --!syntax_v1 - CREATE TABLE `/Root/TableOne` ( - Key Uint32, - Value Uint32, - PRIMARY KEY (Key) - ); - CREATE TABLE `/Root/TableTwo` ( - Key Uint32, - Value Uint32, - PRIMARY KEY (Key) - ); - CREATE TABLE `/Root/TableThree` ( - Key Uint32, - Value Uint32, - PRIMARY KEY (Key) - ); - CREATE TABLE `/Root/TableEmpty` ( - Key Uint32, - Value Uint32, - PRIMARY KEY (Key) - ); - )").GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - result = session.ExecuteDataQuery(R"( - --!syntax_v1 - PRAGMA kikimr.UseNewEngine = "true"; - - REPLACE INTO `/Root/TableOne` (Key, Value) VALUES - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5), - (6, 6); - REPLACE INTO `/Root/TableTwo` (Key, Value) VALUES - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5), - (6, 6); - REPLACE INTO `/Root/TableThree` (Key, Value) VALUES - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5), - (6, 6); - )", TTxControl::BeginTx().CommitTx()).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - TVector<std::pair<TString,TString>> testData = { - { - R"( - $lmt1 = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); - $lmt2 = (SELECT Value FROM `/Root/TableTwo` WHERE Key = 3); - - SELECT Value FROM `/Root/TableThree` ORDER BY Value - LIMIT (CAST($lmt1 AS Uint64) ?? 0) * (CAST($lmt2 AS Uint64) ?? 0); - )", - R"([ - [[1u]];[[2u]];[[3u]];[[4u]];[[5u]];[[6u]] - ])" - }, - { - R"( - $lmt = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); - - SELECT Value FROM `/Root/TableTwo` ORDER BY Value LIMIT CAST($lmt AS Uint64) ?? 0; - )", - R"([ - [[1u]];[[2u]] - ])" - }, - { - R"( - $lmt = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); - - SELECT Value FROM `/Root/TableTwo` ORDER BY Value DESC LIMIT COALESCE($lmt, 1u); - )", - R"([ - [[6u]];[[5u]] - ])" - }, - { - R"( - $lmt = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); - $offt = (SELECT Value FROM `/Root/TableOne` WHERE Key = 3); - - SELECT Value FROM `/Root/TableTwo` ORDER BY Value DESC - LIMIT COALESCE($lmt, 1u) OFFSET COALESCE($offt, 1u); - )", - R"([ - [[3u]];[[2u]] - ])" - }, - { - R"( - $key = (SELECT Value FROM `/Root/TableOne` WHERE Key = 5); - - SELECT Value FROM `/Root/TableTwo` WHERE Key >= $key ORDER BY Value ASC; - )", - R"([ - [[5u]];[[6u]] - ])" - }, - { - R"( - $key = (SELECT Value FROM `/Root/TableOne` WHERE Key = 5); - - SELECT Value FROM `/Root/TableTwo` WHERE Key >= $key ORDER BY Value DESC; - )", - R"([ - [[6u]];[[5u]] - ])" - }, - { - R"( - $key1 = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); - $key2 = (SELECT Value FROM `/Root/TableTwo` WHERE Key = 3); - - SELECT Value FROM `/Root/TableTwo` WHERE Key = $key1 * $key2 ORDER BY Value; - )", - R"([ - [[6u]] - ])" - }, - { - R"( - $keys = (SELECT Value FROM `/Root/TableOne` WHERE Key > 2); - - SELECT Value FROM `/Root/TableTwo` WHERE Key IN $keys ORDER BY Value; - )", - R"([ - [[3u]];[[4u]];[[5u]];[[6u]] - ])" - }, - { - R"( - $keys = (SELECT Value FROM `/Root/TableOne` WHERE Key > 2); - - SELECT Value FROM `/Root/TableTwo` WHERE Value IN COMPACT $keys ORDER BY Value; - )", - R"([ - [[3u]];[[4u]];[[5u]];[[6u]] - ])" - }, -#if 0 - // Count IF is not supported in DqBuildAggregationResultStage, there is no AsStruct and - // optimizer failed. Need to fix later. - { - R"( - $divisor = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); - - SELECT COUNT_IF(Value % $divisor == 1) AS odd_count FROM `/Root/TableTwo`; - )", - R"([ - [3u] - ])" - }, -#endif - }; - - TString useNewEngine = "PRAGMA kikimr.UseNewEngine = \"true\";"; - - for (auto& item: testData) { - auto result = session.ExecuteDataQuery(useNewEngine + item.first, - TTxControl::BeginTx(TTxSettings::OnlineRO()).CommitTx()).ExtractValueSync(); - AssertSuccessResult(result); - auto resultYson = FormatResultSetYson(result.GetResultSet(0)); - CompareYson(item.second, resultYson); - } - - for (auto& item: testData) { - auto it = db.StreamExecuteScanQuery(useNewEngine + item.first).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - - CompareYson(item.second, CollectStreamResult(it).ResultSetYson); - } - } + Y_UNIT_TEST(ScalarFunctions) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto result = session.ExecuteSchemeQuery(R"( + --!syntax_v1 + CREATE TABLE `/Root/TableOne` ( + Key Uint32, + Value Uint32, + PRIMARY KEY (Key) + ); + CREATE TABLE `/Root/TableTwo` ( + Key Uint32, + Value Uint32, + PRIMARY KEY (Key) + ); + CREATE TABLE `/Root/TableThree` ( + Key Uint32, + Value Uint32, + PRIMARY KEY (Key) + ); + CREATE TABLE `/Root/TableEmpty` ( + Key Uint32, + Value Uint32, + PRIMARY KEY (Key) + ); + )").GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + result = session.ExecuteDataQuery(R"( + --!syntax_v1 + PRAGMA kikimr.UseNewEngine = "true"; + + REPLACE INTO `/Root/TableOne` (Key, Value) VALUES + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6); + REPLACE INTO `/Root/TableTwo` (Key, Value) VALUES + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6); + REPLACE INTO `/Root/TableThree` (Key, Value) VALUES + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6); + )", TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + TVector<std::pair<TString,TString>> testData = { + { + R"( + $lmt1 = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); + $lmt2 = (SELECT Value FROM `/Root/TableTwo` WHERE Key = 3); + + SELECT Value FROM `/Root/TableThree` ORDER BY Value + LIMIT (CAST($lmt1 AS Uint64) ?? 0) * (CAST($lmt2 AS Uint64) ?? 0); + )", + R"([ + [[1u]];[[2u]];[[3u]];[[4u]];[[5u]];[[6u]] + ])" + }, + { + R"( + $lmt = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); + + SELECT Value FROM `/Root/TableTwo` ORDER BY Value LIMIT CAST($lmt AS Uint64) ?? 0; + )", + R"([ + [[1u]];[[2u]] + ])" + }, + { + R"( + $lmt = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); + + SELECT Value FROM `/Root/TableTwo` ORDER BY Value DESC LIMIT COALESCE($lmt, 1u); + )", + R"([ + [[6u]];[[5u]] + ])" + }, + { + R"( + $lmt = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); + $offt = (SELECT Value FROM `/Root/TableOne` WHERE Key = 3); + + SELECT Value FROM `/Root/TableTwo` ORDER BY Value DESC + LIMIT COALESCE($lmt, 1u) OFFSET COALESCE($offt, 1u); + )", + R"([ + [[3u]];[[2u]] + ])" + }, + { + R"( + $key = (SELECT Value FROM `/Root/TableOne` WHERE Key = 5); + + SELECT Value FROM `/Root/TableTwo` WHERE Key >= $key ORDER BY Value ASC; + )", + R"([ + [[5u]];[[6u]] + ])" + }, + { + R"( + $key = (SELECT Value FROM `/Root/TableOne` WHERE Key = 5); + + SELECT Value FROM `/Root/TableTwo` WHERE Key >= $key ORDER BY Value DESC; + )", + R"([ + [[6u]];[[5u]] + ])" + }, + { + R"( + $key1 = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); + $key2 = (SELECT Value FROM `/Root/TableTwo` WHERE Key = 3); + + SELECT Value FROM `/Root/TableTwo` WHERE Key = $key1 * $key2 ORDER BY Value; + )", + R"([ + [[6u]] + ])" + }, + { + R"( + $keys = (SELECT Value FROM `/Root/TableOne` WHERE Key > 2); + + SELECT Value FROM `/Root/TableTwo` WHERE Key IN $keys ORDER BY Value; + )", + R"([ + [[3u]];[[4u]];[[5u]];[[6u]] + ])" + }, + { + R"( + $keys = (SELECT Value FROM `/Root/TableOne` WHERE Key > 2); + + SELECT Value FROM `/Root/TableTwo` WHERE Value IN COMPACT $keys ORDER BY Value; + )", + R"([ + [[3u]];[[4u]];[[5u]];[[6u]] + ])" + }, +#if 0 + // Count IF is not supported in DqBuildAggregationResultStage, there is no AsStruct and + // optimizer failed. Need to fix later. + { + R"( + $divisor = (SELECT Value FROM `/Root/TableOne` WHERE Key = 2); + + SELECT COUNT_IF(Value % $divisor == 1) AS odd_count FROM `/Root/TableTwo`; + )", + R"([ + [3u] + ])" + }, +#endif + }; + + TString useNewEngine = "PRAGMA kikimr.UseNewEngine = \"true\";"; + + for (auto& item: testData) { + auto result = session.ExecuteDataQuery(useNewEngine + item.first, + TTxControl::BeginTx(TTxSettings::OnlineRO()).CommitTx()).ExtractValueSync(); + AssertSuccessResult(result); + auto resultYson = FormatResultSetYson(result.GetResultSet(0)); + CompareYson(item.second, resultYson); + } + + for (auto& item: testData) { + auto it = db.StreamExecuteScanQuery(useNewEngine + item.first).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + + CompareYson(item.second, CollectStreamResult(it).ResultSetYson); + } + } Y_UNIT_TEST(DeleteWithBuiltin) { TKikimrRunner kikimr; @@ -2482,70 +2482,70 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { CompareYson("[[[1]];[[1]];[[1]];[[2]];[[2]]]", FormatResultSetYson(result.GetResultSet(0))); } - - Y_UNIT_TEST(SqlInFromCompact) { - TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); - - auto result = session.ExecuteSchemeQuery(R"( - --!syntax_v1 - - CREATE TABLE `/Root/table1` ( - key String, - cached String, - PRIMARY KEY (key) - ); - - CREATE TABLE `/Root/table2` ( - key String, - in_cache String, - value String, - PRIMARY KEY (key) - ); - )").GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; - - REPLACE INTO [/Root/table1] (key, cached) VALUES - ("Key1", "CachedValue1"), - ("Key2", "CachedValue2"); - )", TTxControl::BeginTx().CommitTx()).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; - - REPLACE INTO [/Root/table2] ( - key, in_cache, value - ) VALUES - ("Key1", "CachedValue1", "Value 1"), - ("Key2", "CachedValue2", "Value 2"); - )", TTxControl::BeginTx().CommitTx()).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - result = session.ExecuteDataQuery(R"( - --!syntax_v1 - PRAGMA Kikimr.UseNewEngine = "true"; - - $t1 = SELECT `cached`, `key` FROM `/Root/table1` - WHERE `key` = "Key1"; - - $cache = (SELECT `cached` FROM $t1); - - $t2 = SELECT `value`, `in_cache` FROM `table2` - WHERE `key` = "Key1" AND `in_cache` IN COMPACT $cache; - - SELECT `in_cache`, `value` FROM $t1 AS a - INNER JOIN $t2 AS b - ON a.`cached` == b.`in_cache` - WHERE a.`cached` = b.`in_cache`; - )", TTxControl::BeginTx().CommitTx()).GetValueSync(); - - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - } + + Y_UNIT_TEST(SqlInFromCompact) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto result = session.ExecuteSchemeQuery(R"( + --!syntax_v1 + + CREATE TABLE `/Root/table1` ( + key String, + cached String, + PRIMARY KEY (key) + ); + + CREATE TABLE `/Root/table2` ( + key String, + in_cache String, + value String, + PRIMARY KEY (key) + ); + )").GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + result = session.ExecuteDataQuery(R"( + PRAGMA kikimr.UseNewEngine = "true"; + + REPLACE INTO [/Root/table1] (key, cached) VALUES + ("Key1", "CachedValue1"), + ("Key2", "CachedValue2"); + )", TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + result = session.ExecuteDataQuery(R"( + PRAGMA kikimr.UseNewEngine = "true"; + + REPLACE INTO [/Root/table2] ( + key, in_cache, value + ) VALUES + ("Key1", "CachedValue1", "Value 1"), + ("Key2", "CachedValue2", "Value 2"); + )", TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + result = session.ExecuteDataQuery(R"( + --!syntax_v1 + PRAGMA Kikimr.UseNewEngine = "true"; + + $t1 = SELECT `cached`, `key` FROM `/Root/table1` + WHERE `key` = "Key1"; + + $cache = (SELECT `cached` FROM $t1); + + $t2 = SELECT `value`, `in_cache` FROM `table2` + WHERE `key` = "Key1" AND `in_cache` IN COMPACT $cache; + + SELECT `in_cache`, `value` FROM $t1 AS a + INNER JOIN $t2 AS b + ON a.`cached` == b.`in_cache` + WHERE a.`cached` = b.`in_cache`; + )", TTxControl::BeginTx().CommitTx()).GetValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } Y_UNIT_TEST(PrecomputeKey) { TKikimrRunner kikimr; diff --git a/ydb/core/kqp/ut/kqp_olap_ut.cpp b/ydb/core/kqp/ut/kqp_olap_ut.cpp index 1c9e5c9a1e4..908fc34a13d 100644 --- a/ydb/core/kqp/ut/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/kqp_olap_ut.cpp @@ -45,7 +45,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { #Columns { Name: "resource_type" Type: "Utf8" } Columns { Name: "resource_id" Type: "Utf8" } Columns { Name: "uid" Type: "Utf8" } - Columns { Name: "level" Type: "Int32" } + Columns { Name: "level" Type: "Int32" } Columns { Name: "message" Type: "Utf8" } #Columns { Name: "json_payload" Type: "Json" } #Columns { Name: "ingested_at" Type: "Timestamp" } @@ -77,15 +77,15 @@ Y_UNIT_TEST_SUITE(KqpOlap) { arrow::field("timestamp", arrow::timestamp(arrow::TimeUnit::TimeUnit::MICRO)), arrow::field("resource_id", arrow::utf8()), arrow::field("uid", arrow::utf8()), - arrow::field("level", arrow::int32()), + arrow::field("level", arrow::int32()), arrow::field("message", arrow::utf8()) }); arrow::TimestampBuilder b1(arrow::timestamp(arrow::TimeUnit::TimeUnit::MICRO), arrow::default_memory_pool()); arrow::StringBuilder b2; arrow::StringBuilder b3; - arrow::Int32Builder b4; - arrow::StringBuilder b5; + arrow::Int32Builder b4; + arrow::StringBuilder b5; for (size_t i = 0; i < rowCount; ++i) { std::string uid("uid_" + std::to_string(tsBegin + i)); @@ -93,23 +93,23 @@ Y_UNIT_TEST_SUITE(KqpOlap) { Y_VERIFY(b1.Append(tsBegin + i).ok()); Y_VERIFY(b2.Append(std::to_string(pathIdBegin + i)).ok()); Y_VERIFY(b3.Append(uid).ok()); - Y_VERIFY(b4.Append(i % 5).ok()); - Y_VERIFY(b5.Append(message).ok()); + Y_VERIFY(b4.Append(i % 5).ok()); + Y_VERIFY(b5.Append(message).ok()); } std::shared_ptr<arrow::TimestampArray> a1; std::shared_ptr<arrow::StringArray> a2; std::shared_ptr<arrow::StringArray> a3; - std::shared_ptr<arrow::Int32Array> a4; - std::shared_ptr<arrow::StringArray> a5; + std::shared_ptr<arrow::Int32Array> a4; + std::shared_ptr<arrow::StringArray> a5; Y_VERIFY(b1.Finish(&a1).ok()); Y_VERIFY(b2.Finish(&a2).ok()); Y_VERIFY(b3.Finish(&a3).ok()); Y_VERIFY(b4.Finish(&a4).ok()); - Y_VERIFY(b5.Finish(&a5).ok()); + Y_VERIFY(b5.Finish(&a5).ok()); - return arrow::RecordBatch::Make(schema, rowCount, { a1, a2, a3, a4, a5 }); + return arrow::RecordBatch::Make(schema, rowCount, { a1, a2, a3, a4, a5 }); } TString TestBlob(ui64 pathIdBegin, ui64 tsBegin, size_t rowCount) { @@ -266,139 +266,139 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } } - void CreateTableOfAllTypes(TKikimrRunner& kikimr) { - auto& legacyClient = kikimr.GetTestClient(); - - legacyClient.CreateOlapStore("/Root", R"( - Name: "olapStore" - ColumnShardCount: 1 - SchemaPresets { - Name: "default" - Schema { - Columns { Name: "key" Type: "Int32" } - Columns { Name: "Bool_column" Type: "Bool" } - # Int8, Int16, UInt8, UInt16 is not supported by engine - Columns { Name: "Int8_column" Type: "Int32" } - Columns { Name: "Int16_column" Type: "Int32" } - Columns { Name: "Int32_column" Type: "Int32" } - Columns { Name: "Int64_column" Type: "Int64" } - Columns { Name: "UInt8_column" Type: "Uint32" } - Columns { Name: "UInt16_column" Type: "Uint32" } - Columns { Name: "UInt32_column" Type: "Uint32" } - Columns { Name: "UInt64_column" Type: "Uint64" } - Columns { Name: "Double_column" Type: "Double" } - Columns { Name: "Float_column" Type: "Float" } - Columns { Name: "Decimal_column" Type: "Decimal" } - Columns { Name: "String_column" Type: "String" } - Columns { Name: "Utf8_column" Type: "Utf8" } - Columns { Name: "Json_column" Type: "Json" } - Columns { Name: "Yson_column" Type: "Yson" } - Columns { Name: "Timestamp_column" Type: "Timestamp" } - Columns { Name: "Date_column" Type: "Date" } - Columns { Name: "Datetime_column" Type: "Datetime" } - Columns { Name: "Interval_column" Type: "Interval" } - KeyColumnNames: "key" + void CreateTableOfAllTypes(TKikimrRunner& kikimr) { + auto& legacyClient = kikimr.GetTestClient(); + + legacyClient.CreateOlapStore("/Root", R"( + Name: "olapStore" + ColumnShardCount: 1 + SchemaPresets { + Name: "default" + Schema { + Columns { Name: "key" Type: "Int32" } + Columns { Name: "Bool_column" Type: "Bool" } + # Int8, Int16, UInt8, UInt16 is not supported by engine + Columns { Name: "Int8_column" Type: "Int32" } + Columns { Name: "Int16_column" Type: "Int32" } + Columns { Name: "Int32_column" Type: "Int32" } + Columns { Name: "Int64_column" Type: "Int64" } + Columns { Name: "UInt8_column" Type: "Uint32" } + Columns { Name: "UInt16_column" Type: "Uint32" } + Columns { Name: "UInt32_column" Type: "Uint32" } + Columns { Name: "UInt64_column" Type: "Uint64" } + Columns { Name: "Double_column" Type: "Double" } + Columns { Name: "Float_column" Type: "Float" } + Columns { Name: "Decimal_column" Type: "Decimal" } + Columns { Name: "String_column" Type: "String" } + Columns { Name: "Utf8_column" Type: "Utf8" } + Columns { Name: "Json_column" Type: "Json" } + Columns { Name: "Yson_column" Type: "Yson" } + Columns { Name: "Timestamp_column" Type: "Timestamp" } + Columns { Name: "Date_column" Type: "Date" } + Columns { Name: "Datetime_column" Type: "Datetime" } + Columns { Name: "Interval_column" Type: "Interval" } + KeyColumnNames: "key" Engine: COLUMN_ENGINE_REPLACING_TIMESERIES - } - } - )"); - - legacyClient.CreateOlapTable("/Root/olapStore", R"( - Name: "OlapParametersTable" - ColumnShardCount: 1 - )"); - legacyClient.Ls("/Root"); - legacyClient.Ls("/Root/olapStore"); - legacyClient.Ls("/Root/olapStore/OlapParametersTable"); - } - - std::map<std::string, TParams> CreateParametersOfAllTypes(NYdb::NTable::TTableClient& tableClient) { - return { - { - "Bool", - tableClient.GetParamsBuilder().AddParam("$in_value").Bool(false).Build().Build() - }, - { - "Int8", - tableClient.GetParamsBuilder().AddParam("$in_value").Int8(0).Build().Build() - }, - { - "Int16", - tableClient.GetParamsBuilder().AddParam("$in_value").Int16(0).Build().Build() - }, - { - "Int32", - tableClient.GetParamsBuilder().AddParam("$in_value").Int32(0).Build().Build() - }, - { - "Int64", - tableClient.GetParamsBuilder().AddParam("$in_value").Int64(0).Build().Build() - }, - { - "UInt8", - tableClient.GetParamsBuilder().AddParam("$in_value").Uint8(0).Build().Build() - }, - { - "UInt16", - tableClient.GetParamsBuilder().AddParam("$in_value").Uint16(0).Build().Build() - }, - { - "UInt32", - tableClient.GetParamsBuilder().AddParam("$in_value").Uint32(0).Build().Build() - }, - { - "UInt64", - tableClient.GetParamsBuilder().AddParam("$in_value").Uint64(0).Build().Build() - }, - { - "Float", - tableClient.GetParamsBuilder().AddParam("$in_value").Float(0).Build().Build() - }, - { - "Double", - tableClient.GetParamsBuilder().AddParam("$in_value").Double(0).Build().Build() - }, - { - "String", - tableClient.GetParamsBuilder().AddParam("$in_value").String("XX").Build().Build() - }, - { - "Utf8", - tableClient.GetParamsBuilder().AddParam("$in_value").Utf8("XX").Build().Build() - }, - { - "Timestamp", - tableClient.GetParamsBuilder().AddParam("$in_value").Timestamp(TInstant::Now()).Build().Build() - }, - { - "Date", - tableClient.GetParamsBuilder().AddParam("$in_value").Date(TInstant::Now()).Build().Build() - }, - { - "Datetime", - tableClient.GetParamsBuilder().AddParam("$in_value").Datetime(TInstant::Now()).Build().Build() - }, - { - "Interval", - tableClient.GetParamsBuilder().AddParam("$in_value").Interval(1010).Build().Build() - }, - { - "Decimal(12,9)", - tableClient.GetParamsBuilder().AddParam("$in_value").Decimal(TDecimalValue("10.123456789", 12, 9)).Build().Build() - }, -#if 0 - { - "Json", - tableClient.GetParamsBuilder().AddParam("$in_value").Json(R"({"XX":"YY"})").Build().Build() - }, - { - "Yson", - tableClient.GetParamsBuilder().AddParam("$in_value").Yson("[[[]]]").Build().Build() - }, -#endif - }; - } - + } + } + )"); + + legacyClient.CreateOlapTable("/Root/olapStore", R"( + Name: "OlapParametersTable" + ColumnShardCount: 1 + )"); + legacyClient.Ls("/Root"); + legacyClient.Ls("/Root/olapStore"); + legacyClient.Ls("/Root/olapStore/OlapParametersTable"); + } + + std::map<std::string, TParams> CreateParametersOfAllTypes(NYdb::NTable::TTableClient& tableClient) { + return { + { + "Bool", + tableClient.GetParamsBuilder().AddParam("$in_value").Bool(false).Build().Build() + }, + { + "Int8", + tableClient.GetParamsBuilder().AddParam("$in_value").Int8(0).Build().Build() + }, + { + "Int16", + tableClient.GetParamsBuilder().AddParam("$in_value").Int16(0).Build().Build() + }, + { + "Int32", + tableClient.GetParamsBuilder().AddParam("$in_value").Int32(0).Build().Build() + }, + { + "Int64", + tableClient.GetParamsBuilder().AddParam("$in_value").Int64(0).Build().Build() + }, + { + "UInt8", + tableClient.GetParamsBuilder().AddParam("$in_value").Uint8(0).Build().Build() + }, + { + "UInt16", + tableClient.GetParamsBuilder().AddParam("$in_value").Uint16(0).Build().Build() + }, + { + "UInt32", + tableClient.GetParamsBuilder().AddParam("$in_value").Uint32(0).Build().Build() + }, + { + "UInt64", + tableClient.GetParamsBuilder().AddParam("$in_value").Uint64(0).Build().Build() + }, + { + "Float", + tableClient.GetParamsBuilder().AddParam("$in_value").Float(0).Build().Build() + }, + { + "Double", + tableClient.GetParamsBuilder().AddParam("$in_value").Double(0).Build().Build() + }, + { + "String", + tableClient.GetParamsBuilder().AddParam("$in_value").String("XX").Build().Build() + }, + { + "Utf8", + tableClient.GetParamsBuilder().AddParam("$in_value").Utf8("XX").Build().Build() + }, + { + "Timestamp", + tableClient.GetParamsBuilder().AddParam("$in_value").Timestamp(TInstant::Now()).Build().Build() + }, + { + "Date", + tableClient.GetParamsBuilder().AddParam("$in_value").Date(TInstant::Now()).Build().Build() + }, + { + "Datetime", + tableClient.GetParamsBuilder().AddParam("$in_value").Datetime(TInstant::Now()).Build().Build() + }, + { + "Interval", + tableClient.GetParamsBuilder().AddParam("$in_value").Interval(1010).Build().Build() + }, + { + "Decimal(12,9)", + tableClient.GetParamsBuilder().AddParam("$in_value").Decimal(TDecimalValue("10.123456789", 12, 9)).Build().Build() + }, +#if 0 + { + "Json", + tableClient.GetParamsBuilder().AddParam("$in_value").Json(R"({"XX":"YY"})").Build().Build() + }, + { + "Yson", + tableClient.GetParamsBuilder().AddParam("$in_value").Yson("[[[]]]").Build().Build() + }, +#endif + }; + } + Y_UNIT_TEST(SimpleQueryOlap) { auto settings = TKikimrSettings() .SetWithSampleTables(false) @@ -686,29 +686,29 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } } - Y_UNIT_TEST(EmptyRange) { + Y_UNIT_TEST(EmptyRange) { auto settings = TKikimrSettings() .SetWithSampleTables(false) .SetEnableOlapSchemaOperations(true); TKikimrRunner kikimr(settings); - - CreateTestOlapTable(kikimr); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); - - auto tableClient = kikimr.GetTableClient(); - - auto it = tableClient.StreamExecuteScanQuery(R"( - --!syntax_v1 - - SELECT * - FROM `/Root/olapStore/olapTable` - WHERE `timestamp` < CAST(3000001 AS Timestamp) AND `timestamp` > CAST(3000005 AS Timestamp) - )").GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - CompareYson(StreamResultToYson(it), "[]"); - } - + + CreateTestOlapTable(kikimr); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); + + auto tableClient = kikimr.GetTableClient(); + + auto it = tableClient.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT * + FROM `/Root/olapStore/olapTable` + WHERE `timestamp` < CAST(3000001 AS Timestamp) AND `timestamp` > CAST(3000005 AS Timestamp) + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + CompareYson(StreamResultToYson(it), "[]"); + } + Y_UNIT_TEST(Aggregation) { auto settings = TKikimrSettings() .SetWithSampleTables(false) @@ -784,35 +784,35 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } Y_UNIT_TEST(PushdownFilter) { - static bool enableLog = false; - - auto doTest = [](std::optional<bool> viaSettings, std::optional<bool> viaPragma, bool pushdownPresent) { + static bool enableLog = false; + + auto doTest = [](std::optional<bool> viaSettings, std::optional<bool> viaPragma, bool pushdownPresent) { auto settings = TKikimrSettings() .SetWithSampleTables(false) .SetEnableOlapSchemaOperations(true); - if (enableLog) { - Cerr << "Run test:" << Endl; - Cerr << "viaSettings is " << (viaSettings.has_value() ? "" : "not ") << "present."; - if (viaSettings.has_value()) { - Cerr << " Value: " << viaSettings.value(); - } - Cerr << Endl; - Cerr << "viaPragma is " << (viaPragma.has_value() ? "" : "not ") << "present."; - if (viaPragma.has_value()) { - Cerr << " Value: " << viaPragma.value(); - } - Cerr << Endl; - Cerr << "Expected result: " << pushdownPresent << Endl; - } - - if (viaSettings.has_value()) { - auto setting = NKikimrKqp::TKqpSetting(); - setting.SetName("_KqpPushOlapProcess"); - setting.SetValue(viaSettings.value() ? "true" : "false"); + if (enableLog) { + Cerr << "Run test:" << Endl; + Cerr << "viaSettings is " << (viaSettings.has_value() ? "" : "not ") << "present."; + if (viaSettings.has_value()) { + Cerr << " Value: " << viaSettings.value(); + } + Cerr << Endl; + Cerr << "viaPragma is " << (viaPragma.has_value() ? "" : "not ") << "present."; + if (viaPragma.has_value()) { + Cerr << " Value: " << viaPragma.value(); + } + Cerr << Endl; + Cerr << "Expected result: " << pushdownPresent << Endl; + } + + if (viaSettings.has_value()) { + auto setting = NKikimrKqp::TKqpSetting(); + setting.SetName("_KqpPushOlapProcess"); + setting.SetValue(viaSettings.value() ? "true" : "false"); settings.KqpSettings = { setting }; - } - + } + TKikimrRunner kikimr(settings); kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); @@ -821,152 +821,152 @@ Y_UNIT_TEST_SUITE(KqpOlap) { CreateTestOlapTable(kikimr); WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 10); - TStreamExecScanQuerySettings scanSettings; - scanSettings.Explain(true); - + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + { - TString query = TString(R"( + TString query = TString(R"( --!syntax_v1 SELECT * FROM `/Root/olapStore/olapTable` WHERE resource_id = "5"u; - )"); - - if (viaPragma.has_value()) { - TString pragma = TString(R"( - PRAGMA Kikimr.KqpPushOlapProcess = "<ENABLE_PUSH>"; - )"); - SubstGlobal(pragma, "<ENABLE_PUSH>", viaPragma.value() ? "true" : "false"); - query = pragma + query; - } - - auto it = client.StreamExecuteScanQuery(query).GetValueSync(); - + )"); + + if (viaPragma.has_value()) { + TString pragma = TString(R"( + PRAGMA Kikimr.KqpPushOlapProcess = "<ENABLE_PUSH>"; + )"); + SubstGlobal(pragma, "<ENABLE_PUSH>", viaPragma.value() ? "true" : "false"); + query = pragma + query; + } + + auto it = client.StreamExecuteScanQuery(query).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); TString result = StreamResultToYson(it); CompareYson(result, R"([[ - [0]; + [0]; ["some prefix xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"]; ["5"]; [1000005u]; ["uid_1000005"] ]])"); - - it = client.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); - auto explainResult = CollectStreamResult(it); - NJson::TJsonValue plan, pushdown; - NJson::ReadJsonTree(*explainResult.PlanJson, &plan, true); - - if (pushdownPresent) { - pushdown = FindPlanNodeByKv(plan, "PredicatePushdown", "true"); - } else { - pushdown = FindPlanNodeByKv(plan, "PredicatePushdown", "false"); - } - - UNIT_ASSERT(pushdown.IsDefined()); + + it = client.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); + auto explainResult = CollectStreamResult(it); + NJson::TJsonValue plan, pushdown; + NJson::ReadJsonTree(*explainResult.PlanJson, &plan, true); + + if (pushdownPresent) { + pushdown = FindPlanNodeByKv(plan, "PredicatePushdown", "true"); + } else { + pushdown = FindPlanNodeByKv(plan, "PredicatePushdown", "false"); + } + + UNIT_ASSERT(pushdown.IsDefined()); } }; - TVector<std::tuple<std::optional<bool>, std::optional<bool>, bool>> testData = { - {std::nullopt, std::nullopt, false}, - {false, std::nullopt, false}, - {true, std::nullopt, true}, - {std::nullopt, false, false}, - {std::nullopt, true, true}, - {false, false, false}, - {true, false, false}, - {false, true, true}, - }; - - for (auto &data: testData) { - doTest(std::get<0>(data), std::get<1>(data), std::get<2>(data)); - } + TVector<std::tuple<std::optional<bool>, std::optional<bool>, bool>> testData = { + {std::nullopt, std::nullopt, false}, + {false, std::nullopt, false}, + {true, std::nullopt, true}, + {std::nullopt, false, false}, + {std::nullopt, true, true}, + {false, false, false}, + {true, false, false}, + {false, true, true}, + }; + + for (auto &data: testData) { + doTest(std::get<0>(data), std::get<1>(data), std::get<2>(data)); + } } - - Y_UNIT_TEST(PKDescScan) { + + Y_UNIT_TEST(PKDescScan) { auto settings = TKikimrSettings() .SetWithSampleTables(false) .SetEnableOlapSchemaOperations(true); TKikimrRunner kikimr(settings); - - TStreamExecScanQuerySettings scanSettings; - scanSettings.Explain(true); - + + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + // EnableDebugLogging(kikimr); - CreateTestOlapTable(kikimr); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 128); - - auto tableClient = kikimr.GetTableClient(); - auto selectQueryWithSort = TString(R"( - --!syntax_v1 - SELECT `timestamp` FROM `/Root/olapStore/olapTable` ORDER BY `timestamp` DESC LIMIT 4; - )"); - auto selectQuery = TString(R"( - --!syntax_v1 - SELECT `timestamp` FROM `/Root/olapStore/olapTable` ORDER BY `timestamp`; - )"); - - auto it = tableClient.StreamExecuteScanQuery(selectQuery, scanSettings).GetValueSync(); - auto result = CollectStreamResult(it); - - NJson::TJsonValue plan, node, reverse, limit; - NJson::ReadJsonTree(*result.PlanJson, &plan, true); - - node = FindPlanNodeByKv(plan, "Node Type", "TableFullScan"); - UNIT_ASSERT(node.IsDefined()); - reverse = FindPlanNodeByKv(node, "Reverse", "false"); - UNIT_ASSERT(!reverse.IsDefined()); - - // Check that Reverse flag is set in query plan - it = tableClient.StreamExecuteScanQuery(selectQueryWithSort, scanSettings).GetValueSync(); - result = CollectStreamResult(it); - - NJson::ReadJsonTree(*result.PlanJson, &plan, true); - Cerr << *result.PlanJson << Endl; - + CreateTestOlapTable(kikimr); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 128); + + auto tableClient = kikimr.GetTableClient(); + auto selectQueryWithSort = TString(R"( + --!syntax_v1 + SELECT `timestamp` FROM `/Root/olapStore/olapTable` ORDER BY `timestamp` DESC LIMIT 4; + )"); + auto selectQuery = TString(R"( + --!syntax_v1 + SELECT `timestamp` FROM `/Root/olapStore/olapTable` ORDER BY `timestamp`; + )"); + + auto it = tableClient.StreamExecuteScanQuery(selectQuery, scanSettings).GetValueSync(); + auto result = CollectStreamResult(it); + + NJson::TJsonValue plan, node, reverse, limit; + NJson::ReadJsonTree(*result.PlanJson, &plan, true); + + node = FindPlanNodeByKv(plan, "Node Type", "TableFullScan"); + UNIT_ASSERT(node.IsDefined()); + reverse = FindPlanNodeByKv(node, "Reverse", "false"); + UNIT_ASSERT(!reverse.IsDefined()); + + // Check that Reverse flag is set in query plan + it = tableClient.StreamExecuteScanQuery(selectQueryWithSort, scanSettings).GetValueSync(); + result = CollectStreamResult(it); + + NJson::ReadJsonTree(*result.PlanJson, &plan, true); + Cerr << *result.PlanJson << Endl; + node = FindPlanNodeByKv(plan, "Node Type", "Limit-TableFullScan"); - UNIT_ASSERT(node.IsDefined()); - reverse = FindPlanNodeByKv(node, "Reverse", "true"); - UNIT_ASSERT(reverse.IsDefined()); - limit = FindPlanNodeByKv(node, "Limit", "4"); - UNIT_ASSERT(limit.IsDefined()); - - // Run actual request in case explain did not execute anything - it = tableClient.StreamExecuteScanQuery(selectQueryWithSort).GetValueSync(); - - UNIT_ASSERT(it.IsSuccess()); - - auto ysonResult = CollectStreamResult(it).ResultSetYson; - - auto expectedYson = TString(R"([ - [[1000127u]]; - [[1000126u]]; - [[1000125u]]; - [[1000124u]] - ])"); - - CompareYson(expectedYson, ysonResult); - } - - Y_UNIT_TEST(ExtractRanges) { + UNIT_ASSERT(node.IsDefined()); + reverse = FindPlanNodeByKv(node, "Reverse", "true"); + UNIT_ASSERT(reverse.IsDefined()); + limit = FindPlanNodeByKv(node, "Limit", "4"); + UNIT_ASSERT(limit.IsDefined()); + + // Run actual request in case explain did not execute anything + it = tableClient.StreamExecuteScanQuery(selectQueryWithSort).GetValueSync(); + + UNIT_ASSERT(it.IsSuccess()); + + auto ysonResult = CollectStreamResult(it).ResultSetYson; + + auto expectedYson = TString(R"([ + [[1000127u]]; + [[1000126u]]; + [[1000125u]]; + [[1000124u]] + ])"); + + CompareYson(expectedYson, ysonResult); + } + + Y_UNIT_TEST(ExtractRanges) { auto settings = TKikimrSettings() .SetWithSampleTables(false) .SetEnableOlapSchemaOperations(true); TKikimrRunner kikimr(settings); - - CreateTestOlapTable(kikimr); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 2000); - - auto tableClient = kikimr.GetTableClient(); - auto selectQuery = TString(R"( - SELECT `timestamp` FROM `/Root/olapStore/olapTable` - WHERE - (`timestamp` < CAST(1000100 AS Timestamp) AND `timestamp` > CAST(1000095 AS Timestamp)) OR - (`timestamp` <= CAST(1001000 AS Timestamp) AND `timestamp` >= CAST(1000999 AS Timestamp)) OR - (`timestamp` > CAST(1002000 AS Timestamp)) - ORDER BY `timestamp`; - )"); - + + CreateTestOlapTable(kikimr); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 2000); + + auto tableClient = kikimr.GetTableClient(); + auto selectQuery = TString(R"( + SELECT `timestamp` FROM `/Root/olapStore/olapTable` + WHERE + (`timestamp` < CAST(1000100 AS Timestamp) AND `timestamp` > CAST(1000095 AS Timestamp)) OR + (`timestamp` <= CAST(1001000 AS Timestamp) AND `timestamp` >= CAST(1000999 AS Timestamp)) OR + (`timestamp` > CAST(1002000 AS Timestamp)) + ORDER BY `timestamp`; + )"); + auto rows = ExecuteScanQuery(tableClient, selectQuery); TInstant tsPrev = TInstant::MicroSeconds(1000000); for (const auto& r : rows) { @@ -974,8 +974,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) { UNIT_ASSERT_GE_C(ts, tsPrev, "result is not sorted in ASC order"); tsPrev = ts; } - } - + } + Y_UNIT_TEST(ExtractRangesReverse) { auto settings = TKikimrSettings() .SetWithSampleTables(false) @@ -1006,129 +1006,129 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } } - Y_UNIT_TEST(PredicatePushdown) { - constexpr bool logQueries = false; + Y_UNIT_TEST(PredicatePushdown) { + constexpr bool logQueries = false; auto settings = TKikimrSettings() .SetWithSampleTables(false) .SetEnableOlapSchemaOperations(true); TKikimrRunner kikimr(settings); - - TStreamExecScanQuerySettings scanSettings; - scanSettings.Explain(true); - - CreateTestOlapTable(kikimr); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 5); - - auto tableClient = kikimr.GetTableClient(); - - std::vector<TString> testData = { - R"(`resource_id` = `uid`)", - R"(`resource_id` = "10001")", - R"(`level` = 1)", - R"(`level` = Int8("1"))", - R"(`level` = Int16("1"))", - R"(`level` = Int32("1"))", - R"((`level`, `uid`, `resource_id`) = (Int32("1"), "uid_3000001", "10001"))", - R"(`level` > Int32("3"))", - R"(`level` < Int32("1"))", - R"(`level` >= Int32("4"))", - R"(`level` <= Int32("0"))", - R"(`level` != Int32("0"))", - R"((`level`, `uid`, `resource_id`) > (Int32("1"), "uid_3000001", "10001"))", - R"((`level`, `uid`, `resource_id`) > (Int32("1"), "uid_3000000", "10001"))", - R"((`level`, `uid`, `resource_id`) < (Int32("1"), "uid_3000002", "10001"))", - R"((`level`, `uid`, `resource_id`) >= (Int32("2"), "uid_3000000", "10001"))", - R"((`level`, `uid`, `resource_id`) != (Int32("1"), "uid_3000001", "10001"))", - R"((`level`, `uid`, `resource_id`) != (Int32("0"), "uid_3000001", "10011"))", - R"(`level` = 0 OR `level` = 2 OR `level` = 1)", - R"(`level` = 0 OR (`level` = 2 AND `uid` = "uid_3000002"))", - R"(`level` = 0 OR `uid` = "uid_3000003")", - R"(`level` = 0 AND `uid` = "uid_3000003")", - R"(`level` = 0 AND `uid` = "uid_3000000")", - // Timestamp will be removed by predicate extraction now. - R"(`timestamp` >= CAST(3000001 AS Timestamp) AND `level` > 3)", - R"((`level`, `uid`) > (Int32("2"), "uid_3000004") OR (`level`, `uid`) < (Int32("1"), "uid_3000002"))", - R"(Int32("3") > `level`)", - R"((Int32("1"), "uid_3000001", "10001") = (`level`, `uid`, `resource_id`))", - R"((Int32("1"), `uid`, "10001") = (`level`, "uid_3000001", `resource_id`))", - R"(`level` = 0 AND "uid_3000000" = `uid`)", - R"(`uid` > `resource_id`)", - R"(`level` IS NULL)", - R"(`level` IS NOT NULL)", - R"((`level`, `uid`) > (Int32("1"), NULL))", - R"((`level`, `uid`) != (Int32("1"), NULL))", - //R"((`timestamp`, `level`) >= (CAST(3000001 AS Timestamp), 3))", - }; - - std::vector<TString> testDataNoPush = { - R"(`level` != NULL)", - R"(`level` > NULL)", - }; - - auto buildQuery = [](const TString& predicate, bool pushEnabled) { - TStringBuilder qBuilder; - - qBuilder << "--!syntax_v1" << Endl; - - if (pushEnabled) { - qBuilder << R"(PRAGMA Kikimr.KqpPushOlapProcess = "true";)" << Endl; - } - - qBuilder << "SELECT `timestamp` FROM `/Root/olapStore/olapTable` WHERE "; - qBuilder << predicate; - qBuilder << " ORDER BY `timestamp`"; - - return TString(qBuilder); - }; - - for (const auto& predicate: testData) { - auto normalQuery = buildQuery(predicate, false); - auto pushQuery = buildQuery(predicate, true); - - auto it = tableClient.StreamExecuteScanQuery(normalQuery).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - auto goodResult = CollectStreamResult(it); - - it = tableClient.StreamExecuteScanQuery(pushQuery).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - auto pushResult = CollectStreamResult(it); - - if (logQueries) { - Cerr << "Query: " << normalQuery << Endl; - Cerr << "Expected: " << goodResult.ResultSetYson << Endl; - Cerr << "Received: " << pushResult.ResultSetYson << Endl; - } - - CompareYson(goodResult.ResultSetYson, pushResult.ResultSetYson); - - it = tableClient.StreamExecuteScanQuery(pushQuery, scanSettings).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - - auto result = CollectStreamResult(it); - auto ast = result.QueryStats->Getquery_ast(); - - UNIT_ASSERT_C(ast.find("KqpOlapFilter") != std::string::npos, - TStringBuilder() << "Predicate not pushed down. Query: " << pushQuery); - } - - for (const auto& predicate: testDataNoPush) { - auto pushQuery = buildQuery(predicate, true); - - if (logQueries) { - Cerr << "Query: " << pushQuery << Endl; - } - - auto it = tableClient.StreamExecuteScanQuery(pushQuery, scanSettings).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - - auto result = CollectStreamResult(it); - auto ast = result.QueryStats->Getquery_ast(); - - UNIT_ASSERT_C(ast.find("KqpOlapFilter") == std::string::npos, - TStringBuilder() << "Predicate pushed down. Query: " << pushQuery); - } - } - + + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + + CreateTestOlapTable(kikimr); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 5); + + auto tableClient = kikimr.GetTableClient(); + + std::vector<TString> testData = { + R"(`resource_id` = `uid`)", + R"(`resource_id` = "10001")", + R"(`level` = 1)", + R"(`level` = Int8("1"))", + R"(`level` = Int16("1"))", + R"(`level` = Int32("1"))", + R"((`level`, `uid`, `resource_id`) = (Int32("1"), "uid_3000001", "10001"))", + R"(`level` > Int32("3"))", + R"(`level` < Int32("1"))", + R"(`level` >= Int32("4"))", + R"(`level` <= Int32("0"))", + R"(`level` != Int32("0"))", + R"((`level`, `uid`, `resource_id`) > (Int32("1"), "uid_3000001", "10001"))", + R"((`level`, `uid`, `resource_id`) > (Int32("1"), "uid_3000000", "10001"))", + R"((`level`, `uid`, `resource_id`) < (Int32("1"), "uid_3000002", "10001"))", + R"((`level`, `uid`, `resource_id`) >= (Int32("2"), "uid_3000000", "10001"))", + R"((`level`, `uid`, `resource_id`) != (Int32("1"), "uid_3000001", "10001"))", + R"((`level`, `uid`, `resource_id`) != (Int32("0"), "uid_3000001", "10011"))", + R"(`level` = 0 OR `level` = 2 OR `level` = 1)", + R"(`level` = 0 OR (`level` = 2 AND `uid` = "uid_3000002"))", + R"(`level` = 0 OR `uid` = "uid_3000003")", + R"(`level` = 0 AND `uid` = "uid_3000003")", + R"(`level` = 0 AND `uid` = "uid_3000000")", + // Timestamp will be removed by predicate extraction now. + R"(`timestamp` >= CAST(3000001 AS Timestamp) AND `level` > 3)", + R"((`level`, `uid`) > (Int32("2"), "uid_3000004") OR (`level`, `uid`) < (Int32("1"), "uid_3000002"))", + R"(Int32("3") > `level`)", + R"((Int32("1"), "uid_3000001", "10001") = (`level`, `uid`, `resource_id`))", + R"((Int32("1"), `uid`, "10001") = (`level`, "uid_3000001", `resource_id`))", + R"(`level` = 0 AND "uid_3000000" = `uid`)", + R"(`uid` > `resource_id`)", + R"(`level` IS NULL)", + R"(`level` IS NOT NULL)", + R"((`level`, `uid`) > (Int32("1"), NULL))", + R"((`level`, `uid`) != (Int32("1"), NULL))", + //R"((`timestamp`, `level`) >= (CAST(3000001 AS Timestamp), 3))", + }; + + std::vector<TString> testDataNoPush = { + R"(`level` != NULL)", + R"(`level` > NULL)", + }; + + auto buildQuery = [](const TString& predicate, bool pushEnabled) { + TStringBuilder qBuilder; + + qBuilder << "--!syntax_v1" << Endl; + + if (pushEnabled) { + qBuilder << R"(PRAGMA Kikimr.KqpPushOlapProcess = "true";)" << Endl; + } + + qBuilder << "SELECT `timestamp` FROM `/Root/olapStore/olapTable` WHERE "; + qBuilder << predicate; + qBuilder << " ORDER BY `timestamp`"; + + return TString(qBuilder); + }; + + for (const auto& predicate: testData) { + auto normalQuery = buildQuery(predicate, false); + auto pushQuery = buildQuery(predicate, true); + + auto it = tableClient.StreamExecuteScanQuery(normalQuery).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + auto goodResult = CollectStreamResult(it); + + it = tableClient.StreamExecuteScanQuery(pushQuery).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + auto pushResult = CollectStreamResult(it); + + if (logQueries) { + Cerr << "Query: " << normalQuery << Endl; + Cerr << "Expected: " << goodResult.ResultSetYson << Endl; + Cerr << "Received: " << pushResult.ResultSetYson << Endl; + } + + CompareYson(goodResult.ResultSetYson, pushResult.ResultSetYson); + + it = tableClient.StreamExecuteScanQuery(pushQuery, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + + UNIT_ASSERT_C(ast.find("KqpOlapFilter") != std::string::npos, + TStringBuilder() << "Predicate not pushed down. Query: " << pushQuery); + } + + for (const auto& predicate: testDataNoPush) { + auto pushQuery = buildQuery(predicate, true); + + if (logQueries) { + Cerr << "Query: " << pushQuery << Endl; + } + + auto it = tableClient.StreamExecuteScanQuery(pushQuery, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + + UNIT_ASSERT_C(ast.find("KqpOlapFilter") == std::string::npos, + TStringBuilder() << "Predicate pushed down. Query: " << pushQuery); + } + } + Y_UNIT_TEST(StatsSysView) { auto settings = TKikimrSettings() .SetWithSampleTables(false) @@ -1221,7 +1221,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT * FROM `/Root/olapStore/olapTable_1/.sys/primary_index_stats` WHERE - PathId > UInt64("3") + PathId > UInt64("3") ORDER BY PathId, Kind, TabletId )"); @@ -1317,7 +1317,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT * FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId == UInt64("3") AND Kind < UInt32("4") + PathId == UInt64("3") AND Kind < UInt32("4") ORDER BY PathId, Kind, TabletId; )"); @@ -1357,8 +1357,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT * FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId > UInt64("0") AND PathId < UInt32("4") - OR PathId > UInt64("4") AND PathId <= UInt64("5") + PathId > UInt64("0") AND PathId < UInt32("4") + OR PathId > UInt64("4") AND PathId <= UInt64("5") ORDER BY PathId DESC, Kind DESC, TabletId DESC ; @@ -1395,7 +1395,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT * FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Bytes > UInt64("0") + WHERE Bytes > UInt64("0") ORDER BY PathId, Kind, TabletId; )"); @@ -1410,7 +1410,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SELECT PathId, Kind, TabletId FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Bytes > UInt64("0") + WHERE Bytes > UInt64("0") ORDER BY PathId, Kind, TabletId; )"); @@ -1424,7 +1424,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PRAGMA Kikimr.KqpPushOlapProcess = "true"; SELECT * FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Kind == UInt32("5") + WHERE Kind == UInt32("5") ORDER BY PathId, Kind, TabletId; )"); @@ -1438,7 +1438,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PRAGMA Kikimr.KqpPushOlapProcess = "true"; SELECT * FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Kind >= UInt32("3") + WHERE Kind >= UInt32("3") ORDER BY PathId, Kind, TabletId; )"); @@ -1476,7 +1476,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SUM(Rows) as rows, FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - Kind != UInt32("4") -- not INACTIVE + Kind != UInt32("4") -- not INACTIVE )"); auto rows = ExecuteScanQuery(tableClient, selectQuery); @@ -1492,7 +1492,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SUM(Rows) as rows, FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - Kind != UInt32("4") -- not INACTIVE + Kind != UInt32("4") -- not INACTIVE GROUP BY PathId ORDER BY @@ -1519,7 +1519,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SUM(Blobs) as blobs FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - Kind < UInt32("4") + Kind < UInt32("4") GROUP BY PathId ORDER BY rows DESC LIMIT 10 @@ -1545,7 +1545,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SUM(Blobs) as blobs FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId == UInt64("3") AND Kind < UInt32("4") + PathId == UInt64("3") AND Kind < UInt32("4") GROUP BY PathId ORDER BY rows DESC LIMIT 10 @@ -1569,7 +1569,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { SUM(Blobs) as blobs FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId >= UInt64("4") AND Kind < UInt32("4") + PathId >= UInt64("4") AND Kind < UInt32("4") GROUP BY PathId ORDER BY rows DESC LIMIT 10 @@ -1624,299 +1624,299 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } } - Y_UNIT_TEST(PredicatePushdownWithParameters) { - constexpr bool logQueries = true; + Y_UNIT_TEST(PredicatePushdownWithParameters) { + constexpr bool logQueries = true; auto settings = TKikimrSettings() .SetWithSampleTables(false) .SetEnableOlapSchemaOperations(true); TKikimrRunner kikimr(settings); - - TStreamExecScanQuerySettings scanSettings; - scanSettings.Explain(true); - - CreateTestOlapTable(kikimr); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); - - auto tableClient = kikimr.GetTableClient(); - - auto buildQuery = [](bool pushEnabled) { - TStringBuilder builder; - - builder << "--!syntax_v1" << Endl; - - if (pushEnabled) { - builder << "PRAGMA Kikimr.KqpPushOlapProcess = \"true\";" << Endl; - builder << "PRAGMA Kikimr.OptEnablePredicateExtract=\"false\";" << Endl; - } - - builder << R"( - DECLARE $in_timestamp AS Timestamp; - DECLARE $in_uid AS Utf8; - DECLARE $in_level AS Int32; - - SELECT `timestamp` FROM `/Root/olapStore/olapTable` WHERE - `timestamp` > $in_timestamp AND uid > $in_uid AND level > $in_level - ORDER BY `timestamp`; - )" << Endl; - - return builder; - }; - - auto normalQuery = buildQuery(false); - auto pushQuery = buildQuery(true); - - auto params = tableClient.GetParamsBuilder() - .AddParam("$in_timestamp") - .Timestamp(TInstant::MicroSeconds(3000990)) - .Build() - .AddParam("$in_uid") - .Utf8("uid_3000980") - .Build() - .AddParam("$in_level") - .Int32(2) - .Build() - .Build(); - - auto it = tableClient.StreamExecuteScanQuery(normalQuery, params).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - auto goodResult = CollectStreamResult(it); - - it = tableClient.StreamExecuteScanQuery(pushQuery, params).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - auto pushResult = CollectStreamResult(it); - - if (logQueries) { - Cerr << "Query: " << normalQuery << Endl; - Cerr << "Expected: " << goodResult.ResultSetYson << Endl; - Cerr << "Received: " << pushResult.ResultSetYson << Endl; - } - - CompareYson(goodResult.ResultSetYson, pushResult.ResultSetYson); - - it = tableClient.StreamExecuteScanQuery(pushQuery, scanSettings).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - - auto result = CollectStreamResult(it); - auto ast = result.QueryStats->Getquery_ast(); - - UNIT_ASSERT_C(ast.find("KqpOlapFilter") != std::string::npos, - TStringBuilder() << "Predicate not pushed down. Query: " << pushQuery); - - NJson::TJsonValue plan, readRange; - NJson::ReadJsonTree(*result.PlanJson, &plan, true); - - readRange = FindPlanNodeByKv(plan, "Name", "TableFullScan"); - UNIT_ASSERT(readRange.IsDefined()); - } - - Y_UNIT_TEST(PredicatePushdownParameterTypesValidation) { + + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + + CreateTestOlapTable(kikimr); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); + + auto tableClient = kikimr.GetTableClient(); + + auto buildQuery = [](bool pushEnabled) { + TStringBuilder builder; + + builder << "--!syntax_v1" << Endl; + + if (pushEnabled) { + builder << "PRAGMA Kikimr.KqpPushOlapProcess = \"true\";" << Endl; + builder << "PRAGMA Kikimr.OptEnablePredicateExtract=\"false\";" << Endl; + } + + builder << R"( + DECLARE $in_timestamp AS Timestamp; + DECLARE $in_uid AS Utf8; + DECLARE $in_level AS Int32; + + SELECT `timestamp` FROM `/Root/olapStore/olapTable` WHERE + `timestamp` > $in_timestamp AND uid > $in_uid AND level > $in_level + ORDER BY `timestamp`; + )" << Endl; + + return builder; + }; + + auto normalQuery = buildQuery(false); + auto pushQuery = buildQuery(true); + + auto params = tableClient.GetParamsBuilder() + .AddParam("$in_timestamp") + .Timestamp(TInstant::MicroSeconds(3000990)) + .Build() + .AddParam("$in_uid") + .Utf8("uid_3000980") + .Build() + .AddParam("$in_level") + .Int32(2) + .Build() + .Build(); + + auto it = tableClient.StreamExecuteScanQuery(normalQuery, params).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + auto goodResult = CollectStreamResult(it); + + it = tableClient.StreamExecuteScanQuery(pushQuery, params).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + auto pushResult = CollectStreamResult(it); + + if (logQueries) { + Cerr << "Query: " << normalQuery << Endl; + Cerr << "Expected: " << goodResult.ResultSetYson << Endl; + Cerr << "Received: " << pushResult.ResultSetYson << Endl; + } + + CompareYson(goodResult.ResultSetYson, pushResult.ResultSetYson); + + it = tableClient.StreamExecuteScanQuery(pushQuery, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + + UNIT_ASSERT_C(ast.find("KqpOlapFilter") != std::string::npos, + TStringBuilder() << "Predicate not pushed down. Query: " << pushQuery); + + NJson::TJsonValue plan, readRange; + NJson::ReadJsonTree(*result.PlanJson, &plan, true); + + readRange = FindPlanNodeByKv(plan, "Name", "TableFullScan"); + UNIT_ASSERT(readRange.IsDefined()); + } + + Y_UNIT_TEST(PredicatePushdownParameterTypesValidation) { auto settings = TKikimrSettings() .SetWithSampleTables(false) .SetEnableOlapSchemaOperations(true); TKikimrRunner kikimr(settings); - - TStreamExecScanQuerySettings scanSettings; - scanSettings.Explain(true); - - CreateTableOfAllTypes(kikimr); - - auto tableClient = kikimr.GetTableClient(); - - std::map<std::string, TParams> testData = CreateParametersOfAllTypes(tableClient); - - const TString queryTemplate = R"( - --!syntax_v1 - PRAGMA Kikimr.KqpPushOlapProcess = "true"; - DECLARE $in_value AS <--TYPE-->; - - SELECT `key` FROM `/Root/olapStore/OlapParametersTable` WHERE <--NAME-->_column > $in_value; - )"; - - for (auto& [type, parameter]: testData) { - TString query(queryTemplate); - std::string clearType = type; - - size_t pos = clearType.find('('); - - if (std::string::npos != pos) { - clearType = clearType.substr(0, pos); - } - - SubstGlobal(query, "<--TYPE-->", type); - SubstGlobal(query, "<--NAME-->", clearType); - - TStringBuilder b; - - b << "----------------------------" << Endl; - b << query << Endl; - b << "----------------------------" << Endl; - - auto it = tableClient.StreamExecuteScanQuery(query, parameter).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString() << Endl << b); - auto goodResult = CollectStreamResult(it); - - it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString() << Endl << b); - - auto result = CollectStreamResult(it); - auto ast = result.QueryStats->Getquery_ast(); - - UNIT_ASSERT_C(ast.find("KqpOlapFilter") != std::string::npos, - TStringBuilder() << "Predicate not pushed down. Query: " << query); - } - } - - Y_UNIT_TEST(PredicatePushdownCastErrors) { + + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + + CreateTableOfAllTypes(kikimr); + + auto tableClient = kikimr.GetTableClient(); + + std::map<std::string, TParams> testData = CreateParametersOfAllTypes(tableClient); + + const TString queryTemplate = R"( + --!syntax_v1 + PRAGMA Kikimr.KqpPushOlapProcess = "true"; + DECLARE $in_value AS <--TYPE-->; + + SELECT `key` FROM `/Root/olapStore/OlapParametersTable` WHERE <--NAME-->_column > $in_value; + )"; + + for (auto& [type, parameter]: testData) { + TString query(queryTemplate); + std::string clearType = type; + + size_t pos = clearType.find('('); + + if (std::string::npos != pos) { + clearType = clearType.substr(0, pos); + } + + SubstGlobal(query, "<--TYPE-->", type); + SubstGlobal(query, "<--NAME-->", clearType); + + TStringBuilder b; + + b << "----------------------------" << Endl; + b << query << Endl; + b << "----------------------------" << Endl; + + auto it = tableClient.StreamExecuteScanQuery(query, parameter).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString() << Endl << b); + auto goodResult = CollectStreamResult(it); + + it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString() << Endl << b); + + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + + UNIT_ASSERT_C(ast.find("KqpOlapFilter") != std::string::npos, + TStringBuilder() << "Predicate not pushed down. Query: " << query); + } + } + + Y_UNIT_TEST(PredicatePushdownCastErrors) { auto settings = TKikimrSettings() .SetWithSampleTables(false) .SetEnableOlapSchemaOperations(true); TKikimrRunner kikimr(settings); - - TStreamExecScanQuerySettings scanSettings; - scanSettings.Explain(true); - - CreateTableOfAllTypes(kikimr); - - auto tableClient = kikimr.GetTableClient(); - - std::map<std::string, std::set<std::string>> exceptions = { - {"Int8", {"Int16", "Int32"}}, - {"Int16", {"Int8", "Int32"}}, - {"Int32", {"Int8", "Int16"}}, - {"UInt8", {"UInt16", "UInt32"}}, - {"UInt16", {"UInt8", "UInt32"}}, - {"UInt32", {"UInt8", "UInt16"}}, - {"String", {"Utf8"}}, - {"Utf8", {"String", "Json", "Yson"}}, - {"Json", {"Utf8", "Yson"}}, - {"Yson", {"Utf8", "Json"}}, - }; - - std::vector<std::string> allTypes = { - "Bool", - "Int8", - "Int16", - "Int32", - "Int64", - "UInt8", - "UInt16", - "UInt32", - "UInt64", - "Double", - "Float", - "Decimal(12,9)", - "String", - "Utf8", - "Timestamp", - "Date", - "Datetime", - "Interval" - }; - - std::map<std::string, TParams> parameters = CreateParametersOfAllTypes(tableClient); - - const std::vector<std::string> predicates = { - "<--NAME-->_column > $in_value", - "<--NAME-->_column = $in_value", - "$in_value > <--NAME-->_column", - "$in_value = <--NAME-->_column", - }; - - const TString queryBegin = R"( - --!syntax_v1 - PRAGMA Kikimr.KqpPushOlapProcess = "true"; - - DECLARE $in_value AS <--TYPE-->; - - SELECT `key` FROM `/Root/olapStore/OlapParametersTable` WHERE - )"; - - std::vector<std::string> falsePositive; - std::vector<std::string> falseNegative; - - for (const auto& predicateTemplate: predicates) { - for (const auto& type: allTypes) { - for (const auto& checkType: allTypes) { - bool error = true; - - auto exc = exceptions.find(checkType); - - if (exc != exceptions.end() && exc->second.contains(type)) { - error = false; - } else if (type == checkType) { - error = false; - } - - std::string clearType = type; - - size_t pos = clearType.find('('); - - if (std::string::npos != pos) { - clearType = clearType.substr(0, pos); - } - - TString query(queryBegin); - TString predicate(predicateTemplate); - SubstGlobal(query, "<--TYPE-->", checkType); - SubstGlobal(predicate, "<--NAME-->", clearType); - - auto parameter = parameters.find(checkType); - - UNIT_ASSERT_C(parameter != parameters.end(), "No type " << checkType << " in parameters"); - - auto it = tableClient.StreamExecuteScanQuery(query + predicate, parameter->second).GetValueSync(); - // Check for successful execution - auto streamPart = it.ReadNext().GetValueSync(); - - bool pushdown; - - if (streamPart.IsSuccess()) { - it = tableClient.StreamExecuteScanQuery( - query + predicate, parameter->second, scanSettings - ).GetValueSync(); - - auto result = CollectStreamResult(it); - auto ast = result.QueryStats->Getquery_ast(); - - pushdown = ast.find("KqpOlapFilter") != std::string::npos; - } else { - // Error means that predicate not pushed down - pushdown = false; - } - - if (error && pushdown) { - falsePositive.emplace_back( - TStringBuilder() << type << " vs " << checkType << " at " << predicate - ); - continue; - } - - if (!error && !pushdown) { - falseNegative.emplace_back( - TStringBuilder() << type << " vs " << checkType << " at " << predicate - ); - } - } - } - } - - TStringBuilder b; - b << "Errors found:" << Endl; - b << "------------------------------------------------" << Endl; - b << "False positive" << Endl; - - for (const auto& txt: falsePositive) { - b << txt << Endl; - } - - b << "False negative" << Endl; - for (const auto& txt: falseNegative) { - b << txt << Endl; - } - - b << "------------------------------------------------" << Endl; - UNIT_ASSERT_C(falsePositive.empty() && falseNegative.empty(), b); - } + + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + + CreateTableOfAllTypes(kikimr); + + auto tableClient = kikimr.GetTableClient(); + + std::map<std::string, std::set<std::string>> exceptions = { + {"Int8", {"Int16", "Int32"}}, + {"Int16", {"Int8", "Int32"}}, + {"Int32", {"Int8", "Int16"}}, + {"UInt8", {"UInt16", "UInt32"}}, + {"UInt16", {"UInt8", "UInt32"}}, + {"UInt32", {"UInt8", "UInt16"}}, + {"String", {"Utf8"}}, + {"Utf8", {"String", "Json", "Yson"}}, + {"Json", {"Utf8", "Yson"}}, + {"Yson", {"Utf8", "Json"}}, + }; + + std::vector<std::string> allTypes = { + "Bool", + "Int8", + "Int16", + "Int32", + "Int64", + "UInt8", + "UInt16", + "UInt32", + "UInt64", + "Double", + "Float", + "Decimal(12,9)", + "String", + "Utf8", + "Timestamp", + "Date", + "Datetime", + "Interval" + }; + + std::map<std::string, TParams> parameters = CreateParametersOfAllTypes(tableClient); + + const std::vector<std::string> predicates = { + "<--NAME-->_column > $in_value", + "<--NAME-->_column = $in_value", + "$in_value > <--NAME-->_column", + "$in_value = <--NAME-->_column", + }; + + const TString queryBegin = R"( + --!syntax_v1 + PRAGMA Kikimr.KqpPushOlapProcess = "true"; + + DECLARE $in_value AS <--TYPE-->; + + SELECT `key` FROM `/Root/olapStore/OlapParametersTable` WHERE + )"; + + std::vector<std::string> falsePositive; + std::vector<std::string> falseNegative; + + for (const auto& predicateTemplate: predicates) { + for (const auto& type: allTypes) { + for (const auto& checkType: allTypes) { + bool error = true; + + auto exc = exceptions.find(checkType); + + if (exc != exceptions.end() && exc->second.contains(type)) { + error = false; + } else if (type == checkType) { + error = false; + } + + std::string clearType = type; + + size_t pos = clearType.find('('); + + if (std::string::npos != pos) { + clearType = clearType.substr(0, pos); + } + + TString query(queryBegin); + TString predicate(predicateTemplate); + SubstGlobal(query, "<--TYPE-->", checkType); + SubstGlobal(predicate, "<--NAME-->", clearType); + + auto parameter = parameters.find(checkType); + + UNIT_ASSERT_C(parameter != parameters.end(), "No type " << checkType << " in parameters"); + + auto it = tableClient.StreamExecuteScanQuery(query + predicate, parameter->second).GetValueSync(); + // Check for successful execution + auto streamPart = it.ReadNext().GetValueSync(); + + bool pushdown; + + if (streamPart.IsSuccess()) { + it = tableClient.StreamExecuteScanQuery( + query + predicate, parameter->second, scanSettings + ).GetValueSync(); + + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + + pushdown = ast.find("KqpOlapFilter") != std::string::npos; + } else { + // Error means that predicate not pushed down + pushdown = false; + } + + if (error && pushdown) { + falsePositive.emplace_back( + TStringBuilder() << type << " vs " << checkType << " at " << predicate + ); + continue; + } + + if (!error && !pushdown) { + falseNegative.emplace_back( + TStringBuilder() << type << " vs " << checkType << " at " << predicate + ); + } + } + } + } + + TStringBuilder b; + b << "Errors found:" << Endl; + b << "------------------------------------------------" << Endl; + b << "False positive" << Endl; + + for (const auto& txt: falsePositive) { + b << txt << Endl; + } + + b << "False negative" << Endl; + for (const auto& txt: falseNegative) { + b << txt << Endl; + } + + b << "------------------------------------------------" << Endl; + UNIT_ASSERT_C(falsePositive.empty() && falseNegative.empty(), b); + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/kqp_query_ut.cpp b/ydb/core/kqp/ut/kqp_query_ut.cpp index 7e1bc968a5f..5ef6d18f381 100644 --- a/ydb/core/kqp/ut/kqp_query_ut.cpp +++ b/ydb/core/kqp/ut/kqp_query_ut.cpp @@ -1065,9 +1065,9 @@ Y_UNIT_TEST_SUITE(KqpQuery) { EVALUATE IF true DO $hello() )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::UNSUPPORTED, result.GetIssues().ToString()); - UNIT_ASSERT(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_UNSUPPORTED, [](const NYql::TIssue& issue) { - return issue.Message.Contains("EVALUATE IF is not supported in YDB queries."); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::UNSUPPORTED, result.GetIssues().ToString()); + UNIT_ASSERT(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_UNSUPPORTED, [](const NYql::TIssue& issue) { + return issue.Message.Contains("EVALUATE IF is not supported in YDB queries."); })); } @@ -1077,49 +1077,49 @@ Y_UNIT_TEST_SUITE(KqpQuery) { SELECT $i; END DO; )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::UNSUPPORTED, result.GetIssues().ToString()); - UNIT_ASSERT(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_UNSUPPORTED, [](const NYql::TIssue& issue) { - return issue.Message.Contains("EVALUATE is not supported in YDB queries."); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::UNSUPPORTED, result.GetIssues().ToString()); + UNIT_ASSERT(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_UNSUPPORTED, [](const NYql::TIssue& issue) { + return issue.Message.Contains("EVALUATE is not supported in YDB queries."); + })); + } + + { + auto params = db.GetParamsBuilder() + .AddParam("$table").String("StringValue").Build() + .AddParam("$login").String("LoginString").Build() + .AddParam("$email").String("Email@String").Build() + .AddParam("$id").Uint64(1).Build() + .Build(); + + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + DECLARE $id AS Uint64; + DECLARE $login AS String; + DECLARE $email AS String; + + SELECT $id, $login, $email; + )", TTxControl::BeginTx().CommitTx(), params).ExtractValueSync(); + + CompareYson( + R"([[1u;"LoginString";"Email@String"]])", + FormatResultSetYson(result.GetResultSet(0)) + ); + + result = session.ExecuteDataQuery(R"( + --!syntax_v1 + DECLARE $table AS String; + DECLARE $id AS Uint64; + DECLARE $login AS String; + DECLARE $email AS String; + + INSERT INTO $table ( id, login, email ) VALUES ($id, $login, $email ); + )", TTxControl::BeginTx().CommitTx(), params).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::UNSUPPORTED, result.GetIssues().ToString()); + UNIT_ASSERT(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_UNSUPPORTED, [](const NYql::TIssue& issue) { + return issue.Message.Contains("ATOM evaluation is not supported in YDB queries."); })); } - - { - auto params = db.GetParamsBuilder() - .AddParam("$table").String("StringValue").Build() - .AddParam("$login").String("LoginString").Build() - .AddParam("$email").String("Email@String").Build() - .AddParam("$id").Uint64(1).Build() - .Build(); - - auto result = session.ExecuteDataQuery(R"( - --!syntax_v1 - DECLARE $id AS Uint64; - DECLARE $login AS String; - DECLARE $email AS String; - - SELECT $id, $login, $email; - )", TTxControl::BeginTx().CommitTx(), params).ExtractValueSync(); - - CompareYson( - R"([[1u;"LoginString";"Email@String"]])", - FormatResultSetYson(result.GetResultSet(0)) - ); - - result = session.ExecuteDataQuery(R"( - --!syntax_v1 - DECLARE $table AS String; - DECLARE $id AS Uint64; - DECLARE $login AS String; - DECLARE $email AS String; - - INSERT INTO $table ( id, login, email ) VALUES ($id, $login, $email ); - )", TTxControl::BeginTx().CommitTx(), params).ExtractValueSync(); - - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::UNSUPPORTED, result.GetIssues().ToString()); - UNIT_ASSERT(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_UNSUPPORTED, [](const NYql::TIssue& issue) { - return issue.Message.Contains("ATOM evaluation is not supported in YDB queries."); - })); - } } Y_UNIT_TEST_NEW_ENGINE(UdfTerminate) { diff --git a/ydb/core/kqp/ut/kqp_scan_ut.cpp b/ydb/core/kqp/ut/kqp_scan_ut.cpp index eb0dccc44fc..347ca354412 100644 --- a/ydb/core/kqp/ut/kqp_scan_ut.cpp +++ b/ydb/core/kqp/ut/kqp_scan_ut.cpp @@ -1770,50 +1770,50 @@ Y_UNIT_TEST_SUITE(KqpScan) { } } - Y_UNIT_TEST(LMapFunction) { + Y_UNIT_TEST(LMapFunction) { auto settings = TKikimrSettings() .SetWithSampleTables(false) .SetEnableOlapSchemaOperations(true); TKikimrRunner kikimr(settings); - auto tableClient = kikimr.GetTableClient(); - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - - // EnableDebugLogging(kikimr); - - UNIT_ASSERT(session.ExecuteSchemeQuery(R"( - CREATE TABLE [/Root/SampleMapTable] ( - Key Int32, - Value String, - Price Int32, - PRIMARY KEY (Key) - ); - )").GetValueSync().IsSuccess()); - - UNIT_ASSERT(session.ExecuteDataQuery(R"( - REPLACE INTO [/Root/SampleMapTable] (Key, Value, Price) VALUES - (1, "Bitcoin", 50000), - (2, "Dogecoin", 1000), - (3, "Ethereum", 5000), - (4, "XTC", 1), - (5, "Cardano", 2), - (6, "Tether", 3); - )", TTxControl::BeginTx().CommitTx()).GetValueSync().IsSuccess()); - - auto it = tableClient.StreamExecuteScanQuery(R"( - $func = ($stream) -> { - RETURN YQL::Filter($stream, ($r) -> { RETURN Coalesce($r.Price <= 1000, False); }); - }; - - $inputTable = SELECT Key, Value, Price FROM SampleMapTable; - - PROCESS $inputTable USING $func(TableRows()); - )").GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - std::cerr << result << std::endl; - CompareYson(result, R"([[[2];[1000];["Dogecoin"]];[[4];[1];["XTC"]];[[5];[2];["Cardano"]];[[6];[3];["Tether"]]])"); - } + auto tableClient = kikimr.GetTableClient(); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + // EnableDebugLogging(kikimr); + + UNIT_ASSERT(session.ExecuteSchemeQuery(R"( + CREATE TABLE [/Root/SampleMapTable] ( + Key Int32, + Value String, + Price Int32, + PRIMARY KEY (Key) + ); + )").GetValueSync().IsSuccess()); + + UNIT_ASSERT(session.ExecuteDataQuery(R"( + REPLACE INTO [/Root/SampleMapTable] (Key, Value, Price) VALUES + (1, "Bitcoin", 50000), + (2, "Dogecoin", 1000), + (3, "Ethereum", 5000), + (4, "XTC", 1), + (5, "Cardano", 2), + (6, "Tether", 3); + )", TTxControl::BeginTx().CommitTx()).GetValueSync().IsSuccess()); + + auto it = tableClient.StreamExecuteScanQuery(R"( + $func = ($stream) -> { + RETURN YQL::Filter($stream, ($r) -> { RETURN Coalesce($r.Price <= 1000, False); }); + }; + + $inputTable = SELECT Key, Value, Price FROM SampleMapTable; + + PROCESS $inputTable USING $func(TableRows()); + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + std::cerr << result << std::endl; + CompareYson(result, R"([[[2];[1000];["Dogecoin"]];[[4];[1];["XTC"]];[[5];[2];["Cardano"]];[[6];[3];["Tether"]]])"); + } Y_UNIT_TEST(YqlTableSample) { auto setting = NKikimrKqp::TKqpSetting(); @@ -1874,31 +1874,31 @@ Y_UNIT_TEST_SUITE(KqpScan) { UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); CompareYson(R"([[192u]])", StreamResultToYson(result)); } - - Y_UNIT_TEST(SelectExistsUnexpected) { - TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); - CreateSampleTables(kikimr); - - auto result = db.StreamExecuteScanQuery(R"( - SELECT EXISTS( - SELECT * FROM `/Root/EightShard` WHERE Key > 100 - ) as dataPresent; - )").GetValueSync(); - - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - CompareYson("[[%true]]", StreamResultToYson(result)); - - result = db.StreamExecuteScanQuery(R"( - SELECT EXISTS( - SELECT * FROM `/Root/EightShard` WHERE Key > 10000 - ) as dataPresent; - )").GetValueSync(); - - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - CompareYson("[[%false]]", StreamResultToYson(result)); - } + + Y_UNIT_TEST(SelectExistsUnexpected) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + CreateSampleTables(kikimr); + + auto result = db.StreamExecuteScanQuery(R"( + SELECT EXISTS( + SELECT * FROM `/Root/EightShard` WHERE Key > 100 + ) as dataPresent; + )").GetValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + CompareYson("[[%true]]", StreamResultToYson(result)); + + result = db.StreamExecuteScanQuery(R"( + SELECT EXISTS( + SELECT * FROM `/Root/EightShard` WHERE Key > 10000 + ) as dataPresent; + )").GetValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + CompareYson("[[%false]]", StreamResultToYson(result)); + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/kqp_scripting_ut.cpp b/ydb/core/kqp/ut/kqp_scripting_ut.cpp index d566b91e91c..65e75ac303e 100644 --- a/ydb/core/kqp/ut/kqp_scripting_ut.cpp +++ b/ydb/core/kqp/ut/kqp_scripting_ut.cpp @@ -168,21 +168,21 @@ Y_UNIT_TEST_SUITE(KqpScripting) { auto stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); UNIT_ASSERT(stats.process_cpu_time_us() > 0); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 4); - ui32 phaseNo = 0; - + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 4); + ui32 phaseNo = 0; + uint64_t totalDurationUs = 0; uint64_t totalCpuTimeUs = 0; for (auto& phase : stats.query_phases()) { - if (phaseNo++ == 3) { - UNIT_ASSERT_VALUES_EQUAL(phase.table_access().size(), 0); - UNIT_ASSERT(phase.cpu_time_us() > 0); - UNIT_ASSERT(phase.affected_shards() == 0); + if (phaseNo++ == 3) { + UNIT_ASSERT_VALUES_EQUAL(phase.table_access().size(), 0); + UNIT_ASSERT(phase.cpu_time_us() > 0); + UNIT_ASSERT(phase.affected_shards() == 0); totalDurationUs += phase.duration_us(); totalCpuTimeUs += phase.cpu_time_us(); - continue; - } + continue; + } UNIT_ASSERT_VALUES_EQUAL(phase.table_access().size(), 1); UNIT_ASSERT(phase.table_access(0).partitions_count() > 0); UNIT_ASSERT(phase.table_access(0).reads().rows() > 0); @@ -444,21 +444,21 @@ Y_UNIT_TEST_SUITE(KqpScripting) { CompareYson(R"([[[8u]]])", StreamResultToYson(it)); } - Y_UNIT_TEST(StreamExecuteYqlScriptScanScalar) { - TKikimrRunner kikimr; - TScriptingClient client(kikimr.GetDriver()); - auto it = client.StreamExecuteYqlScript(R"( - PRAGMA kikimr.ScanQuery = "true"; - $key1 = (SELECT Fk21 FROM `/Root/Join1` WHERE Key = 1); - $key2 = (SELECT Fk21 FROM `/Root/Join1` WHERE Key = 2); - $limit = (SELECT Key FROM `/Root/Join1` WHERE Fk21 = 105); - - SELECT Data FROM [/Root/EightShard] WHERE Key = $key1 OR Key = $key2 LIMIT COALESCE($limit, 1u); - )").GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + Y_UNIT_TEST(StreamExecuteYqlScriptScanScalar) { + TKikimrRunner kikimr; + TScriptingClient client(kikimr.GetDriver()); + auto it = client.StreamExecuteYqlScript(R"( + PRAGMA kikimr.ScanQuery = "true"; + $key1 = (SELECT Fk21 FROM `/Root/Join1` WHERE Key = 1); + $key2 = (SELECT Fk21 FROM `/Root/Join1` WHERE Key = 2); + $limit = (SELECT Key FROM `/Root/Join1` WHERE Fk21 = 105); + + SELECT Data FROM [/Root/EightShard] WHERE Key = $key1 OR Key = $key2 LIMIT COALESCE($limit, 1u); + )").GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); CompareYson(R"([[[[1]];[[3]]]])", StreamResultToYson(it)); - } - + } + Y_UNIT_TEST(StreamExecuteYqlScriptData) { TKikimrRunner kikimr; TScriptingClient client(kikimr.GetDriver()); diff --git a/ydb/core/kqp/ut/kqp_sort_ut.cpp b/ydb/core/kqp/ut/kqp_sort_ut.cpp index 516afd28ffc..0b7c65a3d5c 100644 --- a/ydb/core/kqp/ut/kqp_sort_ut.cpp +++ b/ydb/core/kqp/ut/kqp_sort_ut.cpp @@ -780,16 +780,16 @@ Y_UNIT_TEST_SUITE(KqpSort) { } } - Y_UNIT_TEST_NEW_ENGINE(TopSortTableExpr) { + Y_UNIT_TEST_NEW_ENGINE(TopSortTableExpr) { TKikimrRunner kikimr; auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - TString query = Q_(R"( + TString query = Q_(R"( DECLARE $key AS Uint32; $fetch = ( - SELECT Value2 + 1 AS ComputedLimit FROM `/Root/TwoShard` + SELECT Value2 + 1 AS ComputedLimit FROM `/Root/TwoShard` WHERE Key = $key ); @@ -823,12 +823,12 @@ Y_UNIT_TEST_SUITE(KqpSort) { } } - Y_UNIT_TEST_NEW_ENGINE(TopSortTableExprOffset) { + Y_UNIT_TEST_NEW_ENGINE(TopSortTableExprOffset) { TKikimrRunner kikimr; auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - TString query = Q_(R"( + TString query = Q_(R"( DECLARE $key AS Uint32; $fetch = ( diff --git a/ydb/core/kqp/ut/kqp_stats_ut.cpp b/ydb/core/kqp/ut/kqp_stats_ut.cpp index 0c4e2a54ec9..f7815c016ee 100644 --- a/ydb/core/kqp/ut/kqp_stats_ut.cpp +++ b/ydb/core/kqp/ut/kqp_stats_ut.cpp @@ -69,7 +69,7 @@ Y_UNIT_TEST(JoinStatsBasic) { UNIT_ASSERT_VALUES_EQUAL(res.ResultSetYson, "[[16u]]"); UNIT_ASSERT(res.QueryStats); - UNIT_ASSERT_VALUES_EQUAL(res.QueryStats->query_phases().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(res.QueryStats->query_phases().size(), 2); if (res.QueryStats->query_phases(0).table_access(0).name() == "/Root/KeyValue") { UNIT_ASSERT_VALUES_EQUAL(res.QueryStats->query_phases(0).table_access(0).name(), "/Root/KeyValue"); UNIT_ASSERT_VALUES_EQUAL(res.QueryStats->query_phases(0).table_access(0).partitions_count(), 1); @@ -97,10 +97,10 @@ Y_UNIT_TEST(MultiTxStatsFull) { auto res = CollectStreamResult(it); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - UNIT_ASSERT_VALUES_EQUAL( - res.ResultSetYson, - R"([[[1];[202u];["Value2"]];[[2];[201u];["Value1"]];[[3];[203u];["Value3"]]])" - ); + UNIT_ASSERT_VALUES_EQUAL( + res.ResultSetYson, + R"([[[1];[202u];["Value2"]];[[2];[201u];["Value1"]];[[3];[203u];["Value3"]]])" + ); UNIT_ASSERT(res.QueryStats); UNIT_ASSERT_VALUES_EQUAL(res.QueryStats->query_phases().size(), 2); diff --git a/ydb/core/kqp/ut/kqp_sys_view_ut.cpp b/ydb/core/kqp/ut/kqp_sys_view_ut.cpp index 5b0c5e816d6..1f8988517c4 100644 --- a/ydb/core/kqp/ut/kqp_sys_view_ut.cpp +++ b/ydb/core/kqp/ut/kqp_sys_view_ut.cpp @@ -94,61 +94,61 @@ Y_UNIT_TEST_SUITE(KqpSystemView) { TKikimrRunner kikimr; auto client = kikimr.GetTableClient(); - TString enablePredicateExtractor = R"( - PRAGMA Kikimr.OptEnablePredicateExtract = "true"; - )"; - - TString query = R"( + TString enablePredicateExtractor = R"( + PRAGMA Kikimr.OptEnablePredicateExtract = "true"; + )"; + + TString query = R"( SELECT OwnerId, PathId, PartIdx, Path FROM `/Root/.sys/partition_stats` WHERE OwnerId = 72057594046644480ul AND PathId > 5u AND PathId <= 9u ORDER BY PathId, PartIdx; - )"; + )"; - TString expectedYson = R"([ + TString expectedYson = R"([ [[72057594046644480u];[6u];[0u];["/Root/KeyValue"]]; [[72057594046644480u];[7u];[0u];["/Root/KeyValue2"]]; [[72057594046644480u];[8u];[0u];["/Root/Test"]]; [[72057594046644480u];[9u];[0u];["/Root/Join1"]]; [[72057594046644480u];[9u];[1u];["/Root/Join1"]] - ])"; - - auto it = client.StreamExecuteScanQuery(query).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - CompareYson(expectedYson, StreamResultToYson(it)); - - it = client.StreamExecuteScanQuery(enablePredicateExtractor + query).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - CompareYson(expectedYson, StreamResultToYson(it)); + ])"; + + auto it = client.StreamExecuteScanQuery(query).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + CompareYson(expectedYson, StreamResultToYson(it)); + + it = client.StreamExecuteScanQuery(enablePredicateExtractor + query).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + CompareYson(expectedYson, StreamResultToYson(it)); } Y_UNIT_TEST(PartitionStatsRange2) { TKikimrRunner kikimr; auto client = kikimr.GetTableClient(); - TString enablePredicateExtractor = R"( - PRAGMA Kikimr.OptEnablePredicateExtract = "true"; - )"; + TString enablePredicateExtractor = R"( + PRAGMA Kikimr.OptEnablePredicateExtract = "true"; + )"; - TString query = R"( + TString query = R"( SELECT OwnerId, PathId, PartIdx, Path FROM `/Root/.sys/partition_stats` WHERE OwnerId = 72057594046644480ul AND PathId >= 6u AND PathId < 9u ORDER BY PathId, PartIdx; - )"; + )"; - TString expectedYson = R"([ + TString expectedYson = R"([ [[72057594046644480u];[6u];[0u];["/Root/KeyValue"]]; [[72057594046644480u];[7u];[0u];["/Root/KeyValue2"]]; [[72057594046644480u];[8u];[0u];["/Root/Test"]] - ])"; - - auto it = client.StreamExecuteScanQuery(query).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - CompareYson(expectedYson, StreamResultToYson(it)); - - it = client.StreamExecuteScanQuery(enablePredicateExtractor + query).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - CompareYson(expectedYson, StreamResultToYson(it)); + ])"; + + auto it = client.StreamExecuteScanQuery(query).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + CompareYson(expectedYson, StreamResultToYson(it)); + + it = client.StreamExecuteScanQuery(enablePredicateExtractor + query).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + CompareYson(expectedYson, StreamResultToYson(it)); } Y_UNIT_TEST(PartitionStatsRange3) { diff --git a/ydb/core/kqp/ut/kqp_table_predicate_ut.cpp b/ydb/core/kqp/ut/kqp_table_predicate_ut.cpp index 41e67d31aac..5ab5108968f 100644 --- a/ydb/core/kqp/ut/kqp_table_predicate_ut.cpp +++ b/ydb/core/kqp/ut/kqp_table_predicate_ut.cpp @@ -117,231 +117,231 @@ static void CreateSampleTables(TSession session) { } } -namespace { - -void CreateTableWithIntKey(TSession session, ui64 partitions, ui32 rangesPerPartition) { - /* - * Generate table with partitioning. - * Every partition will start from 1000 * partitionNumber and have rangesPerPartition subranges - * from 0 to 100 * rangeNumber - */ - TExplicitPartitions explicitPartitions; - static const ui32 itemsPerRange = 5; - auto splitPoint = [](ui32 partitionNo) { - return partitionNo * 100000; - }; - - YQL_ENSURE(rangesPerPartition < 100); - - for (ui32 i = 0; i < partitions; i++) { - explicitPartitions.AppendSplitPoints( - TValueBuilder().BeginTuple().AddElement().OptionalInt32(splitPoint(i)).EndTuple().Build() - ); - } - - auto builder = TTableBuilder() - .AddNullableColumn("Key1", EPrimitiveType::Int32) - .AddNullableColumn("Key2", EPrimitiveType::Int32) - .SetPrimaryKeyColumns({"Key1", "Key2"}) - .SetPartitionAtKeys(explicitPartitions); - - UNIT_ASSERT(session.CreateTable("/Root/TableWithIntKey", builder.Build()).GetValueSync().IsSuccess()); - - TStringBuilder query; - - query << "REPLACE INTO [/Root/TableWithIntKey] (Key1, Key2) VALUES" << Endl; - - for (ui32 i = 0; i < partitions; i++) { - ui32 partitionStart = splitPoint(i); - - for (ui32 j = 0; j < rangesPerPartition; j++) { - ui32 rangeStart = j * 1000; - - for (ui32 k = 0; k < itemsPerRange; k++) { - query << "(" << partitionStart + rangeStart + k << ", "; - - if (k % 2) { - query << "NULL)"; - } else { - query << k << ")"; - } - - query << "," << Endl; - } - } - } - - query << "(NULL, NULL);" << Endl; - - bool success = session.ExecuteDataQuery( - query, - TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx() - ).GetValueSync().IsSuccess(); - - UNIT_ASSERT(success); -} - -void ExecuteStreamQueryAndCheck(NExperimental::TStreamQueryClient& db, const TString& query, - const TString& expectedYson) -{ - auto settings = NExperimental::TExecuteStreamQuerySettings() - .ProfileMode(NExperimental::EStreamQueryProfileMode::Basic); - - auto it = db.ExecuteStreamQuery(query, settings).GetValueSync(); - UNIT_ASSERT(it.IsSuccess()); - - TVector<TString> profiles; - auto resultYson = StreamResultToYson(it, &profiles); - - Cerr << "---------QUERY----------" << Endl; - Cerr << query << Endl; - Cerr << "---------RESULT---------" << Endl; - Cerr << resultYson << Endl; - Cerr << "------------------------" << Endl; - - CompareYson(expectedYson, resultYson); - +namespace { + +void CreateTableWithIntKey(TSession session, ui64 partitions, ui32 rangesPerPartition) { + /* + * Generate table with partitioning. + * Every partition will start from 1000 * partitionNumber and have rangesPerPartition subranges + * from 0 to 100 * rangeNumber + */ + TExplicitPartitions explicitPartitions; + static const ui32 itemsPerRange = 5; + auto splitPoint = [](ui32 partitionNo) { + return partitionNo * 100000; + }; + + YQL_ENSURE(rangesPerPartition < 100); + + for (ui32 i = 0; i < partitions; i++) { + explicitPartitions.AppendSplitPoints( + TValueBuilder().BeginTuple().AddElement().OptionalInt32(splitPoint(i)).EndTuple().Build() + ); + } + + auto builder = TTableBuilder() + .AddNullableColumn("Key1", EPrimitiveType::Int32) + .AddNullableColumn("Key2", EPrimitiveType::Int32) + .SetPrimaryKeyColumns({"Key1", "Key2"}) + .SetPartitionAtKeys(explicitPartitions); + + UNIT_ASSERT(session.CreateTable("/Root/TableWithIntKey", builder.Build()).GetValueSync().IsSuccess()); + + TStringBuilder query; + + query << "REPLACE INTO [/Root/TableWithIntKey] (Key1, Key2) VALUES" << Endl; + + for (ui32 i = 0; i < partitions; i++) { + ui32 partitionStart = splitPoint(i); + + for (ui32 j = 0; j < rangesPerPartition; j++) { + ui32 rangeStart = j * 1000; + + for (ui32 k = 0; k < itemsPerRange; k++) { + query << "(" << partitionStart + rangeStart + k << ", "; + + if (k % 2) { + query << "NULL)"; + } else { + query << k << ")"; + } + + query << "," << Endl; + } + } + } + + query << "(NULL, NULL);" << Endl; + + bool success = session.ExecuteDataQuery( + query, + TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx() + ).GetValueSync().IsSuccess(); + + UNIT_ASSERT(success); +} + +void ExecuteStreamQueryAndCheck(NExperimental::TStreamQueryClient& db, const TString& query, + const TString& expectedYson) +{ + auto settings = NExperimental::TExecuteStreamQuerySettings() + .ProfileMode(NExperimental::EStreamQueryProfileMode::Basic); + + auto it = db.ExecuteStreamQuery(query, settings).GetValueSync(); + UNIT_ASSERT(it.IsSuccess()); + + TVector<TString> profiles; + auto resultYson = StreamResultToYson(it, &profiles); + + Cerr << "---------QUERY----------" << Endl; + Cerr << query << Endl; + Cerr << "---------RESULT---------" << Endl; + Cerr << resultYson << Endl; + Cerr << "------------------------" << Endl; + + CompareYson(expectedYson, resultYson); + NYql::NDqProto::TDqExecutionStats stats; - // First stage is computation, second scan read. - google::protobuf::TextFormat::ParseFromString(profiles[1], &stats); - - ui64 resultRows = stats.GetResultRows(); - ui64 readRows = stats.GetTables(0).GetReadRows(); - UNIT_ASSERT_EQUAL_C(resultRows, readRows, "There are " << resultRows << " in result, but read " << readRows << " !"); -} - -void RunTestOverIntTable(const TString& query, const TString& expectedYson, ui64 partitions, ui32 rangesPerPartition) { - TKikimrSettings kikimrSettings; - TKikimrRunner kikimr(kikimrSettings); - - NExperimental::TStreamQueryClient db(kikimr.GetDriver()); - - auto client = kikimr.GetTableClient(); - auto session = client.CreateSession().GetValueSync().GetSession(); - CreateTableWithIntKey(session, partitions, rangesPerPartition); - - ExecuteStreamQueryAndCheck(db, query, expectedYson); -} - -void RunPredicateTest(const std::vector<TString>& predicates, bool withNulls) { - TKikimrRunner kikimr; - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); - - auto builder = TTableBuilder() - .AddNullableColumn("Key1", EPrimitiveType::Uint32) - .AddNullableColumn("Key2", EPrimitiveType::Uint32) - .AddNullableColumn("Key3", EPrimitiveType::String) - .AddNullableColumn("Key4", EPrimitiveType::String) - .AddNullableColumn("Value", EPrimitiveType::Uint32) - .SetPrimaryKeyColumns({"Key1", "Key2", "Key3", "Key4"}); - - UNIT_ASSERT(session.CreateTable( - "/Root/TestPredicates", - builder.Build(), - TCreateTableSettings() - .PartitioningPolicy( - TPartitioningPolicy() - .UniformPartitions(3) - ) - ).GetValueSync().IsSuccess()); - - TString query; - - if (withNulls) { - query = TString(R"( - REPLACE INTO [/Root/TestPredicates] (Key1, Key2, Key3, Key4, Value) VALUES - (NULL, NULL, NULL, NULL, 1), - (NULL, NULL, NULL, "uid:10", 2), - (NULL, NULL, "resource_1", "uid:10", 3), - (NULL, 1, "resource_1", "uid:10", 4), - (1000, 1, "resource_1", "uid:10", 5), - (NULL, NULL, "resource_1", NULL, 6), - (NULL, NULL, NULL, "uid:11", 7), - (NULL, NULL, "resource_2", "uid:11", 8), - (NULL, 2, "resource_2", "uid:11", 9), - (2000, 2, "resource_3", "uid:11", 10), - (3000, 3, "resource_3", "uid:11", 11), - (4000, 4, "resource_3", "uid:11", 12), - (5000, 5, "resource_4", "uid:12", 13), - (6000, 5, "resource_4", "uid:12", 14), - (7000, 5, "resource_4", "uid:12", 15), - (8000, 8, "resource_4", "uid:12", 16), - (8000, NULL, "resource_5", "uid:12", 17), - (9000, NULL, "resource_5", NULL, 18), - (9000, 9, NULL, "uid:12", 19), - (9000, 9, NULL, NULL, 20); - )"); - } else { - query = TString(R"( - REPLACE INTO [/Root/TestPredicates] (Key1, Key2, Key3, Key4, Value) VALUES - (1, 0, "resource_0", "uid_0", 1), - (2, 0, "resource_0", "uid:10", 2), - (3, 0, "resource_1", "uid:10", 3), - (4, 1, "resource_1", "uid:10", 4), - (1000, 1, "resource_1", "uid:10", 5), - (1001, 0, "resource_1", "uid:0", 6), - (1002, 0, "resource_0", "uid:11", 7), - (1003, 0, "resource_2", "uid:11", 8), - (1004, 2, "resource_2", "uid:11", 9), - (2000, 2, "resource_3", "uid:11", 10), - (3000, 3, "resource_3", "uid:11", 11), - (4000, 4, "resource_3", "uid:11", 12), - (5000, 5, "resource_4", "uid:12", 13), - (6000, 5, "resource_4", "uid:12", 14), - (7000, 5, "resource_4", "uid:12", 15), - (8000, 8, "resource_4", "uid:12", 16), - (8000, 0, "resource_5", "uid:12", 17), - (9000, 0, "resource_5", "uid:0", 18), - (9000, 9, "resource_0", "uid:12", 19), - (9000, 9, "resource_0", "uid:0", 20); - )"); - } - - UNIT_ASSERT( - session.ExecuteDataQuery( - query, - TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx() - ).GetValueSync().IsSuccess() - ); - - NExperimental::TStreamQueryClient streamDb(kikimr.GetDriver()); - - for (auto& item: predicates) { - TString disablePredicateExtractor = R"( - PRAGMA Kikimr.OptEnablePredicateExtract = "false"; - )"; - TString query = R"( - SELECT `Value` FROM `/Root/TestPredicates` WHERE <PREDICATE> ORDER BY `Value`; - )"; - - SubstGlobal(query, "<PREDICATE>", item); - - Cerr << "Execute query" << Endl << query << Endl; - - auto it = streamDb.ExecuteStreamQuery(disablePredicateExtractor + query).GetValueSync(); - UNIT_ASSERT(it.IsSuccess()); - - auto expectedYson = StreamResultToYson(it); - it = streamDb.ExecuteStreamQuery(query).GetValueSync(); - UNIT_ASSERT(it.IsSuccess()); - - auto resultYson = StreamResultToYson(it); - - Cerr << "EXPECTED: " << expectedYson << Endl; - Cerr << "RECEIVED: " << resultYson << Endl; - - CompareYson(expectedYson, resultYson); - } - - UNIT_ASSERT(session.ExecuteSchemeQuery(R"( - DROP TABLE [/Root/TestPredicates]; - )").GetValueSync().IsSuccess()); - -} - -} // anonymous namespace end - + // First stage is computation, second scan read. + google::protobuf::TextFormat::ParseFromString(profiles[1], &stats); + + ui64 resultRows = stats.GetResultRows(); + ui64 readRows = stats.GetTables(0).GetReadRows(); + UNIT_ASSERT_EQUAL_C(resultRows, readRows, "There are " << resultRows << " in result, but read " << readRows << " !"); +} + +void RunTestOverIntTable(const TString& query, const TString& expectedYson, ui64 partitions, ui32 rangesPerPartition) { + TKikimrSettings kikimrSettings; + TKikimrRunner kikimr(kikimrSettings); + + NExperimental::TStreamQueryClient db(kikimr.GetDriver()); + + auto client = kikimr.GetTableClient(); + auto session = client.CreateSession().GetValueSync().GetSession(); + CreateTableWithIntKey(session, partitions, rangesPerPartition); + + ExecuteStreamQueryAndCheck(db, query, expectedYson); +} + +void RunPredicateTest(const std::vector<TString>& predicates, bool withNulls) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto builder = TTableBuilder() + .AddNullableColumn("Key1", EPrimitiveType::Uint32) + .AddNullableColumn("Key2", EPrimitiveType::Uint32) + .AddNullableColumn("Key3", EPrimitiveType::String) + .AddNullableColumn("Key4", EPrimitiveType::String) + .AddNullableColumn("Value", EPrimitiveType::Uint32) + .SetPrimaryKeyColumns({"Key1", "Key2", "Key3", "Key4"}); + + UNIT_ASSERT(session.CreateTable( + "/Root/TestPredicates", + builder.Build(), + TCreateTableSettings() + .PartitioningPolicy( + TPartitioningPolicy() + .UniformPartitions(3) + ) + ).GetValueSync().IsSuccess()); + + TString query; + + if (withNulls) { + query = TString(R"( + REPLACE INTO [/Root/TestPredicates] (Key1, Key2, Key3, Key4, Value) VALUES + (NULL, NULL, NULL, NULL, 1), + (NULL, NULL, NULL, "uid:10", 2), + (NULL, NULL, "resource_1", "uid:10", 3), + (NULL, 1, "resource_1", "uid:10", 4), + (1000, 1, "resource_1", "uid:10", 5), + (NULL, NULL, "resource_1", NULL, 6), + (NULL, NULL, NULL, "uid:11", 7), + (NULL, NULL, "resource_2", "uid:11", 8), + (NULL, 2, "resource_2", "uid:11", 9), + (2000, 2, "resource_3", "uid:11", 10), + (3000, 3, "resource_3", "uid:11", 11), + (4000, 4, "resource_3", "uid:11", 12), + (5000, 5, "resource_4", "uid:12", 13), + (6000, 5, "resource_4", "uid:12", 14), + (7000, 5, "resource_4", "uid:12", 15), + (8000, 8, "resource_4", "uid:12", 16), + (8000, NULL, "resource_5", "uid:12", 17), + (9000, NULL, "resource_5", NULL, 18), + (9000, 9, NULL, "uid:12", 19), + (9000, 9, NULL, NULL, 20); + )"); + } else { + query = TString(R"( + REPLACE INTO [/Root/TestPredicates] (Key1, Key2, Key3, Key4, Value) VALUES + (1, 0, "resource_0", "uid_0", 1), + (2, 0, "resource_0", "uid:10", 2), + (3, 0, "resource_1", "uid:10", 3), + (4, 1, "resource_1", "uid:10", 4), + (1000, 1, "resource_1", "uid:10", 5), + (1001, 0, "resource_1", "uid:0", 6), + (1002, 0, "resource_0", "uid:11", 7), + (1003, 0, "resource_2", "uid:11", 8), + (1004, 2, "resource_2", "uid:11", 9), + (2000, 2, "resource_3", "uid:11", 10), + (3000, 3, "resource_3", "uid:11", 11), + (4000, 4, "resource_3", "uid:11", 12), + (5000, 5, "resource_4", "uid:12", 13), + (6000, 5, "resource_4", "uid:12", 14), + (7000, 5, "resource_4", "uid:12", 15), + (8000, 8, "resource_4", "uid:12", 16), + (8000, 0, "resource_5", "uid:12", 17), + (9000, 0, "resource_5", "uid:0", 18), + (9000, 9, "resource_0", "uid:12", 19), + (9000, 9, "resource_0", "uid:0", 20); + )"); + } + + UNIT_ASSERT( + session.ExecuteDataQuery( + query, + TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx() + ).GetValueSync().IsSuccess() + ); + + NExperimental::TStreamQueryClient streamDb(kikimr.GetDriver()); + + for (auto& item: predicates) { + TString disablePredicateExtractor = R"( + PRAGMA Kikimr.OptEnablePredicateExtract = "false"; + )"; + TString query = R"( + SELECT `Value` FROM `/Root/TestPredicates` WHERE <PREDICATE> ORDER BY `Value`; + )"; + + SubstGlobal(query, "<PREDICATE>", item); + + Cerr << "Execute query" << Endl << query << Endl; + + auto it = streamDb.ExecuteStreamQuery(disablePredicateExtractor + query).GetValueSync(); + UNIT_ASSERT(it.IsSuccess()); + + auto expectedYson = StreamResultToYson(it); + it = streamDb.ExecuteStreamQuery(query).GetValueSync(); + UNIT_ASSERT(it.IsSuccess()); + + auto resultYson = StreamResultToYson(it); + + Cerr << "EXPECTED: " << expectedYson << Endl; + Cerr << "RECEIVED: " << resultYson << Endl; + + CompareYson(expectedYson, resultYson); + } + + UNIT_ASSERT(session.ExecuteSchemeQuery(R"( + DROP TABLE [/Root/TestPredicates]; + )").GetValueSync().IsSuccess()); + +} + +} // anonymous namespace end + Y_UNIT_TEST_SUITE(KqpTablePredicate) { Y_UNIT_TEST_NEW_ENGINE(IsNull) { TKikimrRunner kikimr; @@ -1150,247 +1150,247 @@ Y_UNIT_TEST_SUITE(KqpTablePredicate) { UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); } } - - Y_UNIT_TEST(NoFullScanAtScanQuery) { - TVector<std::tuple<TString, TString, ui64, ui32>> testData = { - /* - * Predicate : Expected result : shards : ranges per shard - */ - { - "Key1 >= 2000 OR Key1 < 100", - R"([ - [[0];[0]]; - [[1];#]; - [[2];[2]]; - [[3];#]; - [[4];[4]]; - [[2000];[0]]; - [[2001];#]; - [[2002];[2]]; - [[2003];#]; - [[2004];[4]]; - ])", - 1, 3, - }, - { - "(Key1 >= 200003 AND Key1 <= 301003) OR (Key1 > 3 AND Key1 < 1003)", - R"([ - [[4];[4]]; - [[1000];[0]]; - [[1001];#]; - [[1002];[2]]; - [[200003];#]; - [[200004];[4]]; - [[201000];[0]]; - [[201001];#]; - [[201002];[2]]; - [[201003];#]; - [[201004];[4]]; - [[300000];[0]]; - [[300001];#]; - [[300002];[2]]; - [[300003];#]; - [[300004];[4]]; - [[301000];[0]]; - [[301001];#]; - [[301002];[2]]; - [[301003];#]; - ])", - 4, 2, - }, - { - R"( - (Key1 > 1 AND Key1 < 3) OR - (Key1 > 2002 AND Key1 < 2004) OR - (Key1 >= 4001 AND Key1 <= 4004) - )", - R"([ - [[2];[2]]; - [[2003];#]; - [[4001];#]; - [[4002];[2]]; - [[4003];#]; - [[4004];[4]] - ])", - 1, 10, - }, - { - "Key1 IN (1, 2, 100, 101, 102, 200, 201, 201, 1000, 1001, 1002, 2000, 2001, 2002) AND (Key1 > 2000)", - R"([ - [[2001];#];[[2002];[2]] - ])", - 1, 10, - } - }; - - for (auto& data: testData) { - auto query = TString(R"( - --!syntax_v1 - SELECT * FROM `/Root/TableWithIntKey` - WHERE <PREDICATE> - ORDER BY Key1; - )"); - - SubstGlobal(query, "<PREDICATE>", std::get<0>(data)); - RunTestOverIntTable(query, std::get<1>(data), std::get<2>(data), std::get<3>(data)); - } - } - - Y_UNIT_TEST(NoFullScanAtDNFPredicate) { - TKikimrRunner kikimr; - NExperimental::TStreamQueryClient streamDb(kikimr.GetDriver()); - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); - - UNIT_ASSERT(session.ExecuteSchemeQuery(R"( - CREATE TABLE [/Root/TestDNF] ( - Key1 Uint32, - Key2 Uint32, - Value Uint32, - PRIMARY KEY (Key1, Key2) - ); - )").GetValueSync().IsSuccess()); - - UNIT_ASSERT(session.ExecuteDataQuery(R"( - REPLACE INTO [/Root/TestDNF] (Key1, Key2, Value) VALUES - (NULL, NULL, 1), - (NULL, 100u, 2), - (NULL, 200u, 3), - (1u, NULL, 4), - (1u, 100u, 5), - (1u, 200u, 6), - (1u, 200u, 7), - (1u, 200u, 8), - (1u, 300u, 9), - (1u, 400u, 10), - (2u, 100u, 11), - (2u, 200u, 12); - )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).GetValueSync().IsSuccess()); - - TVector<std::pair<TString, TString>> testData = { - /* - * Predicate : Expected result - */ - { - "Key1 = 1 AND (Key2 = 100 OR Key2 = 300)", - "[[[5u]];[[9u]]]" - }, - { - "Key1 = 1 AND Key2 IN (100, 300, 400)", - "[[[5u]];[[9u]];[[10u]]]" - } - }; - - for (auto& data: testData) { - auto query = TString(R"( - --!syntax_v1 - SELECT Value FROM `/Root/TestDNF` - WHERE <PREDICATE> - ORDER BY Value; - )"); - SubstGlobal(query, "<PREDICATE>", data.first); - ExecuteStreamQueryAndCheck(streamDb, query, data.second); - } - } - - Y_UNIT_TEST(ValidatePredicates) { - /* Table format: - * Key1 Uint32, - * Key2 Uint32, - * Key3 String, - * Key4 String, - * Value Int32, - * PRIMARY KEY (Key1, Key2, Key3, Key4) - */ - std::vector<TString> testData = { - "Key1 < 2000", - "Key1 > 1000", - "Key1 = 1000", - "Key1 >= 1000", - "Key1 < 2000", - "Key1 <= 2000", - "Key1 = 1000 AND Key2 > 0", - "Key1 >= 1000 AND Key2 > 8", - "Key1 >= 1000 AND Key2 = 8", - "Key1 >= 8000 AND Key2 >= 8", - "Key1 < 2000 AND Key2 < 2", - "Key1 <= 2000 AND Key2 < 2", - "Key1 <= 2000 AND Key2 <= 2", - "Key1 > 4000 AND Key2 > 4 AND Key3 > \"resource_3\" AND Key4 > \"uid:11\"", - "Key1 >= 4000 AND Key2 >= 4 AND Key3 >= \"resource_3\" AND Key4 >= \"uid:11\"", - "Key1 < 2000 AND Key2 < 2 AND Key3 < \"resource_3\" AND Key4 < \"uid:11\"", - "Key1 <= 2000 AND Key2 <= 2 AND Key3 <= \"resource_3\" AND Key4 <= \"uid:11\"", - "Key2 > 8", - "Key2 < 9", - "Key2 <= 2 AND Key3 <= \"resource_3\" AND Key4 <= \"uid:11\"", - "Key1 = 2000 AND Key2 = 2 AND Key3 = \"resource_3\" AND Key4 = \"uid:11\"", - "Key1 != 2000 AND Key2 != 2 AND Key3 != \"resource_3\" AND Key4 != \"uid:11\"", - "Key1 IS NULL", - "Key2 IS NULL", - "Key1 IS NOT NULL", - "Key1 > 1000 AND Key2 IS NULL", - "Key1 > 1000 OR Key2 IS NULL", - "Key1 >= 1000 OR Key2 IS NOT NULL", - "Key1 < 9000 OR Key3 IS NOT NULL", - "Key1 < 9000 OR Key3 IS NULL", - "Value = 20", - "(Key1 <= 1000) OR (Key1 > 2000 AND Key1 < 5000) OR (Key1 >= 8000)", - "Key1 < NULL" - }; - - RunPredicateTest(testData, /* withNulls */ true); - RunPredicateTest(testData, /* withNulls */ false); - } - - Y_UNIT_TEST(MergeRanges) { - TKikimrRunner kikimr; - NExperimental::TStreamQueryClient streamDb(kikimr.GetDriver()); - auto db = kikimr.GetTableClient(); - auto session = db.CreateSession().GetValueSync().GetSession(); - TStreamExecScanQuerySettings scanSettings; - scanSettings.Explain(true); - - UNIT_ASSERT(session.ExecuteSchemeQuery(R"( - CREATE TABLE [/Root/TestTable] ( - Key1 Uint32, - Key2 Uint32, - Value Uint32, - PRIMARY KEY (Key1, Key2) - ); - )").GetValueSync().IsSuccess()); - - auto replaceResult = session.ExecuteDataQuery(R"( - REPLACE INTO [/Root/TestTable] (Key1, Key2, Value) VALUES - (1u, 10u, 1), - (2u, 20u, 2), - (3u, 30u, 3), - (4u, 40u, 4), - (5u, 50u, 5), - (6u, 60u, 6); - )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).GetValueSync(); - - UNIT_ASSERT_C(replaceResult.IsSuccess(), replaceResult.GetIssues().ToString()); - - auto query = TString(R"( - --!syntax_v1 - SELECT Value FROM `/Root/TestTable` WHERE - Key1 = 1 OR Key1 = 2 OR Key1 = 3 - ORDER BY Value; - )"); - - auto it = db.StreamExecuteScanQuery(query).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - CollectStreamResult(it); - - it = db.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - - auto result = CollectStreamResult(it); - NJson::TJsonValue plan, readRange; - NJson::ReadJsonTree(*result.PlanJson, &plan, true); - // TODO: Need to get real ranges from explain, no anything in JSON - } - + + Y_UNIT_TEST(NoFullScanAtScanQuery) { + TVector<std::tuple<TString, TString, ui64, ui32>> testData = { + /* + * Predicate : Expected result : shards : ranges per shard + */ + { + "Key1 >= 2000 OR Key1 < 100", + R"([ + [[0];[0]]; + [[1];#]; + [[2];[2]]; + [[3];#]; + [[4];[4]]; + [[2000];[0]]; + [[2001];#]; + [[2002];[2]]; + [[2003];#]; + [[2004];[4]]; + ])", + 1, 3, + }, + { + "(Key1 >= 200003 AND Key1 <= 301003) OR (Key1 > 3 AND Key1 < 1003)", + R"([ + [[4];[4]]; + [[1000];[0]]; + [[1001];#]; + [[1002];[2]]; + [[200003];#]; + [[200004];[4]]; + [[201000];[0]]; + [[201001];#]; + [[201002];[2]]; + [[201003];#]; + [[201004];[4]]; + [[300000];[0]]; + [[300001];#]; + [[300002];[2]]; + [[300003];#]; + [[300004];[4]]; + [[301000];[0]]; + [[301001];#]; + [[301002];[2]]; + [[301003];#]; + ])", + 4, 2, + }, + { + R"( + (Key1 > 1 AND Key1 < 3) OR + (Key1 > 2002 AND Key1 < 2004) OR + (Key1 >= 4001 AND Key1 <= 4004) + )", + R"([ + [[2];[2]]; + [[2003];#]; + [[4001];#]; + [[4002];[2]]; + [[4003];#]; + [[4004];[4]] + ])", + 1, 10, + }, + { + "Key1 IN (1, 2, 100, 101, 102, 200, 201, 201, 1000, 1001, 1002, 2000, 2001, 2002) AND (Key1 > 2000)", + R"([ + [[2001];#];[[2002];[2]] + ])", + 1, 10, + } + }; + + for (auto& data: testData) { + auto query = TString(R"( + --!syntax_v1 + SELECT * FROM `/Root/TableWithIntKey` + WHERE <PREDICATE> + ORDER BY Key1; + )"); + + SubstGlobal(query, "<PREDICATE>", std::get<0>(data)); + RunTestOverIntTable(query, std::get<1>(data), std::get<2>(data), std::get<3>(data)); + } + } + + Y_UNIT_TEST(NoFullScanAtDNFPredicate) { + TKikimrRunner kikimr; + NExperimental::TStreamQueryClient streamDb(kikimr.GetDriver()); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + UNIT_ASSERT(session.ExecuteSchemeQuery(R"( + CREATE TABLE [/Root/TestDNF] ( + Key1 Uint32, + Key2 Uint32, + Value Uint32, + PRIMARY KEY (Key1, Key2) + ); + )").GetValueSync().IsSuccess()); + + UNIT_ASSERT(session.ExecuteDataQuery(R"( + REPLACE INTO [/Root/TestDNF] (Key1, Key2, Value) VALUES + (NULL, NULL, 1), + (NULL, 100u, 2), + (NULL, 200u, 3), + (1u, NULL, 4), + (1u, 100u, 5), + (1u, 200u, 6), + (1u, 200u, 7), + (1u, 200u, 8), + (1u, 300u, 9), + (1u, 400u, 10), + (2u, 100u, 11), + (2u, 200u, 12); + )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).GetValueSync().IsSuccess()); + + TVector<std::pair<TString, TString>> testData = { + /* + * Predicate : Expected result + */ + { + "Key1 = 1 AND (Key2 = 100 OR Key2 = 300)", + "[[[5u]];[[9u]]]" + }, + { + "Key1 = 1 AND Key2 IN (100, 300, 400)", + "[[[5u]];[[9u]];[[10u]]]" + } + }; + + for (auto& data: testData) { + auto query = TString(R"( + --!syntax_v1 + SELECT Value FROM `/Root/TestDNF` + WHERE <PREDICATE> + ORDER BY Value; + )"); + SubstGlobal(query, "<PREDICATE>", data.first); + ExecuteStreamQueryAndCheck(streamDb, query, data.second); + } + } + + Y_UNIT_TEST(ValidatePredicates) { + /* Table format: + * Key1 Uint32, + * Key2 Uint32, + * Key3 String, + * Key4 String, + * Value Int32, + * PRIMARY KEY (Key1, Key2, Key3, Key4) + */ + std::vector<TString> testData = { + "Key1 < 2000", + "Key1 > 1000", + "Key1 = 1000", + "Key1 >= 1000", + "Key1 < 2000", + "Key1 <= 2000", + "Key1 = 1000 AND Key2 > 0", + "Key1 >= 1000 AND Key2 > 8", + "Key1 >= 1000 AND Key2 = 8", + "Key1 >= 8000 AND Key2 >= 8", + "Key1 < 2000 AND Key2 < 2", + "Key1 <= 2000 AND Key2 < 2", + "Key1 <= 2000 AND Key2 <= 2", + "Key1 > 4000 AND Key2 > 4 AND Key3 > \"resource_3\" AND Key4 > \"uid:11\"", + "Key1 >= 4000 AND Key2 >= 4 AND Key3 >= \"resource_3\" AND Key4 >= \"uid:11\"", + "Key1 < 2000 AND Key2 < 2 AND Key3 < \"resource_3\" AND Key4 < \"uid:11\"", + "Key1 <= 2000 AND Key2 <= 2 AND Key3 <= \"resource_3\" AND Key4 <= \"uid:11\"", + "Key2 > 8", + "Key2 < 9", + "Key2 <= 2 AND Key3 <= \"resource_3\" AND Key4 <= \"uid:11\"", + "Key1 = 2000 AND Key2 = 2 AND Key3 = \"resource_3\" AND Key4 = \"uid:11\"", + "Key1 != 2000 AND Key2 != 2 AND Key3 != \"resource_3\" AND Key4 != \"uid:11\"", + "Key1 IS NULL", + "Key2 IS NULL", + "Key1 IS NOT NULL", + "Key1 > 1000 AND Key2 IS NULL", + "Key1 > 1000 OR Key2 IS NULL", + "Key1 >= 1000 OR Key2 IS NOT NULL", + "Key1 < 9000 OR Key3 IS NOT NULL", + "Key1 < 9000 OR Key3 IS NULL", + "Value = 20", + "(Key1 <= 1000) OR (Key1 > 2000 AND Key1 < 5000) OR (Key1 >= 8000)", + "Key1 < NULL" + }; + + RunPredicateTest(testData, /* withNulls */ true); + RunPredicateTest(testData, /* withNulls */ false); + } + + Y_UNIT_TEST(MergeRanges) { + TKikimrRunner kikimr; + NExperimental::TStreamQueryClient streamDb(kikimr.GetDriver()); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + + UNIT_ASSERT(session.ExecuteSchemeQuery(R"( + CREATE TABLE [/Root/TestTable] ( + Key1 Uint32, + Key2 Uint32, + Value Uint32, + PRIMARY KEY (Key1, Key2) + ); + )").GetValueSync().IsSuccess()); + + auto replaceResult = session.ExecuteDataQuery(R"( + REPLACE INTO [/Root/TestTable] (Key1, Key2, Value) VALUES + (1u, 10u, 1), + (2u, 20u, 2), + (3u, 30u, 3), + (4u, 40u, 4), + (5u, 50u, 5), + (6u, 60u, 6); + )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).GetValueSync(); + + UNIT_ASSERT_C(replaceResult.IsSuccess(), replaceResult.GetIssues().ToString()); + + auto query = TString(R"( + --!syntax_v1 + SELECT Value FROM `/Root/TestTable` WHERE + Key1 = 1 OR Key1 = 2 OR Key1 = 3 + ORDER BY Value; + )"); + + auto it = db.StreamExecuteScanQuery(query).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + CollectStreamResult(it); + + it = db.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + + auto result = CollectStreamResult(it); + NJson::TJsonValue plan, readRange; + NJson::ReadJsonTree(*result.PlanJson, &plan, true); + // TODO: Need to get real ranges from explain, no anything in JSON + } + Y_UNIT_TEST(ValidatePredicatesDataQuery) { TKikimrRunner kikimr; auto db = kikimr.GetTableClient(); diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index 62dc18e458f..33e598c1c25 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -392,11 +392,11 @@ enum EColumnTableEngine { COLUMN_ENGINE_REPLACING_TIMESERIES = 1; } -enum EOlapProgramType { - OLAP_PROGRAM_SSA_PROGRAM = 1; - OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS = 2; -} - +enum EOlapProgramType { + OLAP_PROGRAM_SSA_PROGRAM = 1; + OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS = 2; +} + message TStorageTierConfig { optional string Name = 1; optional TS3Settings ObjectStorage = 2; diff --git a/ydb/core/protos/kqp_physical.proto b/ydb/core/protos/kqp_physical.proto index 4b5ce5b166d..44d8f587166 100644 --- a/ydb/core/protos/kqp_physical.proto +++ b/ydb/core/protos/kqp_physical.proto @@ -126,37 +126,37 @@ message TKqpPhyOpLookup { TKqpPhyValue KeysValue = 1; } -message TKqpPhyOpReadOlapRanges { - // Parameter come here from computation stage. It has type Tuple(List(Tuple(RangeBegin, RangeEnd)))) - // Where RangeBegin and RangeEnd are Tuple(KeyColumns, Inclusive) - // Where KeyColumns is values of start/end of range for corresponding key column in table - TKqpPhyParamValue KeyRanges = 1; - // Limit value, shard may stop after reading limit rows +message TKqpPhyOpReadOlapRanges { + // Parameter come here from computation stage. It has type Tuple(List(Tuple(RangeBegin, RangeEnd)))) + // Where RangeBegin and RangeEnd are Tuple(KeyColumns, Inclusive) + // Where KeyColumns is values of start/end of range for corresponding key column in table + TKqpPhyParamValue KeyRanges = 1; + // Limit value, shard may stop after reading limit rows TKqpPhyParamValue ItemsLimit = 2; - // Reverse sign, i.e. if user ask ORDER BY ... DESC we need to read table in reverse direction + // Reverse sign, i.e. if user ask ORDER BY ... DESC we need to read table in reverse direction bool Reverse = 3; - // Program in NKikimrSSA.TProgram format - bytes OlapProgram = 4; - /* - * Serialized parameters names for current program, this used for extract parameters used by - * program only from all parameters. Unfortunately we can not fill schema when compile OLAP program, - * it is done a bit later in executor, so we need separate field for parameter names. - */ - repeated string OlapProgramParameterNames = 5; -} - -message TKqpPhyOpReadRanges { - // Parameter come here from computation stage. It has type Tuple(List(Tuple(RangeBegin, RangeEnd)))) - // Where RangeBegin and RangeEnd are Tuple(KeyColumns, Inclusive) - // Where KeyColumns is values of start/end of range for corresponding key column in table - // Also it has special case - empty name. This means full scan. - TKqpPhyParamValue KeyRanges = 1; - // Limit value, shard may stop after reading limit rows - TKqpPhyParamValue ItemsLimit = 2; - // Reverse sign, i.e. if user ask ORDER BY ... DESC we need to read table in reverse direction - bool Reverse = 3; -} - + // Program in NKikimrSSA.TProgram format + bytes OlapProgram = 4; + /* + * Serialized parameters names for current program, this used for extract parameters used by + * program only from all parameters. Unfortunately we can not fill schema when compile OLAP program, + * it is done a bit later in executor, so we need separate field for parameter names. + */ + repeated string OlapProgramParameterNames = 5; +} + +message TKqpPhyOpReadRanges { + // Parameter come here from computation stage. It has type Tuple(List(Tuple(RangeBegin, RangeEnd)))) + // Where RangeBegin and RangeEnd are Tuple(KeyColumns, Inclusive) + // Where KeyColumns is values of start/end of range for corresponding key column in table + // Also it has special case - empty name. This means full scan. + TKqpPhyParamValue KeyRanges = 1; + // Limit value, shard may stop after reading limit rows + TKqpPhyParamValue ItemsLimit = 2; + // Reverse sign, i.e. if user ask ORDER BY ... DESC we need to read table in reverse direction + bool Reverse = 3; +} + message TKqpPhyTableOperation { TKqpPhyTable Table = 1; repeated TKqpPhyColumn Columns = 2; @@ -166,8 +166,8 @@ message TKqpPhyTableOperation { TKqpPhyOpUpsertRows UpsertRows = 4; TKqpPhyOpDeleteRows DeleteRows = 5; TKqpPhyOpLookup Lookup = 6; - TKqpPhyOpReadOlapRanges ReadOlapRange = 7; - TKqpPhyOpReadRanges ReadRanges = 8; + TKqpPhyOpReadOlapRanges ReadOlapRange = 7; + TKqpPhyOpReadRanges ReadRanges = 8; } } diff --git a/ydb/core/protos/ssa.proto b/ydb/core/protos/ssa.proto index 7680cf8d88f..c035c9f5dfe 100644 --- a/ydb/core/protos/ssa.proto +++ b/ydb/core/protos/ssa.proto @@ -35,10 +35,10 @@ message TProgram { } } - message TParameter { - optional string Name = 1; - } - + message TParameter { + optional string Name = 1; + } + message TAssignment { enum EFunction { FUNC_UNSPECIFIED = 0; @@ -77,7 +77,7 @@ message TProgram { TExternalFunction ExternalFunction = 3; TConstant Constant = 4; bool Null = 5; - TParameter Parameter = 6; + TParameter Parameter = 6; } } @@ -125,12 +125,12 @@ message TProgram { repeated TCommand Command = 1; optional uint32 Version = 2; } - -message TOlapProgram { - // Store OLAP program in serialized format in case we do not need to deserialize it in TScanTaskMeta - // Note: when this message exists the program must be present. - optional bytes Program = 1; - // RecordBatch deserialization require arrow::Schema, thus store it here - optional bytes ParametersSchema = 2; - optional bytes Parameters = 3; -}
\ No newline at end of file + +message TOlapProgram { + // Store OLAP program in serialized format in case we do not need to deserialize it in TScanTaskMeta + // Note: when this message exists the program must be present. + optional bytes Program = 1; + // RecordBatch deserialization require arrow::Schema, thus store it here + optional bytes ParametersSchema = 2; + optional bytes Parameters = 3; +}
\ No newline at end of file diff --git a/ydb/core/protos/tx_columnshard.proto b/ydb/core/protos/tx_columnshard.proto index d5e4c53a180..d407b2cc413 100644 --- a/ydb/core/protos/tx_columnshard.proto +++ b/ydb/core/protos/tx_columnshard.proto @@ -89,9 +89,9 @@ message TEvRead { repeated string ColumnNames = 7; optional TPredicate GreaterPredicate = 8; // Greater[OrEqual] (or From for range) optional TPredicate LessPredicate = 9; // Less[OrEqual] (or To for range) - // Serialized Olap program - optional bytes OlapProgram = 10; - optional NKikimrSchemeOp.EOlapProgramType OlapProgramType = 11; + // Serialized Olap program + optional bytes OlapProgram = 10; + optional NKikimrSchemeOp.EOlapProgramType OlapProgramType = 11; } message TEvReadResult { diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto index ad151f40ae3..89863b25400 100644 --- a/ydb/core/protos/tx_datashard.proto +++ b/ydb/core/protos/tx_datashard.proto @@ -208,11 +208,11 @@ message TKqpTransaction { repeated uint32 KeyColumnTypes = 3; // for debug logs only repeated bool SkipNullKeys = 4; repeated TReadOpMeta Reads = 5; - optional uint64 ItemsLimit = 6; - optional bool Reverse = 7; - reserved 8; // optional bytes ProcessProgram = 8; + optional uint64 ItemsLimit = 6; + optional bool Reverse = 7; + reserved 8; // optional bytes ProcessProgram = 8; optional EScanDataFormat DataFormat = 9; - optional NKikimrSSA.TOlapProgram OlapProgram = 10; // Currently only for OLAP tables + optional NKikimrSSA.TOlapProgram OlapProgram = 10; // Currently only for OLAP tables } optional EKqpTransactionType Type = 1; @@ -1383,19 +1383,19 @@ message TEvKqpScan { repeated uint32 ColumnTags = 6; repeated uint32 ColumnTypes = 7; repeated bool SkipNullKeys = 8; - repeated NKikimrTx.TKeyRange Ranges = 9; + repeated NKikimrTx.TKeyRange Ranges = 9; optional NKikimrKqp.TKqpSnapshot Snapshot = 10; reserved 11; // optional NKqpProto.EKqpStatsMode StatsMode = 11; optional uint64 TimeoutMs = 12; optional uint32 Generation = 13; - // Seralized OlapProgram in old format without parameters - reserved 14; - optional uint64 ItemsLimit = 15; - optional bool Reverse = 16; + // Seralized OlapProgram in old format without parameters + reserved 14; + optional uint64 ItemsLimit = 15; + optional bool Reverse = 16; optional EScanDataFormat DataFormat = 17; - optional NYql.NDqProto.EDqStatsMode StatsMode = 18; - optional bytes OlapProgram = 19; - optional NKikimrSchemeOp.EOlapProgramType OlapProgramType = 20; + optional NYql.NDqProto.EDqStatsMode StatsMode = 18; + optional bytes OlapProgram = 19; + optional NKikimrSchemeOp.EOlapProgramType OlapProgramType = 20; } message TEvCompactTable { diff --git a/ydb/core/scheme/scheme_tabledefs.h b/ydb/core/scheme/scheme_tabledefs.h index 27d89cbcc6c..190233fdeee 100644 --- a/ydb/core/scheme/scheme_tabledefs.h +++ b/ydb/core/scheme/scheme_tabledefs.h @@ -191,14 +191,14 @@ public: : From(from) , To(to) , FromInclusive(fromInclusive) - , ToInclusive(toInclusive) {} + , ToInclusive(toInclusive) {} TSerializedTableRange(TConstArrayRef<TCell> fromValues, bool inclusiveFrom, TConstArrayRef<TCell> toValues, bool inclusiveTo) : From(TSerializedCellVec::Serialize(fromValues)) , To(TSerializedCellVec::Serialize(toValues)) , FromInclusive(inclusiveFrom) - , ToInclusive(inclusiveTo) {} + , ToInclusive(inclusiveTo) {} explicit TSerializedTableRange(const TTableRange& range) @@ -284,9 +284,9 @@ int ComparePointAndRange(const TConstArrayRef<TCell>& point, const TTableRange& // E.g. CompareBorders<true, true>(...) compares borders of ranges lying on the left // of compared borders (or in other words upper range borders are compared). template<bool FirstLeft, bool SecondLeft> -int CompareBorders(TConstArrayRef<TCell> first, TConstArrayRef<TCell> second, bool inclusiveFirst, bool inclusiveSecond, - TConstArrayRef<NScheme::TTypeId> cellTypes) -{ +int CompareBorders(TConstArrayRef<TCell> first, TConstArrayRef<TCell> second, bool inclusiveFirst, bool inclusiveSecond, + TConstArrayRef<NScheme::TTypeId> cellTypes) +{ const ui32 firstSize = first.size(); const ui32 secondSize = second.size(); diff --git a/ydb/core/sys_view/scan.cpp b/ydb/core/sys_view/scan.cpp index f3aba7581e7..f337cb6ba77 100644 --- a/ydb/core/sys_view/scan.cpp +++ b/ydb/core/sys_view/scan.cpp @@ -21,11 +21,11 @@ THolder<IActor> CreateSystemViewScan(const TActorId& ownerId, ui32 scanId, const if (tableId.SysViewInfo == PartitionStatsName) { return CreatePartitionStatsScan(ownerId, scanId, tableId, tableRange, columns); } - + if (tableId.SysViewInfo == NodesName) { return CreateNodesScan(ownerId, scanId, tableId, tableRange, columns); } - + if (tableId.SysViewInfo == TopQueriesByDuration1MinuteName || tableId.SysViewInfo == TopQueriesByDuration1HourName || tableId.SysViewInfo == TopQueriesByReadBytes1MinuteName || @@ -41,15 +41,15 @@ THolder<IActor> CreateSystemViewScan(const TActorId& ownerId, ui32 scanId, const if (tableId.SysViewInfo == PDisksName) { return CreatePDisksScan(ownerId, scanId, tableId, tableRange, columns); } - + if (tableId.SysViewInfo == VSlotsName) { return CreateVSlotsScan(ownerId, scanId, tableId, tableRange, columns); } - + if (tableId.SysViewInfo == GroupsName) { return CreateGroupsScan(ownerId, scanId, tableId, tableRange, columns); } - + if (tableId.SysViewInfo == StoragePoolsName) { return CreateStoragePoolsScan(ownerId, scanId, tableId, tableRange, columns); } diff --git a/ydb/core/sys_view/tablets/tablets.cpp b/ydb/core/sys_view/tablets/tablets.cpp index d60e1a821e6..88627748d8c 100644 --- a/ydb/core/sys_view/tablets/tablets.cpp +++ b/ydb/core/sys_view/tablets/tablets.cpp @@ -60,117 +60,117 @@ private: } } - bool CalculateRangeFrom() { - /* + bool CalculateRangeFrom() { + /* * Please note that TabletId and FollowerId do not have NULLs in columns - */ + */ const auto& cellsFrom = TableRange.From.GetCells(); - // Empty means that we read from +inf, it is impossible - if (cellsFrom.empty()) { - YQL_ENSURE(false, "Range starts from +inf, can't read anything."); - return false; - } - - if (cellsFrom[0].IsNull()) { - return true; - } - - FromTabletId = cellsFrom[0].AsValue<ui64>(); - - if (cellsFrom.size() == 1 && TableRange.FromInclusive) { - return true; - } - - if (cellsFrom.size() == 2) { - if (!cellsFrom[1].IsNull()) { + // Empty means that we read from +inf, it is impossible + if (cellsFrom.empty()) { + YQL_ENSURE(false, "Range starts from +inf, can't read anything."); + return false; + } + + if (cellsFrom[0].IsNull()) { + return true; + } + + FromTabletId = cellsFrom[0].AsValue<ui64>(); + + if (cellsFrom.size() == 1 && TableRange.FromInclusive) { + return true; + } + + if (cellsFrom.size() == 2) { + if (!cellsFrom[1].IsNull()) { FromFollowerId = cellsFrom[1].AsValue<ui32>(); } - if (TableRange.FromInclusive) { - return true; + if (TableRange.FromInclusive) { + return true; } - - // The range start from NULL exclusive. So, the next value after NULL will be used. + + // The range start from NULL exclusive. So, the next value after NULL will be used. if (!FromFollowerId.has_value()) { FromFollowerId = Min<ui32>(); - return true; - } - + return true; + } + if (FromFollowerId.value() < Max<ui32>()) { FromFollowerId = FromFollowerId.value() + 1; - return true; - } - + return true; + } + FromFollowerId.reset(); } - if (FromTabletId < Max<ui64>()) { - ++FromTabletId; - return true; - } - - return false; - } - - bool CalculateRangeTo() { + if (FromTabletId < Max<ui64>()) { + ++FromTabletId; + return true; + } + + return false; + } + + bool CalculateRangeTo() { const auto& cellsTo = TableRange.To.GetCells(); - if (cellsTo.empty()) { - return true; - } - - YQL_ENSURE(!cellsTo[0].IsNull(), "Read to -inf range"); - - ToTabletId = cellsTo[0].AsValue<ui64>(); - - if (cellsTo.size() == 1 && TableRange.ToInclusive) { - return true; - } - - auto decreaseTabletId = [this]() { - if (ToTabletId > Min<ui64>()) { - --ToTabletId; - return true; - } - - return false; - }; - - if (cellsTo.size() == 2) { - if (!cellsTo[1].IsNull()) { + if (cellsTo.empty()) { + return true; + } + + YQL_ENSURE(!cellsTo[0].IsNull(), "Read to -inf range"); + + ToTabletId = cellsTo[0].AsValue<ui64>(); + + if (cellsTo.size() == 1 && TableRange.ToInclusive) { + return true; + } + + auto decreaseTabletId = [this]() { + if (ToTabletId > Min<ui64>()) { + --ToTabletId; + return true; + } + + return false; + }; + + if (cellsTo.size() == 2) { + if (!cellsTo[1].IsNull()) { ToFollowerId = cellsTo[1].AsValue<ui32>(); } - if (TableRange.ToInclusive) { - return true; + if (TableRange.ToInclusive) { + return true; } - - // The range ends at NULL exclusive. So, the value before NULL will be used. + + // The range ends at NULL exclusive. So, the value before NULL will be used. if (!ToFollowerId.has_value()) { ToFollowerId = Max<ui32>(); - return decreaseTabletId(); - } - + return decreaseTabletId(); + } + if (ToFollowerId > Min<ui32>()) { ToFollowerId = ToFollowerId.value() - 1; - return true; - } - + return true; + } + ToFollowerId.reset(); } - return decreaseTabletId(); - } - - void RequestTabletIds() { - auto request = MakeHolder<TEvSysView::TEvGetTabletIdsRequest>(); - - if (!CalculateRangeFrom() || !CalculateRangeTo()) { - ReplyEmptyAndDie(); - return; - } - + return decreaseTabletId(); + } + + void RequestTabletIds() { + auto request = MakeHolder<TEvSysView::TEvGetTabletIdsRequest>(); + + if (!CalculateRangeFrom() || !CalculateRangeTo()) { + ReplyEmptyAndDie(); + return; + } + if (ToTabletId < FromTabletId) { ReplyEmptyAndDie(); return; @@ -286,13 +286,13 @@ private: size_t index = 0; ui32 fromFollowerId = FromFollowerId.value_or(Min<ui32>()); - - if (record.EntriesSize() > 0 + + if (record.EntriesSize() > 0 && record.GetEntries(0).GetTabletId() == FromTabletId) { for (; index < record.EntriesSize(); ++index) { const auto& entry = record.GetEntries(index); - + if (entry.GetTabletId() != FromTabletId || entry.GetFollowerId() >= fromFollowerId) { break; } diff --git a/ydb/core/sys_view/ut_kqp.cpp b/ydb/core/sys_view/ut_kqp.cpp index b4448612588..5e90eb59927 100644 --- a/ydb/core/sys_view/ut_kqp.cpp +++ b/ydb/core/sys_view/ut_kqp.cpp @@ -1525,119 +1525,119 @@ Y_UNIT_TEST_SUITE(SystemView) { std::move(desc), settings).GetValueSync(); UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - std::vector<std::pair<TString, TString>> testData = { - { + std::vector<std::pair<TString, TString>> testData = { + { "TabletId = 72075186224037888ul AND FollowerId > 1u", - R"([ - [[2u];[72075186224037888u]]; - [[3u];[72075186224037888u]]; - ])" - }, - { + R"([ + [[2u];[72075186224037888u]]; + [[3u];[72075186224037888u]]; + ])" + }, + { "TabletId = 72075186224037888ul AND FollowerId >= 1u", - R"([ - [[1u];[72075186224037888u]]; - [[2u];[72075186224037888u]]; - [[3u];[72075186224037888u]]; - ])" - }, - { + R"([ + [[1u];[72075186224037888u]]; + [[2u];[72075186224037888u]]; + [[3u];[72075186224037888u]]; + ])" + }, + { "TabletId = 72075186224037888ul AND FollowerId < 2u", - R"([ - [[0u];[72075186224037888u]]; - [[1u];[72075186224037888u]]; - ])" - }, - { + R"([ + [[0u];[72075186224037888u]]; + [[1u];[72075186224037888u]]; + ])" + }, + { "TabletId = 72075186224037888ul AND FollowerId <= 2u", - R"([ - [[0u];[72075186224037888u]]; - [[1u];[72075186224037888u]]; - [[2u];[72075186224037888u]]; - ])" - }, - { - "TabletId > 72075186224037888ul AND TabletId < 72075186224037890ul", - R"([ - [[0u];[72075186224037889u]]; - [[1u];[72075186224037889u]]; - [[2u];[72075186224037889u]]; - [[3u];[72075186224037889u]]; - ])" - } - }; - - TString enablePredicateExtractor = R"( - PRAGMA Kikimr.OptEnablePredicateExtract = "true"; - )"; - - for (auto& data: testData) { - TString query = R"( + R"([ + [[0u];[72075186224037888u]]; + [[1u];[72075186224037888u]]; + [[2u];[72075186224037888u]]; + ])" + }, + { + "TabletId > 72075186224037888ul AND TabletId < 72075186224037890ul", + R"([ + [[0u];[72075186224037889u]]; + [[1u];[72075186224037889u]]; + [[2u];[72075186224037889u]]; + [[3u];[72075186224037889u]]; + ])" + } + }; + + TString enablePredicateExtractor = R"( + PRAGMA Kikimr.OptEnablePredicateExtract = "true"; + )"; + + for (auto& data: testData) { + TString query = R"( SELECT FollowerId, TabletId FROM `/Root/.sys/hive_tablets` - WHERE <PREDICATE>; - )"; - - SubstGlobal(query, "<PREDICATE>", data.first); - - auto it = client.StreamExecuteScanQuery(enablePredicateExtractor + query).GetValueSync(); + WHERE <PREDICATE>; + )"; + + SubstGlobal(query, "<PREDICATE>", data.first); + + auto it = client.StreamExecuteScanQuery(enablePredicateExtractor + query).GetValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - auto streamed = NKqp::StreamResultToYson(it); - - it = client.StreamExecuteScanQuery(query).GetValueSync(); + auto streamed = NKqp::StreamResultToYson(it); + + it = client.StreamExecuteScanQuery(query).GetValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - auto expected = NKqp::StreamResultToYson(it); - - // Compare two ways of execution - NKqp::CompareYson(expected, streamed); - // And check with expected result from test description - NKqp::CompareYson(data.second, streamed); + auto expected = NKqp::StreamResultToYson(it); + + // Compare two ways of execution + NKqp::CompareYson(expected, streamed); + // And check with expected result from test description + NKqp::CompareYson(data.second, streamed); } } - Y_UNIT_TEST(TabletsRangesPredicateExtractDisabled) { - TTestEnv env(1, 0); - - TTableClient client(env.GetDriver()); - auto session = client.CreateSession().GetValueSync().GetSession(); - - auto desc = TTableBuilder() - .AddNullableColumn("Column1", EPrimitiveType::Uint64) - .SetPrimaryKeyColumn("Column1") - .Build(); - - auto settings = TCreateTableSettings() - .ReplicationPolicy(TReplicationPolicy().ReplicasCount(3)) - .PartitioningPolicy(TPartitioningPolicy().UniformPartitions(3)); - - auto result = session.CreateTable("/Root/Table0", - std::move(desc), settings).GetValueSync(); - UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); - - TString query = R"( + Y_UNIT_TEST(TabletsRangesPredicateExtractDisabled) { + TTestEnv env(1, 0); + + TTableClient client(env.GetDriver()); + auto session = client.CreateSession().GetValueSync().GetSession(); + + auto desc = TTableBuilder() + .AddNullableColumn("Column1", EPrimitiveType::Uint64) + .SetPrimaryKeyColumn("Column1") + .Build(); + + auto settings = TCreateTableSettings() + .ReplicationPolicy(TReplicationPolicy().ReplicasCount(3)) + .PartitioningPolicy(TPartitioningPolicy().UniformPartitions(3)); + + auto result = session.CreateTable("/Root/Table0", + std::move(desc), settings).GetValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + TString query = R"( SELECT FollowerId, TabletId - FROM `/Root/.sys/hive_tablets` - WHERE TabletId <= 72075186224037888ul OR TabletId >= 72075186224037890ul; - )"; - - TString expected = R"([ - [[0u];[72075186224037888u]]; - [[1u];[72075186224037888u]]; - [[2u];[72075186224037888u]]; - [[3u];[72075186224037888u]]; - [[0u];[72075186224037890u]]; - [[1u];[72075186224037890u]]; - [[2u];[72075186224037890u]]; - [[3u];[72075186224037890u]]; - ])"; - - auto it = client.StreamExecuteScanQuery(query).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - // System view dows not support multiple ranges, thus here will be an error if - // predicate extraction occurs. - NKqp::CompareYson(expected, NKqp::StreamResultToYson(it)); - } - + FROM `/Root/.sys/hive_tablets` + WHERE TabletId <= 72075186224037888ul OR TabletId >= 72075186224037890ul; + )"; + + TString expected = R"([ + [[0u];[72075186224037888u]]; + [[1u];[72075186224037888u]]; + [[2u];[72075186224037888u]]; + [[3u];[72075186224037888u]]; + [[0u];[72075186224037890u]]; + [[1u];[72075186224037890u]]; + [[2u];[72075186224037890u]]; + [[3u];[72075186224037890u]]; + ])"; + + auto it = client.StreamExecuteScanQuery(query).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + // System view dows not support multiple ranges, thus here will be an error if + // predicate extraction occurs. + NKqp::CompareYson(expected, NKqp::StreamResultToYson(it)); + } + void TestQueryType( std::function<void(const TTestEnv&, const TString&)> execQuery, const TString& type) diff --git a/ydb/core/tablet_flat/flat_scan_iface.h b/ydb/core/tablet_flat/flat_scan_iface.h index 659a2ca63f6..6b0fa8e42d7 100644 --- a/ydb/core/tablet_flat/flat_scan_iface.h +++ b/ydb/core/tablet_flat/flat_scan_iface.h @@ -18,13 +18,13 @@ namespace NTable { next position for seeking followed by a series of Feed(..) with rows. Thus the next call after Prepare(..) is always Seek(, 0). - If Seek call succeeded and return EScan::Feed, then Feed called to pass - data to reader. When range supplied by Seek completed then Exhausted method is - called. Default implementation of Exhausted method return EScan::Reset, - thus Seek method is called again with incrementing seq parameter. Also - Feed method may return EScan::Reset indicating that current range is - completed and next one should be set up in Seek method. - + If Seek call succeeded and return EScan::Feed, then Feed called to pass + data to reader. When range supplied by Seek completed then Exhausted method is + called. Default implementation of Exhausted method return EScan::Reset, + thus Seek method is called again with incrementing seq parameter. Also + Feed method may return EScan::Reset indicating that current range is + completed and next one should be set up in Seek method. + IScan may express its desire of futher IDriver env behaviour with EScan codes where applicable. diff --git a/ydb/core/tx/columnshard/columnshard__index_scan.h b/ydb/core/tx/columnshard/columnshard__index_scan.h index e6c6f769f06..44e9b77cc01 100644 --- a/ydb/core/tx/columnshard/columnshard__index_scan.h +++ b/ydb/core/tx/columnshard/columnshard__index_scan.h @@ -149,4 +149,4 @@ private: } }; -} +} diff --git a/ydb/core/tx/columnshard/columnshard__read.cpp b/ydb/core/tx/columnshard/columnshard__read.cpp index c6a1f02ed7a..6dcfc3d703e 100644 --- a/ydb/core/tx/columnshard/columnshard__read.cpp +++ b/ydb/core/tx/columnshard/columnshard__read.cpp @@ -151,54 +151,54 @@ TTxReadBase::PrepareReadMetadata(const TActorContext& ctx, const TReadDescriptio return spOut; } -bool TTxReadBase::ParseProgram(const TActorContext& ctx, NKikimrSchemeOp::EOlapProgramType programType, - TString serializedProgram, TReadDescription& read, const IColumnResolver& columnResolver) -{ - if (serializedProgram.empty()) { - return true; - } - - NKikimrSSA::TProgram program; - NKikimrSSA::TOlapProgram olapProgram; - - switch (programType) { - case NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS: - if (!olapProgram.ParseFromString(serializedProgram)) { - ErrorDescription = TStringBuilder() << "Can't parse TOlapProgram at " << Self->TabletID(); - return false; - } - - if (!program.ParseFromString(olapProgram.GetProgram())) { - ErrorDescription = TStringBuilder() << "Can't parse TProgram at " << Self->TabletID(); - return false; - } - - break; - default: - ErrorDescription = TStringBuilder() << "Unsupported olap program version: " << (ui32)programType; - return false; - } - - if (ctx.LoggerSettings() && - ctx.LoggerSettings()->Satisfies(NActors::NLog::PRI_DEBUG, NKikimrServices::TX_COLUMNSHARD)) - { - TString out; - ::google::protobuf::TextFormat::PrintToString(program, &out); - LOG_S_DEBUG("Process program: " << Endl << out); - } - - if (olapProgram.HasParameters()) { - Y_VERIFY(olapProgram.HasParametersSchema(), "Parameters are present, but there is no schema."); - - auto schema = NArrow::DeserializeSchema(olapProgram.GetParametersSchema()); - read.ProgramParameters = NArrow::DeserializeBatch(olapProgram.GetParameters(), schema); - } - - read.AddProgram(columnResolver, program); - - return true; -} - +bool TTxReadBase::ParseProgram(const TActorContext& ctx, NKikimrSchemeOp::EOlapProgramType programType, + TString serializedProgram, TReadDescription& read, const IColumnResolver& columnResolver) +{ + if (serializedProgram.empty()) { + return true; + } + + NKikimrSSA::TProgram program; + NKikimrSSA::TOlapProgram olapProgram; + + switch (programType) { + case NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS: + if (!olapProgram.ParseFromString(serializedProgram)) { + ErrorDescription = TStringBuilder() << "Can't parse TOlapProgram at " << Self->TabletID(); + return false; + } + + if (!program.ParseFromString(olapProgram.GetProgram())) { + ErrorDescription = TStringBuilder() << "Can't parse TProgram at " << Self->TabletID(); + return false; + } + + break; + default: + ErrorDescription = TStringBuilder() << "Unsupported olap program version: " << (ui32)programType; + return false; + } + + if (ctx.LoggerSettings() && + ctx.LoggerSettings()->Satisfies(NActors::NLog::PRI_DEBUG, NKikimrServices::TX_COLUMNSHARD)) + { + TString out; + ::google::protobuf::TextFormat::PrintToString(program, &out); + LOG_S_DEBUG("Process program: " << Endl << out); + } + + if (olapProgram.HasParameters()) { + Y_VERIFY(olapProgram.HasParametersSchema(), "Parameters are present, but there is no schema."); + + auto schema = NArrow::DeserializeSchema(olapProgram.GetParametersSchema()); + read.ProgramParameters = NArrow::DeserializeBatch(olapProgram.GetParameters(), schema); + } + + read.AddProgram(columnResolver, program); + + return true; +} + bool TTxRead::Execute(TTransactionContext& txc, const TActorContext& ctx) { Y_VERIFY(Ev); Y_VERIFY(Self->PrimaryIndex); @@ -237,14 +237,14 @@ bool TTxRead::Execute(TTransactionContext& txc, const TActorContext& ctx) { NArrow::EOperation::Less, proto.GetRow(), schema, proto.GetInclusive()); } - bool parseResult = ParseProgram(ctx, record.GetOlapProgramType(), record.GetOlapProgram(), read, - TIndexColumnResolver(Self->PrimaryIndex->GetIndexInfo())); + bool parseResult = ParseProgram(ctx, record.GetOlapProgramType(), record.GetOlapProgram(), read, + TIndexColumnResolver(Self->PrimaryIndex->GetIndexInfo())); - if (parseResult) { - ReadMetadata = PrepareReadMetadata(ctx, read, Self->InsertTable, Self->PrimaryIndex, ErrorDescription); + if (parseResult) { + ReadMetadata = PrepareReadMetadata(ctx, read, Self->InsertTable, Self->PrimaryIndex, ErrorDescription); } - ui32 status = NKikimrTxColumnShard::EResultStatus::ERROR; + ui32 status = NKikimrTxColumnShard::EResultStatus::ERROR; if (ReadMetadata) { status = NKikimrTxColumnShard::EResultStatus::SUCCESS; diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index 92e0fd8327a..e2f7ba5a515 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -43,7 +43,7 @@ public: , DataFormat(dataFormat) , TablePath(table) , ReadMetadataRanges(std::move(readMetadataList)) - , ReadMetadataIndex(0) + , ReadMetadataIndex(0) , Deadline(TInstant::Now() + (timeout ? timeout + SCAN_HARD_TIMEOUT_GAP : SCAN_HARD_TIMEOUT)) { KeyYqlSchema = ReadMetadataRanges[ReadMetadataIndex]->GetKeyYqlSchema(); @@ -144,7 +144,7 @@ private: bool ProduceResults() { Y_VERIFY(!Finished); - if (ScanIterator->Finished()) { + if (ScanIterator->Finished()) { return false; } @@ -435,7 +435,7 @@ private: const TString TablePath; TVector<NOlap::TReadMetadataBase::TConstPtr> ReadMetadataRanges; - ui32 ReadMetadataIndex; + ui32 ReadMetadataIndex; std::unique_ptr<TScanIteratorBase> ScanIterator; TVector<std::pair<TString, NScheme::TTypeId>> ResultYqlSchema; @@ -461,30 +461,30 @@ private: static void FillPredicatesFromRange(TReadDescription& read, const ::NKikimrTx::TKeyRange& keyRange, const TVector<std::pair<TString, NScheme::TTypeId>>& ydbPk, ui64 tabletId) { - TSerializedTableRange range(keyRange); + TSerializedTableRange range(keyRange); - LOG_S_DEBUG("TTxScan.Execute range predicate. From key size: " - << range.From.GetCells().size() << " To key size: " << range.To.GetCells().size() + LOG_S_DEBUG("TTxScan.Execute range predicate. From key size: " + << range.From.GetCells().size() << " To key size: " << range.To.GetCells().size() << " at tablet " << tabletId); - - read.GreaterPredicate = std::make_shared<NOlap::TPredicate>(); - read.LessPredicate = std::make_shared<NOlap::TPredicate>(); + + read.GreaterPredicate = std::make_shared<NOlap::TPredicate>(); + read.LessPredicate = std::make_shared<NOlap::TPredicate>(); std::tie(*read.GreaterPredicate, *read.LessPredicate) = RangePredicates(range, ydbPk); - - LOG_S_DEBUG("TTxScan.Execute greater predicate over columns: " << read.GreaterPredicate->ToString() + + LOG_S_DEBUG("TTxScan.Execute greater predicate over columns: " << read.GreaterPredicate->ToString() << " at tablet " << tabletId); - LOG_S_DEBUG("TTxScan.Execute less predicate over columns: " << read.LessPredicate->ToString() + LOG_S_DEBUG("TTxScan.Execute less predicate over columns: " << read.LessPredicate->ToString() << " at tablet " << tabletId); - + if (read.GreaterPredicate && read.GreaterPredicate->Empty()) { - read.GreaterPredicate.reset(); - } - + read.GreaterPredicate.reset(); + } + if (read.LessPredicate && read.LessPredicate->Empty()) { - read.LessPredicate.reset(); - } -} - + read.LessPredicate.reset(); + } +} + NOlap::TReadStatsMetadata::TPtr PrepareStatsReadMetadata(ui64 tabletId, const TReadDescription& read, const std::unique_ptr<NOlap::IColumnEngine>& index, TString& error) { THashSet<ui32> readColumnIds(read.ColumnIds.begin(), read.ColumnIds.end()); @@ -542,7 +542,7 @@ PrepareStatsReadMetadata(ui64 tabletId, const TReadDescription& read, const std: } return out; } - + NOlap::TReadMetadataBase::TConstPtr TTxScan::CreateReadMetadata(const TActorContext& ctx, TReadDescription& read, bool indexStats, bool isReverse, ui64 itemsLimit) { @@ -557,18 +557,18 @@ NOlap::TReadMetadataBase::TConstPtr TTxScan::CreateReadMetadata(const TActorCont return {}; } - if (isReverse) { - metadata->SetDescSorting(); - } - - if (itemsLimit) { - metadata->Limit = itemsLimit; - } - - return metadata; -} - - + if (isReverse) { + metadata->SetDescSorting(); + } + + if (itemsLimit) { + metadata->Limit = itemsLimit; + } + + return metadata; +} + + bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& ctx) { Y_UNUSED(txc); Y_VERIFY(Ev); @@ -577,8 +577,8 @@ bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& ctx) { auto& record = Ev->Get()->Record; const auto& snapshot = record.GetSnapshot(); - ui64 itemsLimit = record.HasItemsLimit() ? record.GetItemsLimit() : 0; - + ui64 itemsLimit = record.HasItemsLimit() ? record.GetItemsLimit() : 0; + TReadDescription read; read.PlanStep = snapshot.GetStep(); read.TxId = snapshot.GetTxId(); @@ -600,35 +600,35 @@ bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& ctx) { } } - bool parseResult; + bool parseResult; - if (!isIndexStats) { - TIndexColumnResolver columnResolver(Self->PrimaryIndex->GetIndexInfo()); - parseResult = ParseProgram(ctx, record.GetOlapProgramType(), record.GetOlapProgram(), read, columnResolver); - } else { - TStatsColumnResolver columnResolver; - parseResult = ParseProgram(ctx, record.GetOlapProgramType(), record.GetOlapProgram(), read, columnResolver); - } + if (!isIndexStats) { + TIndexColumnResolver columnResolver(Self->PrimaryIndex->GetIndexInfo()); + parseResult = ParseProgram(ctx, record.GetOlapProgramType(), record.GetOlapProgram(), read, columnResolver); + } else { + TStatsColumnResolver columnResolver; + parseResult = ParseProgram(ctx, record.GetOlapProgramType(), record.GetOlapProgram(), read, columnResolver); + } - if (!parseResult) { - return true; + if (!parseResult) { + return true; } - if (!record.RangesSize()) { + if (!record.RangesSize()) { auto range = CreateReadMetadata(ctx, read, isIndexStats, record.GetReverse(), itemsLimit); if (range) { ReadMetadataRanges = {range}; } - return true; - } + return true; + } ReadMetadataRanges.reserve(record.RangesSize()); - + auto ydbKey = isIndexStats ? NOlap::GetColumns(PrimaryIndexStatsSchema, PrimaryIndexStatsSchema.KeyColumns) : Self->PrimaryIndex->GetIndexInfo().GetPK(); - for (auto& range: record.GetRanges()) { + for (auto& range: record.GetRanges()) { FillPredicatesFromRange(read, range, ydbKey, Self->TabletID()); auto newRange = CreateReadMetadata(ctx, read, isIndexStats, record.GetReverse(), itemsLimit); if (!newRange) { @@ -637,7 +637,7 @@ bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& ctx) { } ReadMetadataRanges.emplace_back(newRange); } - + if (record.GetReverse()) { std::reverse(ReadMetadataRanges.begin(), ReadMetadataRanges.end()); } diff --git a/ydb/core/tx/columnshard/columnshard_common.cpp b/ydb/core/tx/columnshard/columnshard_common.cpp index 94dca1fae08..1e76247da84 100644 --- a/ydb/core/tx/columnshard/columnshard_common.cpp +++ b/ydb/core/tx/columnshard/columnshard_common.cpp @@ -162,29 +162,29 @@ NArrow::TAssign MakeConstant(const std::string& name, const NKikimrSSA::TProgram } } -NArrow::TAssign MaterializeParameter(const std::string& name, const NKikimrSSA::TProgram::TParameter& parameter, - const std::shared_ptr<arrow::RecordBatch>& parameterValues) -{ - using TAssign = NArrow::TAssign; - - auto parameterName = parameter.GetName(); - auto column = parameterValues->GetColumnByName(parameterName); - - Y_VERIFY( - column, - "No parameter %s in serialized parameters.", parameterName.c_str() - ); - Y_VERIFY( - column->length() == 1, - "Incorrect values count in parameter array" - ); - - return TAssign(name, *column->GetScalar(0)); -} - -void ExtractAssign(TContext& info, NArrow::TProgramStep& step, const NKikimrSSA::TProgram::TAssignment& assign, - const std::shared_ptr<arrow::RecordBatch>& parameterValues) -{ +NArrow::TAssign MaterializeParameter(const std::string& name, const NKikimrSSA::TProgram::TParameter& parameter, + const std::shared_ptr<arrow::RecordBatch>& parameterValues) +{ + using TAssign = NArrow::TAssign; + + auto parameterName = parameter.GetName(); + auto column = parameterValues->GetColumnByName(parameterName); + + Y_VERIFY( + column, + "No parameter %s in serialized parameters.", parameterName.c_str() + ); + Y_VERIFY( + column->length() == 1, + "Incorrect values count in parameter array" + ); + + return TAssign(name, *column->GetScalar(0)); +} + +void ExtractAssign(TContext& info, NArrow::TProgramStep& step, const NKikimrSSA::TProgram::TAssignment& assign, + const std::shared_ptr<arrow::RecordBatch>& parameterValues) +{ using TId = NKikimrSSA::TProgram::TAssignment; ui32 columnId = assign.GetColumn().GetId(); @@ -201,11 +201,11 @@ void ExtractAssign(TContext& info, NArrow::TProgramStep& step, const NKikimrSSA: step.Assignes.emplace_back(MakeConstant(columnName, assign.GetConstant())); break; } - case TId::kParameter: - { - step.Assignes.emplace_back(MaterializeParameter(columnName, assign.GetParameter(), parameterValues)); - break; - } + case TId::kParameter: + { + step.Assignes.emplace_back(MaterializeParameter(columnName, assign.GetParameter(), parameterValues)); + break; + } case TId::kExternalFunction: case TId::kNull: case TId::EXPRESSION_NOT_SET: @@ -234,7 +234,7 @@ std::pair<TPredicate, TPredicate> RangePredicates(const TSerializedTableRange& r const TVector<std::pair<TString, NScheme::TTypeId>>& columns) { TVector<TCell> leftCells; TVector<std::pair<TString, NScheme::TTypeId>> leftColumns; - bool leftTrailingNull = false; + bool leftTrailingNull = false; { TConstArrayRef<TCell> cells = range.From.GetCells(); size_t size = cells.size(); @@ -244,16 +244,16 @@ std::pair<TPredicate, TPredicate> RangePredicates(const TSerializedTableRange& r if (!cells[i].IsNull()) { leftCells.push_back(cells[i]); leftColumns.push_back(columns[i]); - leftTrailingNull = false; - } else { - leftTrailingNull = true; + leftTrailingNull = false; + } else { + leftTrailingNull = true; } } } TVector<TCell> rightCells; TVector<std::pair<TString, NScheme::TTypeId>> rightColumns; - bool rightTrailingNull = false; + bool rightTrailingNull = false; { TConstArrayRef<TCell> cells = range.To.GetCells(); size_t size = cells.size(); @@ -263,25 +263,25 @@ std::pair<TPredicate, TPredicate> RangePredicates(const TSerializedTableRange& r if (!cells[i].IsNull()) { rightCells.push_back(cells[i]); rightColumns.push_back(columns[i]); - rightTrailingNull = false; - } else { - rightTrailingNull = true; + rightTrailingNull = false; + } else { + rightTrailingNull = true; } } } - bool fromInclusive = range.FromInclusive || leftTrailingNull; - bool toInclusive = range.ToInclusive && !rightTrailingNull; - + bool fromInclusive = range.FromInclusive || leftTrailingNull; + bool toInclusive = range.ToInclusive && !rightTrailingNull; + TString leftBorder = FromCells(leftCells, leftColumns); TString rightBorder = FromCells(rightCells, rightColumns); return std::make_pair( - TPredicate(EOperation::Greater, leftBorder, NArrow::MakeArrowSchema(leftColumns), fromInclusive), - TPredicate(EOperation::Less, rightBorder, NArrow::MakeArrowSchema(rightColumns), toInclusive)); + TPredicate(EOperation::Greater, leftBorder, NArrow::MakeArrowSchema(leftColumns), fromInclusive), + TPredicate(EOperation::Less, rightBorder, NArrow::MakeArrowSchema(rightColumns), toInclusive)); } -void TReadDescription::AddProgram(const IColumnResolver& columnResolver, const NKikimrSSA::TProgram& program) -{ +void TReadDescription::AddProgram(const IColumnResolver& columnResolver, const NKikimrSSA::TProgram& program) +{ using TId = NKikimrSSA::TProgram::TCommand; TContext info(columnResolver); @@ -289,7 +289,7 @@ void TReadDescription::AddProgram(const IColumnResolver& columnResolver, const N for (auto& cmd : program.GetCommand()) { switch (cmd.GetLineCase()) { case TId::kAssign: - ExtractAssign(info, *step, cmd.GetAssign(), ProgramParameters); + ExtractAssign(info, *step, cmd.GetAssign(), ProgramParameters); break; case TId::kFilter: ExtractFilter(info, *step, cmd.GetFilter()); diff --git a/ydb/core/tx/columnshard/columnshard_txs.h b/ydb/core/tx/columnshard/columnshard_txs.h index ef4ca1cca89..b7267dd8ff3 100644 --- a/ydb/core/tx/columnshard/columnshard_txs.h +++ b/ydb/core/tx/columnshard/columnshard_txs.h @@ -203,18 +203,18 @@ protected: const std::unique_ptr<NOlap::TInsertTable>& insertTable, const std::unique_ptr<NOlap::IColumnEngine>& index, TString& error) const; - -protected: - bool ParseProgram( - const TActorContext& ctx, - NKikimrSchemeOp::EOlapProgramType programType, - TString serializedProgram, - TReadDescription& read, - const IColumnResolver& columnResolver - ); - -protected: - TString ErrorDescription; + +protected: + bool ParseProgram( + const TActorContext& ctx, + NKikimrSchemeOp::EOlapProgramType programType, + TString serializedProgram, + TReadDescription& read, + const IColumnResolver& columnResolver + ); + +protected: + TString ErrorDescription; }; class TTxRead : public TTxReadBase { @@ -250,8 +250,8 @@ public: private: NOlap::TReadMetadataBase::TConstPtr CreateReadMetadata(const TActorContext& ctx, TReadDescription& read, bool isIndexStats, bool isReverse, ui64 limit); - -private: + +private: TEvColumnShard::TEvScan::TPtr Ev; TVector<TReadMetadataPtr> ReadMetadataRanges; }; diff --git a/ydb/core/tx/columnshard/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_columnshard_read_write.cpp index 7f4d940f604..00e3143fc61 100644 --- a/ydb/core/tx/columnshard/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_columnshard_read_write.cpp @@ -796,70 +796,70 @@ void TestCompactionInGranuleImpl(bool reboots) { } } -void TestReadWithProgramImpl() -{ - TTestBasicRuntime runtime; - TTester::Setup(runtime); - - TActorId sender = runtime.AllocateEdgeActor(); +void TestReadWithProgramImpl() +{ + TTestBasicRuntime runtime; + TTester::Setup(runtime); + + TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::COLUMNSHARD), &CreateColumnShard); - - TDispatchOptions options; - options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); - runtime.DispatchEvents(options); - + + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); + runtime.DispatchEvents(options); + ui64 metaShard = TTestTxConfig::TxTablet1; - ui64 tableId = 1; - - SetupSchema(runtime, sender, tableId); - { - auto* readEvent = new TEvColumnShard::TEvRead(sender, metaShard, 0, 0, tableId); - auto& readProto = Proto(readEvent); - - readProto.SetOlapProgramType(::NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM); - readProto.SetOlapProgram("XXXYYYZZZ"); - + ui64 tableId = 1; + + SetupSchema(runtime, sender, tableId); + { + auto* readEvent = new TEvColumnShard::TEvRead(sender, metaShard, 0, 0, tableId); + auto& readProto = Proto(readEvent); + + readProto.SetOlapProgramType(::NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM); + readProto.SetOlapProgram("XXXYYYZZZ"); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, readEvent); - - TAutoPtr<IEventHandle> handle; - auto result = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); - UNIT_ASSERT(result); - - auto& resRead = Proto(result); - + + TAutoPtr<IEventHandle> handle; + auto result = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); + UNIT_ASSERT(result); + + auto& resRead = Proto(result); + UNIT_ASSERT_EQUAL(resRead.GetOrigin(), TTestTxConfig::TxTablet0); - UNIT_ASSERT_EQUAL(resRead.GetTxInitiator(), metaShard); - UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::ERROR); - UNIT_ASSERT_EQUAL(resRead.GetBatch(), 0); - UNIT_ASSERT_EQUAL(resRead.GetFinished(), true); - UNIT_ASSERT_EQUAL(resRead.GetData(), ""); - } - - { - auto* readEvent = new TEvColumnShard::TEvRead(sender, metaShard, 0, 0, tableId); - auto& readProto = Proto(readEvent); - - readProto.SetOlapProgramType(::NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS); - readProto.SetOlapProgram("XXXYYYZZZ"); - + UNIT_ASSERT_EQUAL(resRead.GetTxInitiator(), metaShard); + UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::ERROR); + UNIT_ASSERT_EQUAL(resRead.GetBatch(), 0); + UNIT_ASSERT_EQUAL(resRead.GetFinished(), true); + UNIT_ASSERT_EQUAL(resRead.GetData(), ""); + } + + { + auto* readEvent = new TEvColumnShard::TEvRead(sender, metaShard, 0, 0, tableId); + auto& readProto = Proto(readEvent); + + readProto.SetOlapProgramType(::NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS); + readProto.SetOlapProgram("XXXYYYZZZ"); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, readEvent); - - TAutoPtr<IEventHandle> handle; - auto result = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); - UNIT_ASSERT(result); - - auto& resRead = Proto(result); + + TAutoPtr<IEventHandle> handle; + auto result = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); + UNIT_ASSERT(result); + + auto& resRead = Proto(result); UNIT_ASSERT_EQUAL(resRead.GetOrigin(), TTestTxConfig::TxTablet0); - UNIT_ASSERT_EQUAL(resRead.GetTxInitiator(), metaShard); - UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::ERROR); - UNIT_ASSERT_EQUAL(resRead.GetBatch(), 0); - UNIT_ASSERT_EQUAL(resRead.GetFinished(), true); - UNIT_ASSERT_EQUAL(resRead.GetData(), ""); - } + UNIT_ASSERT_EQUAL(resRead.GetTxInitiator(), metaShard); + UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::ERROR); + UNIT_ASSERT_EQUAL(resRead.GetBatch(), 0); + UNIT_ASSERT_EQUAL(resRead.GetFinished(), true); + UNIT_ASSERT_EQUAL(resRead.GetData(), ""); + } +} + } -} - Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { Y_UNIT_TEST(Write) { TestWriteImpl(TTestSchema::YdbSchema()); @@ -897,10 +897,10 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { TestCompactionInGranuleImpl(true); } - Y_UNIT_TEST(TestReadWithProgram) { - TestReadWithProgramImpl(); - } - + Y_UNIT_TEST(TestReadWithProgram) { + TestReadWithProgramImpl(); + } + Y_UNIT_TEST(CompactionSplitGranule) { TTestBasicRuntime runtime; TTester::Setup(runtime); diff --git a/ydb/core/tx/datashard/datashard__kqp_scan.cpp b/ydb/core/tx/datashard/datashard__kqp_scan.cpp index 85ae2790646..b6b6b7e337d 100644 --- a/ydb/core/tx/datashard/datashard__kqp_scan.cpp +++ b/ydb/core/tx/datashard/datashard__kqp_scan.cpp @@ -1,5 +1,5 @@ #include "datashard_impl.h" -#include "range_ops.h" +#include "range_ops.h" #include <util/string/vector.h> #include <ydb/core/actorlib_impl/long_timer.h> @@ -36,7 +36,7 @@ public: public: TKqpScan(const TActorId& computeActorId, const TActorId& datashardActorId, ui32 scanId, NDataShard::TUserTable::TCPtr tableInfo, const TSmallVec<TSerializedTableRange>&& tableRanges, - const TSmallVec<NTable::TTag>&& columnTags, const TSmallVec<bool>&& skipNullKeys, + const TSmallVec<NTable::TTag>&& columnTags, const TSmallVec<bool>&& skipNullKeys, const NYql::NDqProto::EDqStatsMode& statsMode, ui64 timeoutMs, ui32 generation, NKikimrTxDataShard::EScanDataFormat dataFormat) : TActor(&TKqpScan::StateScan) @@ -45,10 +45,10 @@ public: , ScanId(scanId) , TableInfo(tableInfo) , TablePath(TableInfo->Path) - , TableRanges(std::move(tableRanges)) - , CurrentRange(0) - , Tags(std::move(columnTags)) - , SkipNullKeys(std::move(skipNullKeys)) + , TableRanges(std::move(tableRanges)) + , CurrentRange(0) + , Tags(std::move(columnTags)) + , SkipNullKeys(std::move(skipNullKeys)) , StatsMode(statsMode) , Deadline(TInstant::Now() + (timeoutMs ? TDuration::MilliSeconds(timeoutMs) + SCAN_HARD_TIMEOUT_GAP : SCAN_HARD_TIMEOUT)) , Generation(generation) @@ -214,9 +214,9 @@ private: } EScan Seek(TLead& lead, ui64 seq) noexcept final { - YQL_ENSURE(seq == CurrentRange); - - if (CurrentRange == TableRanges.size()) { + YQL_ENSURE(seq == CurrentRange); + + if (CurrentRange == TableRanges.size()) { LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, "TableRanges is over" << ", at: " << ScanActorId << ", scanId: " << ScanId @@ -224,33 +224,33 @@ private: return EScan::Final; } - auto& range = TableRanges[CurrentRange]; - + auto& range = TableRanges[CurrentRange]; + int cmpFrom; int cmpTo; cmpFrom = CompareBorders<false, false>( - range.From.GetCells(), + range.From.GetCells(), TableInfo->Range.From.GetCells(), - range.FromInclusive, + range.FromInclusive, TableInfo->Range.FromInclusive, TableInfo->KeyColumnTypes); cmpTo = CompareBorders<true, true>( - range.To.GetCells(), + range.To.GetCells(), TableInfo->Range.To.GetCells(), - range.ToInclusive, + range.ToInclusive, TableInfo->Range.ToInclusive, TableInfo->KeyColumnTypes); if (cmpFrom > 0) { - auto seek = range.FromInclusive ? NTable::ESeek::Lower : NTable::ESeek::Upper; - lead.To(Tags, range.From.GetCells(), seek); + auto seek = range.FromInclusive ? NTable::ESeek::Lower : NTable::ESeek::Upper; + lead.To(Tags, range.From.GetCells(), seek); } else { lead.To(Tags, {}, NTable::ESeek::Lower); } - if (cmpTo < 0) { - lead.Until(range.To.GetCells(), range.ToInclusive); + if (cmpTo < 0) { + lead.Until(range.To.GetCells(), range.ToInclusive); } return EScan::Feed; @@ -282,35 +282,35 @@ private: auto sent = SendResult(/* pageFault */ false); - if (!sent) { - // There is free space in memory and results are not sent to caller - return EScan::Feed; - } + if (!sent) { + // There is free space in memory and results are not sent to caller + return EScan::Feed; + } - if (PeerFreeSpace <= 0) { + if (PeerFreeSpace <= 0) { Sleep = true; return EScan::Sleep; } - return EScan::Feed; // sent by rows limit, can send one more batch + return EScan::Feed; // sent by rows limit, can send one more batch } EScan Exhausted() noexcept override { - LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, - "Range " << CurrentRange << " of " << TableRanges.size() << " exhausted: try next one." - << " table: " << TablePath - << " range: " << DebugPrintRange( - TableInfo->KeyColumnTypes, TableRanges[CurrentRange].ToTableRange(), *AppData()->TypeRegistry - ) - << " next range: " << ((CurrentRange + 1) >= TableRanges.size() ? "<none>" : DebugPrintRange( - TableInfo->KeyColumnTypes, TableRanges[CurrentRange + 1].ToTableRange(), *AppData()->TypeRegistry - )) - ); - - ++CurrentRange; - return EScan::Reset; - } - + LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, + "Range " << CurrentRange << " of " << TableRanges.size() << " exhausted: try next one." + << " table: " << TablePath + << " range: " << DebugPrintRange( + TableInfo->KeyColumnTypes, TableRanges[CurrentRange].ToTableRange(), *AppData()->TypeRegistry + ) + << " next range: " << ((CurrentRange + 1) >= TableRanges.size() ? "<none>" : DebugPrintRange( + TableInfo->KeyColumnTypes, TableRanges[CurrentRange + 1].ToTableRange(), *AppData()->TypeRegistry + )) + ); + + ++CurrentRange; + return EScan::Reset; + } + EScan PageFault() noexcept override final { ++PageFaults; if (Result && !Result->Rows.empty()) { @@ -505,8 +505,8 @@ private: const ui32 ScanId; const NDataShard::TUserTable::TCPtr TableInfo; const TString TablePath; - const TSmallVec<TSerializedTableRange> TableRanges; - ui32 CurrentRange; + const TSmallVec<TSerializedTableRange> TableRanges; + ui32 CurrentRange; const TSmallVec<NTable::TTag> Tags; TSmallVec<NScheme::TTypeId> Types; const TSmallVec<bool> SkipNullKeys; @@ -614,21 +614,21 @@ void TDataShard::Handle(TEvDataShard::TEvKqpScan::TPtr& ev, const TActorContext& Pipeline.StartStreamingTx(snapshot.GetTxId(), 1); - TSmallVec<TSerializedTableRange> ranges; - ranges.reserve(request.RangesSize()); - - for (auto range: request.GetRanges()) { - ranges.emplace_back(std::move(TSerializedTableRange(range))); - } - + TSmallVec<TSerializedTableRange> ranges; + ranges.reserve(request.RangesSize()); + + for (auto range: request.GetRanges()) { + ranges.emplace_back(std::move(TSerializedTableRange(range))); + } + auto* tableScan = new TKqpScan( scanComputeActor, SelfId(), request.GetScanId(), tableInfo, - std::move(ranges), - std::move(TSmallVec<NTable::TTag>(request.GetColumnTags().begin(), request.GetColumnTags().end())), - std::move(TSmallVec<bool>(request.GetSkipNullKeys().begin(), request.GetSkipNullKeys().end())), + std::move(ranges), + std::move(TSmallVec<NTable::TTag>(request.GetColumnTags().begin(), request.GetColumnTags().end())), + std::move(TSmallVec<bool>(request.GetSkipNullKeys().begin(), request.GetSkipNullKeys().end())), request.GetStatsMode(), request.GetTimeoutMs(), generation, diff --git a/ydb/core/tx/datashard/datashard_kqp_compute.cpp b/ydb/core/tx/datashard/datashard_kqp_compute.cpp index 110900aeac4..edd6ca8e194 100644 --- a/ydb/core/tx/datashard/datashard_kqp_compute.cpp +++ b/ydb/core/tx/datashard/datashard_kqp_compute.cpp @@ -28,7 +28,7 @@ typedef IComputationNode* (*TCallableDatashardBuilderFunc)(TCallable& callable, struct TKqpDatashardComputationMap { TKqpDatashardComputationMap() { Map["KqpWideReadTable"] = &WrapKqpWideReadTable; - Map["KqpWideReadTableRanges"] = &WrapKqpWideReadTableRanges; + Map["KqpWideReadTableRanges"] = &WrapKqpWideReadTableRanges; Map["KqpLookupTable"] = &WrapKqpLookupTable; Map["KqpUpsertRows"] = &WrapKqpUpsertRows; Map["KqpDeleteRows"] = &WrapKqpDeleteRows; @@ -66,7 +66,7 @@ typedef IComputationNode* (*TCallableScanBuilderFunc)(TCallable& callable, struct TKqpScanComputationMap { TKqpScanComputationMap() { Map["KqpWideReadTable"] = &WrapKqpScanWideReadTable; - Map["KqpWideReadTableRanges"] = &WrapKqpScanWideReadTableRanges; + Map["KqpWideReadTableRanges"] = &WrapKqpScanWideReadTableRanges; } THashMap<TString, TCallableScanBuilderFunc> Map; diff --git a/ydb/core/tx/datashard/datashard_kqp_compute.h b/ydb/core/tx/datashard/datashard_kqp_compute.h index 39a6845c86d..4bbb8fa0de7 100644 --- a/ydb/core/tx/datashard/datashard_kqp_compute.h +++ b/ydb/core/tx/datashard/datashard_kqp_compute.h @@ -92,8 +92,8 @@ void FetchRow(const TDbTupleRef& dbTuple, NYql::NUdf::TUnboxedValue& row, TCompu TKqpTableStats& tableStats, const TKqpDatashardComputeContext& computeCtx, const TSmallVec<NTable::TTag>& systemColumnTags); -IComputationNode* WrapKqpWideReadTableRanges(TCallable& callable, const TComputationNodeFactoryContext& ctx, - TKqpDatashardComputeContext& computeCtx); +IComputationNode* WrapKqpWideReadTableRanges(TCallable& callable, const TComputationNodeFactoryContext& ctx, + TKqpDatashardComputeContext& computeCtx); IComputationNode* WrapKqpLookupTable(TCallable& callable, const TComputationNodeFactoryContext& ctx, TKqpDatashardComputeContext& computeCtx); IComputationNode* WrapKqpUpsertRows(TCallable& callable, const TComputationNodeFactoryContext& ctx, diff --git a/ydb/core/tx/datashard/datashard_kqp_lookup_table.cpp b/ydb/core/tx/datashard/datashard_kqp_lookup_table.cpp index 1ae6c4ad6c9..24582c2664d 100644 --- a/ydb/core/tx/datashard/datashard_kqp_lookup_table.cpp +++ b/ydb/core/tx/datashard/datashard_kqp_lookup_table.cpp @@ -72,7 +72,7 @@ TParseLookupTableResult ParseLookupTable(TCallable& callable) { } } - ParseReadColumns(callable.GetType()->GetReturnType(), tagsNode, result.Columns, result.SystemColumns); + ParseReadColumns(callable.GetType()->GetReturnType(), tagsNode, result.Columns, result.SystemColumns); return result; } diff --git a/ydb/core/tx/datashard/datashard_kqp_read_table.cpp b/ydb/core/tx/datashard/datashard_kqp_read_table.cpp index c2512a1fc1c..9fbd8ef0ad2 100644 --- a/ydb/core/tx/datashard/datashard_kqp_read_table.cpp +++ b/ydb/core/tx/datashard/datashard_kqp_read_table.cpp @@ -557,9 +557,9 @@ void FetchRow(const TDbTupleRef& dbTuple, TUnboxedValue& row, TComputationContex return FetchRowImpl(dbTuple, row, ctx, tableStats, computeCtx, systemColumnTags); } -IComputationNode* WrapKqpWideReadTableRanges(TCallable& callable, const TComputationNodeFactoryContext& ctx, - TKqpDatashardComputeContext& computeCtx) -{ +IComputationNode* WrapKqpWideReadTableRanges(TCallable& callable, const TComputationNodeFactoryContext& ctx, + TKqpDatashardComputeContext& computeCtx) +{ auto parseResult = ParseWideReadTableRanges(callable); auto rangesNode = LocateNode(ctx.NodeLocator, *parseResult.Ranges); @@ -580,8 +580,8 @@ IComputationNode* WrapKqpWideReadTableRanges(TCallable& callable, const TComputa } return new TKqpWideReadTableRangesWrapper<false>(computeCtx, ctx.Env, parseResult, rangesNode, itemsLimit); -} - +} + IComputationNode* WrapKqpWideReadTable(TCallable& callable, const TComputationNodeFactoryContext& ctx, TKqpDatashardComputeContext& computeCtx) { diff --git a/ydb/core/tx/datashard/datashard_ut_kqp_scan.cpp b/ydb/core/tx/datashard/datashard_ut_kqp_scan.cpp index 53a22986c16..b2b14de1904 100644 --- a/ydb/core/tx/datashard/datashard_ut_kqp_scan.cpp +++ b/ydb/core/tx/datashard/datashard_ut_kqp_scan.cpp @@ -26,16 +26,16 @@ namespace { return sql; } - void EnableLogging(TTestActorRuntime& runtime) { - runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_DEBUG); + void EnableLogging(TTestActorRuntime& runtime) { + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_DEBUG); //runtime.SetLogPriority(NKikimrServices::TX_PROXY, NLog::PRI_DEBUG); runtime.SetLogPriority(NKikimrServices::KQP_EXECUTER, NActors::NLog::PRI_TRACE); //runtime.SetLogPriority(NKikimrServices::KQP_WORKER, NActors::NLog::PRI_DEBUG); //runtime.SetLogPriority(NKikimrServices::KQP_RESOURCE_MANAGER, NActors::NLog::PRI_DEBUG); //runtime.SetLogPriority(NKikimrServices::KQP_NODE, NActors::NLog::PRI_DEBUG); runtime.SetLogPriority(NKikimrServices::KQP_COMPUTE, NActors::NLog::PRI_TRACE); - } - + } + } Y_UNIT_TEST_SUITE(KqpScan) { @@ -67,7 +67,7 @@ Y_UNIT_TEST_SUITE(KqpScan) { auto &runtime = *server->GetRuntime(); auto sender = runtime.AllocateEdgeActor(); - // EnableLogging(runtime); + // EnableLogging(runtime); InitRoot(server, sender); CreateShardedTable(server, sender, "/Root", "table-1", 1); @@ -172,7 +172,7 @@ Y_UNIT_TEST_SUITE(KqpScan) { auto &runtime = *server->GetRuntime(); auto sender = runtime.AllocateEdgeActor(); - // EnableLogging(runtime); + // EnableLogging(runtime); InitRoot(server, sender); CreateShardedTable(server, sender, "/Root", "table-1", 7); @@ -264,7 +264,7 @@ Y_UNIT_TEST_SUITE(KqpScan) { auto sender = runtime.AllocateEdgeActor(); auto senderSplit = runtime.AllocateEdgeActor(); - // EnableLogging(runtime); + // EnableLogging(runtime); SetSplitMergePartCountLimit(&runtime, -1); @@ -374,74 +374,74 @@ Y_UNIT_TEST_SUITE(KqpScan) { UNIT_ASSERT_VALUES_EQUAL(result, 596400); } - Y_UNIT_TEST_WITH_MVCC(ScanRetryReadRanges) { - Y_UNUSED(EnableLogging); - - NKikimrConfig::TAppConfig appCfg; - - auto* rm = appCfg.MutableTableServiceConfig()->MutableResourceManager(); - rm->SetChannelBufferSize(100); - rm->SetMinChannelBufferSize(100); - rm->SetScanBufferSize(100); - - TPortManager pm; - TServerSettings serverSettings(pm.GetPort(2134)); - serverSettings.SetDomainName("Root") - .SetEnableMvcc(WithMvcc) - .SetNodeCount(2) - .SetAppConfig(appCfg) - .SetUseRealThreads(false); - - Tests::TServer::TPtr server = new TServer(serverSettings); - auto &runtime = *server->GetRuntime(); - auto sender = runtime.AllocateEdgeActor(); - - // EnableLogging(runtime); - - InitRoot(server, sender); - CreateShardedTable(server, sender, "/Root", "table-1", 1); - ExecSQL(server, sender, FillTableQuery()); - - TSet<TActorId> scans; + Y_UNIT_TEST_WITH_MVCC(ScanRetryReadRanges) { + Y_UNUSED(EnableLogging); + + NKikimrConfig::TAppConfig appCfg; + + auto* rm = appCfg.MutableTableServiceConfig()->MutableResourceManager(); + rm->SetChannelBufferSize(100); + rm->SetMinChannelBufferSize(100); + rm->SetScanBufferSize(100); + + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetEnableMvcc(WithMvcc) + .SetNodeCount(2) + .SetAppConfig(appCfg) + .SetUseRealThreads(false); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto &runtime = *server->GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + // EnableLogging(runtime); + + InitRoot(server, sender); + CreateShardedTable(server, sender, "/Root", "table-1", 1); + ExecSQL(server, sender, FillTableQuery()); + + TSet<TActorId> scans; TSet<TActorId> killedTablets; - - ui64 result = 0; - ui64 incomingRangesSize = 0; - - auto captureEvents = [&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle> &ev) -> auto { - switch (ev->GetTypeRewrite()) { - /* - * Trick executor to think that all datashard are located on node 1. - */ - case NKqp::TKqpExecuterEvents::EvShardsResolveStatus: { - auto* msg = ev->Get<NKqp::TEvKqpExecuter::TEvShardsResolveStatus>(); - for (auto& [shardId, nodeId]: msg->ShardNodes) { - Cerr << "-- nodeId: " << nodeId << Endl; - nodeId = runtime.GetNodeId(0); - } - break; - } - - case TEvDataShard::EvKqpScan: { - Cerr << (TStringBuilder() << "-- EvScan " << ev->Sender << " -> " << ev->Recipient << Endl); - - if (!incomingRangesSize) { - auto& request = ev->Get<TEvDataShard::TEvKqpScan>()->Record; - incomingRangesSize = request.RangesSize(); - } - - break; - } - - /* - * Respond to streamData with acks. Without that execution pipeline will stop - * producing new tuples. - */ - case NKqp::TKqpExecuterEvents::EvStreamData: { - auto& record = ev->Get<NKqp::TEvKqpExecuter::TEvStreamData>()->Record; - - Cerr << (TStringBuilder() << "-- EvStreamData: " << record.AsJSON() << Endl); - + + ui64 result = 0; + ui64 incomingRangesSize = 0; + + auto captureEvents = [&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle> &ev) -> auto { + switch (ev->GetTypeRewrite()) { + /* + * Trick executor to think that all datashard are located on node 1. + */ + case NKqp::TKqpExecuterEvents::EvShardsResolveStatus: { + auto* msg = ev->Get<NKqp::TEvKqpExecuter::TEvShardsResolveStatus>(); + for (auto& [shardId, nodeId]: msg->ShardNodes) { + Cerr << "-- nodeId: " << nodeId << Endl; + nodeId = runtime.GetNodeId(0); + } + break; + } + + case TEvDataShard::EvKqpScan: { + Cerr << (TStringBuilder() << "-- EvScan " << ev->Sender << " -> " << ev->Recipient << Endl); + + if (!incomingRangesSize) { + auto& request = ev->Get<TEvDataShard::TEvKqpScan>()->Record; + incomingRangesSize = request.RangesSize(); + } + + break; + } + + /* + * Respond to streamData with acks. Without that execution pipeline will stop + * producing new tuples. + */ + case NKqp::TKqpExecuterEvents::EvStreamData: { + auto& record = ev->Get<NKqp::TEvKqpExecuter::TEvStreamData>()->Record; + + Cerr << (TStringBuilder() << "-- EvStreamData: " << record.AsJSON() << Endl); + // Empty message can come on finish if (!record.GetResultSet().rows().empty()) { Y_ASSERT(record.GetResultSet().rows().at(0).items().size() == 2); @@ -450,70 +450,70 @@ Y_UNIT_TEST_SUITE(KqpScan) { auto val = record.GetResultSet().rows().at(i).items().at(1).uint32_value(); result += val; } - } - - auto resp = MakeHolder<NKqp::TEvKqpExecuter::TEvStreamDataAck>(); - resp->Record.SetEnough(false); - resp->Record.SetSeqNo(ev->Get<NKqp::TEvKqpExecuter::TEvStreamData>()->Record.GetSeqNo()); - resp->Record.SetFreeSpace(100); - runtime.Send(new IEventHandle(ev->Sender, sender, resp.Release())); - return TTestActorRuntime::EEventAction::DROP; - } - - /* Drop message and kill tablet if we already had seen this tablet */ - case NKqp::TKqpComputeEvents::EvScanData: { - if (scans.contains(ev->Sender)) { + } + + auto resp = MakeHolder<NKqp::TEvKqpExecuter::TEvStreamDataAck>(); + resp->Record.SetEnough(false); + resp->Record.SetSeqNo(ev->Get<NKqp::TEvKqpExecuter::TEvStreamData>()->Record.GetSeqNo()); + resp->Record.SetFreeSpace(100); + runtime.Send(new IEventHandle(ev->Sender, sender, resp.Release())); + return TTestActorRuntime::EEventAction::DROP; + } + + /* Drop message and kill tablet if we already had seen this tablet */ + case NKqp::TKqpComputeEvents::EvScanData: { + if (scans.contains(ev->Sender)) { if (killedTablets.empty()) { // do only 1 kill per test runtime.Send(new IEventHandle(ev->Sender, ev->Sender, new NKqp::TEvKqpCompute::TEvKillScanTablet)); Cerr << (TStringBuilder() << "-- EvScanData from " << ev->Sender << ": hijack event, kill tablet " << ev->Sender << Endl); Cerr.Flush(); } - } else { - scans.insert(ev->Sender); + } else { + scans.insert(ev->Sender); runtime.EnableScheduleForActor(ev->Sender); - Cerr << (TStringBuilder() << "-- EvScanData from " << ev->Sender << ": pass" << Endl); - - auto scanEvent = ev->Get<NKqp::TEvKqpCompute::TEvScanData>(); - - for (auto& item: scanEvent->Rows) { - // Row consists of 'key', 'value' - ui32 key = item[0].AsValue<ui32>(); - - // Check that key correspond to query - bool inRange = (key > 1 && key < 3) || (key > 20 && key < 30) || (key >= 40 && key <= 50); - UNIT_ASSERT_C(inRange, TStringBuilder() << "Key " << key << "not in query range"); - } - } - - break; - } - - default: - break; - } - return TTestActorRuntime::EEventAction::PROCESS; - }; - runtime.SetObserverFunc(captureEvents); - - auto query = TString(R"( - --!syntax_v1 - SELECT key, value FROM `/Root/table-1` - WHERE - (key > 1 AND key < 3) OR - (key > 20 AND key < 30) OR - (key >= 40 AND key <= 50) - ORDER BY key; - )"); - - auto streamSender = runtime.AllocateEdgeActor(); - SendRequest(runtime, streamSender, MakeStreamRequest(streamSender, query, false)); - auto ev = runtime.GrabEdgeEventRethrow<NKqp::TEvKqp::TEvQueryResponse>(streamSender); - - UNIT_ASSERT_VALUES_EQUAL(result, 72742); - UNIT_ASSERT_VALUES_EQUAL(incomingRangesSize, 3); - } - + Cerr << (TStringBuilder() << "-- EvScanData from " << ev->Sender << ": pass" << Endl); + + auto scanEvent = ev->Get<NKqp::TEvKqpCompute::TEvScanData>(); + + for (auto& item: scanEvent->Rows) { + // Row consists of 'key', 'value' + ui32 key = item[0].AsValue<ui32>(); + + // Check that key correspond to query + bool inRange = (key > 1 && key < 3) || (key > 20 && key < 30) || (key >= 40 && key <= 50); + UNIT_ASSERT_C(inRange, TStringBuilder() << "Key " << key << "not in query range"); + } + } + + break; + } + + default: + break; + } + return TTestActorRuntime::EEventAction::PROCESS; + }; + runtime.SetObserverFunc(captureEvents); + + auto query = TString(R"( + --!syntax_v1 + SELECT key, value FROM `/Root/table-1` + WHERE + (key > 1 AND key < 3) OR + (key > 20 AND key < 30) OR + (key >= 40 AND key <= 50) + ORDER BY key; + )"); + + auto streamSender = runtime.AllocateEdgeActor(); + SendRequest(runtime, streamSender, MakeStreamRequest(streamSender, query, false)); + auto ev = runtime.GrabEdgeEventRethrow<NKqp::TEvKqp::TEvQueryResponse>(streamSender); + + UNIT_ASSERT_VALUES_EQUAL(result, 72742); + UNIT_ASSERT_VALUES_EQUAL(incomingRangesSize, 3); + } + } } // namespace NKqp diff --git a/ydb/library/yql/ast/yql_expr.cpp b/ydb/library/yql/ast/yql_expr.cpp index 92c68ee67d9..82f6c5812ac 100644 --- a/ydb/library/yql/ast/yql_expr.cpp +++ b/ydb/library/yql/ast/yql_expr.cpp @@ -2390,16 +2390,16 @@ TAstParseResult ConvertToAst(const TExprNode& root, TExprContext& exprContext, c if (name.empty()) { const auto& ref = ctx.References[node.second]; if (!InlineNode(*node.second, ref.References, ref.Neighbors, settings)) { - if (settings.PrintArguments && node.second->IsArgument()) { - auto buffer = TStringBuilder() << "$" << ++uniqueNum - << "{" << node.second->Content() << ":" - << node.second->UniqueId() << "}"; - YQL_ENSURE(frame.Bindings.emplace(node.second, buffer).second); - } else { - char buffer[1 + 10 + 1]; - sprintf(buffer, "$%" PRIu32, ++uniqueNum); - YQL_ENSURE(frame.Bindings.emplace(node.second, buffer).second); - } + if (settings.PrintArguments && node.second->IsArgument()) { + auto buffer = TStringBuilder() << "$" << ++uniqueNum + << "{" << node.second->Content() << ":" + << node.second->UniqueId() << "}"; + YQL_ENSURE(frame.Bindings.emplace(node.second, buffer).second); + } else { + char buffer[1 + 10 + 1]; + sprintf(buffer, "$%" PRIu32, ++uniqueNum); + YQL_ENSURE(frame.Bindings.emplace(node.second, buffer).second); + } frame.TopoSortedNodes.emplace_back(node.second); } } diff --git a/ydb/library/yql/ast/yql_expr.h b/ydb/library/yql/ast/yql_expr.h index f010efcb67b..d6eb544f9af 100644 --- a/ydb/library/yql/ast/yql_expr.h +++ b/ydb/library/yql/ast/yql_expr.h @@ -2555,7 +2555,7 @@ struct TConvertToAstSettings { ui32 AnnotationFlags = 0; bool RefAtoms = false; std::function<bool(const TExprNode&)> NoInlineFunc; - bool PrintArguments = false; + bool PrintArguments = false; }; TAstParseResult ConvertToAst(const TExprNode& root, TExprContext& ctx, const TConvertToAstSettings& settings); diff --git a/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json b/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json index 37ec25e62cc..edfcaa15302 100644 --- a/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json +++ b/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json @@ -1366,7 +1366,7 @@ }, { "Name": "TCoMapJoinCore", - "Base": "TCallable", + "Base": "TCallable", "Match": {"Type": "Callable", "Name": "MapJoinCore"}, "Children": [ {"Index": 0, "Name": "LeftInput", "Type": "TExprBase"}, @@ -1378,17 +1378,17 @@ ] }, { - "Name": "TCoJoinDict", - "Base": "TCallable", - "Match": {"Type": "Callable", "Name": "JoinDict"}, - "Children": [ - {"Index": 0, "Name": "LeftInput", "Type": "TExprBase"}, - {"Index": 1, "Name": "RightInput", "Type": "TExprBase"}, + "Name": "TCoJoinDict", + "Base": "TCallable", + "Match": {"Type": "Callable", "Name": "JoinDict"}, + "Children": [ + {"Index": 0, "Name": "LeftInput", "Type": "TExprBase"}, + {"Index": 1, "Name": "RightInput", "Type": "TExprBase"}, {"Index": 2, "Name": "JoinKind", "Type": "TCoAtom"}, {"Index": 3, "Name": "Flags", "Type": "TCoAtomList", "Optional": true} - ] - }, - { + ] + }, + { "Name": "TCoSortBase", "Base": "TCoInputBase", "Match": {"Type": "CallableBase"}, @@ -1952,27 +1952,27 @@ {"Index": 4, "Name": "OnEmpty", "Type": "TCoAtom"}, {"Index": 5, "Name": "OnError", "Type": "TCoAtom"} ] - }, - { + }, + { "Name": "TCoAsRange", "VarArgBase": "TExprBase", "Match": {"Type": "Callable", "Name": "AsRange"} - }, - { + }, + { "Name": "TCoRangeCreate", "Base": "TCallable", "Match": {"Type": "Callable", "Name": "RangeCreate"}, - "Children": [ + "Children": [ {"Index": 0, "Name": "UserRange", "Type": "TExprBase"} - ] - }, - { - "Name": "TCoRangeFinalize", - "Base": "TCallable", - "Match": {"Type": "Callable", "Name": "RangeFinalize"}, - "Children": [ - {"Index": 0, "Name": "Range", "Type": "TExprBase"} - ] + ] + }, + { + "Name": "TCoRangeFinalize", + "Base": "TCallable", + "Match": {"Type": "Callable", "Name": "RangeFinalize"}, + "Children": [ + {"Index": 0, "Name": "Range", "Type": "TExprBase"} + ] }, { "Name": "TCoCastStruct", diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index b545f47cd70..5846e6cb108 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -12175,7 +12175,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> YQL_ENSURE(components.front() && components.back()); if (!IsSameAnnotation(*components.front(), *components.back())) { ctx.AddError(TIssue(ctx.GetPosition(pos), - TStringBuilder() << "Range begin/end type mismatch. Begin: " << *components.front() + TStringBuilder() << "Range begin/end type mismatch. Begin: " << *components.front() << " End: " << *components.back())); return false; } @@ -12989,7 +12989,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["EmptyListType"] = &TypeWrapper<ETypeAnnotationKind::EmptyList>; Functions["EmptyDictType"] = &TypeWrapper<ETypeAnnotationKind::EmptyDict>; Functions["Join"] = &JoinWrapper; - Functions["JoinDict"] = &JoinDictWrapper; + Functions["JoinDict"] = &JoinDictWrapper; Functions["MapJoinCore"] = &MapJoinCoreWrapper; Functions["CommonJoinCore"] = &CommonJoinCoreWrapper; Functions["CombineCore"] = &CombineCoreWrapper; diff --git a/ydb/library/yql/core/type_ann/type_ann_impl.h b/ydb/library/yql/core/type_ann/type_ann_impl.h index 235c31a5832..3d940ebd737 100644 --- a/ydb/library/yql/core/type_ann/type_ann_impl.h +++ b/ydb/library/yql/core/type_ann/type_ann_impl.h @@ -26,7 +26,7 @@ namespace NTypeAnnImpl { // Implemented in type_ann_join.cpp IGraphTransformer::TStatus JoinWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); - IGraphTransformer::TStatus JoinDictWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); + IGraphTransformer::TStatus JoinDictWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus MapJoinCoreWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus CommonJoinCoreWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus EquiJoinWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); diff --git a/ydb/library/yql/core/type_ann/type_ann_join.cpp b/ydb/library/yql/core/type_ann/type_ann_join.cpp index 26fe6eae2ec..18516a8ab8b 100644 --- a/ydb/library/yql/core/type_ann/type_ann_join.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_join.cpp @@ -127,31 +127,31 @@ namespace NTypeAnnImpl { return IGraphTransformer::TStatus::Ok; } - IGraphTransformer::TStatus JoinDictWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { - Y_UNUSED(output); - + IGraphTransformer::TStatus JoinDictWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + if (!EnsureMinMaxArgsCount(*input, 3U, 4U, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - + return IGraphTransformer::TStatus::Error; + } + const auto& left = input->Head(); const auto& right = *input->Child(1U); const auto& kind = *input->Child(2U); if (!EnsureDictType(left, ctx.Expr) || !EnsureDictType(right, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - + return IGraphTransformer::TStatus::Error; + } + if (!EnsureAtom(kind, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } - + return IGraphTransformer::TStatus::Error; + } + bool leftUnique = false, rightUnique = false; if (input->ChildrenSize() > 3U) { if (!EnsureTupleOfAtoms(input->Tail(), ctx.Expr)) { return IGraphTransformer::TStatus::Error; } - + bool hasUnknown = false; input->Tail().ForEachChild([&](const TExprNode& flag) { if (const auto& content = flag.Content(); content == "LeftUnique") @@ -166,7 +166,7 @@ namespace NTypeAnnImpl { if (hasUnknown) return IGraphTransformer::TStatus::Error; } - + const auto keyType = left.GetTypeAnn()->Cast<TDictExprType>()->GetKeyType(); if (const auto rightKeyType = right.GetTypeAnn()->Cast<TDictExprType>()->GetKeyType(); !IsSameAnnotation(*keyType, *rightKeyType)) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), TStringBuilder() << "Mismatch dict key types: " << *keyType << " and " << *rightKeyType)); @@ -209,31 +209,31 @@ namespace NTypeAnnImpl { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(right.Pos()), TStringBuilder() << "Expected multi dict on right side but got " << *right.GetTypeAnn())); return IGraphTransformer::TStatus::Error; } - + TTypeAnnotationNode::TListType tupleItems = { leftUnique ? leftPayloadType : leftPayloadType->Cast<TListExprType>()->GetItemType(), rightUnique ? rightPayloadType : rightPayloadType->Cast<TListExprType>()->GetItemType() }; - + if (joinKind == "Right" || joinKind == "Full" || joinKind == "Exclusion") { tupleItems.front() = ctx.Expr.MakeType<TOptionalExprType>(tupleItems.front()); } if (joinKind == "Left" || joinKind == "Full" || joinKind == "Exclusion") { tupleItems.back() = ctx.Expr.MakeType<TOptionalExprType>(tupleItems.back()); } - + outputItemType = ctx.Expr.MakeType<TTupleExprType>(tupleItems); } else { ctx.Expr.AddError( TIssue(ctx.Expr.GetPosition(kind.Pos()), TStringBuilder() << "Unsupported join kind: " << joinKind) ); - return IGraphTransformer::TStatus::Error; - } - + return IGraphTransformer::TStatus::Error; + } + input->SetTypeAnn(ctx.Expr.MakeType<TListExprType>(outputItemType)); - return IGraphTransformer::TStatus::Ok; - } - + return IGraphTransformer::TStatus::Ok; + } + IGraphTransformer::TStatus EquiJoinWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { if (!EnsureMinArgsCount(*input, 4, ctx.Expr)) { return IGraphTransformer::TStatus::Error; diff --git a/ydb/library/yql/dq/expr_nodes/dq_expr_nodes.json b/ydb/library/yql/dq/expr_nodes/dq_expr_nodes.json index cd73653b9e4..67627b1b8bd 100644 --- a/ydb/library/yql/dq/expr_nodes/dq_expr_nodes.json +++ b/ydb/library/yql/dq/expr_nodes/dq_expr_nodes.json @@ -52,11 +52,11 @@ "Match": {"Type": "Callable", "Name": "DqPhyCrossJoin"} }, { - "Name": "TDqPhyJoinDict", - "Base": "TDqJoinBase", - "Match": {"Type": "Callable", "Name": "DqPhyJoinDict"} - }, - { + "Name": "TDqPhyJoinDict", + "Base": "TDqJoinBase", + "Match": {"Type": "Callable", "Name": "DqPhyJoinDict"} + }, + { "Name": "TDqSource", "Base": "TCallable", "Match": {"Type": "Callable", "Name": "DqSource"}, @@ -200,22 +200,22 @@ {"Index": 0, "Name": "World", "Type": "TExprBase"}, {"Index": 1, "Name": "SinkStages", "Type": "TDqStageList"} ] - }, - { - "Name": "TDqPrecompute", - "Base": "TCallable", - "Match": {"Type": "Callable", "Name": "DqPrecompute"}, - "Children": [ - {"Index": 0, "Name": "Input", "Type": "TExprBase"} - ] - }, - { - "Name": "TDqPhyPrecompute", - "Base": "TCallable", - "Match": {"Type": "Callable", "Name": "DqPhyPrecompute"}, - "Children": [ - {"Index": 0, "Name": "Connection", "Type": "TDqConnection"} - ] + }, + { + "Name": "TDqPrecompute", + "Base": "TCallable", + "Match": {"Type": "Callable", "Name": "DqPrecompute"}, + "Children": [ + {"Index": 0, "Name": "Input", "Type": "TExprBase"} + ] + }, + { + "Name": "TDqPhyPrecompute", + "Base": "TCallable", + "Match": {"Type": "Callable", "Name": "DqPhyPrecompute"}, + "Children": [ + {"Index": 0, "Name": "Connection", "Type": "TDqConnection"} + ] }, { "Name": "TDqSqlExternalFunction", @@ -227,5 +227,5 @@ {"Index": 2, "Name": "Settings", "Type": "TExprBase"} ] } - ] + ] } diff --git a/ydb/library/yql/dq/opt/dq_opt.cpp b/ydb/library/yql/dq/opt/dq_opt.cpp index a9afbdab311..f60f29dd460 100644 --- a/ydb/library/yql/dq/opt/dq_opt.cpp +++ b/ydb/library/yql/dq/opt/dq_opt.cpp @@ -178,20 +178,20 @@ TVector<TDqConnection> FindDqConnections(const TExprBase& node) { } bool IsDqPureExpr(const TExprBase& node, bool isPrecomputePure) { - auto filter = [](const TExprNode::TPtr& node) { - return !TMaybeNode<TDqPhyPrecompute>(node).IsValid(); - }; - - auto predicate = [](const TExprNode::TPtr& node) { - return TMaybeNode<TDqSource>(node).IsValid() || - TMaybeNode<TDqConnection>(node).IsValid(); - }; - + auto filter = [](const TExprNode::TPtr& node) { + return !TMaybeNode<TDqPhyPrecompute>(node).IsValid(); + }; + + auto predicate = [](const TExprNode::TPtr& node) { + return TMaybeNode<TDqSource>(node).IsValid() || + TMaybeNode<TDqConnection>(node).IsValid(); + }; + if (isPrecomputePure) { - return !FindNode(node.Ptr(), filter, predicate); - } - - return !FindNode(node.Ptr(), predicate); + return !FindNode(node.Ptr(), filter, predicate); + } + + return !FindNode(node.Ptr(), predicate); } bool IsDqDependsOnStage(const TExprBase& node, const TDqStageBase& stage) { diff --git a/ydb/library/yql/dq/opt/dq_opt_join.cpp b/ydb/library/yql/dq/opt/dq_opt_join.cpp index b107d51d251..4097b49e24a 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join.cpp @@ -45,17 +45,17 @@ void CollectJoinColumns(const TExprBase& joinSettings, THashMap<TStringBuf, TVec } } -TExprBase BuildSkipNullKeys(TExprContext& ctx, TPositionHandle pos, - const TExprBase& input, const TVector<TCoAtom>& keys) -{ - return Build<TCoSkipNullMembers>(ctx, pos) - .Input(input) - .Members() - .Add(keys) - .Build() - .Done(); -}; - +TExprBase BuildSkipNullKeys(TExprContext& ctx, TPositionHandle pos, + const TExprBase& input, const TVector<TCoAtom>& keys) +{ + return Build<TCoSkipNullMembers>(ctx, pos) + .Input(input) + .Members() + .Add(keys) + .Build() + .Done(); +}; + TMaybe<TJoinInputDesc> BuildDqJoin(const TCoEquiJoinTuple& joinTuple, const THashMap<TStringBuf, TJoinInputDesc>& inputs, TExprContext& ctx) { @@ -186,7 +186,7 @@ TStringBuf RotateRightJoinType(TStringBuf joinType) { std::pair<TVector<TCoAtom>, TVector<TCoAtom>> GetJoinKeys(const TDqJoin& join, TExprContext& ctx) { TVector<TCoAtom> leftJoinKeys; TVector<TCoAtom> rightJoinKeys; - + auto size = join.JoinKeys().Size(); leftJoinKeys.reserve(size); rightJoinKeys.reserve(size); @@ -207,43 +207,43 @@ std::pair<TVector<TCoAtom>, TVector<TCoAtom>> GetJoinKeys(const TDqJoin& join, T : FullColumnName(rightLabel, keyTuple.RightColumn().Value())) .Done(); - leftJoinKeys.emplace_back(std::move(leftKey)); - rightJoinKeys.emplace_back(std::move(rightKey)); + leftJoinKeys.emplace_back(std::move(leftKey)); + rightJoinKeys.emplace_back(std::move(rightKey)); } - return std::make_pair(std::move(leftJoinKeys), std::move(rightJoinKeys)); -} - -TDqPhyMapJoin DqMakePhyMapJoin(const TDqJoin& join, const TExprBase& leftInput, const TExprBase& rightInput, - TExprContext& ctx) -{ + return std::make_pair(std::move(leftJoinKeys), std::move(rightJoinKeys)); +} + +TDqPhyMapJoin DqMakePhyMapJoin(const TDqJoin& join, const TExprBase& leftInput, const TExprBase& rightInput, + TExprContext& ctx) +{ static const std::set<std::string_view> supportedTypes = {"Inner"sv, "Left"sv, "LeftOnly"sv, "LeftSemi"sv}; - auto joinType = join.JoinType().Value(); - bool supportedJoin = supportedTypes.contains(joinType); + auto joinType = join.JoinType().Value(); + bool supportedJoin = supportedTypes.contains(joinType); YQL_ENSURE(supportedJoin, "" << joinType); - - auto [leftJoinKeys, rightJoinKeys] = GetJoinKeys(join, ctx); - + + auto [leftJoinKeys, rightJoinKeys] = GetJoinKeys(join, ctx); + TVector<TCoAtom> leftFilterKeys; TVector<TCoAtom> rightFilterKeys; if (joinType == "Inner"sv || joinType == "LeftSemi"sv) { - for (const auto& key : leftJoinKeys) { + for (const auto& key : leftJoinKeys) { leftFilterKeys.push_back(key); } } - for (const auto& key : rightJoinKeys) { - rightFilterKeys.push_back(key); + for (const auto& key : rightJoinKeys) { + rightFilterKeys.push_back(key); } - auto leftFilteredInput = BuildSkipNullKeys(ctx, join.Pos(), leftInput, leftFilterKeys); - auto rightFilteredInput = BuildSkipNullKeys(ctx, join.Pos(), rightInput, rightFilterKeys); - + auto leftFilteredInput = BuildSkipNullKeys(ctx, join.Pos(), leftInput, leftFilterKeys); + auto rightFilteredInput = BuildSkipNullKeys(ctx, join.Pos(), rightInput, rightFilterKeys); + return Build<TDqPhyMapJoin>(ctx, join.Pos()) - .LeftInput(leftFilteredInput) + .LeftInput(leftFilteredInput) .LeftLabel(join.LeftLabel()) - .RightInput(rightFilteredInput) + .RightInput(rightFilteredInput) .RightLabel(join.RightLabel()) .JoinType(join.JoinType()) .JoinKeys(join.JoinKeys()) @@ -482,8 +482,8 @@ TExprBase DqBuildPhyJoin(const TDqJoin& join, bool pushLeftStage, TExprContext& }; auto joinType = join.JoinType().Value(); - - if (!supportedTypes.contains(joinType)) { + + if (!supportedTypes.contains(joinType)) { return join; } @@ -664,48 +664,48 @@ TExprBase DqBuildPhyJoin(const TDqJoin& join, bool pushLeftStage, TExprContext& return newConnection.Cast(); } -TExprBase DqBuildJoinDict(const TDqJoin& join, TExprContext& ctx) { - auto joinType = join.JoinType().Value(); - +TExprBase DqBuildJoinDict(const TDqJoin& join, TExprContext& ctx) { + auto joinType = join.JoinType().Value(); + if (joinType != "Full"sv && joinType != "Exclusion"sv) { - return join; - } - + return join; + } + auto buildShuffle = [&ctx, &join](const TExprBase& input, const TVector<TCoAtom>& keys) { - auto stage = Build<TDqStage>(ctx, join.Pos()) - .Inputs() - .Add(input) - .Build() - .Program() - .Args({"stream"}) - .Body("stream") - .Build() - .Settings(TDqStageSettings().BuildNode(ctx, join.Pos())) - .Done(); - + auto stage = Build<TDqStage>(ctx, join.Pos()) + .Inputs() + .Add(input) + .Build() + .Program() + .Args({"stream"}) + .Body("stream") + .Build() + .Settings(TDqStageSettings().BuildNode(ctx, join.Pos())) + .Done(); + return Build<TDqCnHashShuffle>(ctx, join.Pos()) - .Output() - .Stage(stage) - .Index().Build("0") - .Build() - .KeyColumns() - .Add(keys) - .Build() - .Done(); - }; - + .Output() + .Stage(stage) + .Index().Build("0") + .Build() + .KeyColumns() + .Add(keys) + .Build() + .Done(); + }; + bool leftIsUnionAll = join.LeftInput().Maybe<TDqCnUnionAll>().IsValid(); bool rightIsUnionAll = join.RightInput().Maybe<TDqCnUnionAll>().IsValid(); - + TMaybeNode<TDqStage> joinStage; - + // join streams if (leftIsUnionAll && rightIsUnionAll) { auto leftCn = join.LeftInput().Cast<TDqCnUnionAll>(); auto rightCn = join.RightInput().Cast<TDqCnUnionAll>(); - + auto [leftJoinKeys, rightJoinKeys] = GetJoinKeys(join, ctx); - + auto rightShuffle = buildShuffle(rightCn, rightJoinKeys); auto leftShuffle = buildShuffle(leftCn, leftJoinKeys); @@ -812,6 +812,6 @@ TExprBase DqBuildJoinDict(const TDqJoin& join, TExprContext& ctx) { } return join; -} +} } // namespace NYql::NDq diff --git a/ydb/library/yql/dq/opt/dq_opt_log.cpp b/ydb/library/yql/dq/opt/dq_opt_log.cpp index 9b7830a201b..fed0f09f0f0 100644 --- a/ydb/library/yql/dq/opt/dq_opt_log.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_log.cpp @@ -186,9 +186,9 @@ NNodes::TExprBase DqMergeQueriesWithSinks(NNodes::TExprBase dqQueryNode, TExprCo return dqQueryNode; } -NNodes::TMaybeNode<NNodes::TExprBase> DqUnorderedInStage(NNodes::TExprBase node, - const std::function<bool(const TExprNode*)>& stopTraverse, TExprContext& ctx, TTypeAnnotationContext* typeCtx) -{ +NNodes::TMaybeNode<NNodes::TExprBase> DqUnorderedInStage(NNodes::TExprBase node, + const std::function<bool(const TExprNode*)>& stopTraverse, TExprContext& ctx, TTypeAnnotationContext* typeCtx) +{ auto stage = node.Cast<TDqStageBase>(); TExprNode::TPtr newProgram; diff --git a/ydb/library/yql/dq/opt/dq_opt_log.h b/ydb/library/yql/dq/opt/dq_opt_log.h index b50fa31bc7b..8e9ed97b063 100644 --- a/ydb/library/yql/dq/opt/dq_opt_log.h +++ b/ydb/library/yql/dq/opt/dq_opt_log.h @@ -26,7 +26,7 @@ NNodes::TExprBase DqMergeQueriesWithSinks(NNodes::TExprBase dqQueryNode, TExprCo NNodes::TExprBase DqFlatMapOverExtend(NNodes::TExprBase node, TExprContext& ctx); -NNodes::TMaybeNode<NNodes::TExprBase> DqUnorderedInStage(NNodes::TExprBase node, - const std::function<bool(const TExprNode*)>& stopTraverse, TExprContext& ctx, TTypeAnnotationContext* typeCtx); +NNodes::TMaybeNode<NNodes::TExprBase> DqUnorderedInStage(NNodes::TExprBase node, + const std::function<bool(const TExprNode*)>& stopTraverse, TExprContext& ctx, TTypeAnnotationContext* typeCtx); } // namespace NYql::NDq diff --git a/ydb/library/yql/dq/opt/dq_opt_peephole.cpp b/ydb/library/yql/dq/opt/dq_opt_peephole.cpp index aa97c21c00d..87ac170eeac 100644 --- a/ydb/library/yql/dq/opt/dq_opt_peephole.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_peephole.cpp @@ -12,60 +12,60 @@ namespace NYql::NDq { using namespace NYql::NNodes; -namespace { - +namespace { + inline std::string_view GetTableLabel(const TExprBase& node) { static const std::string_view empty; - - if (node.Maybe<TCoAtom>()) { - return node.Cast<TCoAtom>().Value(); - } - - return empty; -} - + + if (node.Maybe<TCoAtom>()) { + return node.Cast<TCoAtom>().Value(); + } + + return empty; +} + inline TString GetColumnName(std::string_view label, const TItemExprType *key) { if (!label.empty()) { - return FullColumnName(label, key->GetName()); - } - - return ToString(key->GetName()); -} - -std::pair<TExprNode::TListType, TExprNode::TListType> JoinKeysToAtoms(TExprContext& ctx, const TDqJoinBase& join, + return FullColumnName(label, key->GetName()); + } + + return ToString(key->GetName()); +} + +std::pair<TExprNode::TListType, TExprNode::TListType> JoinKeysToAtoms(TExprContext& ctx, const TDqJoinBase& join, std::string_view leftTableLabel, std::string_view rightTableLabel) -{ - TExprNode::TListType leftNodes; - TExprNode::TListType rightNodes; - - for (const auto& joinOn : join.JoinKeys()) { - TExprNode::TPtr leftValue, rightValue; - - if (leftTableLabel.empty()) { - leftValue = ctx.NewAtom( - join.Pos(), - FullColumnName(joinOn.LeftLabel().Value(), joinOn.LeftColumn().Value()) - ); - } else { - leftValue = joinOn.LeftColumn().Ptr(); - } - - if (rightTableLabel.empty()) { - rightValue = ctx.NewAtom( - join.Pos(), - FullColumnName(joinOn.RightLabel().Value(), joinOn.RightColumn().Value()) - ); - } else { - rightValue = joinOn.RightColumn().Ptr(); - } - - leftNodes.emplace_back(leftValue); - rightNodes.emplace_back(rightValue); - } - +{ + TExprNode::TListType leftNodes; + TExprNode::TListType rightNodes; + + for (const auto& joinOn : join.JoinKeys()) { + TExprNode::TPtr leftValue, rightValue; + + if (leftTableLabel.empty()) { + leftValue = ctx.NewAtom( + join.Pos(), + FullColumnName(joinOn.LeftLabel().Value(), joinOn.LeftColumn().Value()) + ); + } else { + leftValue = joinOn.LeftColumn().Ptr(); + } + + if (rightTableLabel.empty()) { + rightValue = ctx.NewAtom( + join.Pos(), + FullColumnName(joinOn.RightLabel().Value(), joinOn.RightColumn().Value()) + ); + } else { + rightValue = joinOn.RightColumn().Ptr(); + } + + leftNodes.emplace_back(leftValue); + rightNodes.emplace_back(rightValue); + } + return {std::move(leftNodes), std::move(rightNodes)}; -} - +} + TExprNode::TPtr BuildDictKeySelector(TExprContext& ctx, TPositionHandle pos, const TExprNode::TListType& keyAtoms, const TTypeAnnotationNode::TListType& keyDryTypes, bool optional) { @@ -120,8 +120,8 @@ TExprNode::TPtr BuildDictKeySelector(TExprContext& ctx, TPositionHandle pos, con .Done().Ptr(); } -} // anonymous namespace end - +} // anonymous namespace end + /** * Rewrites a `KqpMapJoin` to the `MapJoinCore`. * @@ -141,7 +141,7 @@ TExprBase DqPeepholeRewriteMapJoin(const TExprBase& node, TExprContext& ctx) { const auto leftTableLabel = GetTableLabel(mapJoin.LeftLabel()); const auto rightTableLabel = GetTableLabel(mapJoin.RightLabel()); - auto [leftKeyColumnNodes, rightKeyColumnNodes] = JoinKeysToAtoms(ctx, mapJoin, leftTableLabel, rightTableLabel); + auto [leftKeyColumnNodes, rightKeyColumnNodes] = JoinKeysToAtoms(ctx, mapJoin, leftTableLabel, rightTableLabel); const auto keyWidth = leftKeyColumnNodes.size(); const auto makeRenames = [&ctx, pos](TStringBuf label, const TStructExprType& type) { @@ -244,8 +244,8 @@ TExprBase DqPeepholeRewriteCrossJoin(const TExprBase& node, TExprContext& ctx) { } auto crossJoin = node.Cast<TDqPhyCrossJoin>(); - auto leftTableLabel = GetTableLabel(crossJoin.LeftLabel()); - auto rightTableLabel = GetTableLabel(crossJoin.RightLabel()); + auto leftTableLabel = GetTableLabel(crossJoin.LeftLabel()); + auto rightTableLabel = GetTableLabel(crossJoin.RightLabel()); TCoArgument leftArg{ctx.NewArgument(crossJoin.Pos(), "_kqp_left")}; TCoArgument rightArg{ctx.NewArgument(crossJoin.Pos(), "_kqp_right")}; @@ -253,7 +253,7 @@ TExprBase DqPeepholeRewriteCrossJoin(const TExprBase& node, TExprContext& ctx) { TExprNodeList keys; auto collectKeys = [&ctx, &keys](const TExprBase& input, TStringBuf label, const TCoArgument& arg) { for (auto key : GetSeqItemType(input.Ref().GetTypeAnn())->Cast<TStructExprType>()->GetItems()) { - auto fqColumnName = GetColumnName(label, key); + auto fqColumnName = GetColumnName(label, key); keys.emplace_back( Build<TCoNameValueTuple>(ctx, input.Pos()) .Name().Build(fqColumnName) @@ -319,77 +319,77 @@ TExprBase DqPeepholeRewriteCrossJoin(const TExprBase& node, TExprContext& ctx) { .Done(); } -namespace { - +namespace { + TExprNode::TPtr UnpackJoinedData(const TStructExprType* leftRowType, const TStructExprType* rightRowType, std::string_view leftLabel, std::string_view rightLabel, TPositionHandle pos, TExprContext& ctx) -{ +{ auto arg = Build<TCoArgument>(ctx, pos) - .Name("packedItem") - .Done(); - + .Name("packedItem") + .Done(); + const auto& leftScheme = leftRowType->GetItems(); const auto& rightScheme = rightRowType->GetItems(); - - TExprNode::TListType outValueItems; + + TExprNode::TListType outValueItems; outValueItems.reserve(leftScheme.size() + rightScheme.size()); - - for (int tableIndex = 0; tableIndex < 2; tableIndex++) { + + for (int tableIndex = 0; tableIndex < 2; tableIndex++) { const auto& scheme = tableIndex ? rightScheme : leftScheme; const auto label = tableIndex ? rightLabel : leftLabel; - - for (const auto& item : scheme) { - auto nameAtom = ctx.NewAtom(pos, item->GetName()); - - auto pair = ctx.Builder(pos) - .List() - .Atom(0, GetColumnName(label, item)) - .Callable(1, "Member") - .Callable(0, "Nth") - .Add(0, arg.Ptr()) - .Atom(1, ToString(tableIndex), TNodeFlags::Default) - .Seal() - .Atom(1, item->GetName()) - .Seal() - .Seal() - .Build(); - - outValueItems.push_back(pair); - } - } - - return Build<TCoLambda>(ctx, pos) - .Args({arg}) - .Body<TCoAsStruct>() - .Add(outValueItems) - .Build() - .Done().Ptr(); -} - -} //anonymous namespace end - -NNodes::TExprBase DqPeepholeRewriteJoinDict(const NNodes::TExprBase& node, TExprContext& ctx) { - if (!node.Maybe<TDqPhyJoinDict>()) { - return node; - } - + + for (const auto& item : scheme) { + auto nameAtom = ctx.NewAtom(pos, item->GetName()); + + auto pair = ctx.Builder(pos) + .List() + .Atom(0, GetColumnName(label, item)) + .Callable(1, "Member") + .Callable(0, "Nth") + .Add(0, arg.Ptr()) + .Atom(1, ToString(tableIndex), TNodeFlags::Default) + .Seal() + .Atom(1, item->GetName()) + .Seal() + .Seal() + .Build(); + + outValueItems.push_back(pair); + } + } + + return Build<TCoLambda>(ctx, pos) + .Args({arg}) + .Body<TCoAsStruct>() + .Add(outValueItems) + .Build() + .Done().Ptr(); +} + +} //anonymous namespace end + +NNodes::TExprBase DqPeepholeRewriteJoinDict(const NNodes::TExprBase& node, TExprContext& ctx) { + if (!node.Maybe<TDqPhyJoinDict>()) { + return node; + } + const auto joinDict = node.Cast<TDqPhyJoinDict>(); const auto joinKind = joinDict.JoinType().Value(); - + YQL_ENSURE(joinKind != "Cross"sv); const auto leftTableLabel = GetTableLabel(joinDict.LeftLabel()); const auto rightTableLabel = GetTableLabel(joinDict.RightLabel()); auto [leftKeys, rightKeys] = JoinKeysToAtoms(ctx, joinDict, leftTableLabel, rightTableLabel); - + YQL_CLOG(TRACE, CoreDq) << "[DqPeepholeRewriteJoinDict] join types" << ", left: " << *joinDict.LeftInput().Ref().GetTypeAnn() << ", right: " << *joinDict.RightInput().Ref().GetTypeAnn(); - + const auto* leftRowType = GetSeqItemType(joinDict.LeftInput().Ref().GetTypeAnn())->Cast<TStructExprType>(); const auto* rightRowType = GetSeqItemType(joinDict.RightInput().Ref().GetTypeAnn())->Cast<TStructExprType>(); - + bool optKeyLeft = false, optKeyRight = false, badKey = false; TTypeAnnotationNode::TListType keyTypeItems; keyTypeItems.reserve(leftKeys.size()); @@ -429,52 +429,52 @@ NNodes::TExprBase DqPeepholeRewriteJoinDict(const NNodes::TExprBase& node, TExpr auto streamToDict = [&ctx](const TExprBase& input, const TExprNode::TPtr& keySelector) { return Build<TCoSqueezeToDict>(ctx, input.Pos()) - .Stream(input) - .KeySelector(keySelector) - .PayloadSelector() + .Stream(input) + .KeySelector(keySelector) + .PayloadSelector() .Args({"item"}) .Body("item") - .Build() - .Settings() + .Build() + .Settings() .Add<TCoAtom>().Build("Hashed") .Add<TCoAtom>().Build("Many") .Add<TCoAtom>().Build("Compact") - .Build() - .Done(); - }; - - auto leftDict = streamToDict(joinDict.LeftInput(), leftKeySelector); - auto rightDict = streamToDict(joinDict.RightInput(), rightKeySelector); - - auto join = Build<TCoFlatMap>(ctx, joinDict.Pos()) + .Build() + .Done(); + }; + + auto leftDict = streamToDict(joinDict.LeftInput(), leftKeySelector); + auto rightDict = streamToDict(joinDict.RightInput(), rightKeySelector); + + auto join = Build<TCoFlatMap>(ctx, joinDict.Pos()) .Input(leftDict) // only 1 element with dict - .Lambda() + .Lambda() .Args({"left"}) - .Body<TCoFlatMap>() + .Body<TCoFlatMap>() .Input(rightDict) // only 1 element with dict - .Lambda() + .Lambda() .Args({"right"}) - .Body<TCoJoinDict>() + .Body<TCoJoinDict>() .LeftInput("left") .RightInput("right") - .JoinKind(joinDict.JoinType()) - .Build() - .Build() - .Build() - .Build() - .Done(); - - // Join return list of tuple of structs. I.e. if you have tables t1 and t2 with values t1.a, t1.b and t2.c, t2.d, - // you will receive List<Tuple<Struct<t1.a, t1.b>, Struct<t2.c, t2.d>>> and this data should be unpacked to - // List<Struct<t1.a, t1.b, t2.c, t2.d>> + .JoinKind(joinDict.JoinType()) + .Build() + .Build() + .Build() + .Build() + .Done(); + + // Join return list of tuple of structs. I.e. if you have tables t1 and t2 with values t1.a, t1.b and t2.c, t2.d, + // you will receive List<Tuple<Struct<t1.a, t1.b>, Struct<t2.c, t2.d>>> and this data should be unpacked to + // List<Struct<t1.a, t1.b, t2.c, t2.d>> auto unpackData = UnpackJoinedData(leftRowType, rightRowType, leftTableLabel, rightTableLabel, join.Pos(), ctx); - - return Build<TCoMap>(ctx, joinDict.Pos()) - .Input(join) - .Lambda(unpackData) - .Done(); -} - + + return Build<TCoMap>(ctx, joinDict.Pos()) + .Input(join) + .Lambda(unpackData) + .Done(); +} + NNodes::TExprBase DqPeepholeRewritePureJoin(const NNodes::TExprBase& node, TExprContext& ctx) { if (!node.Maybe<TDqJoin>()) { return node; diff --git a/ydb/library/yql/dq/opt/dq_opt_peephole.h b/ydb/library/yql/dq/opt/dq_opt_peephole.h index bf4637ff0a9..565ee4296d2 100644 --- a/ydb/library/yql/dq/opt/dq_opt_peephole.h +++ b/ydb/library/yql/dq/opt/dq_opt_peephole.h @@ -7,8 +7,8 @@ namespace NYql::NDq { -NNodes::TExprBase DqPeepholeRewriteCrossJoin(const NNodes::TExprBase& node, TExprContext& ctx); -NNodes::TExprBase DqPeepholeRewriteJoinDict(const NNodes::TExprBase& node, TExprContext& ctx); +NNodes::TExprBase DqPeepholeRewriteCrossJoin(const NNodes::TExprBase& node, TExprContext& ctx); +NNodes::TExprBase DqPeepholeRewriteJoinDict(const NNodes::TExprBase& node, TExprContext& ctx); NNodes::TExprBase DqPeepholeRewriteMapJoin(const NNodes::TExprBase& node, TExprContext& ctx); NNodes::TExprBase DqPeepholeRewriteReplicate(const NNodes::TExprBase& node, TExprContext& ctx); NNodes::TExprBase DqPeepholeRewritePureJoin(const NNodes::TExprBase& node, TExprContext& ctx); diff --git a/ydb/library/yql/dq/opt/dq_opt_phy.cpp b/ydb/library/yql/dq/opt/dq_opt_phy.cpp index 10497150534..cbe871059d7 100644 --- a/ydb/library/yql/dq/opt/dq_opt_phy.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_phy.cpp @@ -249,10 +249,10 @@ TMaybeNode<TDqStage> DqPushLambdaToStage(const TDqStage& stage, const TCoAtom& o // YQL_CLOG(TRACE, CoreDq) << "-- newProgram: " << newProgram->Dump(); } - TNodeOnNodeOwnedMap inputArgReplaces; + TNodeOnNodeOwnedMap inputArgReplaces; TVector<TCoArgument> newArgs = PrepareArgumentsReplacement(TCoLambda(newProgram).Args(), lambdaInputs, ctx, inputArgReplaces); TVector<TExprBase> inputNodes; - + // if lambda contains precomputes -> move them to the stage inputs { TNodeOnNodeOwnedMap precomputesInsideLambda; @@ -652,50 +652,50 @@ TExprBase DqBuildAggregationResultStage(TExprBase node, TExprContext& ctx, IOpti TExprNode::TPtr connection; bool hasDirectConnection = false; bool dependsOnManyConnections = false; - bool valueConnection = false; - + bool valueConnection = false; + VisitExpr(asStruct.Ptr(), [&](const TExprNode::TPtr& exprPtr) { - // Do not try to visit any other nodes, it is useless. - if (hasDirectConnection || dependsOnManyConnections) { - return false; - } - + // Do not try to visit any other nodes, it is useless. + if (hasDirectConnection || dependsOnManyConnections) { + return false; + } + TExprBase expr{exprPtr}; - + if (expr.Maybe<TCoToOptional>().List().Maybe<TDqCnUnionAll>()) { - if (connection && (connection != expr.Cast<TCoToOptional>().List().Ptr())) { - dependsOnManyConnections = true; - return false; + if (connection && (connection != expr.Cast<TCoToOptional>().List().Ptr())) { + dependsOnManyConnections = true; + return false; + } + + connection = expr.Cast<TCoToOptional>().List().Ptr(); + return false; + } + + if (expr.Maybe<TDqPhyPrecompute>().IsValid()) { + auto precompute = expr.Cast<TDqPhyPrecompute>(); + auto maybeConnection = precompute.Connection().Maybe<TDqCnValue>(); + + // Here we should catch only TDqPhyPrecompute(DqCnValue) + if (!maybeConnection.IsValid()) { + return true; + } + + if (connection && (connection != maybeConnection.Cast().Ptr())) { + dependsOnManyConnections = true; + return false; } - - connection = expr.Cast<TCoToOptional>().List().Ptr(); + + connection = precompute.Ptr(); + valueConnection = true; return false; } - - if (expr.Maybe<TDqPhyPrecompute>().IsValid()) { - auto precompute = expr.Cast<TDqPhyPrecompute>(); - auto maybeConnection = precompute.Connection().Maybe<TDqCnValue>(); - - // Here we should catch only TDqPhyPrecompute(DqCnValue) - if (!maybeConnection.IsValid()) { - return true; - } - - if (connection && (connection != maybeConnection.Cast().Ptr())) { - dependsOnManyConnections = true; - return false; - } - - connection = precompute.Ptr(); - valueConnection = true; - return false; - } - + if (expr.Maybe<TDqConnection>()) { hasDirectConnection = true; return false; } - + return true; }); @@ -709,40 +709,40 @@ TExprBase DqBuildAggregationResultStage(TExprBase node, TExprContext& ctx, IOpti const auto pos = listItem.Pos(); auto newArg = ctx.NewArgument(pos, "result"); - auto lambda = ctx.NewLambda(pos, - ctx.NewArguments(pos, {newArg}), - ctx.ReplaceNode(asStruct.Ptr(), *connection, std::move(newArg)) - ); - auto programArg = TCoArgument(ctx.NewArgument(pos, "stage_lambda_arg")); - TExprNode::TPtr mapInput; - - if (valueConnection) { - // DqCnValue send only one element, need to convert it to stream - mapInput = Build<TCoToStream>(ctx, pos) - .Input<TCoAsList>() - .Add(programArg) - .Build() - .Done().Ptr(); - } else { - // Input came from UnionAll, thus need to gather all elements - mapInput = Build<TCoCondense>(ctx, pos) - .Input(programArg) - .State<TCoList>() - .ListType(ExpandType(pos, *connection->GetTypeAnn(), ctx)) - .Build() - .SwitchHandler() - .Args({"item", "stub"}) - .Body(MakeBool<false>(pos, ctx)) - .Build() - .UpdateHandler() - .Args({"item", "stub"}) - .Body<TCoAsList>() - .Add("item") - .Build() - .Build() - .Done().Ptr(); - } - + auto lambda = ctx.NewLambda(pos, + ctx.NewArguments(pos, {newArg}), + ctx.ReplaceNode(asStruct.Ptr(), *connection, std::move(newArg)) + ); + auto programArg = TCoArgument(ctx.NewArgument(pos, "stage_lambda_arg")); + TExprNode::TPtr mapInput; + + if (valueConnection) { + // DqCnValue send only one element, need to convert it to stream + mapInput = Build<TCoToStream>(ctx, pos) + .Input<TCoAsList>() + .Add(programArg) + .Build() + .Done().Ptr(); + } else { + // Input came from UnionAll, thus need to gather all elements + mapInput = Build<TCoCondense>(ctx, pos) + .Input(programArg) + .State<TCoList>() + .ListType(ExpandType(pos, *connection->GetTypeAnn(), ctx)) + .Build() + .SwitchHandler() + .Args({"item", "stub"}) + .Body(MakeBool<false>(pos, ctx)) + .Build() + .UpdateHandler() + .Args({"item", "stub"}) + .Body<TCoAsList>() + .Add("item") + .Build() + .Build() + .Done().Ptr(); + } + auto resultConnection = Build<TDqCnUnionAll>(ctx, pos) .Output() .Stage<TDqStage>() @@ -750,9 +750,9 @@ TExprBase DqBuildAggregationResultStage(TExprBase node, TExprContext& ctx, IOpti .Add(std::move(connection)) .Build() .Program() - .Args(programArg) + .Args(programArg) .Body<TCoMap>() - .Input(mapInput) + .Input(mapInput) .Lambda(std::move(lambda)) .Build() .Build() @@ -1123,7 +1123,7 @@ TExprBase DqBuildTakeStage(TExprBase node, TExprContext& ctx, IOptimizationConte } auto result = dqUnion.Output().Stage().Program().Body(); - auto stage = dqUnion.Output().Stage(); + auto stage = dqUnion.Output().Stage(); auto lambda = Build<TCoLambda>(ctx, take.Pos()) .Args({"stream"}) @@ -1426,45 +1426,45 @@ TExprBase DqBuildExtendStage(TExprBase node, TExprContext& ctx) { .Done(); } -/* - * Precompute input value in a separate stage. - */ -TExprBase DqBuildPrecomputeStage(TExprBase node, TExprContext& ctx) { - if (!node.Maybe<TDqPrecompute>()) { - return node; - } - - auto input = node.Cast<TDqPrecompute>().Input(); - - TExprNode::TPtr connection; - bool value = false; - - if (input.Maybe<TDqCnUnionAll>()) { - connection = input.Ptr(); - } else if (input.Maybe<TDqCnValue>()) { - connection = input.Ptr(); - value = true; - } else if (IsDqPureExpr(input)) { +/* + * Precompute input value in a separate stage. + */ +TExprBase DqBuildPrecomputeStage(TExprBase node, TExprContext& ctx) { + if (!node.Maybe<TDqPrecompute>()) { + return node; + } + + auto input = node.Cast<TDqPrecompute>().Input(); + + TExprNode::TPtr connection; + bool value = false; + + if (input.Maybe<TDqCnUnionAll>()) { + connection = input.Ptr(); + } else if (input.Maybe<TDqCnValue>()) { + connection = input.Ptr(); + value = true; + } else if (IsDqPureExpr(input)) { if (input.Ref().GetTypeAnn()->GetKind() != ETypeAnnotationKind::List && input.Ref().GetTypeAnn()->GetKind() != ETypeAnnotationKind::Data) { - return node; - } - - auto dataStage = Build<TDqStage>(ctx, node.Pos()) - .Inputs() - .Build() - .Program() - .Args({}) + return node; + } + + auto dataStage = Build<TDqStage>(ctx, node.Pos()) + .Inputs() + .Build() + .Program() + .Args({}) .Body<TCoToStream>() .Input<TCoJust>() .Input(input) .Build() .Build() - .Build() - .Settings().Build() - .Done(); - + .Build() + .Settings().Build() + .Done(); + connection = Build<TDqCnValue>(ctx, node.Pos()) .Output() .Stage(dataStage) @@ -1473,197 +1473,197 @@ TExprBase DqBuildPrecomputeStage(TExprBase node, TExprContext& ctx) { .Done().Ptr(); value = true; - } else { - return node; - } - - auto phyPrecompute = Build<TDqPhyPrecompute>(ctx, node.Pos()) - .Connection(connection) - .Done(); - - if (value) { - return phyPrecompute; - } - - auto precomputeStage = Build<TDqStage>(ctx, node.Pos()) - .Inputs() - .Add(phyPrecompute) - .Build() - .Program() + } else { + return node; + } + + auto phyPrecompute = Build<TDqPhyPrecompute>(ctx, node.Pos()) + .Connection(connection) + .Done(); + + if (value) { + return phyPrecompute; + } + + auto precomputeStage = Build<TDqStage>(ctx, node.Pos()) + .Inputs() + .Add(phyPrecompute) + .Build() + .Program() .Args({"zzz"}) .Body<TCoIterator>() .List("zzz") .Build() - .Build() - .Settings().Build() - .Done(); - - return Build<TDqCnUnionAll>(ctx, node.Pos()) - .Output() - .Stage(precomputeStage) - .Index().Build("0") - .Build() - .Done(); -} - + .Build() + .Settings().Build() + .Done(); + + return Build<TDqCnUnionAll>(ctx, node.Pos()) + .Output() + .Stage(precomputeStage) + .Index().Build("0") + .Build() + .Done(); +} + TExprBase DqBuildHasItems(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx) { - if (!node.Maybe<TCoHasItems>()) { - return node; - } - - auto hasItems = node.Cast<TCoHasItems>(); - - if (!hasItems.List().Maybe<TDqCnUnionAll>()) { - return node; - } - - auto unionAll = hasItems.List().Cast<TDqCnUnionAll>(); - - // Add LIMIT 1 via Take - auto takeProgram = Build<TCoLambda>(ctx, node.Pos()) - .Args({"take_arg"}) - // DqOutput expects stream as input, thus form stream with one element - .Body<TCoToStream>() - .Input<TCoTake>() - .Input({"take_arg"}) - .Count<TCoUint64>() - .Literal().Build("1") - .Build() - .Build() - .Build() - .Done(); - + if (!node.Maybe<TCoHasItems>()) { + return node; + } + + auto hasItems = node.Cast<TCoHasItems>(); + + if (!hasItems.List().Maybe<TDqCnUnionAll>()) { + return node; + } + + auto unionAll = hasItems.List().Cast<TDqCnUnionAll>(); + + // Add LIMIT 1 via Take + auto takeProgram = Build<TCoLambda>(ctx, node.Pos()) + .Args({"take_arg"}) + // DqOutput expects stream as input, thus form stream with one element + .Body<TCoToStream>() + .Input<TCoTake>() + .Input({"take_arg"}) + .Count<TCoUint64>() + .Literal().Build("1") + .Build() + .Build() + .Build() + .Done(); + auto newUnion = DqPushLambdaToStageUnionAll(unionAll, takeProgram, {}, ctx, optCtx); - - if (!newUnion.IsValid()) { - return node; - } - - // Build stage simulating HasItems via Condense - auto hasItemsProgram = Build<TCoLambda>(ctx, node.Pos()) - .Args({"has_items_arg"}) - .Body<TCoCondense>() - .Input({"has_items_arg"}) - .State<TCoBool>() - .Literal().Build("false") - .Build() - .SwitchHandler() - .Args({"item", "state"}) - .Body<TCoBool>() - .Literal().Build("false") - .Build() - .Build() - .UpdateHandler() - .Args({"item", "state"}) - .Body<TCoBool>() - .Literal().Build("true") - .Build() - .Build() - .Build() - .Done(); - - auto hasItemsStage = Build<TDqStage>(ctx, node.Pos()) - .Inputs() - .Add(newUnion.Cast()) - .Build() - .Program(hasItemsProgram) - .Settings().Build() - .Done(); - - auto precompute = Build<TDqPrecompute>(ctx, node.Pos()) - .Input<TDqCnValue>() - .Output<TDqOutput>() - .Stage(hasItemsStage) - .Index().Build("0") - .Build() - .Build() - .Done(); - - return precompute; -} - + + if (!newUnion.IsValid()) { + return node; + } + + // Build stage simulating HasItems via Condense + auto hasItemsProgram = Build<TCoLambda>(ctx, node.Pos()) + .Args({"has_items_arg"}) + .Body<TCoCondense>() + .Input({"has_items_arg"}) + .State<TCoBool>() + .Literal().Build("false") + .Build() + .SwitchHandler() + .Args({"item", "state"}) + .Body<TCoBool>() + .Literal().Build("false") + .Build() + .Build() + .UpdateHandler() + .Args({"item", "state"}) + .Body<TCoBool>() + .Literal().Build("true") + .Build() + .Build() + .Build() + .Done(); + + auto hasItemsStage = Build<TDqStage>(ctx, node.Pos()) + .Inputs() + .Add(newUnion.Cast()) + .Build() + .Program(hasItemsProgram) + .Settings().Build() + .Done(); + + auto precompute = Build<TDqPrecompute>(ctx, node.Pos()) + .Input<TDqCnValue>() + .Output<TDqOutput>() + .Stage(hasItemsStage) + .Index().Build("0") + .Build() + .Build() + .Done(); + + return precompute; +} + TExprBase DqBuildScalarPrecompute(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx) { - if (!node.Maybe<TCoToOptional>()) { - return node; - } - - auto toOptional = node.Cast<TCoToOptional>(); - - if (!toOptional.List().Maybe<TDqCnUnionAll>()) { - return node; - } - - auto unionAll = toOptional.List().Cast<TDqCnUnionAll>(); - - if (!unionAll.Output().Maybe<TDqOutput>()) { - return node; - } - - auto output = unionAll.Output().Cast<TDqOutput>(); - - if (!output.Stage().Maybe<TDqStage>()) { - return node; - } - - auto stage = output.Stage().Cast<TDqStage>(); - - YQL_ENSURE(stage.Program().Ref().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Stream || - stage.Program().Ref().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Flow); - - auto lambdaArg = Build<TCoArgument>(ctx, node.Pos()) - .Name("scp_np_arg") - .Done(); - - /* - * Need to build ToOptional(..) but this callable can't be pushed inside stage, thus simulate it - * via Condense over Take(X, 1) - */ - TExprNode::TPtr valueExtractor = Build<TCoCondense>(ctx, node.Pos()) - .Input<TCoTake>() - .Input(lambdaArg) - .Count<TCoUint64>() - .Literal().Build("1") - .Build() - .Build() - .State<TCoNothing>() - .OptionalType(ExpandType(node.Pos(), *toOptional.Ptr()->GetTypeAnn(), ctx)) - .Build() - .SwitchHandler() - .Args({"item", "state"}) - .Body<TCoBool>() - .Literal().Build("false") - .Build() - .Build() - .UpdateHandler() - .Args({"item", "state"}) - .Body<TCoJust>() - .Input("item") - .Build() - .Build() - .Done().Ptr(); - - auto newProgram = Build<TCoLambda>(ctx, node.Pos()) - .Args({lambdaArg}) - // DqOutput expects stream as input, thus form stream with one element - .Body<TCoToStream>() - .Input(valueExtractor) - .Build() - .Done(); - + if (!node.Maybe<TCoToOptional>()) { + return node; + } + + auto toOptional = node.Cast<TCoToOptional>(); + + if (!toOptional.List().Maybe<TDqCnUnionAll>()) { + return node; + } + + auto unionAll = toOptional.List().Cast<TDqCnUnionAll>(); + + if (!unionAll.Output().Maybe<TDqOutput>()) { + return node; + } + + auto output = unionAll.Output().Cast<TDqOutput>(); + + if (!output.Stage().Maybe<TDqStage>()) { + return node; + } + + auto stage = output.Stage().Cast<TDqStage>(); + + YQL_ENSURE(stage.Program().Ref().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Stream || + stage.Program().Ref().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Flow); + + auto lambdaArg = Build<TCoArgument>(ctx, node.Pos()) + .Name("scp_np_arg") + .Done(); + + /* + * Need to build ToOptional(..) but this callable can't be pushed inside stage, thus simulate it + * via Condense over Take(X, 1) + */ + TExprNode::TPtr valueExtractor = Build<TCoCondense>(ctx, node.Pos()) + .Input<TCoTake>() + .Input(lambdaArg) + .Count<TCoUint64>() + .Literal().Build("1") + .Build() + .Build() + .State<TCoNothing>() + .OptionalType(ExpandType(node.Pos(), *toOptional.Ptr()->GetTypeAnn(), ctx)) + .Build() + .SwitchHandler() + .Args({"item", "state"}) + .Body<TCoBool>() + .Literal().Build("false") + .Build() + .Build() + .UpdateHandler() + .Args({"item", "state"}) + .Body<TCoJust>() + .Input("item") + .Build() + .Build() + .Done().Ptr(); + + auto newProgram = Build<TCoLambda>(ctx, node.Pos()) + .Args({lambdaArg}) + // DqOutput expects stream as input, thus form stream with one element + .Body<TCoToStream>() + .Input(valueExtractor) + .Build() + .Done(); + auto newUnion = DqPushLambdaToStageUnionAll(unionAll, newProgram, {}, ctx, optCtx); - - if (!newUnion.IsValid()) { - return node; - } - - // Change connection to DqCnValue in case optional returns one element and wrap to precompute - auto precompute = Build<TDqPrecompute>(ctx, node.Pos()) - .Input<TDqCnValue>() - .Output(newUnion.Cast().Output()) - .Build() - .Done(); - - return precompute; -} - + + if (!newUnion.IsValid()) { + return node; + } + + // Change connection to DqCnValue in case optional returns one element and wrap to precompute + auto precompute = Build<TDqPrecompute>(ctx, node.Pos()) + .Input<TDqCnValue>() + .Output(newUnion.Cast().Output()) + .Build() + .Done(); + + return precompute; +} + } // namespace NYql::NDq diff --git a/ydb/library/yql/dq/opt/dq_opt_phy.h b/ydb/library/yql/dq/opt/dq_opt_phy.h index 960362d78e3..70a9d70082b 100644 --- a/ydb/library/yql/dq/opt/dq_opt_phy.h +++ b/ydb/library/yql/dq/opt/dq_opt_phy.h @@ -67,11 +67,11 @@ NNodes::TExprBase DqRewriteRightJoinToLeft(const NNodes::TExprBase node, TExprCo NNodes::TExprBase DqPushJoinToStage(const NNodes::TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx, const TParentsMap& parentsMap, bool allowStageMultiUsage = true); -NNodes::TExprBase DqBuildPhyJoin(const NNodes::TDqJoin& join, bool pushLeftStage, TExprContext& ctx, +NNodes::TExprBase DqBuildPhyJoin(const NNodes::TDqJoin& join, bool pushLeftStage, TExprContext& ctx, IOptimizationContext& optCtx); -NNodes::TExprBase DqBuildJoinDict(const NNodes::TDqJoin& join, TExprContext& ctx); - +NNodes::TExprBase DqBuildJoinDict(const NNodes::TDqJoin& join, TExprContext& ctx); + TMaybe<std::pair<NNodes::TExprBase, NNodes::TDqConnection>> ExtractPureExprStage(TExprNode::TPtr input, TExprContext& ctx); @@ -79,12 +79,12 @@ NNodes::TExprBase DqBuildPureExprStage(NNodes::TExprBase node, TExprContext& ctx NNodes::TExprBase DqBuildExtendStage(NNodes::TExprBase node, TExprContext& ctx); -NNodes::TExprBase DqBuildPrecomputeStage(NNodes::TExprBase node, TExprContext& ctx); - -NYql::NNodes::TExprBase DqBuildHasItems(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, +NNodes::TExprBase DqBuildPrecomputeStage(NNodes::TExprBase node, TExprContext& ctx); + +NYql::NNodes::TExprBase DqBuildHasItems(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, NYql::IOptimizationContext& optCtx); - -NYql::NNodes::TExprBase DqBuildScalarPrecompute(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, + +NYql::NNodes::TExprBase DqBuildScalarPrecompute(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, NYql::IOptimizationContext& optCtx); - + } // namespace NYql::NDq diff --git a/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp b/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp index e34edbb8614..a1c8cf10c2c 100644 --- a/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp @@ -1,4 +1,4 @@ -#include "dq_opt_phy_finalizing.h" +#include "dq_opt_phy_finalizing.h" #include <ydb/library/yql/dq/type_ann/dq_type_ann.h> #include <ydb/library/yql/utils/log/log.h> @@ -331,20 +331,20 @@ TExprNode::TPtr ReplicateDqConnection(TExprNode::TPtr&& input, const TMultiUsedC return ctx.ReplaceNode(std::move(result), dqStage.Ref(), newStage.Ptr()); } -template <typename TExpr> -TVector<TExpr> CollectNodes(const TExprNode::TPtr& input) { - TVector<TExpr> result; - - VisitExpr(input, [&result](const TExprNode::TPtr& node) { - if (TExpr::Match(node.Get())) { - result.emplace_back(TExpr(node)); - } - return true; - }); - - return result; -} - +template <typename TExpr> +TVector<TExpr> CollectNodes(const TExprNode::TPtr& input) { + TVector<TExpr> result; + + VisitExpr(input, [&result](const TExprNode::TPtr& node) { + if (TExpr::Match(node.Get())) { + result.emplace_back(TExpr(node)); + } + return true; + }); + + return result; +} + bool GatherConsumersImpl(const TExprNode& node, TNodeMap<TNodeMultiSet>& consumers, TNodeSet& visited) { if (!visited.emplace(&node).second) { return true; @@ -521,60 +521,60 @@ IGraphTransformer::TStatus DqReplicateStageMultiOutput(TExprNode::TPtr input, TE return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); } -IGraphTransformer::TStatus DqExtractPrecomputeToStageInput(const TExprNode::TPtr& input, TExprNode::TPtr& output, +IGraphTransformer::TStatus DqExtractPrecomputeToStageInput(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -{ - auto stages = CollectNodes<TDqStage>(input); - - TNodeOnNodeOwnedMap replaces; - for (auto& stage : stages) { - auto dqPrecomputes = CollectNodes<TDqPhyPrecompute>(stage.Program().Ptr()); - if (dqPrecomputes.empty()) { - continue; - } - +{ + auto stages = CollectNodes<TDqStage>(input); + + TNodeOnNodeOwnedMap replaces; + for (auto& stage : stages) { + auto dqPrecomputes = CollectNodes<TDqPhyPrecompute>(stage.Program().Ptr()); + if (dqPrecomputes.empty()) { + continue; + } + YQL_CLOG(TRACE, CoreDq) << "DqExtractPrecomputeToStageInput: stage: " << PrintDqStageOnly(stage, ctx) << ", DqPhyPrecompute: " << dqPrecomputes.size(); - - TVector<TExprNode::TPtr> inputs; - TVector<TExprNode::TPtr> args; - inputs.reserve(stage.Inputs().Size() + dqPrecomputes.size()); - args.reserve(stage.Inputs().Size() + dqPrecomputes.size()); - - auto exprApplier = Build<TExprApplier>(ctx, stage.Pos()) - .Apply(stage.Program()); - - for (ui64 i = 0; i < stage.Inputs().Size(); ++i) { - inputs.emplace_back(stage.Inputs().Item(i).Ptr()); - args.emplace_back(ctx.NewArgument(stage.Pos(), TStringBuilder() << "_kqp_arg_" << i)); - exprApplier.With(i, TCoArgument(args.back())); - } - - for (ui64 i = 0; i < dqPrecomputes.size(); ++i) { - inputs.emplace_back(dqPrecomputes[i].Ptr()); - args.emplace_back(ctx.NewArgument(stage.Pos(), TStringBuilder() << "_kqp_pc_arg_" << i)); - exprApplier.With(dqPrecomputes[i], TCoArgument(args.back())); - } - - auto newStage = Build<TDqStage>(ctx, stage.Pos()) - .Inputs() - .Add(inputs) - .Build() - .Program() - .Args(args) - .Body(exprApplier.Done()) - .Build() - .Settings().Build() - .Done(); - - replaces.emplace(stage.Raw(), newStage.Ptr()); - } - - if (replaces.empty()) { - return IGraphTransformer::TStatus::Ok; - } - - return RemapExpr(input, output, replaces, ctx, TOptimizeExprSettings(nullptr)); -} - + + TVector<TExprNode::TPtr> inputs; + TVector<TExprNode::TPtr> args; + inputs.reserve(stage.Inputs().Size() + dqPrecomputes.size()); + args.reserve(stage.Inputs().Size() + dqPrecomputes.size()); + + auto exprApplier = Build<TExprApplier>(ctx, stage.Pos()) + .Apply(stage.Program()); + + for (ui64 i = 0; i < stage.Inputs().Size(); ++i) { + inputs.emplace_back(stage.Inputs().Item(i).Ptr()); + args.emplace_back(ctx.NewArgument(stage.Pos(), TStringBuilder() << "_kqp_arg_" << i)); + exprApplier.With(i, TCoArgument(args.back())); + } + + for (ui64 i = 0; i < dqPrecomputes.size(); ++i) { + inputs.emplace_back(dqPrecomputes[i].Ptr()); + args.emplace_back(ctx.NewArgument(stage.Pos(), TStringBuilder() << "_kqp_pc_arg_" << i)); + exprApplier.With(dqPrecomputes[i], TCoArgument(args.back())); + } + + auto newStage = Build<TDqStage>(ctx, stage.Pos()) + .Inputs() + .Add(inputs) + .Build() + .Program() + .Args(args) + .Body(exprApplier.Done()) + .Build() + .Settings().Build() + .Done(); + + replaces.emplace(stage.Raw(), newStage.Ptr()); + } + + if (replaces.empty()) { + return IGraphTransformer::TStatus::Ok; + } + + return RemapExpr(input, output, replaces, ctx, TOptimizeExprSettings(nullptr)); +} + } // NKikimr::NKqp diff --git a/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.h b/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.h index f535cfe55f2..2f4ac58f369 100644 --- a/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.h +++ b/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.h @@ -1,19 +1,19 @@ -#pragma once - -#include "dq_opt.h" - +#pragma once + +#include "dq_opt.h" + #include <ydb/library/yql/dq/common/dq_common.h> #include <ydb/library/yql/dq/expr_nodes/dq_expr_nodes.h> - + #include <ydb/library/yql/ast/yql_expr.h> #include <ydb/library/yql/core/yql_expr_optimize.h> - -namespace NYql::NDq { - -IGraphTransformer::TStatus DqReplicateStageMultiOutput(TExprNode::TPtr input, TExprNode::TPtr& output, + +namespace NYql::NDq { + +IGraphTransformer::TStatus DqReplicateStageMultiOutput(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx); - -IGraphTransformer::TStatus DqExtractPrecomputeToStageInput(const TExprNode::TPtr& input, TExprNode::TPtr& output, + +IGraphTransformer::TStatus DqExtractPrecomputeToStageInput(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx); - -} // namespace NYql::NDq + +} // namespace NYql::NDq diff --git a/ydb/library/yql/dq/runtime/dq_arrow_helpers.h b/ydb/library/yql/dq/runtime/dq_arrow_helpers.h index f3114179ff7..31c5248afcb 100644 --- a/ydb/library/yql/dq/runtime/dq_arrow_helpers.h +++ b/ydb/library/yql/dq/runtime/dq_arrow_helpers.h @@ -64,26 +64,26 @@ std::unique_ptr<arrow::ArrayBuilder> MakeArrowBuilder(const NKikimr::NMiniKQL::T * @param itemType type of each element to parse it and to construct corresponding arrow type * @return std::shared_ptr<arrow::Array> data in arrow format */ -std::shared_ptr<arrow::Array> MakeArray(NKikimr::NMiniKQL::TUnboxedValueVector& values, - const NKikimr::NMiniKQL::TType* itemType); +std::shared_ptr<arrow::Array> MakeArray(NKikimr::NMiniKQL::TUnboxedValueVector& values, + const NKikimr::NMiniKQL::TType* itemType); -NKikimr::NMiniKQL::TUnboxedValueVector ExtractUnboxedValues(const std::shared_ptr<arrow::Array>& array, - const NKikimr::NMiniKQL::TType* itemType, const NKikimr::NMiniKQL::THolderFactory& holderFactory); +NKikimr::NMiniKQL::TUnboxedValueVector ExtractUnboxedValues(const std::shared_ptr<arrow::Array>& array, + const NKikimr::NMiniKQL::TType* itemType, const NKikimr::NMiniKQL::THolderFactory& holderFactory); std::string SerializeArray(const std::shared_ptr<arrow::Array>& array); std::shared_ptr<arrow::Array> DeserializeArray(const std::string& blob, std::shared_ptr<arrow::DataType> type); -/** - * @brief Append UnboxedValue to arrow Array via arrow Builder - * - * @param value unboxed value to append - * @param builder arrow Builder with proper type used to append converted value array - * @param type type of element to parse it and to construct corresponding arrow type - * @return std::shared_ptr<arrow::Array> data in arrow format - */ -void AppendElement(NYql::NUdf::TUnboxedValue value, arrow::ArrayBuilder* builder, const NKikimr::NMiniKQL::TType* type); +/** + * @brief Append UnboxedValue to arrow Array via arrow Builder + * + * @param value unboxed value to append + * @param builder arrow Builder with proper type used to append converted value array + * @param type type of element to parse it and to construct corresponding arrow type + * @return std::shared_ptr<arrow::Array> data in arrow format + */ +void AppendElement(NYql::NUdf::TUnboxedValue value, arrow::ArrayBuilder* builder, const NKikimr::NMiniKQL::TType* type); + - } // NArrow } // NYql diff --git a/ydb/library/yql/dq/runtime/dq_tasks_runner.cpp b/ydb/library/yql/dq/runtime/dq_tasks_runner.cpp index d479c539534..10900352c3b 100644 --- a/ydb/library/yql/dq/runtime/dq_tasks_runner.cpp +++ b/ydb/library/yql/dq/runtime/dq_tasks_runner.cpp @@ -352,7 +352,7 @@ public: auto paramNode = ProgramParsed.CompGraph->GetEntryPoint(programInputsCount, /* require */ false); if (paramNode) { - // TODO: Remove serialized parameters that are used in OLAP program and not used in current program + // TODO: Remove serialized parameters that are used in OLAP program and not used in current program const auto& graphHolderFactory = ProgramParsed.CompGraph->GetHolderFactory(); NUdf::TUnboxedValue* structMembers; auto paramsStructValue = graphHolderFactory.CreateDirectArrayHolder(paramsCount, structMembers); @@ -377,11 +377,11 @@ public: paramNode->SetValue(ProgramParsed.CompGraph->GetContext(), std::move(paramsStructValue)); } else { - /* - * This situation is ok, when there are OLAP parameters only. There is no parameter node - * because there is no parameters in program. But there are parameters in paramsStruct, they are - * serialized somewhere before in executor. - */ + /* + * This situation is ok, when there are OLAP parameters only. There is no parameter node + * because there is no parameters in program. But there are parameters in paramsStruct, they are + * serialized somewhere before in executor. + */ } auto buildTime = TInstant::Now() - startTime; diff --git a/ydb/library/yql/dq/type_ann/dq_type_ann.cpp b/ydb/library/yql/dq/type_ann/dq_type_ann.cpp index fe5ad7c8343..fcbd3bb9ecf 100644 --- a/ydb/library/yql/dq/type_ann/dq_type_ann.cpp +++ b/ydb/library/yql/dq/type_ann/dq_type_ann.cpp @@ -372,30 +372,30 @@ const TStructExprType* GetDqJoinResultType(const TExprNode::TPtr& input, bool st rightTableLabel, join.JoinType(), join.JoinKeys(), ctx); } -TStatus AnnotateDqPrecompute(const TExprNode::TPtr& node, TExprContext& ctx) { - if (!EnsureArgsCount(*node, 1, ctx)) { - return TStatus::Error; - } - - node->SetTypeAnn(node->Child(TDqPrecompute::idx_Input)->GetTypeAnn()); - return TStatus::Ok; -} - -TStatus AnnotateDqPhyPrecompute(const TExprNode::TPtr& node, TExprContext& ctx) { - if (!EnsureArgsCount(*node, 1, ctx)) { - return TStatus::Error; - } - +TStatus AnnotateDqPrecompute(const TExprNode::TPtr& node, TExprContext& ctx) { + if (!EnsureArgsCount(*node, 1, ctx)) { + return TStatus::Error; + } + + node->SetTypeAnn(node->Child(TDqPrecompute::idx_Input)->GetTypeAnn()); + return TStatus::Ok; +} + +TStatus AnnotateDqPhyPrecompute(const TExprNode::TPtr& node, TExprContext& ctx) { + if (!EnsureArgsCount(*node, 1, ctx)) { + return TStatus::Error; + } + auto* cn = node->Child(TDqPhyPrecompute::idx_Connection); if (!TDqConnection::Match(cn)) { ctx.AddError(TIssue(ctx.GetPosition(cn->Pos()), TStringBuilder() << "Expected DqConnection, got " << cn->Content())); - return TStatus::Error; - } - + return TStatus::Error; + } + node->SetTypeAnn(cn->GetTypeAnn()); - return TStatus::Ok; -} - + return TStatus::Ok; +} + } // unnamed TStatus AnnotateDqStage(const TExprNode::TPtr& input, TExprContext& ctx) { @@ -683,7 +683,7 @@ TStatus AnnotateDqJoin(const TExprNode::TPtr& input, TExprContext& ctx) { return TStatus::Ok; } -TStatus AnnotateDqMapOrDictJoin(const TExprNode::TPtr& input, TExprContext& ctx) { +TStatus AnnotateDqMapOrDictJoin(const TExprNode::TPtr& input, TExprContext& ctx) { auto resultRowType = GetDqJoinResultType<true>(input, true, ctx); if (!resultRowType) { return TStatus::Error; @@ -829,13 +829,13 @@ THolder<IGraphTransformer> CreateDqTypeAnnotationTransformer(TTypeAnnotationCont } if (TDqPhyMapJoin::Match(input.Get())) { - return AnnotateDqMapOrDictJoin(input, ctx); + return AnnotateDqMapOrDictJoin(input, ctx); + } + + if (TDqPhyJoinDict::Match(input.Get())) { + return AnnotateDqMapOrDictJoin(input, ctx); } - if (TDqPhyJoinDict::Match(input.Get())) { - return AnnotateDqMapOrDictJoin(input, ctx); - } - if (TDqPhyCrossJoin::Match(input.Get())) { return AnnotateDqCrossJoin(input, ctx); } @@ -852,14 +852,14 @@ THolder<IGraphTransformer> CreateDqTypeAnnotationTransformer(TTypeAnnotationCont return AnnotateDqQuery(input, ctx); } - if (TDqPrecompute::Match(input.Get())) { - return AnnotateDqPrecompute(input, ctx); - } - - if (TDqPhyPrecompute::Match(input.Get())) { - return AnnotateDqPhyPrecompute(input, ctx); - } - + if (TDqPrecompute::Match(input.Get())) { + return AnnotateDqPrecompute(input, ctx); + } + + if (TDqPhyPrecompute::Match(input.Get())) { + return AnnotateDqPhyPrecompute(input, ctx); + } + return coreTransformer->Transform(input, output, ctx); }); } diff --git a/ydb/library/yql/dq/type_ann/dq_type_ann.h b/ydb/library/yql/dq/type_ann/dq_type_ann.h index 2b588c01381..03aefe701ff 100644 --- a/ydb/library/yql/dq/type_ann/dq_type_ann.h +++ b/ydb/library/yql/dq/type_ann/dq_type_ann.h @@ -17,7 +17,7 @@ IGraphTransformer::TStatus AnnotateDqReplicate(const TExprNode::TPtr& input, TEx IGraphTransformer::TStatus AnnotateDqConnection(const TExprNode::TPtr& input, TExprContext& ctx); IGraphTransformer::TStatus AnnotateDqCnMerge(const TExprNode::TPtr& input, TExprContext& ctx); IGraphTransformer::TStatus AnnotateDqJoin(const TExprNode::TPtr& input, TExprContext& ctx); -IGraphTransformer::TStatus AnnotateDqMapOrDictJoin(const TExprNode::TPtr& input, TExprContext& ctx); +IGraphTransformer::TStatus AnnotateDqMapOrDictJoin(const TExprNode::TPtr& input, TExprContext& ctx); IGraphTransformer::TStatus AnnotateDqCrossJoin(const TExprNode::TPtr& input, TExprContext& ctx); IGraphTransformer::TStatus AnnotateDqSource(const TExprNode::TPtr& input, TExprContext& ctx); IGraphTransformer::TStatus AnnotateDqSink(const TExprNode::TPtr& input, TExprContext& ctx); diff --git a/ydb/library/yql/minikql/defs.h b/ydb/library/yql/minikql/defs.h index 7af966eafa9..3e332363c6c 100644 --- a/ydb/library/yql/minikql/defs.h +++ b/ydb/library/yql/minikql/defs.h @@ -8,12 +8,12 @@ #define THROW ::NKikimr::TThrowable() , __LOCATION__ + -#define MKQL_ENSURE(condition, message) \ - do { \ - if (Y_UNLIKELY(!(condition))) { \ - (THROW yexception() << __FUNCTION__ << "(): requirement " \ - << #condition << " failed. " << message); \ - } \ +#define MKQL_ENSURE(condition, message) \ + do { \ + if (Y_UNLIKELY(!(condition))) { \ + (THROW yexception() << __FUNCTION__ << "(): requirement " \ + << #condition << " failed. " << message); \ + } \ } while (0) #define MKQL_ENSURE_WITH_LOC(location, condition, message) \ diff --git a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp index 59e81dbb0c4..dd1f162d784 100644 --- a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp +++ b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp @@ -2087,11 +2087,11 @@ TMkqlCommonCallableCompiler::TShared::TShared() { return ctx.ProgramBuilder.JoinDict(dict1, true, dict2, true, joinKind); }); - AddCallable("JoinDict", [](const TExprNode& node, TMkqlBuildContext& ctx) { - const auto dict1 = MkqlBuildExpr(*node.Child(0), ctx); - const auto dict2 = MkqlBuildExpr(*node.Child(1), ctx); - const auto joinKind = GetJoinKind(node, node.Child(2)->Content()); - + AddCallable("JoinDict", [](const TExprNode& node, TMkqlBuildContext& ctx) { + const auto dict1 = MkqlBuildExpr(*node.Child(0), ctx); + const auto dict2 = MkqlBuildExpr(*node.Child(1), ctx); + const auto joinKind = GetJoinKind(node, node.Child(2)->Content()); + bool multi1 = true, multi2 = true; if (node.ChildrenSize() > 3) { node.Tail().ForEachChild([&](const TExprNode& flag){ @@ -2103,8 +2103,8 @@ TMkqlCommonCallableCompiler::TShared::TShared() { } return ctx.ProgramBuilder.JoinDict(dict1, multi1, dict2, multi2, joinKind); - }); - + }); + AddCallable({"FilePath", "FileContent", "FolderPath"}, [](const TExprNode& node, TMkqlBuildContext& ctx) { TCallableBuilder call(ctx.ProgramBuilder.GetTypeEnvironment(), node.Content(), ctx.ProgramBuilder.NewDataType(NUdf::TDataType<char*>::Id)); call.Add(ctx.ProgramBuilder.NewDataLiteral<NUdf::EDataSlot::String>(node.Head().Content())); diff --git a/ydb/library/yql/providers/dq/opt/dqs_opt.cpp b/ydb/library/yql/providers/dq/opt/dqs_opt.cpp index 8d78d82a370..b4bc07fbef1 100644 --- a/ydb/library/yql/providers/dq/opt/dqs_opt.cpp +++ b/ydb/library/yql/providers/dq/opt/dqs_opt.cpp @@ -106,8 +106,8 @@ namespace NYql::NDqs { return OptimizeExprEx(input, output, [](const TExprNode::TPtr& inputExpr, TExprContext& ctx, IOptimizationContext&) { TExprBase node{inputExpr}; - PERFORM_RULE(DqPeepholeRewriteCrossJoin, node, ctx); - PERFORM_RULE(DqPeepholeRewriteJoinDict, node, ctx); + PERFORM_RULE(DqPeepholeRewriteCrossJoin, node, ctx); + PERFORM_RULE(DqPeepholeRewriteJoinDict, node, ctx); PERFORM_RULE(DqPeepholeRewriteMapJoin, node, ctx); PERFORM_RULE(DqPeepholeRewritePureJoin, node, ctx); PERFORM_RULE(DqPeepholeRewriteReplicate, node, ctx); diff --git a/ydb/library/yql/providers/dq/opt/physical_optimize.cpp b/ydb/library/yql/providers/dq/opt/physical_optimize.cpp index a87d04db507..858e3da45a0 100644 --- a/ydb/library/yql/providers/dq/opt/physical_optimize.cpp +++ b/ydb/library/yql/providers/dq/opt/physical_optimize.cpp @@ -44,10 +44,10 @@ public: AddHandler(0, &TCoLMap::Match, HNDL(PushLMapToStage<false>)); // (Apply (SqlExternalFunction ..) ..) to stage AddHandler(0, &TCoApply::Match, HNDL(BuildExtFunctionStage<false>)); -#if 0 - AddHandler(0, &TCoHasItems::Match, HNDL(BuildHasItems)); - AddHandler(0, &TCoToOptional::Match, HNDL(BuildScalarPrecompute)); -#endif +#if 0 + AddHandler(0, &TCoHasItems::Match, HNDL(BuildHasItems)); + AddHandler(0, &TCoToOptional::Match, HNDL(BuildScalarPrecompute)); +#endif AddHandler(1, &TCoSkipNullMembers::Match, HNDL(PushSkipNullMembersToStage<true>)); AddHandler(1, &TCoExtractMembers::Match, HNDL(PushExtractMembersToStage<true>)); @@ -296,12 +296,12 @@ protected: TMaybeNode<TExprBase> BuildHasItems(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx) { return DqBuildHasItems(node, ctx, optCtx); - } - + } + TMaybeNode<TExprBase> BuildScalarPrecompute(TExprBase node, TExprContext& ctx, IOptimizationContext& optCtx) { return DqBuildScalarPrecompute(node, ctx, optCtx); - } - + } + private: bool JoinPrerequisitesVerify(TDqJoin join, const TParentsMap* parentsMap, bool isGlobal) const { // KqpBuildJoin copy/paste diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_datasink_type_ann.cpp b/ydb/library/yql/providers/dq/provider/yql_dq_datasink_type_ann.cpp index 240d7b94030..c8d9786a7a0 100644 --- a/ydb/library/yql/providers/dq/provider/yql_dq_datasink_type_ann.cpp +++ b/ydb/library/yql/providers/dq/provider/yql_dq_datasink_type_ann.cpp @@ -28,9 +28,9 @@ public: AddHandler({TDqCnMerge::CallableName()}, Hndl(&NDq::AnnotateDqCnMerge)); AddHandler({TDqReplicate::CallableName()}, Hndl(&NDq::AnnotateDqReplicate)); AddHandler({TDqJoin::CallableName()}, Hndl(&NDq::AnnotateDqJoin)); - AddHandler({TDqPhyMapJoin::CallableName()}, Hndl(&NDq::AnnotateDqMapOrDictJoin)); + AddHandler({TDqPhyMapJoin::CallableName()}, Hndl(&NDq::AnnotateDqMapOrDictJoin)); AddHandler({TDqPhyCrossJoin::CallableName()}, Hndl(&NDq::AnnotateDqCrossJoin)); - AddHandler({TDqPhyJoinDict::CallableName()}, Hndl(&NDq::AnnotateDqMapOrDictJoin)); + AddHandler({TDqPhyJoinDict::CallableName()}, Hndl(&NDq::AnnotateDqMapOrDictJoin)); AddHandler({TDqSink::CallableName()}, Hndl(&NDq::AnnotateDqSink)); AddHandler({TDqWrite::CallableName()}, Hndl(&TDqsDataSinkTypeAnnotationTransformer::AnnotateDqWrite)); AddHandler({TDqQuery::CallableName()}, Hndl(&NDq::AnnotateDqQuery)); diff --git a/ydb/public/sdk/python/examples/reservations-bot-demo/ydb/ddl.yql b/ydb/public/sdk/python/examples/reservations-bot-demo/ydb/ddl.yql index 9c6b98b47aa..c944febbf6c 100644 --- a/ydb/public/sdk/python/examples/reservations-bot-demo/ydb/ddl.yql +++ b/ydb/public/sdk/python/examples/reservations-bot-demo/ydb/ddl.yql @@ -1,4 +1,4 @@ -CREATE TABLE tables +CREATE TABLE tables ( table_id Uint64, description Utf8, diff --git a/ydb/public/sdk/python/examples/time-series-serverless/README.md b/ydb/public/sdk/python/examples/time-series-serverless/README.md index 5955ea8099d..5f4ca7ced0f 100644 --- a/ydb/public/sdk/python/examples/time-series-serverless/README.md +++ b/ydb/public/sdk/python/examples/time-series-serverless/README.md @@ -1,63 +1,63 @@ -# Prepare your cloud environment -## Create serverless DB - -Go to [web console](https://console.cloud.yandex.ru/) and select Yandex Database, find your folder and create database via **Create database** button. Please select serverless option and name of your database. -Select your DB in the list of databases and note **Endpoint** and **Database** values, you will need them to connect to your DB from cloud function. - -## Create table for time series -Select **Navigation** in left pane and press **Create** in the top right corner, choose **Table** and fill table name, columns name and type. -You should select next values for this example. - -| Column name | Type | Key | -| ----------- | :-------: | :-----: | -| timestamp | Timestamp | Primary | -| value | Double | | - -Remember YDB endpoint, database and table name - -## Create Service account -To create service account you need to go to cloud console to **Service accounts** in left pane and create account with **editor** and **viewer** roles. -Remember account id, it will be used later to allow access to serverless database. - -# Create cloud function -Create cloud function from source code. You should archive source code directory to zip file and upload it into **Cloud functions** section. -Please do not forget to add requirements.txt to this archive. - -To upload function you may use CLI. You can refer to CLI documentation to understand -[how to install it](https://cloud.yandex.com/en/docs/cli/quickstart#install "CLI installation") and -[how to create profile](https://cloud.yandex.com/en-ru/docs/cli/operations/authentication/user "Get profile via CLI"). -Please note that you should select proper profile type in left pane to get right instructions. - -After successful installation of CLI and creating profile you should execute next command to create function: -```shell -yc serverless function create --name=time-series -``` - -Next you should upload code and create new version of function via following command: -```shell -yc sls fn version create --service-account-id=<service-account-id> --function-name=time-series --runtime python37 --entrypoint time_series.handler --memory 128m --execution-timeout 60s --source-path <path-to-archived-sources> --environment YDB_ENDPOINT=<db-endpoint>,YDB_DATABASE=<db-database>,YDB_TABLE=<db-table>,USE_METADATA_CREDENTIALS=1 -``` - -The environment variables passed to function -* YDB_ENDPOINT is the endpoint of your database -* YDB_DATABASE is the path to your database -* YDB_TABLE is the table name -* USE_METADATA_CREDENTIALS=1 means that your code automatically get [IAM token](https://cloud.yandex.com/en-ru/docs/iam/concepts/authorization/iam-token) for service account you specified - -After a while you receive message that function was created. Now you can test it in cloud console or in CLI. - -# Test cloud function -## Testing via CLI -You should invoke function via next command -```shell -yc serverless function invoke <function-id> -d '{"queryStringParameters": {"start": "1615000000000", "end": "1615000010000", "interval": "1", "mean": "12.3", "sigma": "5"}}' -``` - -## Testing via CURL -Before you can access your function via CURL you should make function public. You can do it in web console at -funtion overview page. - -Next you may call function with next command: -```shell -curl "https://functions.yandexcloud.net/<function-id>?start=1615000000000&end=1615000010000&interval=1&mean=12.3&sigma=5" -```
\ No newline at end of file +# Prepare your cloud environment +## Create serverless DB + +Go to [web console](https://console.cloud.yandex.ru/) and select Yandex Database, find your folder and create database via **Create database** button. Please select serverless option and name of your database. +Select your DB in the list of databases and note **Endpoint** and **Database** values, you will need them to connect to your DB from cloud function. + +## Create table for time series +Select **Navigation** in left pane and press **Create** in the top right corner, choose **Table** and fill table name, columns name and type. +You should select next values for this example. + +| Column name | Type | Key | +| ----------- | :-------: | :-----: | +| timestamp | Timestamp | Primary | +| value | Double | | + +Remember YDB endpoint, database and table name + +## Create Service account +To create service account you need to go to cloud console to **Service accounts** in left pane and create account with **editor** and **viewer** roles. +Remember account id, it will be used later to allow access to serverless database. + +# Create cloud function +Create cloud function from source code. You should archive source code directory to zip file and upload it into **Cloud functions** section. +Please do not forget to add requirements.txt to this archive. + +To upload function you may use CLI. You can refer to CLI documentation to understand +[how to install it](https://cloud.yandex.com/en/docs/cli/quickstart#install "CLI installation") and +[how to create profile](https://cloud.yandex.com/en-ru/docs/cli/operations/authentication/user "Get profile via CLI"). +Please note that you should select proper profile type in left pane to get right instructions. + +After successful installation of CLI and creating profile you should execute next command to create function: +```shell +yc serverless function create --name=time-series +``` + +Next you should upload code and create new version of function via following command: +```shell +yc sls fn version create --service-account-id=<service-account-id> --function-name=time-series --runtime python37 --entrypoint time_series.handler --memory 128m --execution-timeout 60s --source-path <path-to-archived-sources> --environment YDB_ENDPOINT=<db-endpoint>,YDB_DATABASE=<db-database>,YDB_TABLE=<db-table>,USE_METADATA_CREDENTIALS=1 +``` + +The environment variables passed to function +* YDB_ENDPOINT is the endpoint of your database +* YDB_DATABASE is the path to your database +* YDB_TABLE is the table name +* USE_METADATA_CREDENTIALS=1 means that your code automatically get [IAM token](https://cloud.yandex.com/en-ru/docs/iam/concepts/authorization/iam-token) for service account you specified + +After a while you receive message that function was created. Now you can test it in cloud console or in CLI. + +# Test cloud function +## Testing via CLI +You should invoke function via next command +```shell +yc serverless function invoke <function-id> -d '{"queryStringParameters": {"start": "1615000000000", "end": "1615000010000", "interval": "1", "mean": "12.3", "sigma": "5"}}' +``` + +## Testing via CURL +Before you can access your function via CURL you should make function public. You can do it in web console at +funtion overview page. + +Next you may call function with next command: +```shell +curl "https://functions.yandexcloud.net/<function-id>?start=1615000000000&end=1615000010000&interval=1&mean=12.3&sigma=5" +```
\ No newline at end of file diff --git a/ydb/public/sdk/python/examples/time-series-serverless/config.py b/ydb/public/sdk/python/examples/time-series-serverless/config.py index 60e902e7d4e..4fa10614528 100644 --- a/ydb/public/sdk/python/examples/time-series-serverless/config.py +++ b/ydb/public/sdk/python/examples/time-series-serverless/config.py @@ -1,27 +1,27 @@ -import os - - -class Config(object): - def __init__(self): +import os + + +class Config(object): + def __init__(self): self._endpoint = os.getenv("YDB_ENDPOINT") self._database = os.getenv("YDB_DATABASE") self._path = os.getenv("YDB_PATH") - - @property - def endpoint(self): - return self._endpoint - - @property - def database(self): - return self._database - - @property - def path(self): - return self._path - - @property - def full_path(self): - return os.path.join(self.database, self._path) - - -ydb_configuration = Config() + + @property + def endpoint(self): + return self._endpoint + + @property + def database(self): + return self._database + + @property + def path(self): + return self._path + + @property + def full_path(self): + return os.path.join(self.database, self._path) + + +ydb_configuration = Config() diff --git a/ydb/public/sdk/python/examples/time-series-serverless/database.py b/ydb/public/sdk/python/examples/time-series-serverless/database.py index 630231473ad..df3e2f6acc2 100644 --- a/ydb/public/sdk/python/examples/time-series-serverless/database.py +++ b/ydb/public/sdk/python/examples/time-series-serverless/database.py @@ -1,40 +1,40 @@ -import ydb -from typing import List - -from config import ydb_configuration -from exception import ConnectionFailure - - -class YDBClient: - def __init__(self): - self.config = ydb_configuration - self.driver = self.create_driver() - - def create_driver(self) -> ydb.Driver: - driver_config = ydb.DriverConfig( - self.config.endpoint, - self.config.database, - credentials=ydb.construct_credentials_from_environ(), - root_certificates=ydb.load_ydb_root_certificate(), - ) - - driver = ydb.Driver(driver_config) - - try: - driver.wait(timeout=5) - except Exception: - raise ConnectionFailure(driver.discovery_debug_details()) - - return driver - - @property - def table_client(self) -> ydb.TableClient: - return self.driver.table_client - - def bulk_upsert(self, rows: List, column_types: ydb.BulkUpsertColumns): - self.table_client.bulk_upsert(self.config.full_path, rows, column_types) - - -# Serverless functions can restore context, thus if we connect once we can use same client -# variable in next calls of function. -ydb_client = YDBClient() +import ydb +from typing import List + +from config import ydb_configuration +from exception import ConnectionFailure + + +class YDBClient: + def __init__(self): + self.config = ydb_configuration + self.driver = self.create_driver() + + def create_driver(self) -> ydb.Driver: + driver_config = ydb.DriverConfig( + self.config.endpoint, + self.config.database, + credentials=ydb.construct_credentials_from_environ(), + root_certificates=ydb.load_ydb_root_certificate(), + ) + + driver = ydb.Driver(driver_config) + + try: + driver.wait(timeout=5) + except Exception: + raise ConnectionFailure(driver.discovery_debug_details()) + + return driver + + @property + def table_client(self) -> ydb.TableClient: + return self.driver.table_client + + def bulk_upsert(self, rows: List, column_types: ydb.BulkUpsertColumns): + self.table_client.bulk_upsert(self.config.full_path, rows, column_types) + + +# Serverless functions can restore context, thus if we connect once we can use same client +# variable in next calls of function. +ydb_client = YDBClient() diff --git a/ydb/public/sdk/python/examples/time-series-serverless/entry.py b/ydb/public/sdk/python/examples/time-series-serverless/entry.py index 99ade9ed0c3..21d01caad80 100644 --- a/ydb/public/sdk/python/examples/time-series-serverless/entry.py +++ b/ydb/public/sdk/python/examples/time-series-serverless/entry.py @@ -1,7 +1,7 @@ -from dataclasses import dataclass - - -@dataclass -class Entry: - timestamp: int - value: float +from dataclasses import dataclass + + +@dataclass +class Entry: + timestamp: int + value: float diff --git a/ydb/public/sdk/python/examples/time-series-serverless/exception.py b/ydb/public/sdk/python/examples/time-series-serverless/exception.py index 6e9e329cfc5..5631d554b9b 100644 --- a/ydb/public/sdk/python/examples/time-series-serverless/exception.py +++ b/ydb/public/sdk/python/examples/time-series-serverless/exception.py @@ -1,13 +1,13 @@ -class CloudFunctionException(Exception): - error = "Generic exception" - - def __init__(self, reason: str): - self.reason = reason - - -class ConnectionFailure(CloudFunctionException): - error = "Connection failure" - - -class ValidationError(CloudFunctionException): - error = "Incoming parameters invalid" +class CloudFunctionException(Exception): + error = "Generic exception" + + def __init__(self, reason: str): + self.reason = reason + + +class ConnectionFailure(CloudFunctionException): + error = "Connection failure" + + +class ValidationError(CloudFunctionException): + error = "Incoming parameters invalid" diff --git a/ydb/public/sdk/python/examples/time-series-serverless/parameters.py b/ydb/public/sdk/python/examples/time-series-serverless/parameters.py index 3d7de53630f..193a78c25f7 100644 --- a/ydb/public/sdk/python/examples/time-series-serverless/parameters.py +++ b/ydb/public/sdk/python/examples/time-series-serverless/parameters.py @@ -1,39 +1,39 @@ -from dataclasses import dataclass - -from exception import ValidationError - - -US_IN_MSEC = 1000 - - -@dataclass -class Parameters: - start_us: int - end_us: int - interval_us: int - mean: float - sigma: float - - @classmethod - def from_strings(cls, start: str, end: str, interval: str, mean: str, sigma: str): - try: - parameters = cls( - start_us=int(start) * US_IN_MSEC, - end_us=int(end) * US_IN_MSEC, - interval_us=int(interval) * US_IN_MSEC, - mean=float(mean), - sigma=float(sigma), - ) - except ValueError: - raise ValidationError - - parameters.validate() - - return parameters - - def validate(self): - if self.start_us >= self.end_us: - raise ValidationError - - if self.interval_us == 0: - raise ValidationError +from dataclasses import dataclass + +from exception import ValidationError + + +US_IN_MSEC = 1000 + + +@dataclass +class Parameters: + start_us: int + end_us: int + interval_us: int + mean: float + sigma: float + + @classmethod + def from_strings(cls, start: str, end: str, interval: str, mean: str, sigma: str): + try: + parameters = cls( + start_us=int(start) * US_IN_MSEC, + end_us=int(end) * US_IN_MSEC, + interval_us=int(interval) * US_IN_MSEC, + mean=float(mean), + sigma=float(sigma), + ) + except ValueError: + raise ValidationError + + parameters.validate() + + return parameters + + def validate(self): + if self.start_us >= self.end_us: + raise ValidationError + + if self.interval_us == 0: + raise ValidationError diff --git a/ydb/public/sdk/python/examples/time-series-serverless/requirements.txt b/ydb/public/sdk/python/examples/time-series-serverless/requirements.txt index 957cebe16c7..23c145a51fc 100644 --- a/ydb/public/sdk/python/examples/time-series-serverless/requirements.txt +++ b/ydb/public/sdk/python/examples/time-series-serverless/requirements.txt @@ -1,3 +1,3 @@ -iso8601==0.1.14 -yandexcloud==0.80.0 -ydb==0.0.50.843132462 +iso8601==0.1.14 +yandexcloud==0.80.0 +ydb==0.0.50.843132462 diff --git a/ydb/public/sdk/python/examples/time-series-serverless/response.py b/ydb/public/sdk/python/examples/time-series-serverless/response.py index c9c2047784b..0052adb1758 100644 --- a/ydb/public/sdk/python/examples/time-series-serverless/response.py +++ b/ydb/public/sdk/python/examples/time-series-serverless/response.py @@ -1,31 +1,31 @@ -import json - - -class Response: - status = 200 - headers = { +import json + + +class Response: + status = 200 + headers = { "Content-type": "application/json", - } - body = {} - - def as_dict(self): - return dict( + } + body = {} + + def as_dict(self): + return dict( status=self.status, headers=self.headers, body=json.dumps(self.body) - ) - - -class Ok(Response): - pass - - -class ErrorResponse(Response): - def __init__(self, message: str): + ) + + +class Ok(Response): + pass + + +class ErrorResponse(Response): + def __init__(self, message: str): self.body["message"] = message - - -class Conflict(ErrorResponse): - status = 409 - - -class BadRequest(ErrorResponse): - status = 400 + + +class Conflict(ErrorResponse): + status = 409 + + +class BadRequest(ErrorResponse): + status = 400 diff --git a/ydb/public/sdk/python/examples/time-series-serverless/time_series.py b/ydb/public/sdk/python/examples/time-series-serverless/time_series.py index 013122740f6..b95ccbbe2f3 100644 --- a/ydb/public/sdk/python/examples/time-series-serverless/time_series.py +++ b/ydb/public/sdk/python/examples/time-series-serverless/time_series.py @@ -1,54 +1,54 @@ -from typing import Dict -import random - -import ydb - -from database import ydb_client -from entry import Entry -from exception import ConnectionFailure, ValidationError -from parameters import Parameters -from response import Response, Conflict, BadRequest, Ok - - -def generate_time_series(parameters: Parameters): - column_types = ydb.BulkUpsertColumns() +from typing import Dict +import random + +import ydb + +from database import ydb_client +from entry import Entry +from exception import ConnectionFailure, ValidationError +from parameters import Parameters +from response import Response, Conflict, BadRequest, Ok + + +def generate_time_series(parameters: Parameters): + column_types = ydb.BulkUpsertColumns() column_types.add_column("timestamp", ydb.PrimitiveType.Timestamp) column_types.add_column("value", ydb.PrimitiveType.Double) - - rows = [ - Entry(t, random.normalvariate(parameters.mean, parameters.sigma)) - for t in range(parameters.start_us, parameters.end_us, parameters.interval_us) - ] - - ydb_client.bulk_upsert(rows, column_types) - - -def do_handle(event: Dict, _) -> Response: + + rows = [ + Entry(t, random.normalvariate(parameters.mean, parameters.sigma)) + for t in range(parameters.start_us, parameters.end_us, parameters.interval_us) + ] + + ydb_client.bulk_upsert(rows, column_types) + + +def do_handle(event: Dict, _) -> Response: if "queryStringParameters" not in event: return BadRequest("Incorrect function call: non HTTP request") - - try: + + try: query_string = event["queryStringParameters"] mean = query_string["mean"] sigma = query_string["sigma"] start_ms = query_string["start"] end_ms = query_string["end"] interval_ms = query_string["interval"] - except KeyError: + except KeyError: return BadRequest("Incorrect function call: required parameters missing") - - try: - parameters = Parameters.from_strings(start_ms, end_ms, interval_ms, mean, sigma) - except ValidationError as e: + + try: + parameters = Parameters.from_strings(start_ms, end_ms, interval_ms, mean, sigma) + except ValidationError as e: return BadRequest(f"Incorrect function call: {e.reason}") - - try: - generate_time_series(parameters) - except ConnectionFailure as e: + + try: + generate_time_series(parameters) + except ConnectionFailure as e: return Conflict(f"Failed to connect to YDB: {e.reason}") - - return Ok() - - -def handler(event, context): - return do_handle(event, context).as_dict() + + return Ok() + + +def handler(event, context): + return do_handle(event, context).as_dict() |