diff options
author | udovichenko-r <udovichenko-r@yandex-team.com> | 2025-04-25 19:52:57 +0300 |
---|---|---|
committer | udovichenko-r <udovichenko-r@yandex-team.com> | 2025-04-25 20:39:51 +0300 |
commit | 4b2b4df612d73bbecbb12bd5c5b548dd28857295 (patch) | |
tree | 0c7c2c93e54f841c6f370ec4c068f18c4fa7ad9b | |
parent | 484ea754d4f2dda5d1f683ad0301945724955a03 (diff) | |
download | ydb-4b2b4df612d73bbecbb12bd5c5b548dd28857295.tar.gz |
Use modules md5 for operation hash
commit_hash:479beaff03a33251903e09d1196e18e030d4029c
-rw-r--r-- | yql/essentials/core/expr_nodes/yql_expr_nodes.json | 3 | ||||
-rw-r--r-- | yql/essentials/core/type_ann/type_ann_core.cpp | 25 | ||||
-rw-r--r-- | yt/yql/providers/yt/lib/hash/ya.make | 2 | ||||
-rw-r--r-- | yt/yql/providers/yt/lib/hash/yql_op_hash.cpp | 33 | ||||
-rw-r--r-- | yt/yql/providers/yt/provider/yql_yt_datasink_exec.cpp | 2 |
5 files changed, 56 insertions, 9 deletions
diff --git a/yql/essentials/core/expr_nodes/yql_expr_nodes.json b/yql/essentials/core/expr_nodes/yql_expr_nodes.json index e0b08030e94..740fae8474f 100644 --- a/yql/essentials/core/expr_nodes/yql_expr_nodes.json +++ b/yql/essentials/core/expr_nodes/yql_expr_nodes.json @@ -1460,7 +1460,8 @@ {"Index": 1, "Name": "FunctionName", "Type": "TCoAtom"}, {"Index": 2, "Name": "FunctionType", "Type": "TExprBase"}, {"Index": 3, "Name": "Script", "Type": "TExprBase"}, - {"Index": 4, "Name": "Options", "Type": "TCoNameValueTupleList", "Optional": true} + {"Index": 4, "Name": "Options", "Type": "TCoNameValueTupleList", "Optional": true}, + {"Index": 5, "Name": "FileAlias", "Type": "TCoAtom", "Optional": true} ] }, { diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp index e97b1690c4a..e0518348d7c 100644 --- a/yql/essentials/core/type_ann/type_ann_core.cpp +++ b/yql/essentials/core/type_ann/type_ann_core.cpp @@ -7841,7 +7841,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> if (!EnsureMinArgsCount(*input, 4, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } - if (!EnsureMaxArgsCount(*input, 5, ctx.Expr)) { + if (!EnsureMaxArgsCount(*input, 6, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -7900,6 +7900,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> bool isCustomPython = NKikimr::NMiniKQL::IsCustomPython(scriptType); auto canonizedModuleName = isCustomPython ? moduleName : NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType); bool foundModule = false; + TStringBuf fileAlias = ""_sb; // resolve script udf from external resources (files / urls) // (main usage of CustomPython) @@ -7910,7 +7911,10 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Error; } - foundModule = ctx.Types.UdfModules.find(canonizedModuleName) != ctx.Types.UdfModules.end(); + if (auto udfInfo = ctx.Types.UdfModules.FindPtr(canonizedModuleName)) { + foundModule = true; + fileAlias = udfInfo->FileAlias; + } } // fallback for preinstalled CustomPython case @@ -7930,7 +7934,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Error; } - if (input->ChildrenSize() == 5) { + if (input->ChildrenSize() > 4) { if (!EnsureTuple(*input->Child(4), ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -7970,6 +7974,21 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } } + if (input->ChildrenSize() > 5) { + if (!EnsureAtom(*input->Child(5), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + } else if (fileAlias) { + auto children = input->ChildrenList(); + if (children.size() < 5) { + children.push_back(ctx.Expr.NewList(input->Pos(), {})); + } + children.push_back(ctx.Expr.NewAtom(input->Pos(), fileAlias)); + YQL_ENSURE(children.size() == 6); + output = ctx.Expr.ChangeChildren(*input, std::move(children)); + return IGraphTransformer::TStatus::Repeat; + } + input->SetTypeAnn(callableType); return IGraphTransformer::TStatus::Ok; } diff --git a/yt/yql/providers/yt/lib/hash/ya.make b/yt/yql/providers/yt/lib/hash/ya.make index 508131391f2..b444d8a0fc4 100644 --- a/yt/yql/providers/yt/lib/hash/ya.make +++ b/yt/yql/providers/yt/lib/hash/ya.make @@ -14,4 +14,6 @@ PEERDIR( yql/essentials/core/expr_nodes ) +YQL_LAST_ABI_VERSION() + END() diff --git a/yt/yql/providers/yt/lib/hash/yql_op_hash.cpp b/yt/yql/providers/yt/lib/hash/yql_op_hash.cpp index 0fe82bd4e20..2f6e3240b53 100644 --- a/yt/yql/providers/yt/lib/hash/yql_op_hash.cpp +++ b/yt/yql/providers/yt/lib/hash/yql_op_hash.cpp @@ -3,6 +3,7 @@ #include <yql/essentials/core/yql_type_annotation.h> #include <yql/essentials/core/expr_nodes/yql_expr_nodes.h> +#include <yql/essentials/minikql/mkql_program_builder.h> #include <yql/essentials/utils/log/log.h> #include <yql/essentials/utils/yql_panic.h> @@ -81,10 +82,34 @@ TString TNodeHashCalculator::GetHashImpl(const TExprNode& node, TArgIndex& argIn isHashable = false; } else { - if (TCoUdf::Match(&node) && node.ChildrenSize() > TCoUdf::idx_FileAlias && !node.Child(TCoUdf::idx_FileAlias)->Content().empty()) { - // an udf from imported file, use hash of file - auto alias = node.Child(TCoUdf::idx_FileAlias)->Content(); - UpdateFileHash(builder, alias); + if (TCoUdf::Match(&node)) { + if (node.ChildrenSize() > TCoUdf::idx_FileAlias && !node.Child(TCoUdf::idx_FileAlias)->Content().empty()) { + // an udf from imported file, use hash of file + auto alias = node.Child(TCoUdf::idx_FileAlias)->Content(); + UpdateFileHash(builder, alias); + } else { + // preinstalled + TStringBuf moduleName, funcName; + YQL_ENSURE(SplitUdfName(node.Head().Content(), moduleName, funcName)); + const auto res = Types.UdfResolver->GetSystemModulePath(moduleName); + YQL_ENSURE(res, "Expected either file alias or system module"); + builder << moduleName << res->Md5; + } + } else if (TCoScriptUdf::Match(&node)) { + if (node.ChildrenSize() > TCoScriptUdf::idx_FileAlias && !node.Child(TCoScriptUdf::idx_FileAlias)->Content().empty()) { + // an udf from imported file, use hash of file + auto alias = node.Child(TCoScriptUdf::idx_FileAlias)->Content(); + UpdateFileHash(builder, alias); + } else { + auto moduleName = node.Head().Content(); + auto scriptType = NKikimr::NMiniKQL::CanonizeScriptType(NKikimr::NMiniKQL::ScriptTypeFromStr(moduleName)); + if (!NKikimr::NMiniKQL::IsCustomPython(scriptType)) { + moduleName = NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType); + } + const auto res = Types.UdfResolver->GetSystemModulePath(moduleName); + YQL_ENSURE(res, "Expected either file alias or system module"); + builder << moduleName << res->Md5; + } } else if (node.Content() == "FilePath" || node.Content() == "FileContent") { auto alias = node.Child(0)->Content(); UpdateFileHash(builder, alias); diff --git a/yt/yql/providers/yt/provider/yql_yt_datasink_exec.cpp b/yt/yql/providers/yt/provider/yql_yt_datasink_exec.cpp index 74905bb7b32..72865fcce3c 100644 --- a/yt/yql/providers/yt/provider/yql_yt_datasink_exec.cpp +++ b/yt/yql/providers/yt/provider/yql_yt_datasink_exec.cpp @@ -710,7 +710,7 @@ private: TString operationHash; if (const auto queryCacheMode = config->QueryCacheMode.Get().GetOrElse(EQueryCacheMode::Disable); queryCacheMode != EQueryCacheMode::Disable) { if (!hasNonDeterministicFunctions) { - operationHash = TYtNodeHashCalculator(State_, cluster, config).GetHash(*input); + operationHash = TYtNodeHashCalculator(State_, cluster, config).GetHash(*optimizedNode); } YQL_CLOG(DEBUG, ProviderYt) << "Operation hash: " << HexEncode(operationHash).Quote() << ", cache mode: " << queryCacheMode; } |