aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorudovichenko-r <udovichenko-r@yandex-team.com>2025-04-25 19:52:57 +0300
committerudovichenko-r <udovichenko-r@yandex-team.com>2025-04-25 20:39:51 +0300
commit4b2b4df612d73bbecbb12bd5c5b548dd28857295 (patch)
tree0c7c2c93e54f841c6f370ec4c068f18c4fa7ad9b
parent484ea754d4f2dda5d1f683ad0301945724955a03 (diff)
downloadydb-4b2b4df612d73bbecbb12bd5c5b548dd28857295.tar.gz
Use modules md5 for operation hash
commit_hash:479beaff03a33251903e09d1196e18e030d4029c
-rw-r--r--yql/essentials/core/expr_nodes/yql_expr_nodes.json3
-rw-r--r--yql/essentials/core/type_ann/type_ann_core.cpp25
-rw-r--r--yt/yql/providers/yt/lib/hash/ya.make2
-rw-r--r--yt/yql/providers/yt/lib/hash/yql_op_hash.cpp33
-rw-r--r--yt/yql/providers/yt/provider/yql_yt_datasink_exec.cpp2
5 files changed, 56 insertions, 9 deletions
diff --git a/yql/essentials/core/expr_nodes/yql_expr_nodes.json b/yql/essentials/core/expr_nodes/yql_expr_nodes.json
index e0b08030e94..740fae8474f 100644
--- a/yql/essentials/core/expr_nodes/yql_expr_nodes.json
+++ b/yql/essentials/core/expr_nodes/yql_expr_nodes.json
@@ -1460,7 +1460,8 @@
{"Index": 1, "Name": "FunctionName", "Type": "TCoAtom"},
{"Index": 2, "Name": "FunctionType", "Type": "TExprBase"},
{"Index": 3, "Name": "Script", "Type": "TExprBase"},
- {"Index": 4, "Name": "Options", "Type": "TCoNameValueTupleList", "Optional": true}
+ {"Index": 4, "Name": "Options", "Type": "TCoNameValueTupleList", "Optional": true},
+ {"Index": 5, "Name": "FileAlias", "Type": "TCoAtom", "Optional": true}
]
},
{
diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp
index e97b1690c4a..e0518348d7c 100644
--- a/yql/essentials/core/type_ann/type_ann_core.cpp
+++ b/yql/essentials/core/type_ann/type_ann_core.cpp
@@ -7841,7 +7841,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
if (!EnsureMinArgsCount(*input, 4, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
- if (!EnsureMaxArgsCount(*input, 5, ctx.Expr)) {
+ if (!EnsureMaxArgsCount(*input, 6, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
@@ -7900,6 +7900,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
bool isCustomPython = NKikimr::NMiniKQL::IsCustomPython(scriptType);
auto canonizedModuleName = isCustomPython ? moduleName : NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType);
bool foundModule = false;
+ TStringBuf fileAlias = ""_sb;
// resolve script udf from external resources (files / urls)
// (main usage of CustomPython)
@@ -7910,7 +7911,10 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
return IGraphTransformer::TStatus::Error;
}
- foundModule = ctx.Types.UdfModules.find(canonizedModuleName) != ctx.Types.UdfModules.end();
+ if (auto udfInfo = ctx.Types.UdfModules.FindPtr(canonizedModuleName)) {
+ foundModule = true;
+ fileAlias = udfInfo->FileAlias;
+ }
}
// fallback for preinstalled CustomPython case
@@ -7930,7 +7934,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
return IGraphTransformer::TStatus::Error;
}
- if (input->ChildrenSize() == 5) {
+ if (input->ChildrenSize() > 4) {
if (!EnsureTuple(*input->Child(4), ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
@@ -7970,6 +7974,21 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
}
}
+ if (input->ChildrenSize() > 5) {
+ if (!EnsureAtom(*input->Child(5), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ } else if (fileAlias) {
+ auto children = input->ChildrenList();
+ if (children.size() < 5) {
+ children.push_back(ctx.Expr.NewList(input->Pos(), {}));
+ }
+ children.push_back(ctx.Expr.NewAtom(input->Pos(), fileAlias));
+ YQL_ENSURE(children.size() == 6);
+ output = ctx.Expr.ChangeChildren(*input, std::move(children));
+ return IGraphTransformer::TStatus::Repeat;
+ }
+
input->SetTypeAnn(callableType);
return IGraphTransformer::TStatus::Ok;
}
diff --git a/yt/yql/providers/yt/lib/hash/ya.make b/yt/yql/providers/yt/lib/hash/ya.make
index 508131391f2..b444d8a0fc4 100644
--- a/yt/yql/providers/yt/lib/hash/ya.make
+++ b/yt/yql/providers/yt/lib/hash/ya.make
@@ -14,4 +14,6 @@ PEERDIR(
yql/essentials/core/expr_nodes
)
+YQL_LAST_ABI_VERSION()
+
END()
diff --git a/yt/yql/providers/yt/lib/hash/yql_op_hash.cpp b/yt/yql/providers/yt/lib/hash/yql_op_hash.cpp
index 0fe82bd4e20..2f6e3240b53 100644
--- a/yt/yql/providers/yt/lib/hash/yql_op_hash.cpp
+++ b/yt/yql/providers/yt/lib/hash/yql_op_hash.cpp
@@ -3,6 +3,7 @@
#include <yql/essentials/core/yql_type_annotation.h>
#include <yql/essentials/core/expr_nodes/yql_expr_nodes.h>
+#include <yql/essentials/minikql/mkql_program_builder.h>
#include <yql/essentials/utils/log/log.h>
#include <yql/essentials/utils/yql_panic.h>
@@ -81,10 +82,34 @@ TString TNodeHashCalculator::GetHashImpl(const TExprNode& node, TArgIndex& argIn
isHashable = false;
}
else {
- if (TCoUdf::Match(&node) && node.ChildrenSize() > TCoUdf::idx_FileAlias && !node.Child(TCoUdf::idx_FileAlias)->Content().empty()) {
- // an udf from imported file, use hash of file
- auto alias = node.Child(TCoUdf::idx_FileAlias)->Content();
- UpdateFileHash(builder, alias);
+ if (TCoUdf::Match(&node)) {
+ if (node.ChildrenSize() > TCoUdf::idx_FileAlias && !node.Child(TCoUdf::idx_FileAlias)->Content().empty()) {
+ // an udf from imported file, use hash of file
+ auto alias = node.Child(TCoUdf::idx_FileAlias)->Content();
+ UpdateFileHash(builder, alias);
+ } else {
+ // preinstalled
+ TStringBuf moduleName, funcName;
+ YQL_ENSURE(SplitUdfName(node.Head().Content(), moduleName, funcName));
+ const auto res = Types.UdfResolver->GetSystemModulePath(moduleName);
+ YQL_ENSURE(res, "Expected either file alias or system module");
+ builder << moduleName << res->Md5;
+ }
+ } else if (TCoScriptUdf::Match(&node)) {
+ if (node.ChildrenSize() > TCoScriptUdf::idx_FileAlias && !node.Child(TCoScriptUdf::idx_FileAlias)->Content().empty()) {
+ // an udf from imported file, use hash of file
+ auto alias = node.Child(TCoScriptUdf::idx_FileAlias)->Content();
+ UpdateFileHash(builder, alias);
+ } else {
+ auto moduleName = node.Head().Content();
+ auto scriptType = NKikimr::NMiniKQL::CanonizeScriptType(NKikimr::NMiniKQL::ScriptTypeFromStr(moduleName));
+ if (!NKikimr::NMiniKQL::IsCustomPython(scriptType)) {
+ moduleName = NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType);
+ }
+ const auto res = Types.UdfResolver->GetSystemModulePath(moduleName);
+ YQL_ENSURE(res, "Expected either file alias or system module");
+ builder << moduleName << res->Md5;
+ }
} else if (node.Content() == "FilePath" || node.Content() == "FileContent") {
auto alias = node.Child(0)->Content();
UpdateFileHash(builder, alias);
diff --git a/yt/yql/providers/yt/provider/yql_yt_datasink_exec.cpp b/yt/yql/providers/yt/provider/yql_yt_datasink_exec.cpp
index 74905bb7b32..72865fcce3c 100644
--- a/yt/yql/providers/yt/provider/yql_yt_datasink_exec.cpp
+++ b/yt/yql/providers/yt/provider/yql_yt_datasink_exec.cpp
@@ -710,7 +710,7 @@ private:
TString operationHash;
if (const auto queryCacheMode = config->QueryCacheMode.Get().GetOrElse(EQueryCacheMode::Disable); queryCacheMode != EQueryCacheMode::Disable) {
if (!hasNonDeterministicFunctions) {
- operationHash = TYtNodeHashCalculator(State_, cluster, config).GetHash(*input);
+ operationHash = TYtNodeHashCalculator(State_, cluster, config).GetHash(*optimizedNode);
}
YQL_CLOG(DEBUG, ProviderYt) << "Operation hash: " << HexEncode(operationHash).Quote() << ", cache mode: " << queryCacheMode;
}