diff options
author | atarasov5 <[email protected]> | 2025-03-24 12:44:03 +0300 |
---|---|---|
committer | atarasov5 <[email protected]> | 2025-03-24 12:57:18 +0300 |
commit | 39ea455c99db12684cf4ba10185e6961851231b8 (patch) | |
tree | 612b3077beb5127b48dd91b1ad13f3f0d23ac9b2 /yql/essentials/minikql/computation | |
parent | 71f6767025c7a8ac0fe7c9c45556faf1d7f7391c (diff) |
YQL-19520: msan + codegen = <3
commit_hash:9a814af3b8bd51c53939aa6ffde2e981ad8ea150
Diffstat (limited to 'yql/essentials/minikql/computation')
8 files changed, 258 insertions, 20 deletions
diff --git a/yql/essentials/minikql/computation/mkql_computation_node_codegen.cpp b/yql/essentials/minikql/computation/mkql_computation_node_codegen.cpp index 1b4984d1dc0..72d1c5abfe1 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node_codegen.cpp +++ b/yql/essentials/minikql/computation/mkql_computation_node_codegen.cpp @@ -259,7 +259,7 @@ Value* GenEqualsFunction<false>(NUdf::EDataSlot slot, Value* lv, Value* rv, TCod const auto& info = NUdf::GetDataTypeInfo(slot); if ((info.Features & NUdf::EDataTypeFeatures::CommonType) && (info.Features & NUdf::EDataTypeFeatures::StringType || NUdf::EDataSlot::Uuid == slot || NUdf::EDataSlot::DyNumber == slot)) { - return CallBinaryUnboxedValueFunction(&MyEquteStrings, Type::getInt1Ty(context), lv, rv, ctx.Codegen, block); + return CallBinaryUnboxedValueFunction<&MyEquteStrings>(Type::getInt1Ty(context), lv, rv, ctx.Codegen, block); } const auto lhs = GetterFor(slot, lv, context, block); @@ -338,7 +338,7 @@ Value* GenCompareFunction<false>(NUdf::EDataSlot slot, Value* lv, Value* rv, TCo const auto& info = NUdf::GetDataTypeInfo(slot); if ((info.Features & NUdf::EDataTypeFeatures::CommonType) && (info.Features & NUdf::EDataTypeFeatures::StringType || NUdf::EDataSlot::Uuid == slot || NUdf::EDataSlot::DyNumber == slot)) { - return CallBinaryUnboxedValueFunction(&MyCompareStrings, Type::getInt32Ty(context), lv, rv, ctx.Codegen, block); + return CallBinaryUnboxedValueFunction<&MyCompareStrings>(Type::getInt32Ty(context), lv, rv, ctx.Codegen, block); } const bool extra = info.Features & (NUdf::EDataTypeFeatures::FloatType | NUdf::EDataTypeFeatures::TzDateType); @@ -507,7 +507,7 @@ Value* GenHashFunction<false>(NUdf::EDataSlot slot, Value* value, TCodegenContex const auto& info = NUdf::GetDataTypeInfo(slot); if ((info.Features & NUdf::EDataTypeFeatures::CommonType) && (info.Features & NUdf::EDataTypeFeatures::StringType || NUdf::EDataSlot::Uuid == slot || NUdf::EDataSlot::DyNumber == slot)) { - return CallUnaryUnboxedValueFunction(&MyHashString, Type::getInt64Ty(context), value, ctx.Codegen, block); + return CallUnaryUnboxedValueFunction<&MyHashString>(Type::getInt64Ty(context), value, ctx.Codegen, block); } const auto val = GetterFor(slot, value, context, block); @@ -1865,7 +1865,7 @@ Value* MakeVariant(Value* item, Value* variant, const TCodegenContext& ctx, Basi block = boxed; const auto factory = ctx.GetFactory(); - const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&THolderFactory::CreateBoxedVariantHolder)); + const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&THolderFactory::CreateBoxedVariantHolder>()); const auto signature = FunctionType::get(item->getType(), {factory->getType(), item->getType(), variant->getType()}, false); const auto creator = CastInst::Create(Instruction::IntToPtr, func, PointerType::getUnqual(signature), "creator", block); @@ -1943,7 +1943,7 @@ ICodegeneratorInlineWideNode::TGenerateResult GetNodeValues(IComputationWideFlow Value* GenNewArray(const TCodegenContext& ctx, Value* size, Value* items, BasicBlock* block) { auto& context = ctx.Codegen.GetContext(); const auto fact = ctx.GetFactory(); - const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&THolderFactory::CreateDirectArrayHolder)); + const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&THolderFactory::CreateDirectArrayHolder>()); const auto valueType = Type::getInt128Ty(context); const auto funType = FunctionType::get(valueType, {fact->getType(), size->getType(), items->getType()}, false); const auto funcPtr = CastInst::Create(Instruction::IntToPtr, func, PointerType::getUnqual(funType), "function", block); @@ -1957,7 +1957,7 @@ Value* GetMemoryUsed(ui64 limit, const TCodegenContext& ctx, BasicBlock* block) auto& context = ctx.Codegen.GetContext(); const auto fact = ctx.GetFactory(); - const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&THolderFactory::GetMemoryUsed)); + const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&THolderFactory::GetMemoryUsed>()); const auto funType = FunctionType::get(Type::getInt64Ty(context), {fact->getType()}, false); const auto funcPtr = CastInst::Create(Instruction::IntToPtr, func, PointerType::getUnqual(funType), "get_used", block); return CallInst::Create(funType, funcPtr, {fact}, "mem_used", block); @@ -1987,7 +1987,7 @@ Value* CheckAdjustedMemLimit(ui64 limit, Value* init, const TCodegenContext& ctx BranchInst::Create(call, skip, now, block); block = call; - const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&TComputationContext::UpdateUsageAdjustor)); + const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&TComputationContext::UpdateUsageAdjustor>()); const auto funType = FunctionType::get(Type::getVoidTy(context), {ctx.Ctx->getType(), Type::getInt64Ty(context)}, false); const auto funcPtr = CastInst::Create(Instruction::IntToPtr, func, PointerType::getUnqual(funType), "update", block); CallInst::Create(funType, funcPtr, {ctx.Ctx, ConstantInt::get(init->getType(), limit)}, "", block); diff --git a/yql/essentials/minikql/computation/mkql_computation_node_codegen.h.txt b/yql/essentials/minikql/computation/mkql_computation_node_codegen.h.txt index 8835b3b6518..8baf51d82c1 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node_codegen.h.txt +++ b/yql/essentials/minikql/computation/mkql_computation_node_codegen.h.txt @@ -217,16 +217,16 @@ Value* CallBoxedValueVirtualMethod(Type* returnType, Value* value, NYql::NCodege Value* CallUnaryUnboxedValueFunctionImpl(uintptr_t methodPtr, Type* result, Value* arg, NYql::NCodegen::ICodegen& codegen, BasicBlock* block); -template<typename Method> -Value* CallUnaryUnboxedValueFunction(Method method, Type* result, Value* arg, NYql::NCodegen::ICodegen& codegen, BasicBlock* block) { - return CallUnaryUnboxedValueFunctionImpl(GetMethodPtr(method), result, arg, codegen, block); +template<auto Method> +Value* CallUnaryUnboxedValueFunction(Type* result, Value* arg, NYql::NCodegen::ICodegen& codegen, BasicBlock* block) { + return CallUnaryUnboxedValueFunctionImpl(GetMethodPtr<Method>(), result, arg, codegen, block); } Value* CallBinaryUnboxedValueFunctionImpl(uintptr_t methodPtr, Type* result, Value* left, Value* right, NYql::NCodegen::ICodegen& codegen, BasicBlock* block); -template<typename Method> -Value* CallBinaryUnboxedValueFunction(Method method, Type* result, Value* left, Value* right, NYql::NCodegen::ICodegen& codegen, BasicBlock* block) { - return CallBinaryUnboxedValueFunctionImpl(GetMethodPtr(method), result, left, right, codegen, block); +template<auto Method> +Value* CallBinaryUnboxedValueFunction(Type* result, Value* left, Value* right, NYql::NCodegen::ICodegen& codegen, BasicBlock* block) { + return CallBinaryUnboxedValueFunctionImpl(GetMethodPtr<Method>(), result, left, right, codegen, block); } void AddRefBoxed(Value* value, const TCodegenContext& ctx, BasicBlock*& block); @@ -589,10 +589,11 @@ protected: : TBase(mutables, kind) {} + public: Value* DoGenerateGetValue(const TCodegenContext& ctx, BasicBlock*& block) const { static_assert(std::is_same<std::invoke_result_t<decltype(&TDerived::DoCalculate), TDerived, TComputationContext&>, NUdf::TUnboxedValuePod>(), "DoCalculate must return pod!"); - return DoGenerateGetValueImpl(GetMethodPtr(&TDerived::DoCalculate), uintptr_t(this), ctx, block); + return DoGenerateGetValueImpl(GetMethodPtr<&TDerived::DoCalculate>(), uintptr_t(this), ctx, block); } }; diff --git a/yql/essentials/minikql/computation/mkql_computation_node_codegen_common.h b/yql/essentials/minikql/computation/mkql_computation_node_codegen_common.h index c6dd729eac5..ab58cdf89d0 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node_codegen_common.h +++ b/yql/essentials/minikql/computation/mkql_computation_node_codegen_common.h @@ -5,4 +5,6 @@ #include <yql/essentials/minikql/codegen/codegen.h> #include <yql/essentials/utils/method_index.h> +#include <yql/essentials/minikql/computation/mkql_method_address_helper.h> + #include <type_traits> diff --git a/yql/essentials/minikql/computation/mkql_computation_node_holders_codegen.cpp b/yql/essentials/minikql/computation/mkql_computation_node_holders_codegen.cpp index cead70dec0a..72f21b0c0aa 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node_holders_codegen.cpp +++ b/yql/essentials/minikql/computation/mkql_computation_node_holders_codegen.cpp @@ -123,7 +123,7 @@ Value* TContainerCacheOnContext::GenNewArray(ui64 sz, Value* items, const TCodeg const auto fact = ctx.GetFactory(); - const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&THolderFactory::CreateDirectArrayHolder)); + const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&THolderFactory::CreateDirectArrayHolder>()); const auto size = ConstantInt::get(Type::getInt64Ty(context), sz); const auto funType = FunctionType::get(valueType, {fact->getType(), size->getType(), items->getType()}, false); @@ -178,7 +178,7 @@ public: auto& context = ctx.Codegen.GetContext(); const auto valueType = Type::getInt128Ty(context); const auto factory = ctx.GetFactory(); - const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&THolderFactory::GetEmptyContainerLazy)); + const auto func = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr<&THolderFactory::GetEmptyContainerLazy>()); const auto funType = FunctionType::get(valueType, {factory->getType()}, false); const auto funcPtr = CastInst::Create(Instruction::IntToPtr, func, PointerType::getUnqual(funType), "function", block); diff --git a/yql/essentials/minikql/computation/mkql_method_address_helper.h b/yql/essentials/minikql/computation/mkql_method_address_helper.h new file mode 100644 index 00000000000..058e4098976 --- /dev/null +++ b/yql/essentials/minikql/computation/mkql_method_address_helper.h @@ -0,0 +1,165 @@ +#include <yql/essentials/public/udf/udf_value.h> + +#if defined(_msan_enabled_) && defined(__linux__) + #define SHOULD_WRAP_ALL_UNBOXED_VALUES_FOR_CODEGEN 1 +#else + #define SHOULD_WRAP_ALL_UNBOXED_VALUES_FOR_CODEGEN 0 +#endif + +namespace NYql { + +// Concept that checks if a type is a free function. +template <typename T> +concept FunctionPointer = std::is_pointer_v<T> && + std::is_function_v<std::remove_pointer_t<T>>; + +// Concept that checks if a type is a pointer-to-member function. +template <typename T> +concept MethodPointer = std::is_member_function_pointer_v<T>; + +// When compling with msan you have to replace all NUdf::TUnboxedValuePod with __int128_t. +// See YQL-19520#67da4c599dd9e93523567aff for details. +// This helpers help to solve the problem by converting each NUdf::TUnboxedValuePod with __int128_t for passed method. +// For example: +// 1. You have a function +// NUdf::TUnboxedValuePod Func(OtherType a, NUdf::TUnboxedValuePod b) { +// ... +// } +// 2. You call GetMethodPtr<&Func>() +// 3. You recieve pointer to function that do something like this: +// __int128_t FuncWrapper(OtherType a, __int128_t b) { +// NUdf::TUnboxedValuePod realB; +// memcpy(&realB, &b, sizeof(b)); +// NUdf::TUnboxedValuePod result = Func(std::move(a), NUdf::TUnboxedValuePod(std::move(b))); +// __int128_t fakeResult; +// memcpy(&fakeResult, &result, sizeof(fakeResult)); +// return fakeResult; +// } + +#if SHOULD_WRAP_ALL_UNBOXED_VALUES_FOR_CODEGEN +template <FunctionPointer Method> +inline uintptr_t GetMethodPtrNumber(Method method) { + uintptr_t ptr; + std::memcpy(&ptr, &method, sizeof(uintptr_t)); + return ptr; +} + +template <typename T> +struct TReplaceUnboxedValuePodWithUInt128 { + using TType = T; +}; + +template <> +struct TReplaceUnboxedValuePodWithUInt128<NUdf::TUnboxedValuePod> { + using TType = __int128_t; +}; + +template <typename T> +using TReplaceUnboxedValuePodWithUInt128_t = + typename TReplaceUnboxedValuePodWithUInt128<T>::TType; + +template <typename TR, typename... TArgs> +struct TFunctionWrapper { + template <FunctionPointer auto function> + static TReplaceUnboxedValuePodWithUInt128_t<TR> Wrapper(TReplaceUnboxedValuePodWithUInt128_t<TArgs>... wargs) { + // Call the original function with converted parameters. + if constexpr (std::is_same_v<TR, void>) { + function(ConvertArg<TArgs>(TReplaceUnboxedValuePodWithUInt128_t<TArgs>(std::move(wargs)))...); + return; + } else { + return ConvertReturn<TR>(function(ConvertArg<TArgs>(TReplaceUnboxedValuePodWithUInt128_t<TArgs>(std::move(wargs)))...)); + } + } + +private: + template <typename T> + static T ConvertArg(TReplaceUnboxedValuePodWithUInt128_t<T> arg Y_LIFETIME_BOUND) { + if constexpr (std::is_same_v<std::remove_const_t<T>, NUdf::TUnboxedValuePod>) { + NUdf::TUnboxedValuePod tmp; + std::memcpy(&tmp, &arg, sizeof(T)); + return tmp; + } else { + return std::forward<TReplaceUnboxedValuePodWithUInt128_t<T>>(arg); + } + } + + template <typename T> + static TReplaceUnboxedValuePodWithUInt128_t<T> ConvertReturn(T arg Y_LIFETIME_BOUND) { + if constexpr (std::is_same_v<std::remove_const_t<T>, NUdf::TUnboxedValuePod>) { + __int128_t tmp; + std::memcpy(&tmp, &arg, sizeof(T)); + return tmp; + } else { + return std::forward<T>(arg); + } + } +}; + +template <FunctionPointer auto func, typename TR, typename... TArgs> +inline auto DoGetFreeFunctionPtrInternal() { + return &(TFunctionWrapper<TR, TArgs...>::template Wrapper<func>); +} + +template <FunctionPointer auto func> +inline auto DoGetFreeFunctionPtr() { + return []<typename TR, typename... TArgs>(TR (*fptr)(TArgs...)) { + Y_UNUSED(fptr, "For type deducing only."); + return DoGetFreeFunctionPtrInternal<func, TR, TArgs...>(); + }(func); +} + +template <FunctionPointer auto func> +inline auto GetMethodPtr() { + return GetMethodPtrNumber(DoGetFreeFunctionPtr<func>()); +} + +template <MethodPointer auto func, typename TR, typename TM, typename... TArgs> +inline TR Adapter(TM obj, TArgs&&... args) { + return (obj->*func)(std::forward<TArgs>(args)...); +} + +template <MethodPointer auto func, typename TR, typename TM, typename... TArgs> +inline auto GetMethodPtrImpl() { + return DoGetFreeFunctionPtrInternal<&Adapter<func, TR, TM, TArgs...>, TR, TM, TArgs...>(); +} + +template <typename T> +struct is_const_member_function_pointer: std::false_type {}; + +template <typename TR, typename TM, typename... TArgs> +struct is_const_member_function_pointer<TR (TM::*)(TArgs...) const>: std::true_type {}; + +template <MethodPointer auto func> +inline auto DoGetMethodPtr() { + // Just an template helper to get TArgs..., R, T from func. + if constexpr (is_const_member_function_pointer<decltype(func)>::value) { + return []<typename TR, typename TM, typename... TArgs>(TR (TM::*fptr)(TArgs...) const) { + Y_UNUSED(fptr); + return GetMethodPtrImpl<func, TR, TM*, TArgs...>(); + }(func); + } else { + return []<typename TR, typename TM, typename... TArgs>(TR (TM::*fptr)(TArgs...)) { + Y_UNUSED(fptr); + return GetMethodPtrImpl<func, TR, TM*, TArgs...>(); + }(func); + } +} + +template <MethodPointer auto func> +inline uintptr_t GetMethodPtr() { + return GetMethodPtrNumber(DoGetMethodPtr<func>()); +} +#else // SHOULD_WRAP_ALL_UNBOXED_VALUES_FOR_CODEGEN + +template <MethodPointer auto func> +inline uintptr_t GetMethodPtr() { + return GetMethodPtr(func); +} + +template <FunctionPointer auto func> +inline uintptr_t GetMethodPtr() { + return GetMethodPtr(func); +} +#endif // SHOULD_WRAP_ALL_UNBOXED_VALUES_FOR_CODEGEN + +} // namespace NYql diff --git a/yql/essentials/minikql/computation/mkql_method_address_helper_ut.cpp b/yql/essentials/minikql/computation/mkql_method_address_helper_ut.cpp new file mode 100644 index 00000000000..baef6a006d9 --- /dev/null +++ b/yql/essentials/minikql/computation/mkql_method_address_helper_ut.cpp @@ -0,0 +1,69 @@ +#include <yql/essentials/minikql/computation/mkql_method_address_helper.h> + +#include <library/cpp/testing/unittest/registar.h> +#include <yql/essentials/public/udf/udf_value.h> + +#if SHOULD_WRAP_ALL_UNBOXED_VALUES_FOR_CODEGEN +using namespace NYql; +namespace { +// Test class with methods to test method pointers +class TTestClass { +public: + int MethodWithUnboxedValuePod(NUdf::TUnboxedValuePod val) { + Y_UNUSED(val); + CallCount_++; + return 123; + } + + NUdf::TUnboxedValuePod& ConstMethodWithUnboxedValuePod(NUdf::TUnboxedValuePod val) const { + Y_UNUSED(val); + CallCount_++; + return UnboxedValuePod_; + } + + size_t CallCount() const { + return CallCount_; + } + +private: + mutable size_t CallCount_ = 0; + mutable NUdf::TUnboxedValuePod UnboxedValuePod_; +}; + +NUdf::TUnboxedValuePod FunctionWithUnboxedValuePod(NUdf::TUnboxedValuePod val, NUdf::TUnboxedValuePod& val2, int a, const int& b, int* c, int&& d) { + Y_UNUSED(val2, a, b, c, d); + return val; +} +} // namespace + +Y_UNIT_TEST_SUITE(TestMethodConvertion) { + +Y_UNIT_TEST(TestFreeFunction) { + __int128_t (*actualMethod)(__int128_t, NUdf::TUnboxedValuePod&, int a, const int& b, int* c, int&& d) = DoGetFreeFunctionPtr<&FunctionWithUnboxedValuePod>(); + Y_UNUSED(actualMethod); + auto address = GetMethodPtr<&FunctionWithUnboxedValuePod>(); + NUdf::TUnboxedValuePod a; + UNIT_ASSERT_EQUAL(reinterpret_cast<decltype(actualMethod)>(address)(13, a, 1, 2, nullptr, 3), 13); +} + +Y_UNIT_TEST(TestConstMethod) { + TTestClass testClass; + NUdf::TUnboxedValuePod& (*actualMethod)(TTestClass*, __int128_t) = DoGetMethodPtr<&TTestClass::ConstMethodWithUnboxedValuePod>(); + Y_UNUSED(actualMethod(&testClass, 123)); + UNIT_ASSERT_EQUAL(testClass.CallCount(), 1); +} + +Y_UNIT_TEST(TestNonConstMethod) { + TTestClass testClass; + int (*actualMethod)(TTestClass*, __int128_t) = DoGetMethodPtr<&TTestClass::MethodWithUnboxedValuePod>(); + Y_UNUSED(actualMethod); + actualMethod(&testClass, 123); + UNIT_ASSERT_EQUAL(testClass.CallCount(), 1); + auto address = GetMethodPtr<&TTestClass::MethodWithUnboxedValuePod>(); + UNIT_ASSERT(reinterpret_cast<decltype(actualMethod)>(address)(&testClass, 123)); + UNIT_ASSERT_EQUAL(testClass.CallCount(), 2); +} + +} // Y_UNIT_TEST_SUITE(TestMethodConvertion) + +#endif // SHOULD_WRAP_ALL_UNBOXED_VALUES_FOR_CODEGEN diff --git a/yql/essentials/minikql/computation/mkql_simple_codegen.h b/yql/essentials/minikql/computation/mkql_simple_codegen.h index ab8438b1762..1294abe06c5 100644 --- a/yql/essentials/minikql/computation/mkql_simple_codegen.h +++ b/yql/essentials/minikql/computation/mkql_simple_codegen.h @@ -91,9 +91,9 @@ protected: : TBase(mutables, source, StateKind) , TLLVMBase(source, inWidth, outWidth, { .ThisPtr = reinterpret_cast<uintptr_t>(this), - .InitStateMethPtr = GetMethodPtr(&TDerived::InitState), - .PrepareInputMethPtr = GetMethodPtr(&TDerived::PrepareInput), - .DoProcessMethPtr = GetMethodPtr(&TDerived::DoProcess) + .InitStateMethPtr = GetMethodPtr<&TDerived::InitState>(), + .PrepareInputMethPtr = GetMethodPtr<&TDerived::PrepareInput>(), + .DoProcessMethPtr = GetMethodPtr<&TDerived::DoProcess>() }) {} #ifndef MKQL_DISABLE_CODEGEN @@ -123,4 +123,4 @@ public: }; } -}
\ No newline at end of file +} diff --git a/yql/essentials/minikql/computation/ut/ya.make.inc b/yql/essentials/minikql/computation/ut/ya.make.inc index 1f7ae7d6dc2..969083f969e 100644 --- a/yql/essentials/minikql/computation/ut/ya.make.inc +++ b/yql/essentials/minikql/computation/ut/ya.make.inc @@ -24,6 +24,7 @@ SRCS( mkql_value_builder_ut.cpp presort_ut.cpp mkql_vector_spiller_adapter_ut.cpp + mkql_method_address_helper_ut.cpp ) PEERDIR( |