diff options
author | aneporada <aneporada@ydb.tech> | 2023-07-27 21:26:12 +0300 |
---|---|---|
committer | aneporada <aneporada@ydb.tech> | 2023-07-27 21:26:12 +0300 |
commit | 5ea97cfd8a8f61d96636778ed64de3cb003e1589 (patch) | |
tree | e3340f838ec0c80c049b15ae610173c50beb6269 | |
parent | 95ef237389033d9554531589df9c3dcbed67514d (diff) | |
download | ydb-5ea97cfd8a8f61d96636778ed64de3cb003e1589.tar.gz |
Add IBlockItemHasher
16 files changed, 205 insertions, 5 deletions
diff --git a/ydb/library/yql/minikql/mkql_type_builder.cpp b/ydb/library/yql/minikql/mkql_type_builder.cpp index c6dd847f913..e4ec0473d0c 100644 --- a/ydb/library/yql/minikql/mkql_type_builder.cpp +++ b/ydb/library/yql/minikql/mkql_type_builder.cpp @@ -5,6 +5,7 @@ #include <ydb/library/yql/public/udf/udf_type_ops.h> #include <ydb/library/yql/public/udf/arrow/block_item_comparator.h> +#include <ydb/library/yql/public/udf/arrow/block_item_hasher.h> #include <library/cpp/containers/stack_vector/stack_vec.h> #include <ydb/library/yql/minikql/computation/mkql_computation_node_impl.h> @@ -671,6 +672,8 @@ public: {} ui64 Hash(NUdf::TUnboxedValuePod value) const override { + // keep hash computation in sync with + // ydb/library/yql/public/udf/arrow/block_item_hasher.h: TBlockItemHasherBase::Hash() if (!value) { return 0; } @@ -753,6 +756,8 @@ private: class TVectorHash : public NUdf::IHash { public: ui64 Hash(NUdf::TUnboxedValuePod value) const override { + // keep hash computation in sync with + // ydb/library/yql/public/udf/arrow/block_item_hasher.h: TTupleBlockItemHasher::DoHash() ui64 result = 0ULL; auto elements = value.GetElements(); if (elements) { @@ -2422,9 +2427,29 @@ struct TComparatorTraits { } }; +struct THasherTraits { + using TResult = NUdf::IBlockItemHasher; + template <bool Nullable> + using TTuple = NUdf::TTupleBlockItemHasher<Nullable>; + template <typename T, bool Nullable> + using TFixedSize = NUdf::TFixedSizeBlockItemHasher<T, Nullable>; + template <typename TStringType, bool Nullable> + using TStrings = NUdf::TStringBlockItemHasher<TStringType, Nullable>; + using TExtOptional = NUdf::TExternalOptionalBlockItemHasher; + + static std::unique_ptr<TResult> MakePg(const NUdf::TPgTypeDescription& desc, const NUdf::IPgBuilder* pgBuilder) { + Y_UNUSED(pgBuilder); + return std::unique_ptr<TResult>(MakePgItemHasher(desc.TypeId).Release()); + } +}; + NUdf::IBlockItemComparator::TPtr TBlockTypeHelper::MakeComparator(NUdf::TType* type) const { return NUdf::MakeBlockReaderImpl<TComparatorTraits>(TTypeInfoHelper(), type, nullptr).release(); } +NUdf::IBlockItemHasher::TPtr TBlockTypeHelper::MakeHasher(NUdf::TType* type) const { + return NUdf::MakeBlockReaderImpl<THasherTraits>(TTypeInfoHelper(), type, nullptr).release(); +} + } // namespace NMiniKQL } // namespace Nkikimr diff --git a/ydb/library/yql/minikql/mkql_type_builder.h b/ydb/library/yql/minikql/mkql_type_builder.h index 2043c5b3fcb..8806c014ab8 100644 --- a/ydb/library/yql/minikql/mkql_type_builder.h +++ b/ydb/library/yql/minikql/mkql_type_builder.h @@ -16,6 +16,7 @@ namespace NMiniKQL { class TBlockTypeHelper : public NUdf::IBlockTypeHelper { public: NUdf::IBlockItemComparator::TPtr MakeComparator(NUdf::TType* type) const final; + NUdf::IBlockItemHasher::TPtr MakeHasher(NUdf::TType* type) const final; }; constexpr size_t MaxBlockSizeInBytes = 1_MB; diff --git a/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp b/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp index 1848969b54c..f4a2f3fc28a 100644 --- a/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp +++ b/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp @@ -2863,7 +2863,7 @@ void PgDestroyContext(const std::string_view& contextType, void* ctx) { } template <bool PassByValue, bool IsArray> -class TPgHash : public NUdf::IHash { +class TPgHash : public NUdf::IHash, public NUdf::TBlockItemHasherBase<TPgHash<PassByValue, IsArray>, true> { public: TPgHash(const NYql::NPg::TTypeDesc& typeDesc) : TypeDesc(typeDesc) @@ -2904,6 +2904,21 @@ public: return DatumGetUInt32(x); } + ui64 DoHash(NUdf::TBlockItem value) const { + LOCAL_FCINFO(callInfo, 1); + Zero(*callInfo); + callInfo->flinfo = const_cast<FmgrInfo*>(&FInfoHash); // don't copy becase of IHash isn't threadsafe + callInfo->nargs = 1; + callInfo->fncollation = DEFAULT_COLLATION_OID; + callInfo->isnull = false; + callInfo->args[0] = { PassByValue ? + ScalarDatumFromItem(value) : + PointerDatumFromItem(value), false }; + + auto x = FInfoHash.fn_addr(callInfo); + Y_ENSURE(!callInfo->isnull); + return DatumGetUInt32(x); + } private: const NYql::NPg::TTypeDesc TypeDesc; @@ -2921,6 +2936,17 @@ NUdf::IHash::TPtr MakePgHash(const NMiniKQL::TPgType* type) { } } +NUdf::IBlockItemHasher::TPtr MakePgItemHasher(ui32 typeId) { + const auto& typeDesc = NYql::NPg::LookupType(typeId); + if (typeDesc.PassByValue) { + return new TPgHash<true, false>(typeDesc); + } else if (typeDesc.TypeId == typeDesc.ArrayTypeId) { + return new TPgHash<false, true>(typeDesc); + } else { + return new TPgHash<false, false>(typeDesc); + } +} + template <bool PassByValue, bool IsArray> class TPgCompare : public NUdf::ICompare, public NUdf::TBlockItemComparatorBase<TPgCompare<PassByValue, IsArray>, true> { public: diff --git a/ydb/library/yql/parser/pg_wrapper/interface/compare.h b/ydb/library/yql/parser/pg_wrapper/interface/compare.h index a3870c5fce6..f91d6b026df 100644 --- a/ydb/library/yql/parser/pg_wrapper/interface/compare.h +++ b/ydb/library/yql/parser/pg_wrapper/interface/compare.h @@ -2,6 +2,7 @@ #include <ydb/library/yql/public/udf/udf_type_builder.h> #include <ydb/library/yql/public/udf/arrow/block_item_comparator.h> +#include <ydb/library/yql/public/udf/arrow/block_item_hasher.h> namespace NKikimr { namespace NMiniKQL { @@ -12,6 +13,7 @@ NUdf::IHash::TPtr MakePgHash(const TPgType* type); NUdf::ICompare::TPtr MakePgCompare(const TPgType* type); NUdf::IEquate::TPtr MakePgEquate(const TPgType* type); NUdf::IBlockItemComparator::TPtr MakePgItemComparator(ui32 typeId); +NUdf::IBlockItemHasher::TPtr MakePgItemHasher(ui32 typeId); } // namespace NMiniKQL } // namespace NKikimr diff --git a/ydb/library/yql/public/udf/arrow/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/udf/arrow/CMakeLists.darwin-x86_64.txt index 1b41c0f06e9..322d3440a53 100644 --- a/ydb/library/yql/public/udf/arrow/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/public/udf/arrow/CMakeLists.darwin-x86_64.txt @@ -24,5 +24,7 @@ target_sources(public-udf-arrow PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/util.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_reader.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_comparator.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_type_helper.cpp ) diff --git a/ydb/library/yql/public/udf/arrow/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/udf/arrow/CMakeLists.linux-aarch64.txt index 59ba7f29325..0e855decf8d 100644 --- a/ydb/library/yql/public/udf/arrow/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/public/udf/arrow/CMakeLists.linux-aarch64.txt @@ -25,5 +25,7 @@ target_sources(public-udf-arrow PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/util.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_reader.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_comparator.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_type_helper.cpp ) diff --git a/ydb/library/yql/public/udf/arrow/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/udf/arrow/CMakeLists.linux-x86_64.txt index 59ba7f29325..0e855decf8d 100644 --- a/ydb/library/yql/public/udf/arrow/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/public/udf/arrow/CMakeLists.linux-x86_64.txt @@ -25,5 +25,7 @@ target_sources(public-udf-arrow PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/util.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_reader.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_comparator.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_type_helper.cpp ) diff --git a/ydb/library/yql/public/udf/arrow/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/udf/arrow/CMakeLists.windows-x86_64.txt index 1b41c0f06e9..322d3440a53 100644 --- a/ydb/library/yql/public/udf/arrow/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/public/udf/arrow/CMakeLists.windows-x86_64.txt @@ -24,5 +24,7 @@ target_sources(public-udf-arrow PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/util.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_reader.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_comparator.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_type_helper.cpp ) diff --git a/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp b/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp new file mode 100644 index 00000000000..37faaa49592 --- /dev/null +++ b/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp @@ -0,0 +1 @@ +#include "block_item_hasher.h" diff --git a/ydb/library/yql/public/udf/arrow/block_item_hasher.h b/ydb/library/yql/public/udf/arrow/block_item_hasher.h new file mode 100644 index 00000000000..4c3d89e998e --- /dev/null +++ b/ydb/library/yql/public/udf/arrow/block_item_hasher.h @@ -0,0 +1,96 @@ +#pragma once + +#include "block_item.h" + +#include <ydb/library/yql/public/udf/udf_ptr.h> +#include <ydb/library/yql/public/udf/udf_type_inspection.h> +#include <ydb/library/yql/public/udf/udf_type_ops.h> +#include <ydb/library/yql/public/udf/udf_type_size_check.h> + +namespace NYql::NUdf { + +// ABI stable +class IBlockItemHasher { +public: + using TPtr = TUniquePtr<IBlockItemHasher>; + + virtual ~IBlockItemHasher() = default; + virtual ui64 Hash(TBlockItem value) const = 0; +}; + +UDF_ASSERT_TYPE_SIZE(IBlockItemHasher, 8); + +template <typename TDerived, bool Nullable> +class TBlockItemHasherBase : public IBlockItemHasher { +public: + const TDerived* Derived() const { + return static_cast<const TDerived*>(this); + } + + ui64 Hash(TBlockItem value) const final { + // keep hash computation in sync with + // ydb/library/yql/minikql/mkql_type_builder.cpp: THash<NMiniKQL::TType::EKind::Optional>::Hash() + if constexpr (Nullable) { + if (!value) { + return 0; + } + return CombineHashes(ui64(1), Derived()->DoHash(value)); + } else { + return Derived()->DoHash(value); + } + } +}; + +template <typename T, bool Nullable> +class TFixedSizeBlockItemHasher : public TBlockItemHasherBase<TFixedSizeBlockItemHasher<T, Nullable>, Nullable> { +public: + ui64 DoHash(TBlockItem value) const { + return GetValueHash<TDataType<T>::Slot>(NUdf::TUnboxedValuePod(value.As<T>())); + } +}; + +template <typename TStringType, bool Nullable> +class TStringBlockItemHasher : public TBlockItemHasherBase<TStringBlockItemHasher<TStringType, Nullable>, Nullable> { +public: + ui64 DoHash(TBlockItem value) const { + return GetStringHash(value.AsStringRef()); + } +}; + +template <bool Nullable> +class TTupleBlockItemHasher : public TBlockItemHasherBase<TTupleBlockItemHasher<Nullable>, Nullable> { +public: + TTupleBlockItemHasher(TVector<std::unique_ptr<IBlockItemHasher>>&& children) + : Children_(std::move(children)) + {} + + ui64 DoHash(TBlockItem value) const { + // keep hash computation in sync with + // ydb/library/yql/minikql/mkql_type_builder.cpp: TVectorHash::Hash() + ui64 result = 0ULL; + auto elements = value.GetElements(); + for (ui32 i = 0; i < Children_.size(); ++i) { + result = CombineHashes(result, Children_[i]->Hash(elements[i])); + } + return result; + } + +private: + const TVector<std::unique_ptr<IBlockItemHasher>> Children_; +}; + +class TExternalOptionalBlockItemHasher : public TBlockItemHasherBase<TExternalOptionalBlockItemHasher, true> { +public: + TExternalOptionalBlockItemHasher(std::unique_ptr<IBlockItemHasher>&& inner) + : Inner_(std::move(inner)) + {} + + ui64 DoHash(TBlockItem value) const { + return Inner_->Hash(value.GetOptionalValue()); + } + +private: + const std::unique_ptr<IBlockItemHasher> Inner_; +}; + +} diff --git a/ydb/library/yql/public/udf/arrow/block_type_helper.cpp b/ydb/library/yql/public/udf/arrow/block_type_helper.cpp new file mode 100644 index 00000000000..362d9faf254 --- /dev/null +++ b/ydb/library/yql/public/udf/arrow/block_type_helper.cpp @@ -0,0 +1,9 @@ +#include "block_type_helper.h" + +namespace NYql { +namespace NUdf { + +IBlockTypeHelper::IBlockTypeHelper() {} + +} // namespace NUdf +} // namespace NYql diff --git a/ydb/library/yql/public/udf/arrow/block_type_helper.h b/ydb/library/yql/public/udf/arrow/block_type_helper.h index 9eb7fc9bf9d..d02168ac864 100644 --- a/ydb/library/yql/public/udf/arrow/block_type_helper.h +++ b/ydb/library/yql/public/udf/arrow/block_type_helper.h @@ -1,17 +1,38 @@ #pragma once #include "block_item_comparator.h" +#include "block_item_hasher.h" #include <ydb/library/yql/public/udf/udf_type_size_check.h> +#include <ydb/library/yql/public/udf/udf_version.h> namespace NYql { namespace NUdf { // ABI stable -class IBlockTypeHelper { +class IBlockTypeHelper1 { public: - virtual ~IBlockTypeHelper() = default; + virtual ~IBlockTypeHelper1() = default; virtual IBlockItemComparator::TPtr MakeComparator(TType* type) const = 0; }; +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 34) +class IBlockTypeHelper2 : public IBlockTypeHelper1 { +public: + virtual IBlockItemHasher::TPtr MakeHasher(TType *type) const = 0; +}; +#endif + +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 34) +class IBlockTypeHelper : public IBlockTypeHelper2 { +public: + IBlockTypeHelper(); +}; +#else +class IBlockTypeHelper : public IBlockTypeHelper1 { +public: + IBlockTypeHelper(); +}; +#endif + UDF_ASSERT_TYPE_SIZE(IBlockTypeHelper, 8); } diff --git a/ydb/library/yql/public/udf/arrow/ya.make b/ydb/library/yql/public/udf/arrow/ya.make index f334f8eea84..27e7476cd3f 100644 --- a/ydb/library/yql/public/udf/arrow/ya.make +++ b/ydb/library/yql/public/udf/arrow/ya.make @@ -7,7 +7,9 @@ SRCS( util.cpp block_reader.cpp block_item.cpp + block_item_hasher.cpp block_item_comparator.cpp + block_type_helper.cpp ) PEERDIR( diff --git a/ydb/library/yql/public/udf/udf_type_ops.h b/ydb/library/yql/public/udf/udf_type_ops.h index 4b2446c8fd9..21b79fffa32 100644 --- a/ydb/library/yql/public/udf/udf_type_ops.h +++ b/ydb/library/yql/public/udf/udf_type_ops.h @@ -50,8 +50,12 @@ inline THashType GetFloatHash(const TUnboxedValuePod& value) { return std::isunordered(x, x) ? ~0ULL : std::hash<T>()(x); } +inline THashType GetStringHash(TStringBuf value) { + return THash<TStringBuf>{}(value); +} + inline THashType GetStringHash(const TUnboxedValuePod& value) { - return THash<TStringBuf>{}(value.AsStringRef()); + return GetStringHash(value.AsStringRef()); } template <typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr> diff --git a/ydb/library/yql/public/udf/udf_version.h b/ydb/library/yql/public/udf/udf_version.h index e1a33d560f6..d66cd123480 100644 --- a/ydb/library/yql/public/udf/udf_version.h +++ b/ydb/library/yql/public/udf/udf_version.h @@ -7,7 +7,7 @@ namespace NYql { namespace NUdf { #define CURRENT_UDF_ABI_VERSION_MAJOR 2 -#define CURRENT_UDF_ABI_VERSION_MINOR 33 +#define CURRENT_UDF_ABI_VERSION_MINOR 34 #define CURRENT_UDF_ABI_VERSION_PATCH 0 #ifdef USE_CURRENT_UDF_ABI_VERSION diff --git a/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp b/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp index c38b3d73791..93872431c6a 100644 --- a/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp +++ b/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp @@ -231,6 +231,11 @@ NUdf::IBlockItemComparator::TPtr MakePgItemComparator(ui32 typeId) { throw yexception() << "PG types are not supported"; } +NUdf::IBlockItemHasher::TPtr MakePgItemHasher(ui32 typeId) { + Y_UNUSED(typeId); + throw yexception() << "PG types are not supported"; +} + void RegisterPgBlockAggs(THashMap<TString, std::unique_ptr<IBlockAggregatorFactory>>& registry) { Y_UNUSED(registry); } |