aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraneporada <aneporada@ydb.tech>2023-07-27 21:26:12 +0300
committeraneporada <aneporada@ydb.tech>2023-07-27 21:26:12 +0300
commit5ea97cfd8a8f61d96636778ed64de3cb003e1589 (patch)
treee3340f838ec0c80c049b15ae610173c50beb6269
parent95ef237389033d9554531589df9c3dcbed67514d (diff)
downloadydb-5ea97cfd8a8f61d96636778ed64de3cb003e1589.tar.gz
Add IBlockItemHasher
-rw-r--r--ydb/library/yql/minikql/mkql_type_builder.cpp25
-rw-r--r--ydb/library/yql/minikql/mkql_type_builder.h1
-rw-r--r--ydb/library/yql/parser/pg_wrapper/comp_factory.cpp28
-rw-r--r--ydb/library/yql/parser/pg_wrapper/interface/compare.h2
-rw-r--r--ydb/library/yql/public/udf/arrow/CMakeLists.darwin-x86_64.txt2
-rw-r--r--ydb/library/yql/public/udf/arrow/CMakeLists.linux-aarch64.txt2
-rw-r--r--ydb/library/yql/public/udf/arrow/CMakeLists.linux-x86_64.txt2
-rw-r--r--ydb/library/yql/public/udf/arrow/CMakeLists.windows-x86_64.txt2
-rw-r--r--ydb/library/yql/public/udf/arrow/block_item_hasher.cpp1
-rw-r--r--ydb/library/yql/public/udf/arrow/block_item_hasher.h96
-rw-r--r--ydb/library/yql/public/udf/arrow/block_type_helper.cpp9
-rw-r--r--ydb/library/yql/public/udf/arrow/block_type_helper.h25
-rw-r--r--ydb/library/yql/public/udf/arrow/ya.make2
-rw-r--r--ydb/library/yql/public/udf/udf_type_ops.h6
-rw-r--r--ydb/library/yql/public/udf/udf_version.h2
-rw-r--r--ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp5
16 files changed, 205 insertions, 5 deletions
diff --git a/ydb/library/yql/minikql/mkql_type_builder.cpp b/ydb/library/yql/minikql/mkql_type_builder.cpp
index c6dd847f913..e4ec0473d0c 100644
--- a/ydb/library/yql/minikql/mkql_type_builder.cpp
+++ b/ydb/library/yql/minikql/mkql_type_builder.cpp
@@ -5,6 +5,7 @@
#include <ydb/library/yql/public/udf/udf_type_ops.h>
#include <ydb/library/yql/public/udf/arrow/block_item_comparator.h>
+#include <ydb/library/yql/public/udf/arrow/block_item_hasher.h>
#include <library/cpp/containers/stack_vector/stack_vec.h>
#include <ydb/library/yql/minikql/computation/mkql_computation_node_impl.h>
@@ -671,6 +672,8 @@ public:
{}
ui64 Hash(NUdf::TUnboxedValuePod value) const override {
+ // keep hash computation in sync with
+ // ydb/library/yql/public/udf/arrow/block_item_hasher.h: TBlockItemHasherBase::Hash()
if (!value) {
return 0;
}
@@ -753,6 +756,8 @@ private:
class TVectorHash : public NUdf::IHash {
public:
ui64 Hash(NUdf::TUnboxedValuePod value) const override {
+ // keep hash computation in sync with
+ // ydb/library/yql/public/udf/arrow/block_item_hasher.h: TTupleBlockItemHasher::DoHash()
ui64 result = 0ULL;
auto elements = value.GetElements();
if (elements) {
@@ -2422,9 +2427,29 @@ struct TComparatorTraits {
}
};
+struct THasherTraits {
+ using TResult = NUdf::IBlockItemHasher;
+ template <bool Nullable>
+ using TTuple = NUdf::TTupleBlockItemHasher<Nullable>;
+ template <typename T, bool Nullable>
+ using TFixedSize = NUdf::TFixedSizeBlockItemHasher<T, Nullable>;
+ template <typename TStringType, bool Nullable>
+ using TStrings = NUdf::TStringBlockItemHasher<TStringType, Nullable>;
+ using TExtOptional = NUdf::TExternalOptionalBlockItemHasher;
+
+ static std::unique_ptr<TResult> MakePg(const NUdf::TPgTypeDescription& desc, const NUdf::IPgBuilder* pgBuilder) {
+ Y_UNUSED(pgBuilder);
+ return std::unique_ptr<TResult>(MakePgItemHasher(desc.TypeId).Release());
+ }
+};
+
NUdf::IBlockItemComparator::TPtr TBlockTypeHelper::MakeComparator(NUdf::TType* type) const {
return NUdf::MakeBlockReaderImpl<TComparatorTraits>(TTypeInfoHelper(), type, nullptr).release();
}
+NUdf::IBlockItemHasher::TPtr TBlockTypeHelper::MakeHasher(NUdf::TType* type) const {
+ return NUdf::MakeBlockReaderImpl<THasherTraits>(TTypeInfoHelper(), type, nullptr).release();
+}
+
} // namespace NMiniKQL
} // namespace Nkikimr
diff --git a/ydb/library/yql/minikql/mkql_type_builder.h b/ydb/library/yql/minikql/mkql_type_builder.h
index 2043c5b3fcb..8806c014ab8 100644
--- a/ydb/library/yql/minikql/mkql_type_builder.h
+++ b/ydb/library/yql/minikql/mkql_type_builder.h
@@ -16,6 +16,7 @@ namespace NMiniKQL {
class TBlockTypeHelper : public NUdf::IBlockTypeHelper {
public:
NUdf::IBlockItemComparator::TPtr MakeComparator(NUdf::TType* type) const final;
+ NUdf::IBlockItemHasher::TPtr MakeHasher(NUdf::TType* type) const final;
};
constexpr size_t MaxBlockSizeInBytes = 1_MB;
diff --git a/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp b/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp
index 1848969b54c..f4a2f3fc28a 100644
--- a/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp
+++ b/ydb/library/yql/parser/pg_wrapper/comp_factory.cpp
@@ -2863,7 +2863,7 @@ void PgDestroyContext(const std::string_view& contextType, void* ctx) {
}
template <bool PassByValue, bool IsArray>
-class TPgHash : public NUdf::IHash {
+class TPgHash : public NUdf::IHash, public NUdf::TBlockItemHasherBase<TPgHash<PassByValue, IsArray>, true> {
public:
TPgHash(const NYql::NPg::TTypeDesc& typeDesc)
: TypeDesc(typeDesc)
@@ -2904,6 +2904,21 @@ public:
return DatumGetUInt32(x);
}
+ ui64 DoHash(NUdf::TBlockItem value) const {
+ LOCAL_FCINFO(callInfo, 1);
+ Zero(*callInfo);
+ callInfo->flinfo = const_cast<FmgrInfo*>(&FInfoHash); // don't copy becase of IHash isn't threadsafe
+ callInfo->nargs = 1;
+ callInfo->fncollation = DEFAULT_COLLATION_OID;
+ callInfo->isnull = false;
+ callInfo->args[0] = { PassByValue ?
+ ScalarDatumFromItem(value) :
+ PointerDatumFromItem(value), false };
+
+ auto x = FInfoHash.fn_addr(callInfo);
+ Y_ENSURE(!callInfo->isnull);
+ return DatumGetUInt32(x);
+ }
private:
const NYql::NPg::TTypeDesc TypeDesc;
@@ -2921,6 +2936,17 @@ NUdf::IHash::TPtr MakePgHash(const NMiniKQL::TPgType* type) {
}
}
+NUdf::IBlockItemHasher::TPtr MakePgItemHasher(ui32 typeId) {
+ const auto& typeDesc = NYql::NPg::LookupType(typeId);
+ if (typeDesc.PassByValue) {
+ return new TPgHash<true, false>(typeDesc);
+ } else if (typeDesc.TypeId == typeDesc.ArrayTypeId) {
+ return new TPgHash<false, true>(typeDesc);
+ } else {
+ return new TPgHash<false, false>(typeDesc);
+ }
+}
+
template <bool PassByValue, bool IsArray>
class TPgCompare : public NUdf::ICompare, public NUdf::TBlockItemComparatorBase<TPgCompare<PassByValue, IsArray>, true> {
public:
diff --git a/ydb/library/yql/parser/pg_wrapper/interface/compare.h b/ydb/library/yql/parser/pg_wrapper/interface/compare.h
index a3870c5fce6..f91d6b026df 100644
--- a/ydb/library/yql/parser/pg_wrapper/interface/compare.h
+++ b/ydb/library/yql/parser/pg_wrapper/interface/compare.h
@@ -2,6 +2,7 @@
#include <ydb/library/yql/public/udf/udf_type_builder.h>
#include <ydb/library/yql/public/udf/arrow/block_item_comparator.h>
+#include <ydb/library/yql/public/udf/arrow/block_item_hasher.h>
namespace NKikimr {
namespace NMiniKQL {
@@ -12,6 +13,7 @@ NUdf::IHash::TPtr MakePgHash(const TPgType* type);
NUdf::ICompare::TPtr MakePgCompare(const TPgType* type);
NUdf::IEquate::TPtr MakePgEquate(const TPgType* type);
NUdf::IBlockItemComparator::TPtr MakePgItemComparator(ui32 typeId);
+NUdf::IBlockItemHasher::TPtr MakePgItemHasher(ui32 typeId);
} // namespace NMiniKQL
} // namespace NKikimr
diff --git a/ydb/library/yql/public/udf/arrow/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/udf/arrow/CMakeLists.darwin-x86_64.txt
index 1b41c0f06e9..322d3440a53 100644
--- a/ydb/library/yql/public/udf/arrow/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/public/udf/arrow/CMakeLists.darwin-x86_64.txt
@@ -24,5 +24,7 @@ target_sources(public-udf-arrow PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/util.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_reader.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_comparator.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_type_helper.cpp
)
diff --git a/ydb/library/yql/public/udf/arrow/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/udf/arrow/CMakeLists.linux-aarch64.txt
index 59ba7f29325..0e855decf8d 100644
--- a/ydb/library/yql/public/udf/arrow/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/public/udf/arrow/CMakeLists.linux-aarch64.txt
@@ -25,5 +25,7 @@ target_sources(public-udf-arrow PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/util.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_reader.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_comparator.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_type_helper.cpp
)
diff --git a/ydb/library/yql/public/udf/arrow/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/udf/arrow/CMakeLists.linux-x86_64.txt
index 59ba7f29325..0e855decf8d 100644
--- a/ydb/library/yql/public/udf/arrow/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/public/udf/arrow/CMakeLists.linux-x86_64.txt
@@ -25,5 +25,7 @@ target_sources(public-udf-arrow PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/util.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_reader.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_comparator.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_type_helper.cpp
)
diff --git a/ydb/library/yql/public/udf/arrow/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/udf/arrow/CMakeLists.windows-x86_64.txt
index 1b41c0f06e9..322d3440a53 100644
--- a/ydb/library/yql/public/udf/arrow/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/public/udf/arrow/CMakeLists.windows-x86_64.txt
@@ -24,5 +24,7 @@ target_sources(public-udf-arrow PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/util.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_reader.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_item_comparator.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/udf/arrow/block_type_helper.cpp
)
diff --git a/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp b/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp
new file mode 100644
index 00000000000..37faaa49592
--- /dev/null
+++ b/ydb/library/yql/public/udf/arrow/block_item_hasher.cpp
@@ -0,0 +1 @@
+#include "block_item_hasher.h"
diff --git a/ydb/library/yql/public/udf/arrow/block_item_hasher.h b/ydb/library/yql/public/udf/arrow/block_item_hasher.h
new file mode 100644
index 00000000000..4c3d89e998e
--- /dev/null
+++ b/ydb/library/yql/public/udf/arrow/block_item_hasher.h
@@ -0,0 +1,96 @@
+#pragma once
+
+#include "block_item.h"
+
+#include <ydb/library/yql/public/udf/udf_ptr.h>
+#include <ydb/library/yql/public/udf/udf_type_inspection.h>
+#include <ydb/library/yql/public/udf/udf_type_ops.h>
+#include <ydb/library/yql/public/udf/udf_type_size_check.h>
+
+namespace NYql::NUdf {
+
+// ABI stable
+class IBlockItemHasher {
+public:
+ using TPtr = TUniquePtr<IBlockItemHasher>;
+
+ virtual ~IBlockItemHasher() = default;
+ virtual ui64 Hash(TBlockItem value) const = 0;
+};
+
+UDF_ASSERT_TYPE_SIZE(IBlockItemHasher, 8);
+
+template <typename TDerived, bool Nullable>
+class TBlockItemHasherBase : public IBlockItemHasher {
+public:
+ const TDerived* Derived() const {
+ return static_cast<const TDerived*>(this);
+ }
+
+ ui64 Hash(TBlockItem value) const final {
+ // keep hash computation in sync with
+ // ydb/library/yql/minikql/mkql_type_builder.cpp: THash<NMiniKQL::TType::EKind::Optional>::Hash()
+ if constexpr (Nullable) {
+ if (!value) {
+ return 0;
+ }
+ return CombineHashes(ui64(1), Derived()->DoHash(value));
+ } else {
+ return Derived()->DoHash(value);
+ }
+ }
+};
+
+template <typename T, bool Nullable>
+class TFixedSizeBlockItemHasher : public TBlockItemHasherBase<TFixedSizeBlockItemHasher<T, Nullable>, Nullable> {
+public:
+ ui64 DoHash(TBlockItem value) const {
+ return GetValueHash<TDataType<T>::Slot>(NUdf::TUnboxedValuePod(value.As<T>()));
+ }
+};
+
+template <typename TStringType, bool Nullable>
+class TStringBlockItemHasher : public TBlockItemHasherBase<TStringBlockItemHasher<TStringType, Nullable>, Nullable> {
+public:
+ ui64 DoHash(TBlockItem value) const {
+ return GetStringHash(value.AsStringRef());
+ }
+};
+
+template <bool Nullable>
+class TTupleBlockItemHasher : public TBlockItemHasherBase<TTupleBlockItemHasher<Nullable>, Nullable> {
+public:
+ TTupleBlockItemHasher(TVector<std::unique_ptr<IBlockItemHasher>>&& children)
+ : Children_(std::move(children))
+ {}
+
+ ui64 DoHash(TBlockItem value) const {
+ // keep hash computation in sync with
+ // ydb/library/yql/minikql/mkql_type_builder.cpp: TVectorHash::Hash()
+ ui64 result = 0ULL;
+ auto elements = value.GetElements();
+ for (ui32 i = 0; i < Children_.size(); ++i) {
+ result = CombineHashes(result, Children_[i]->Hash(elements[i]));
+ }
+ return result;
+ }
+
+private:
+ const TVector<std::unique_ptr<IBlockItemHasher>> Children_;
+};
+
+class TExternalOptionalBlockItemHasher : public TBlockItemHasherBase<TExternalOptionalBlockItemHasher, true> {
+public:
+ TExternalOptionalBlockItemHasher(std::unique_ptr<IBlockItemHasher>&& inner)
+ : Inner_(std::move(inner))
+ {}
+
+ ui64 DoHash(TBlockItem value) const {
+ return Inner_->Hash(value.GetOptionalValue());
+ }
+
+private:
+ const std::unique_ptr<IBlockItemHasher> Inner_;
+};
+
+}
diff --git a/ydb/library/yql/public/udf/arrow/block_type_helper.cpp b/ydb/library/yql/public/udf/arrow/block_type_helper.cpp
new file mode 100644
index 00000000000..362d9faf254
--- /dev/null
+++ b/ydb/library/yql/public/udf/arrow/block_type_helper.cpp
@@ -0,0 +1,9 @@
+#include "block_type_helper.h"
+
+namespace NYql {
+namespace NUdf {
+
+IBlockTypeHelper::IBlockTypeHelper() {}
+
+} // namespace NUdf
+} // namespace NYql
diff --git a/ydb/library/yql/public/udf/arrow/block_type_helper.h b/ydb/library/yql/public/udf/arrow/block_type_helper.h
index 9eb7fc9bf9d..d02168ac864 100644
--- a/ydb/library/yql/public/udf/arrow/block_type_helper.h
+++ b/ydb/library/yql/public/udf/arrow/block_type_helper.h
@@ -1,17 +1,38 @@
#pragma once
#include "block_item_comparator.h"
+#include "block_item_hasher.h"
#include <ydb/library/yql/public/udf/udf_type_size_check.h>
+#include <ydb/library/yql/public/udf/udf_version.h>
namespace NYql {
namespace NUdf {
// ABI stable
-class IBlockTypeHelper {
+class IBlockTypeHelper1 {
public:
- virtual ~IBlockTypeHelper() = default;
+ virtual ~IBlockTypeHelper1() = default;
virtual IBlockItemComparator::TPtr MakeComparator(TType* type) const = 0;
};
+#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 34)
+class IBlockTypeHelper2 : public IBlockTypeHelper1 {
+public:
+ virtual IBlockItemHasher::TPtr MakeHasher(TType *type) const = 0;
+};
+#endif
+
+#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 34)
+class IBlockTypeHelper : public IBlockTypeHelper2 {
+public:
+ IBlockTypeHelper();
+};
+#else
+class IBlockTypeHelper : public IBlockTypeHelper1 {
+public:
+ IBlockTypeHelper();
+};
+#endif
+
UDF_ASSERT_TYPE_SIZE(IBlockTypeHelper, 8);
}
diff --git a/ydb/library/yql/public/udf/arrow/ya.make b/ydb/library/yql/public/udf/arrow/ya.make
index f334f8eea84..27e7476cd3f 100644
--- a/ydb/library/yql/public/udf/arrow/ya.make
+++ b/ydb/library/yql/public/udf/arrow/ya.make
@@ -7,7 +7,9 @@ SRCS(
util.cpp
block_reader.cpp
block_item.cpp
+ block_item_hasher.cpp
block_item_comparator.cpp
+ block_type_helper.cpp
)
PEERDIR(
diff --git a/ydb/library/yql/public/udf/udf_type_ops.h b/ydb/library/yql/public/udf/udf_type_ops.h
index 4b2446c8fd9..21b79fffa32 100644
--- a/ydb/library/yql/public/udf/udf_type_ops.h
+++ b/ydb/library/yql/public/udf/udf_type_ops.h
@@ -50,8 +50,12 @@ inline THashType GetFloatHash(const TUnboxedValuePod& value) {
return std::isunordered(x, x) ? ~0ULL : std::hash<T>()(x);
}
+inline THashType GetStringHash(TStringBuf value) {
+ return THash<TStringBuf>{}(value);
+}
+
inline THashType GetStringHash(const TUnboxedValuePod& value) {
- return THash<TStringBuf>{}(value.AsStringRef());
+ return GetStringHash(value.AsStringRef());
}
template <typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr>
diff --git a/ydb/library/yql/public/udf/udf_version.h b/ydb/library/yql/public/udf/udf_version.h
index e1a33d560f6..d66cd123480 100644
--- a/ydb/library/yql/public/udf/udf_version.h
+++ b/ydb/library/yql/public/udf/udf_version.h
@@ -7,7 +7,7 @@ namespace NYql {
namespace NUdf {
#define CURRENT_UDF_ABI_VERSION_MAJOR 2
-#define CURRENT_UDF_ABI_VERSION_MINOR 33
+#define CURRENT_UDF_ABI_VERSION_MINOR 34
#define CURRENT_UDF_ABI_VERSION_PATCH 0
#ifdef USE_CURRENT_UDF_ABI_VERSION
diff --git a/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp b/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp
index c38b3d73791..93872431c6a 100644
--- a/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp
+++ b/ydb/library/yql/sql/pg_dummy/pg_sql_dummy.cpp
@@ -231,6 +231,11 @@ NUdf::IBlockItemComparator::TPtr MakePgItemComparator(ui32 typeId) {
throw yexception() << "PG types are not supported";
}
+NUdf::IBlockItemHasher::TPtr MakePgItemHasher(ui32 typeId) {
+ Y_UNUSED(typeId);
+ throw yexception() << "PG types are not supported";
+}
+
void RegisterPgBlockAggs(THashMap<TString, std::unique_ptr<IBlockAggregatorFactory>>& registry) {
Y_UNUSED(registry);
}