diff options
author | udovichenko-r <rvu@ydb.tech> | 2022-11-18 15:17:58 +0300 |
---|---|---|
committer | udovichenko-r <rvu@ydb.tech> | 2022-11-18 15:17:58 +0300 |
commit | ded42ea745c481774d4ffa30a44d31057b6ce03b (patch) | |
tree | 6997984d1e02c154a553d98e5eff21397e50ecf4 | |
parent | 7506642977946e2bd9538cf66daa3bf0d07f00b0 (diff) | |
download | ydb-ded42ea745c481774d4ffa30a44d31057b6ce03b.tar.gz |
[mkql] Optimize HashedDict for optional data keys
5 files changed, 873 insertions, 218 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp index f743fb39cd1..70242223d02 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp @@ -13,6 +13,8 @@ #include <algorithm> #include <unordered_map> +#include <optional> +#include <vector> namespace NKikimr { namespace NMiniKQL { @@ -137,7 +139,7 @@ public: } }; -template<typename T> +template<typename T, bool OptionalKey> class THashedSingleFixedMultiMapAccumulator { using TMapType = std::unordered_map< T, @@ -149,6 +151,7 @@ class THashedSingleFixedMultiMapAccumulator { TComputationContext& Ctx; const TKeyTypes& KeyTypes; TMapType Map; + TUnboxedValueVector NullPayloads; public: THashedSingleFixedMultiMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, @@ -167,6 +170,12 @@ public: void Add(NUdf::TUnboxedValue&& key, NUdf::TUnboxedValue&& payload) { + if constexpr (OptionalKey) { + if (!key) { + NullPayloads.emplace_back(std::move(payload)); + return; + } + } const auto ins = Map.emplace(key.Get<T>(), 1U); if (ins.second) ins.first->second.front() = std::move(payload); @@ -179,11 +188,10 @@ public: const auto filler = [this](TValuesDictHashMap& targetMap) { targetMap.reserve(Map.size()); + auto itemFactory = [](const NUdf::TUnboxedValuePod& value) { + return value; + }; for (auto& pair : Map) { - auto itemFactory = [](const NUdf::TUnboxedValuePod& value) { - return value; - }; - ui64 start = 0; ui64 finish = pair.second.size(); auto payloadList = CreateOwningVectorListAdapter(std::move(pair.second), itemFactory, @@ -192,18 +200,28 @@ public: targetMap.emplace(NUdf::TUnboxedValuePod(pair.first), std::move(payloadList)); } + if constexpr (OptionalKey) { + if (!NullPayloads.empty()) { + auto payloadList = CreateOwningVectorListAdapter(std::move(NullPayloads), itemFactory, + /*start*/ 0, /*finish*/ NullPayloads.size(), /*reversed*/ false, + Ctx.HolderFactory.GetMemInfo()); + + targetMap.emplace(NUdf::TUnboxedValuePod(), std::move(payloadList)); + } + } }; return Ctx.HolderFactory.CreateDirectHashedDictHolder(filler, KeyTypes, false, true, nullptr, nullptr, nullptr); } }; -template<typename T> +template<typename T, bool OptionalKey> class THashedSingleFixedMapAccumulator { using TMapType = TValuesDictHashSingleFixedMap<T>; TComputationContext& Ctx; TMapType Map; + std::optional<NUdf::TUnboxedValue> NullPayload; public: THashedSingleFixedMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, @@ -223,12 +241,18 @@ public: void Add(NUdf::TUnboxedValue&& key, NUdf::TUnboxedValue&& payload) { + if constexpr (OptionalKey) { + if (!key) { + NullPayload.emplace(std::move(payload)); + return; + } + } Map.emplace(key.Get<T>(), std::move(payload)); } NUdf::TUnboxedValue Build() { - return Ctx.HolderFactory.CreateDirectHashedSingleFixedMapHolder<T>(std::move(Map)); + return Ctx.HolderFactory.CreateDirectHashedSingleFixedMapHolder<T, OptionalKey>(std::move(Map), std::move(NullPayload)); } }; @@ -277,12 +301,13 @@ public: } }; -template <typename T> +template <typename T, bool OptionalKey> class THashedSingleFixedSetAccumulator { using TSetType = TValuesDictHashSingleFixedSet<T>; TComputationContext& Ctx; TSetType Set; + bool HasNull = false; public: THashedSingleFixedSetAccumulator(TType* keyType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, @@ -301,22 +326,29 @@ public: void Add(NUdf::TUnboxedValue&& key) { + if constexpr (OptionalKey) { + if (!key) { + HasNull = true; + return; + } + } Set.emplace(key.Get<T>()); } NUdf::TUnboxedValue Build() { - return Ctx.HolderFactory.CreateDirectHashedSingleFixedSetHolder<T>(std::move(Set)); + return Ctx.HolderFactory.CreateDirectHashedSingleFixedSetHolder<T, OptionalKey>(std::move(Set), HasNull); } }; -template <typename T> +template <typename T, bool OptionalKey> class THashedSingleFixedCompactSetAccumulator { using TSetType = TValuesDictHashSingleFixedCompactSet<T>; TComputationContext& Ctx; TPagedArena Pool; TSetType Set; + bool HasNull = false; public: THashedSingleFixedCompactSetAccumulator(TType* keyType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, @@ -335,12 +367,18 @@ public: void Add(NUdf::TUnboxedValue&& key) { + if constexpr (OptionalKey) { + if (!key) { + HasNull = true; + return; + } + } Set.Insert(key.Get<T>()); } NUdf::TUnboxedValue Build() { - return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactSetHolder<T>(std::move(Set)); + return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactSetHolder<T, OptionalKey>(std::move(Set), HasNull); } }; @@ -454,17 +492,18 @@ public: } }; -template <typename T, bool Multi> +template <typename T, bool OptionalKey, bool Multi> class THashedSingleFixedCompactMapAccumulator; -template <typename T> -class THashedSingleFixedCompactMapAccumulator<T, false> { +template <typename T, bool OptionalKey> +class THashedSingleFixedCompactMapAccumulator<T, OptionalKey, false> { using TMapType = TValuesDictHashSingleFixedCompactMap<T>; TComputationContext& Ctx; TPagedArena Pool; TMapType Map; - TType *PayloadType; + std::optional<ui64> NullPayload; + TType* PayloadType; TValuePacker PayloadPacker; public: @@ -485,23 +524,30 @@ public: void Add(NUdf::TUnboxedValue&& key, NUdf::TUnboxedValue&& payload) { + if constexpr (OptionalKey) { + if (!key) { + NullPayload = AddSmallValue(Pool, PayloadPacker.Pack(payload)); + return; + } + } Map.InsertNew(key.Get<T>(), AddSmallValue(Pool, PayloadPacker.Pack(payload))); } NUdf::TUnboxedValue Build() { - return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactMapHolder(std::move(Map), std::move(Pool), PayloadType, &Ctx); + return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactMapHolder<T, OptionalKey>(std::move(Map), std::move(NullPayload), std::move(Pool), PayloadType, &Ctx); } }; -template <typename T> -class THashedSingleFixedCompactMapAccumulator<T, true> { +template <typename T, bool OptionalKey> +class THashedSingleFixedCompactMapAccumulator<T, OptionalKey, true> { using TMapType = TValuesDictHashSingleFixedCompactMultiMap<T>; TComputationContext& Ctx; TPagedArena Pool; TMapType Map; - TType *PayloadType; + std::vector<ui64> NullPayloads; + TType* PayloadType; TValuePacker PayloadPacker; public: @@ -522,12 +568,18 @@ public: void Add(NUdf::TUnboxedValue&& key, NUdf::TUnboxedValue&& payload) { + if constexpr (OptionalKey) { + if (!key) { + NullPayloads.push_back(AddSmallValue(Pool, PayloadPacker.Pack(payload))); + return; + } + } Map.Insert(key.Get<T>(), AddSmallValue(Pool, PayloadPacker.Pack(payload))); } NUdf::TUnboxedValue Build() { - return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactMultiMapHolder(std::move(Map), std::move(Pool), PayloadType, &Ctx); + return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactMultiMapHolder<T, OptionalKey>(std::move(Map), std::move(NullPayloads), std::move(Pool), PayloadType, &Ctx); } }; @@ -1881,15 +1933,23 @@ IComputationNode* WrapToHashedDictInternal(TCallable& callable, const TComputati const bool isCompact = AS_VALUE(TDataLiteral, callable.GetInput(callable.GetInputsCount() - 2U))->AsValue().Get<bool>(); const auto payloadSelectorNode = callable.GetInput(callable.GetInputsCount() - 4U); + const bool isOptional = keyType->IsOptional(); + const auto unwrappedKeyType = isOptional ? AS_TYPE(TOptionalType, keyType)->GetItemType() : keyType; + if (!multi && payloadType->IsVoid()) { if (isCompact) { - if (keyType->IsData()) { + if (unwrappedKeyType->IsData()) { #define USE_HASHED_SINGLE_FIXED_COMPACT_SET(xType, xLayoutType) \ case NUdf::TDataType<xType>::Id: \ - return WrapToSet< \ - THashedSingleFixedCompactSetAccumulator<xLayoutType>>(callable, ctx.NodeLocator, ctx.Mutables); + if (isOptional) { \ + return WrapToSet< \ + THashedSingleFixedCompactSetAccumulator<xLayoutType, true>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } else { \ + return WrapToSet< \ + THashedSingleFixedCompactSetAccumulator<xLayoutType, false>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } - switch (AS_TYPE(TDataType, keyType)->GetSchemeType()) { + switch (AS_TYPE(TDataType, unwrappedKeyType)->GetSchemeType()) { KNOWN_FIXED_VALUE_TYPES(USE_HASHED_SINGLE_FIXED_COMPACT_SET) } #undef USE_HASHED_SINGLE_FIXED_COMPACT_SET @@ -1898,13 +1958,18 @@ IComputationNode* WrapToHashedDictInternal(TCallable& callable, const TComputati return WrapToSet<THashedCompactSetAccumulator>(callable, ctx.NodeLocator, ctx.Mutables); } - if (keyType->IsData()) { + if (unwrappedKeyType->IsData()) { #define USE_HASHED_SINGLE_FIXED_SET(xType, xLayoutType) \ case NUdf::TDataType<xType>::Id: \ - return WrapToSet< \ - THashedSingleFixedSetAccumulator<xLayoutType>>(callable, ctx.NodeLocator, ctx.Mutables); + if (isOptional) { \ + return WrapToSet< \ + THashedSingleFixedSetAccumulator<xLayoutType, true>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } else { \ + return WrapToSet< \ + THashedSingleFixedSetAccumulator<xLayoutType, false>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } - switch (AS_TYPE(TDataType, keyType)->GetSchemeType()) { + switch (AS_TYPE(TDataType, unwrappedKeyType)->GetSchemeType()) { KNOWN_FIXED_VALUE_TYPES(USE_HASHED_SINGLE_FIXED_SET) } #undef USE_HASHED_SINGLE_FIXED_SET @@ -1913,18 +1978,28 @@ IComputationNode* WrapToHashedDictInternal(TCallable& callable, const TComputati } if (isCompact) { - if (keyType->IsData()) { + if (unwrappedKeyType->IsData()) { #define USE_HASHED_SINGLE_FIXED_COMPACT_MAP(xType, xLayoutType) \ case NUdf::TDataType<xType>::Id: \ if (multi) { \ - return WrapToMap< \ - THashedSingleFixedCompactMapAccumulator<xLayoutType, true>>(callable, ctx.NodeLocator, ctx.Mutables); \ + if (isOptional) { \ + return WrapToMap< \ + THashedSingleFixedCompactMapAccumulator<xLayoutType, true, true>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } else { \ + return WrapToMap< \ + THashedSingleFixedCompactMapAccumulator<xLayoutType, false, true>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } \ } else { \ - return WrapToMap< \ - THashedSingleFixedCompactMapAccumulator<xLayoutType, false>>(callable, ctx.NodeLocator, ctx.Mutables); \ + if (isOptional) { \ + return WrapToMap< \ + THashedSingleFixedCompactMapAccumulator<xLayoutType, true, false>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } else { \ + return WrapToMap< \ + THashedSingleFixedCompactMapAccumulator<xLayoutType, false, false>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } \ } - switch (AS_TYPE(TDataType, keyType)->GetSchemeType()) { + switch (AS_TYPE(TDataType, unwrappedKeyType)->GetSchemeType()) { KNOWN_FIXED_VALUE_TYPES(USE_HASHED_SINGLE_FIXED_COMPACT_MAP) } #undef USE_HASHED_SINGLE_FIXED_COMPACT_MAP @@ -1937,18 +2012,28 @@ IComputationNode* WrapToHashedDictInternal(TCallable& callable, const TComputati } } - if (keyType->IsData()) { + if (unwrappedKeyType->IsData()) { #define USE_HASHED_SINGLE_FIXED_MAP(xType, xLayoutType) \ case NUdf::TDataType<xType>::Id: \ if (multi) { \ - return WrapToMap< \ - THashedSingleFixedMultiMapAccumulator<xLayoutType>>(callable, ctx.NodeLocator, ctx.Mutables); \ + if (isOptional) { \ + return WrapToMap< \ + THashedSingleFixedMultiMapAccumulator<xLayoutType, true>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } else { \ + return WrapToMap< \ + THashedSingleFixedMultiMapAccumulator<xLayoutType, false>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } \ } else { \ - return WrapToMap< \ - THashedSingleFixedMapAccumulator<xLayoutType>>(callable, ctx.NodeLocator, ctx.Mutables); \ + if (isOptional) { \ + return WrapToMap< \ + THashedSingleFixedMapAccumulator<xLayoutType, true>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } else { \ + return WrapToMap< \ + THashedSingleFixedMapAccumulator<xLayoutType, false>>(callable, ctx.NodeLocator, ctx.Mutables); \ + } \ } - switch (AS_TYPE(TDataType, keyType)->GetSchemeType()) { + switch (AS_TYPE(TDataType, unwrappedKeyType)->GetSchemeType()) { KNOWN_FIXED_VALUE_TYPES(USE_HASHED_SINGLE_FIXED_MAP) } #undef USE_HASHED_SINGLE_FIXED_MAP diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp index 5a5cd7e2de5..75bf0fe352a 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp @@ -5,10 +5,10 @@ #include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> #include <util/random/shuffle.h> +#include <map> +#include <optional> - -namespace NKikimr { -namespace NMiniKQL { +namespace NKikimr::NMiniKQL { static const TStringBuf data[] = { "13d49d4db08e57d645fe4d44bbed4738f386af6e9e742cf186961063feb9919b", @@ -214,7 +214,336 @@ Y_UNIT_TEST_SUITE(TMiniKQLToDictTest) { } } #endif -} + template <bool LLVM> + static void TestDictWithDataKeyImpl(bool optionalKey, bool multi, bool compact, bool withNull, bool withData) { + TSetup<LLVM> setup; + TProgramBuilder& pb = *setup.PgmBuilder; + TType* keyType = pb.NewDataType(NUdf::EDataSlot::Int32, optionalKey); + TType* valueType = pb.NewDataType(NUdf::EDataSlot::Int32, false); + TType* tupleType = pb.NewTupleType({keyType, valueType}); + TVector<TRuntimeNode> items; + TVector<TRuntimeNode> keys; + if (withNull) { + UNIT_ASSERT(optionalKey); + keys.push_back(pb.NewEmptyOptional(keyType)); + for (size_t k = 0; k < 1 + multi; ++k) { + items.push_back(pb.NewTuple(tupleType, {keys.back(), pb.NewDataLiteral((i32)items.size())})); + } + } + if (withData) { + for (i32 i = 0; i < 2; ++i) { + auto key = pb.NewDataLiteral(i); + if (optionalKey) { + key = pb.NewOptional(key); + } + keys.push_back(key); + for (size_t k = 0; k < 1 + multi; ++k) { + items.push_back(pb.NewTuple(tupleType, {key, pb.NewDataLiteral((i32)items.size())})); + } + } + } + auto list = pb.NewList(tupleType, items); + auto keyList = pb.NewList(keyType, keys); + auto dict = pb.ToHashedDict(list, multi, [&](TRuntimeNode tuple) { return pb.Nth(tuple, 0); }, [&pb](TRuntimeNode tuple) { return pb.Nth(tuple, 1); }, compact); + + auto compareLists = [&](bool itemIsTuple, TRuntimeNode list1, TRuntimeNode list2) { + return pb.And({ + pb.Equals( + pb.Length(list1), + pb.Length(list2) + ), + pb.Not( + pb.Exists( + pb.Head( + pb.SkipWhile( + pb.Zip({list1, list2}), + [&](TRuntimeNode pair) { + if (itemIsTuple) { + return pb.And({ + pb.AggrEquals(pb.Nth(pb.Nth(pair, 0), 0), pb.Nth(pb.Nth(pair, 1), 0)), + pb.AggrEquals(pb.Nth(pb.Nth(pair, 0), 1), pb.Nth(pb.Nth(pair, 1), 1)), + }); + } else { + return pb.AggrEquals(pb.Nth(pair, 0), pb.Nth(pair, 1)); + } + } + ) + ) + ) + ) + }); + }; + + TVector<TRuntimeNode> results; + + // Check Dict has items + results.push_back(pb.AggrEquals( + pb.HasItems(dict), + pb.NewDataLiteral(withNull || withData) + )); + + // Check Dict length + results.push_back(pb.AggrEquals( + pb.Length(dict), + pb.NewDataLiteral((ui64)keys.size()) + )); + + // Check Dict Contains + results.push_back(pb.AllOf( + pb.Map(list, [&](TRuntimeNode tuple) { + return pb.Contains(dict, pb.Nth(tuple, 0)); + }), + [&](TRuntimeNode item) { return item; } + )); + + // Check Dict Lookup + results.push_back(compareLists(false, + pb.Sort( + pb.FlatMap( + pb.Map( + keyList, + [&](TRuntimeNode key) { + return pb.Unwrap(pb.Lookup(dict, key), pb.NewDataLiteral<NUdf::EDataSlot::String>("Lookup failed"), "", 0, 0); + } + ), + [&](TRuntimeNode item) { + return multi ? item : pb.NewOptional(item); + } + ), + pb.NewDataLiteral(true), + [&](TRuntimeNode item) { return item; } + ), + pb.Sort( + pb.Map(list, [&](TRuntimeNode tuple) { + return pb.Nth(tuple, 1); + }), + pb.NewDataLiteral(true), + [&](TRuntimeNode item) { return item; } + ) + )); + + // Check Dict items iterator + results.push_back(compareLists(true, + pb.Sort( + pb.FlatMap( + pb.DictItems(dict), + [&](TRuntimeNode pair) { + if (multi) { + return pb.Map( + pb.Nth(pair, 1), + [&](TRuntimeNode p) { + return pb.NewTuple({pb.Nth(pair, 0), p}); + } + ); + } else { + return pb.NewOptional(pair); + } + } + ), + pb.NewTuple({pb.NewDataLiteral(true), pb.NewDataLiteral(true)}), + [&](TRuntimeNode item) { return item; } + ), + list + )); + + // Check Dict payloads iterator + results.push_back(compareLists(false, + pb.Sort( + pb.FlatMap( + pb.DictPayloads(dict), + [&](TRuntimeNode item) { + return multi ? item : pb.NewOptional(item); + } + ), + pb.NewDataLiteral(true), + [&](TRuntimeNode item) { return item; } + ), + pb.Map( + list, + [&](TRuntimeNode item) { + return pb.Nth(item, 1); + } + ) + )); + + auto graph = setup.BuildGraph(pb.NewTuple(results)); + NUdf::TUnboxedValue res = graph->GetValue(); + + UNIT_ASSERT_C(res.GetElement(0).Get<bool>(), "Dict HasItems fail"); + UNIT_ASSERT_C(res.GetElement(1).Get<bool>(), "Dict Length fail"); + UNIT_ASSERT_C(res.GetElement(2).Get<bool>(), "Dict Contains fail"); + UNIT_ASSERT_C(res.GetElement(3).Get<bool>(), "Dict Lookup fail"); + UNIT_ASSERT_C(res.GetElement(4).Get<bool>(), "DictItems fail"); + UNIT_ASSERT_C(res.GetElement(5).Get<bool>(), "DictPayloads fail"); + } + + Y_UNIT_TEST_LLVM(TestDictWithDataKey) { + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict + } + + Y_UNIT_TEST_LLVM(TestDictCompactWithDataKey) { + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict + } + + Y_UNIT_TEST_LLVM(TestDictMultiWithDataKey) { + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict + } + + Y_UNIT_TEST_LLVM(TestDictCompactMultiWithDataKey) { + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict + } + + Y_UNIT_TEST_LLVM(TestDictWithOptionalDataKey) { + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/true, /*withData*/false); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/true, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict + } + + Y_UNIT_TEST_LLVM(TestDictCompactWithOptionalDataKey) { + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/true, /*withData*/false); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/true, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict + } + + Y_UNIT_TEST_LLVM(TestDictMultiWithOptionalDataKey) { + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/true, /*withData*/false); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/true, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict + } + + Y_UNIT_TEST_LLVM(TestDictCompactMultiWithOptionalDataKey) { + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/true, /*withData*/false); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/true, /*withData*/true); + TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict + } + + template <bool LLVM> + static void TestSetWithDataKeyImpl(bool optionalKey, bool compact, bool withNull, bool withData) { + TSetup<LLVM> setup; + TProgramBuilder& pb = *setup.PgmBuilder; + + TType* keyType = pb.NewDataType(NUdf::EDataSlot::Int32, optionalKey); + TVector<TRuntimeNode> keys; + if (withNull) { + UNIT_ASSERT(optionalKey); + keys.push_back(pb.NewEmptyOptional(keyType)); + } + if (withData) { + for (i32 i = 0; i < 2; ++i) { + auto key = pb.NewDataLiteral(i); + if (optionalKey) { + key = pb.NewOptional(key); + } + keys.push_back(key); + } + } + auto keyList = pb.NewList(keyType, keys); + auto set = pb.ToHashedDict(keyList, false, [&](TRuntimeNode key) { return key; }, [&pb](TRuntimeNode) { return pb.NewVoid(); }, compact); + + auto compareLists = [&](TRuntimeNode list1, TRuntimeNode list2) { + return pb.And({ + pb.Equals( + pb.Length(list1), + pb.Length(list2) + ), + pb.Not( + pb.Exists( + pb.Head( + pb.SkipWhile( + pb.Zip({list1, list2}), + [&](TRuntimeNode pair) { + return pb.AggrEquals(pb.Nth(pair, 0), pb.Nth(pair, 1)); + } + ) + ) + ) + ) + }); + }; + + TVector<TRuntimeNode> results; + + // Check Set has items + results.push_back(pb.AggrEquals( + pb.HasItems(set), + pb.NewDataLiteral(withNull || withData) + )); + + // Check Set length + results.push_back(pb.AggrEquals( + pb.Length(set), + pb.NewDataLiteral((ui64)keys.size()) + )); + + // Check Set Contains + results.push_back(pb.AllOf( + pb.Map(keyList, [&](TRuntimeNode key) { + return pb.Contains(set, key); + }), + [&](TRuntimeNode item) { return item; } + )); + + // Check Set Lookup + results.push_back(pb.AllOf( + pb.Map(keyList, [&](TRuntimeNode key) { + return pb.Exists(pb.Lookup(set, key)); + }), + [&](TRuntimeNode item) { return item; } + )); + + // Check Set items iterator + results.push_back(compareLists( + pb.Sort( + pb.DictKeys(set), + pb.NewDataLiteral(true), + [&](TRuntimeNode item) { return item; } + ), + keyList + )); + + auto graph = setup.BuildGraph(pb.NewTuple(results)); + NUdf::TUnboxedValue res = graph->GetValue(); + + UNIT_ASSERT_C(res.GetElement(0).Get<bool>(), "Set HasItems fail"); + UNIT_ASSERT_C(res.GetElement(1).Get<bool>(), "Set Length fail"); + UNIT_ASSERT_C(res.GetElement(2).Get<bool>(), "Set Contains fail"); + UNIT_ASSERT_C(res.GetElement(3).Get<bool>(), "Set Lookup fail"); + UNIT_ASSERT_C(res.GetElement(4).Get<bool>(), "Set DictKeys fail"); + } + + Y_UNIT_TEST_LLVM(TestSetWithDataKey) { + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/false, /*withNull*/false, /*withData*/true); + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/false, /*withNull*/false, /*withData*/false); // empty set + } + + Y_UNIT_TEST_LLVM(TestSetCompactWithDataKey) { + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/true, /*withNull*/false, /*withData*/true); + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/true, /*withNull*/false, /*withData*/false); // empty set + } + + Y_UNIT_TEST_LLVM(TestSetWithOptionalDataKey) { + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/false, /*withData*/true); + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/true, /*withData*/false); + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/true, /*withData*/true); + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/false, /*withData*/false); // empty set + } + + Y_UNIT_TEST_LLVM(TestSetCompactWithOptionalDataKey) { + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/false, /*withData*/true); + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/true, /*withData*/false); + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/true, /*withData*/true); + TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/false, /*withData*/false); // empty set + } } -} + + +} // namespace diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp index a546e94dedc..ff8be740309 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp @@ -1199,44 +1199,54 @@ private: mutable std::optional<TValuePacker> Packer; }; -template <typename T> -class THashedSingleFixedSetHolder : public TComputationValue<THashedSingleFixedSetHolder<T>> { +template <typename T, bool OptionalKey> +class THashedSingleFixedSetHolder : public TComputationValue<THashedSingleFixedSetHolder<T, OptionalKey>> { public: using TSetType = TValuesDictHashSingleFixedSet<T>; class TIterator : public TComputationValue<TIterator> { public: + enum class EState { + AtStart, + AtNull, + Iterator + }; TIterator(const THashedSingleFixedSetHolder* parent) : TComputationValue<TIterator>(parent->GetMemInfo()) , Parent(const_cast<THashedSingleFixedSetHolder*>(parent)) , Iterator(Parent->Set.begin()) , End(Parent->Set.end()) - , AtStart(true) + , State(EState::AtStart) { } private: - bool Skip() override { - if (AtStart) { - AtStart = false; - } - else { + bool Skip() final { + switch (State) { + case EState::AtStart: + State = OptionalKey && Parent->HasNull ? EState::AtNull : EState::Iterator; + break; + case EState::AtNull: + State = EState::Iterator; + break; + case EState::Iterator: if (Iterator == End) return false; ++Iterator; + break; } - return Iterator != End; + return EState::AtNull == State || Iterator != End; } - bool Next(NUdf::TUnboxedValue& key) override { + bool Next(NUdf::TUnboxedValue& key) final { if (!Skip()) return false; - key = NUdf::TUnboxedValuePod(*Iterator); + key = EState::AtNull == State ? NUdf::TUnboxedValuePod() : NUdf::TUnboxedValuePod(*Iterator); return true; } - bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override { + bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) final { if (!Next(key)) return false; payload = NUdf::TUnboxedValuePod::Void(); @@ -1246,96 +1256,112 @@ public: const NUdf::TRefCountedPtr<THashedSingleFixedSetHolder> Parent; typename TSetType::const_iterator Iterator; typename TSetType::const_iterator End; - bool AtStart; + EState State; }; - THashedSingleFixedSetHolder(TMemoryUsageInfo* memInfo, TSetType&& set) + THashedSingleFixedSetHolder(TMemoryUsageInfo* memInfo, TSetType&& set, bool hasNull) : TComputationValue<THashedSingleFixedSetHolder>(memInfo) , Set(std::move(set)) + , HasNull(hasNull) { + MKQL_ENSURE(OptionalKey || !HasNull, "Null value is not allowed for non-optional key type"); } private: - bool Contains(const NUdf::TUnboxedValuePod& key) const override { + bool Contains(const NUdf::TUnboxedValuePod& key) const final { + if constexpr (OptionalKey) { + if (!key) { + return HasNull; + } + } return Set.find(key.Get<T>()) != Set.cend(); } - NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { - const auto it = Set.find(key.Get<T>()); - if (it == Set.cend()) { - return NUdf::TUnboxedValuePod(); - } - return NUdf::TUnboxedValuePod::Void(); + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final { + if (Contains(key)) + return NUdf::TUnboxedValuePod::Void(); + return NUdf::TUnboxedValuePod(); } - NUdf::TUnboxedValue GetKeysIterator() const override { + NUdf::TUnboxedValue GetKeysIterator() const final { return NUdf::TUnboxedValuePod(new TIterator(this)); } - NUdf::TUnboxedValue GetDictIterator() const override { + NUdf::TUnboxedValue GetDictIterator() const final { return NUdf::TUnboxedValuePod(new TIterator(this)); } - NUdf::TUnboxedValue GetPayloadsIterator() const override { + NUdf::TUnboxedValue GetPayloadsIterator() const final { return NUdf::TUnboxedValuePod(new TIterator(this)); } - NUdf::TUnboxedValue GetListIterator() const override { + NUdf::TUnboxedValue GetListIterator() const final { return NUdf::TUnboxedValuePod(new TIterator(this)); } - ui64 GetDictLength() const override { - return Set.size(); + ui64 GetDictLength() const final { + return Set.size() + ui64(OptionalKey && HasNull); } - bool HasDictItems() const override { - return !Set.empty(); + bool HasDictItems() const final { + return !Set.empty() || (OptionalKey && HasNull); } - bool IsSortedDict() const override { + bool IsSortedDict() const final { return false; } const TSetType Set; + const bool HasNull; }; -template <typename T> -class THashedSingleFixedCompactSetHolder : public TComputationValue<THashedSingleFixedCompactSetHolder<T>> { +template <typename T, bool OptionalKey> +class THashedSingleFixedCompactSetHolder : public TComputationValue<THashedSingleFixedCompactSetHolder<T, OptionalKey>> { public: using TSetType = TValuesDictHashSingleFixedCompactSet<T>; class TIterator : public TComputationValue<TIterator> { public: + enum class EState { + AtStart, + AtNull, + Iterator + }; TIterator(const THashedSingleFixedCompactSetHolder* parent) : TComputationValue<TIterator>(parent->GetMemInfo()) , Parent(const_cast<THashedSingleFixedCompactSetHolder*>(parent)) , Iterator(Parent->Set.Iterate()) - , AtStart(true) + , State(EState::AtStart) { } private: - bool Skip() override { - if (AtStart) { - AtStart = false; - } - else { + bool Skip() final { + switch (State) { + case EState::AtStart: + State = OptionalKey && Parent->HasNull ? EState::AtNull : EState::Iterator; + break; + case EState::AtNull: + State = EState::Iterator; + break; + case EState::Iterator: if (!Iterator.Ok()) return false; ++Iterator; + break; } - return Iterator.Ok(); + return EState::AtNull == State || Iterator.Ok(); } - bool Next(NUdf::TUnboxedValue& key) override { + bool Next(NUdf::TUnboxedValue& key) final { if (!Skip()) return false; - key = NUdf::TUnboxedValuePod(*Iterator); + key = EState::AtNull == State ? NUdf::TUnboxedValuePod() : NUdf::TUnboxedValuePod(*Iterator); return true; } - bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override { + bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) final { if (!Next(key)) return false; payload = NUdf::TUnboxedValuePod::Void(); @@ -1344,55 +1370,63 @@ public: const NUdf::TRefCountedPtr<THashedSingleFixedCompactSetHolder> Parent; typename TSetType::TIterator Iterator; - bool AtStart; + EState State; }; - THashedSingleFixedCompactSetHolder(TMemoryUsageInfo* memInfo, TSetType&& set) + THashedSingleFixedCompactSetHolder(TMemoryUsageInfo* memInfo, TSetType&& set, bool hasNull) : TComputationValue<THashedSingleFixedCompactSetHolder>(memInfo) , Set(std::move(set)) + , HasNull(hasNull) { + MKQL_ENSURE(OptionalKey || !HasNull, "Null value is not allowed for non-optional key type"); } private: - bool Contains(const NUdf::TUnboxedValuePod& key) const override { + bool Contains(const NUdf::TUnboxedValuePod& key) const final { + if constexpr (OptionalKey) { + if (!key) { + return HasNull; + } + } return Set.Has(key.Get<T>()); } - NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { - if (Set.Has(key.Get<T>())) + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final { + if (Contains(key)) return NUdf::TUnboxedValuePod::Void(); return NUdf::TUnboxedValuePod(); } - NUdf::TUnboxedValue GetKeysIterator() const override { + NUdf::TUnboxedValue GetKeysIterator() const final { return NUdf::TUnboxedValuePod(new TIterator(this)); } - NUdf::TUnboxedValue GetDictIterator() const override { + NUdf::TUnboxedValue GetDictIterator() const final { return NUdf::TUnboxedValuePod(new TIterator(this)); } - NUdf::TUnboxedValue GetPayloadsIterator() const override { + NUdf::TUnboxedValue GetPayloadsIterator() const final { return NUdf::TUnboxedValuePod(new TIterator(this)); } - NUdf::TUnboxedValue GetListIterator() const override { + NUdf::TUnboxedValue GetListIterator() const final { return NUdf::TUnboxedValuePod(new TIterator(this)); } - ui64 GetDictLength() const override { - return Set.Size(); + ui64 GetDictLength() const final { + return Set.Size() + ui64(OptionalKey && HasNull); } - bool HasDictItems() const override { - return !Set.Empty(); + bool HasDictItems() const final { + return !Set.Empty() || (OptionalKey && HasNull); } - bool IsSortedDict() const override { + bool IsSortedDict() const final { return false; } const TSetType Set; + const bool HasNull; }; class THashedCompactSetHolder : public TComputationValue<THashedCompactSetHolder> { @@ -1947,209 +1981,273 @@ private: std::optional<TValuePacker> Packer; }; -template <typename T> -class THashedSingleFixedMapHolder : public TComputationValue<THashedSingleFixedMapHolder<T>> { +template <typename T, bool OptionalKey> +class THashedSingleFixedMapHolder : public TComputationValue<THashedSingleFixedMapHolder<T, OptionalKey>> { public: using TMapType = TValuesDictHashSingleFixedMap<T>; template <bool NoSwap> class TIterator : public TComputationValue<TIterator<NoSwap>> { public: + enum class EState { + AtStart, + AtNull, + Iterator + }; TIterator(const THashedSingleFixedMapHolder* parent) : TComputationValue<TIterator<NoSwap>>(parent->GetMemInfo()) , Parent(const_cast<THashedSingleFixedMapHolder*>(parent)) , Iterator(Parent->Map.begin()) , End(Parent->Map.end()) - , AtStart(true) + , State(EState::AtStart) { } private: - bool Skip() override { - if (AtStart) { - AtStart = false; - } - else { + bool Skip() final { + switch (State) { + case EState::AtStart: + State = OptionalKey && Parent->NullPayload.has_value() ? EState::AtNull : EState::Iterator; + break; + case EState::AtNull: + State = EState::Iterator; + break; + case EState::Iterator: if (Iterator == End) { return false; } ++Iterator; + break; } - return Iterator != End; + return EState::AtNull == State || Iterator != End; } - bool Next(NUdf::TUnboxedValue& key) override { + bool Next(NUdf::TUnboxedValue& key) final { if (!Skip()) return false; - key = NoSwap ? NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator->first)) : Iterator->second; + key = NoSwap + ? (EState::AtNull == State ? NUdf::TUnboxedValue() : NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator->first))) + : (EState::AtNull == State ? *Parent->NullPayload : Iterator->second); return true; } - bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override { + bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) final { if (!Next(key)) return false; - payload = NoSwap ? Iterator->second : NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator->first)); + payload = NoSwap + ? (EState::AtNull == State ? *Parent->NullPayload : Iterator->second) + : (EState::AtNull == State ? NUdf::TUnboxedValue() : NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator->first))); return true; } const NUdf::TRefCountedPtr<THashedSingleFixedMapHolder> Parent; typename TMapType::const_iterator Iterator; typename TMapType::const_iterator End; - bool AtStart; + EState State; }; - THashedSingleFixedMapHolder(TMemoryUsageInfo* memInfo, TValuesDictHashSingleFixedMap<T>&& map) + THashedSingleFixedMapHolder(TMemoryUsageInfo* memInfo, TValuesDictHashSingleFixedMap<T>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload) : TComputationValue<THashedSingleFixedMapHolder>(memInfo) , Map(std::move(map)) + , NullPayload(std::move(nullPayload)) { } private: - bool Contains(const NUdf::TUnboxedValuePod& key) const override { + bool Contains(const NUdf::TUnboxedValuePod& key) const final { + if constexpr (OptionalKey) { + if (!key) { + return NullPayload.has_value(); + } + } return Map.find(key.Get<T>()) != Map.end(); } - NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final { + if constexpr (OptionalKey) { + if (!key) { + return NullPayload.has_value() ? NullPayload->MakeOptional() : NUdf::TUnboxedValuePod(); + } + } const auto it = Map.find(key.Get<T>()); if (it == Map.end()) return NUdf::TUnboxedValuePod(); return it->second.MakeOptional(); } - NUdf::TUnboxedValue GetKeysIterator() const override { + NUdf::TUnboxedValue GetKeysIterator() const final { return NUdf::TUnboxedValuePod(new TIterator<true>(this)); } - NUdf::TUnboxedValue GetDictIterator() const override { + NUdf::TUnboxedValue GetDictIterator() const final { return NUdf::TUnboxedValuePod(new TIterator<true>(this)); } - NUdf::TUnboxedValue GetPayloadsIterator() const override { + NUdf::TUnboxedValue GetPayloadsIterator() const final { return NUdf::TUnboxedValuePod(new TIterator<false>(this)); } - ui64 GetDictLength() const override { - return Map.size(); + ui64 GetDictLength() const final { + return Map.size() + ui64(OptionalKey && NullPayload.has_value()); } - bool HasDictItems() const override { - return !Map.empty(); + bool HasDictItems() const final { + return !Map.empty() || (OptionalKey && NullPayload.has_value()); } - bool IsSortedDict() const override { + bool IsSortedDict() const final { return false; } const TMapType Map; + const std::optional<NUdf::TUnboxedValue> NullPayload; }; -template <typename T> -class THashedSingleFixedCompactMapHolder : public TComputationValue<THashedSingleFixedCompactMapHolder<T>> { +template <typename T, bool OptionalKey> +class THashedSingleFixedCompactMapHolder : public TComputationValue<THashedSingleFixedCompactMapHolder<T, OptionalKey>> { public: using TMapType = TValuesDictHashSingleFixedCompactMap<T>; template <bool NoSwap> class TIterator : public TComputationValue<TIterator<NoSwap>> { public: + enum class EState { + AtStart, + AtNull, + Iterator + }; TIterator(const THashedSingleFixedCompactMapHolder* parent) : TComputationValue<TIterator<NoSwap>>(parent->GetMemInfo()) , Parent(const_cast<THashedSingleFixedCompactMapHolder*>(parent)) , Iterator(Parent->Map.Iterate()) - , AtStart(true) + , State(EState::AtStart) { } private: - bool Skip() override { - if (AtStart) { - AtStart = false; - } - else if (Iterator.Ok()) { - ++Iterator; + bool Skip() final { + switch (State) { + case EState::AtStart: + State = OptionalKey && Parent->NullPayload.has_value() ? EState::AtNull : EState::Iterator; + break; + case EState::AtNull: + State = EState::Iterator; + break; + case EState::Iterator: + if (Iterator.Ok()) + ++Iterator; + break; } - return Iterator.Ok(); + return EState::AtNull == State || Iterator.Ok(); } - bool Next(NUdf::TUnboxedValue& key) override { + bool Next(NUdf::TUnboxedValue& key) final { if (!Skip()) return false; - key = NoSwap ? - NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator.Get().first)): - Parent->PayloadPacker.Unpack(GetSmallValue(Iterator.Get().second), Parent->Ctx->HolderFactory); + + key = NoSwap + ? (EState::AtNull == State + ? NUdf::TUnboxedValue() + : NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator.Get().first)) + ) + : (EState::AtNull == State + ? Parent->PayloadPacker.Unpack(GetSmallValue(*Parent->NullPayload), Parent->Ctx->HolderFactory) + : Parent->PayloadPacker.Unpack(GetSmallValue(Iterator.Get().second), Parent->Ctx->HolderFactory) + ); return true; } - bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override { + bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) final { if (!Next(key)) return false; - payload = NoSwap ? - Parent->PayloadPacker.Unpack(GetSmallValue(Iterator.Get().second), Parent->Ctx->HolderFactory): - NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator.Get().first)); + payload = NoSwap + ? (EState::AtNull == State + ? Parent->PayloadPacker.Unpack(GetSmallValue(*Parent->NullPayload), Parent->Ctx->HolderFactory) + : Parent->PayloadPacker.Unpack(GetSmallValue(Iterator.Get().second), Parent->Ctx->HolderFactory) + ) + : (EState::AtNull == State + ? NUdf::TUnboxedValue() + : NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator.Get().first)) + ); return true; } const NUdf::TRefCountedPtr<THashedSingleFixedCompactMapHolder> Parent; typename TMapType::TIterator Iterator; - bool AtStart; + EState State; }; - THashedSingleFixedCompactMapHolder(TMemoryUsageInfo* memInfo, TMapType&& map, TPagedArena&& pool, + THashedSingleFixedCompactMapHolder(TMemoryUsageInfo* memInfo, TMapType&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool, TType* payloadType, TComputationContext* ctx) : TComputationValue<THashedSingleFixedCompactMapHolder>(memInfo) , Pool(std::move(pool)) , Map(std::move(map)) + , NullPayload(std::move(nullPayload)) , PayloadPacker(false, payloadType) , Ctx(ctx) { } private: - bool Contains(const NUdf::TUnboxedValuePod& key) const override { + bool Contains(const NUdf::TUnboxedValuePod& key) const final { + if constexpr (OptionalKey) { + if (!key) { + return NullPayload.has_value(); + } + } return Map.Has(key.Get<T>()); } - NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { + NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final { + if constexpr (OptionalKey) { + if (!key) { + return NullPayload.has_value() + ? PayloadPacker.Unpack(GetSmallValue(*NullPayload), Ctx->HolderFactory).Release().MakeOptional() + : NUdf::TUnboxedValuePod(); + } + } auto it = Map.Find(key.Get<T>()); if (!it.Ok()) return NUdf::TUnboxedValuePod(); return PayloadPacker.Unpack(GetSmallValue(it.Get().second), Ctx->HolderFactory).Release().MakeOptional(); } - NUdf::TUnboxedValue GetKeysIterator() const override { + NUdf::TUnboxedValue GetKeysIterator() const final { return NUdf::TUnboxedValuePod(new TIterator<true>(this)); } - NUdf::TUnboxedValue GetDictIterator() const override { + NUdf::TUnboxedValue GetDictIterator() const final { return NUdf::TUnboxedValuePod(new TIterator<true>(this)); } - NUdf::TUnboxedValue GetPayloadsIterator() const override { + NUdf::TUnboxedValue GetPayloadsIterator() const final { return NUdf::TUnboxedValuePod(new TIterator<false>(this)); } - ui64 GetDictLength() const override { - return Map.Size(); + ui64 GetDictLength() const final { + return Map.Size() + ui64(OptionalKey && NullPayload.has_value()); } - bool HasDictItems() const override { - return !Map.Empty(); + bool HasDictItems() const final { + return !Map.Empty() || (OptionalKey && NullPayload.has_value()); } - bool IsSortedDict() const override { + bool IsSortedDict() const final { return false; } private: TPagedArena Pool; const TMapType Map; + const std::optional<ui64> NullPayload; mutable TValuePacker PayloadPacker; TComputationContext* Ctx; }; -template <typename T> -class THashedSingleFixedCompactMultiMapHolder : public TComputationValue<THashedSingleFixedCompactMultiMapHolder<T>> { +template <typename T, bool OptionalKey> +class THashedSingleFixedCompactMultiMapHolder : public TComputationValue<THashedSingleFixedCompactMultiMapHolder<T, OptionalKey>> { public: using TMapType = TValuesDictHashSingleFixedCompactMultiMap<T>; using TMapIterator = typename TMapType::TIterator; @@ -2166,7 +2264,7 @@ public: } private: - bool Next(NUdf::TUnboxedValue& value) override { + bool Next(NUdf::TUnboxedValue& value) final { if (!Iterator.Ok()) { return false; } @@ -2176,7 +2274,7 @@ public: return true; } - bool Skip() override { + bool Skip() final { if (!Iterator.Ok()) { return false; } @@ -2197,11 +2295,11 @@ public: Y_ASSERT(From.Ok()); } - bool HasFastListLength() const override { + bool HasFastListLength() const final { return true; } - ui64 GetListLength() const override { + ui64 GetListLength() const final { if (!Length) { Length = Parent->Map.Count(From.GetKey()); } @@ -2209,11 +2307,11 @@ public: return *Length; } - bool HasListItems() const override { + bool HasListItems() const final { return true; } - NUdf::TUnboxedValue GetListIterator() const override { + NUdf::TUnboxedValue GetListIterator() const final { return NUdf::TUnboxedValuePod(new TIterator(Parent.Get(), From)); } @@ -2221,6 +2319,70 @@ public: TMapIterator From; }; + class TNullPayloadList: public TCustomListValue { + public: + class TIterator : public TComputationValue<TIterator> { + public: + TIterator(const THashedSingleFixedCompactMultiMapHolder* parent) + : TComputationValue<TIterator>(parent->GetMemInfo()) + , Parent(const_cast<THashedSingleFixedCompactMultiMapHolder*>(parent)) + , Iterator(Parent->NullPayloads.cbegin()) + { + } + + private: + bool Next(NUdf::TUnboxedValue& value) final { + if (Iterator == Parent->NullPayloads.cend()) { + return false; + } + + value = Parent->PayloadPacker.Unpack(GetSmallValue(*Iterator), Parent->Ctx->HolderFactory); + ++Iterator; + return true; + } + + bool Skip() final { + if (Iterator == Parent->NullPayloads.cend()) { + return false; + } + + ++Iterator; + return true; + } + + const NUdf::TRefCountedPtr<THashedSingleFixedCompactMultiMapHolder> Parent; + typename std::vector<ui64>::const_iterator Iterator; + }; + + TNullPayloadList(TMemoryUsageInfo* memInfo, const THashedSingleFixedCompactMultiMapHolder* parent) + : TCustomListValue(memInfo) + , Parent(const_cast<THashedSingleFixedCompactMultiMapHolder*>(parent)) + { + } + + bool HasFastListLength() const final { + return true; + } + + ui64 GetListLength() const final { + if (!Length) { + Length = Parent->NullPayloads.size(); + } + + return *Length; + } + + bool HasListItems() const final { + return true; + } + + NUdf::TUnboxedValue GetListIterator() const final { + return NUdf::TUnboxedValuePod(new TIterator(Parent.Get())); + } + + const NUdf::TRefCountedPtr<THashedSingleFixedCompactMultiMapHolder> Parent; + }; + template <bool NoSwap> class TIterator : public TComputationValue<TIterator<NoSwap>> { public: @@ -2228,11 +2390,19 @@ public: : TComputationValue<TIterator<NoSwap>>(parent->GetMemInfo()) , Parent(const_cast<THashedSingleFixedCompactMultiMapHolder*>(parent)) , Iterator(parent->Map.Iterate()) + , AtNull(OptionalKey && !parent->NullPayloads.empty()) { } private: bool Next(NUdf::TUnboxedValue& key) override { + if (AtNull) { + AtNull = false; + key = NoSwap + ? NUdf::TUnboxedValuePod() + : Parent->Ctx->HolderFactory.template Create<TNullPayloadList>(Parent.Get()); + return true; + } if (!Iterator.Ok()) { return false; } @@ -2245,6 +2415,17 @@ public: } bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override { + if (AtNull) { + AtNull = false; + if (NoSwap) { + key = NUdf::TUnboxedValuePod(); + payload = Parent->Ctx->HolderFactory.template Create<TNullPayloadList>(Parent.Get()); + } else { + payload = NUdf::TUnboxedValuePod(); + key = Parent->Ctx->HolderFactory.template Create<TNullPayloadList>(Parent.Get()); + } + return true; + } if (!Iterator.Ok()) { return false; } @@ -2261,6 +2442,10 @@ public: } bool Skip() override { + if (AtNull) { + AtNull = false; + return true; + } if (!Iterator.Ok()) { return false; } @@ -2271,13 +2456,15 @@ public: const NUdf::TRefCountedPtr<THashedSingleFixedCompactMultiMapHolder> Parent; TMapIterator Iterator; + bool AtNull; }; - THashedSingleFixedCompactMultiMapHolder(TMemoryUsageInfo* memInfo, TMapType&& map, TPagedArena&& pool, + THashedSingleFixedCompactMultiMapHolder(TMemoryUsageInfo* memInfo, TMapType&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool, TType* payloadType, TComputationContext* ctx) : TComputationValue<THashedSingleFixedCompactMultiMapHolder>(memInfo) , Pool(std::move(pool)) , Map(std::move(map)) + , NullPayloads(std::move(nullPayloads)) , PayloadPacker(false, payloadType) , Ctx(ctx) { @@ -2285,10 +2472,22 @@ public: private: bool Contains(const NUdf::TUnboxedValuePod& key) const override { + if constexpr (OptionalKey) { + if (!key) { + return !NullPayloads.empty(); + } + } return Map.Has(key.Get<T>()); } NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { + if constexpr (OptionalKey) { + if (!key) { + return NullPayloads.empty() + ? NUdf::TUnboxedValuePod() + : Ctx->HolderFactory.Create<TNullPayloadList>(this); + } + } const auto it = Map.Find(key.Get<T>()); if (!it.Ok()) return NUdf::TUnboxedValuePod(); @@ -2308,11 +2507,11 @@ private: } ui64 GetDictLength() const override { - return Map.UniqSize(); + return Map.UniqSize() + ui64(OptionalKey && !NullPayloads.empty()); } bool HasDictItems() const override { - return !Map.Empty(); + return !Map.Empty() || (OptionalKey && !NullPayloads.empty()); } bool IsSortedDict() const override { @@ -2322,6 +2521,7 @@ private: private: TPagedArena Pool; const TMapType Map; + const std::vector<ui64> NullPayloads; mutable TValuePacker PayloadPacker; TComputationContext* Ctx; }; @@ -3316,36 +3516,50 @@ NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSetHolder( filler, types, isTuple, eagerFill, encodedType, hash, equate, *this)); } -template <typename T> +template <typename T, bool OptionalKey> NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedSetHolder( - TValuesDictHashSingleFixedSet<T>&& set) const { - return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedSetHolder<T>>(CurrentAllocState, &MemInfo, std::move(set))); + TValuesDictHashSingleFixedSet<T>&& set, bool hasNull) const { + return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedSetHolder<T, OptionalKey>>(CurrentAllocState, &MemInfo, std::move(set), hasNull)); } -#define DEFINE_HASHED_SINGLE_FIXED_SET(xType) \ - template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedSetHolder<xType> \ - (TValuesDictHashSingleFixedSet<xType>&& set) const; +#define DEFINE_HASHED_SINGLE_FIXED_SET_OPT(xType) \ + template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedSetHolder<xType, true> \ + (TValuesDictHashSingleFixedSet<xType>&& set, bool hasNull) const; + +KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_SET_OPT) +#undef DEFINE_HASHED_SINGLE_FIXED_SET_OPT -KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_SET) -#undef DEFINE_HASHED_SINGLE_FIXED_SET +#define DEFINE_HASHED_SINGLE_FIXED_SET_NONOPT(xType) \ + template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedSetHolder<xType, false> \ + (TValuesDictHashSingleFixedSet<xType>&& set, bool hasNull) const; -template <typename T> +KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_SET_NONOPT) +#undef DEFINE_HASHED_SINGLE_FIXED_SET_NONOPT + +template <typename T, bool OptionalKey> NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactSetHolder( - TValuesDictHashSingleFixedCompactSet<T>&& set) const { - return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactSetHolder<T>>(CurrentAllocState, &MemInfo, std::move(set))); + TValuesDictHashSingleFixedCompactSet<T>&& set, bool hasNull) const { + return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactSetHolder<T, OptionalKey>>(CurrentAllocState, &MemInfo, std::move(set), hasNull)); } -#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET(xType) \ - template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactSetHolder<xType> \ - (TValuesDictHashSingleFixedCompactSet<xType>&& set) const; +#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_OPT(xType) \ + template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactSetHolder<xType, true> \ + (TValuesDictHashSingleFixedCompactSet<xType>&& set, bool hasNull) const; + +KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_OPT) +#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_OPT -KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET) -#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET +#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_NONOPT(xType) \ + template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactSetHolder<xType, false> \ + (TValuesDictHashSingleFixedCompactSet<xType>&& set, bool hasNull) const; -template <typename T> +KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_NONOPT) +#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_NONOPT + +template <typename T, bool OptionalKey> NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedMapHolder( - TValuesDictHashSingleFixedMap<T>&& map) const { - return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedMapHolder<T>>(CurrentAllocState, &MemInfo, std::move(map))); + TValuesDictHashSingleFixedMap<T>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload) const { + return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedMapHolder<T, OptionalKey>>(CurrentAllocState, &MemInfo, std::move(map), std::move(nullPayload))); } NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedCompactSetHolder( @@ -3365,18 +3579,20 @@ NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedCompactMultiMapHolder( return NUdf::TUnboxedValuePod(AllocateOn<THashedCompactMultiMapHolder>(CurrentAllocState, &MemInfo, std::move(map), std::move(pool), keyType, payloadType, ctx)); } -template <typename T> +template <typename T, bool OptionalKey> NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMapHolder( - TValuesDictHashSingleFixedCompactMap<T>&& map, TPagedArena&& pool, TType* payloadType, + TValuesDictHashSingleFixedCompactMap<T>&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool, TType* payloadType, TComputationContext* ctx) const { - return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactMapHolder<T>>(CurrentAllocState, &MemInfo, std::move(map), std::move(pool), payloadType, ctx)); + return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactMapHolder<T, OptionalKey>>(CurrentAllocState, &MemInfo, + std::move(map), std::move(nullPayload), std::move(pool), payloadType, ctx)); } -template <typename T> +template <typename T, bool OptionalKey> NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMultiMapHolder( - TValuesDictHashSingleFixedCompactMultiMap<T>&& map, TPagedArena&& pool, TType* payloadType, + TValuesDictHashSingleFixedCompactMultiMap<T>&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool, TType* payloadType, TComputationContext* ctx) const { - return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactMultiMapHolder<T>>(CurrentAllocState, &MemInfo, std::move(map), std::move(pool), payloadType, ctx)); + return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactMultiMapHolder<T, OptionalKey>>(CurrentAllocState, &MemInfo, + std::move(map), std::move(nullPayloads), std::move(pool), payloadType, ctx)); } NUdf::IDictValueBuilder::TPtr THolderFactory::NewDict( @@ -3395,28 +3611,51 @@ NUdf::IDictValueBuilder::TPtr THolderFactory::NewDict( useIHash ? MakeCompareImpl(keyType) : nullptr); } -#define DEFINE_HASHED_SINGLE_FIXED_MAP(xType) \ - template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedMapHolder<xType> \ - (TValuesDictHashSingleFixedMap<xType>&& map) const; +#define DEFINE_HASHED_SINGLE_FIXED_MAP_OPT(xType) \ + template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedMapHolder<xType, true> \ + (TValuesDictHashSingleFixedMap<xType>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload) const; + +KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_MAP_OPT) +#undef DEFINE_HASHED_SINGLE_FIXED_MAP_OPT + +#define DEFINE_HASHED_SINGLE_FIXED_MAP_NONOPT(xType) \ + template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedMapHolder<xType, false> \ + (TValuesDictHashSingleFixedMap<xType>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload) const; + +KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_MAP_NONOPT) +#undef DEFINE_HASHED_SINGLE_FIXED_MAP_NONOPT + +#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_OPT(xType) \ + template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMapHolder<xType, true> \ + (TValuesDictHashSingleFixedCompactMap<xType>&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool, TType* payloadType, \ + TComputationContext* ctx) const; + +KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_OPT) +#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_OPT + +#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_NONOPT(xType) \ + template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMapHolder<xType, false> \ + (TValuesDictHashSingleFixedCompactMap<xType>&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool, TType* payloadType, \ + TComputationContext* ctx) const; -KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_MAP) -#undef DEFINE_HASHED_SINGLE_FIXED_MAP +KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_NONOPT) +#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_NONOPT -#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP(xType) \ - template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMapHolder<xType> \ - (TValuesDictHashSingleFixedCompactMap<xType>&& map, TPagedArena&& pool, TType* payloadType, \ +#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_OPT(xType) \ + template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMultiMapHolder<xType, true> \ + (TValuesDictHashSingleFixedCompactMultiMap<xType>&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool, TType* payloadType, \ TComputationContext* ctx) const; -KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP) -#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP +KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_OPT) +#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_OPT -#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP(xType) \ - template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMultiMapHolder<xType> \ - (TValuesDictHashSingleFixedCompactMultiMap<xType>&& map, TPagedArena&& pool, TType* payloadType, \ +#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_NONOPT(xType) \ + template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMultiMapHolder<xType, false> \ + (TValuesDictHashSingleFixedCompactMultiMap<xType>&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool, TType* payloadType, \ TComputationContext* ctx) const; -KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP) -#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP +KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_NONOPT) +#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_NONOPT ////////////////////////////////////////////////////////////////////////////// // TNodeFactory diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h index 85681918c70..59813f771e2 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h @@ -18,6 +18,8 @@ #include <functional> #include <unordered_map> #include <unordered_set> +#include <optional> +#include <vector> #ifndef MKQL_DISABLE_CODEGEN namespace llvm { @@ -460,14 +462,14 @@ public: NUdf::IHash::TPtr hash, NUdf::IEquate::TPtr equate) const; - template <typename T> - NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedSetHolder(TValuesDictHashSingleFixedSet<T>&& set) const; + template <typename T, bool OptionalKey> + NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedSetHolder(TValuesDictHashSingleFixedSet<T>&& set, bool hasNull) const; - template <typename T> - NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactSetHolder(TValuesDictHashSingleFixedCompactSet<T>&& set) const; + template <typename T, bool OptionalKey> + NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactSetHolder(TValuesDictHashSingleFixedCompactSet<T>&& set, bool hasNull) const; - template <typename T> - NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedMapHolder(TValuesDictHashSingleFixedMap<T>&& map) const; + template <typename T, bool OptionalKey> + NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedMapHolder(TValuesDictHashSingleFixedMap<T>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload) const; NUdf::TUnboxedValuePod CreateDirectHashedCompactSetHolder( TValuesDictHashCompactSet&& set, TPagedArena&& pool, TType* keyType, @@ -481,14 +483,14 @@ public: TValuesDictHashCompactMultiMap&& map, TPagedArena&& pool, TType* keyType, TType* payloadType, TComputationContext* ctx) const; - template <typename T> + template <typename T, bool OptionalKey> NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactMapHolder( - TValuesDictHashSingleFixedCompactMap<T>&& map, TPagedArena&& pool, TType* payloadType, + TValuesDictHashSingleFixedCompactMap<T>&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool, TType* payloadType, TComputationContext* ctx) const; - template <typename T> + template <typename T, bool OptionalKey> NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactMultiMapHolder( - TValuesDictHashSingleFixedCompactMultiMap<T>&& map, TPagedArena&& pool, TType* payloadType, + TValuesDictHashSingleFixedCompactMultiMap<T>&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool, TType* payloadType, TComputationContext* ctx) const; NUdf::IDictValueBuilder::TPtr NewDict( diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_pack_ut.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_pack_ut.cpp index 55e41c32327..07a99c994cd 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_pack_ut.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_pack_ut.cpp @@ -261,7 +261,7 @@ protected: map[4] = NUdf::TUnboxedValuePod::Embedded("4"); map[10] = NUdf::TUnboxedValuePod::Embedded("10"); map[1] = NUdf::TUnboxedValuePod::Embedded("1"); - const NUdf::TUnboxedValue value = HolderFactory.CreateDirectHashedSingleFixedMapHolder<ui32>(std::move(map)); + const NUdf::TUnboxedValue value = HolderFactory.CreateDirectHashedSingleFixedMapHolder<ui32, false>(std::move(map), std::nullopt); const auto uValue = TestPackUnpack(dictType, value, "Type:Dict"); |