aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorudovichenko-r <rvu@ydb.tech>2022-11-18 15:17:58 +0300
committerudovichenko-r <rvu@ydb.tech>2022-11-18 15:17:58 +0300
commitded42ea745c481774d4ffa30a44d31057b6ce03b (patch)
tree6997984d1e02c154a553d98e5eff21397e50ecf4
parent7506642977946e2bd9538cf66daa3bf0d07f00b0 (diff)
downloadydb-ded42ea745c481774d4ffa30a44d31057b6ce03b.tar.gz
[mkql] Optimize HashedDict for optional data keys
-rw-r--r--ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp165
-rw-r--r--ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp339
-rw-r--r--ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp563
-rw-r--r--ydb/library/yql/minikql/computation/mkql_computation_node_holders.h22
-rw-r--r--ydb/library/yql/minikql/computation/mkql_computation_node_pack_ut.cpp2
5 files changed, 873 insertions, 218 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp
index f743fb39cd1..70242223d02 100644
--- a/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp
+++ b/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp
@@ -13,6 +13,8 @@
#include <algorithm>
#include <unordered_map>
+#include <optional>
+#include <vector>
namespace NKikimr {
namespace NMiniKQL {
@@ -137,7 +139,7 @@ public:
}
};
-template<typename T>
+template<typename T, bool OptionalKey>
class THashedSingleFixedMultiMapAccumulator {
using TMapType = std::unordered_map<
T,
@@ -149,6 +151,7 @@ class THashedSingleFixedMultiMapAccumulator {
TComputationContext& Ctx;
const TKeyTypes& KeyTypes;
TMapType Map;
+ TUnboxedValueVector NullPayloads;
public:
THashedSingleFixedMultiMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded,
@@ -167,6 +170,12 @@ public:
void Add(NUdf::TUnboxedValue&& key, NUdf::TUnboxedValue&& payload)
{
+ if constexpr (OptionalKey) {
+ if (!key) {
+ NullPayloads.emplace_back(std::move(payload));
+ return;
+ }
+ }
const auto ins = Map.emplace(key.Get<T>(), 1U);
if (ins.second)
ins.first->second.front() = std::move(payload);
@@ -179,11 +188,10 @@ public:
const auto filler = [this](TValuesDictHashMap& targetMap) {
targetMap.reserve(Map.size());
+ auto itemFactory = [](const NUdf::TUnboxedValuePod& value) {
+ return value;
+ };
for (auto& pair : Map) {
- auto itemFactory = [](const NUdf::TUnboxedValuePod& value) {
- return value;
- };
-
ui64 start = 0;
ui64 finish = pair.second.size();
auto payloadList = CreateOwningVectorListAdapter(std::move(pair.second), itemFactory,
@@ -192,18 +200,28 @@ public:
targetMap.emplace(NUdf::TUnboxedValuePod(pair.first), std::move(payloadList));
}
+ if constexpr (OptionalKey) {
+ if (!NullPayloads.empty()) {
+ auto payloadList = CreateOwningVectorListAdapter(std::move(NullPayloads), itemFactory,
+ /*start*/ 0, /*finish*/ NullPayloads.size(), /*reversed*/ false,
+ Ctx.HolderFactory.GetMemInfo());
+
+ targetMap.emplace(NUdf::TUnboxedValuePod(), std::move(payloadList));
+ }
+ }
};
return Ctx.HolderFactory.CreateDirectHashedDictHolder(filler, KeyTypes, false, true, nullptr, nullptr, nullptr);
}
};
-template<typename T>
+template<typename T, bool OptionalKey>
class THashedSingleFixedMapAccumulator {
using TMapType = TValuesDictHashSingleFixedMap<T>;
TComputationContext& Ctx;
TMapType Map;
+ std::optional<NUdf::TUnboxedValue> NullPayload;
public:
THashedSingleFixedMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded,
@@ -223,12 +241,18 @@ public:
void Add(NUdf::TUnboxedValue&& key, NUdf::TUnboxedValue&& payload)
{
+ if constexpr (OptionalKey) {
+ if (!key) {
+ NullPayload.emplace(std::move(payload));
+ return;
+ }
+ }
Map.emplace(key.Get<T>(), std::move(payload));
}
NUdf::TUnboxedValue Build()
{
- return Ctx.HolderFactory.CreateDirectHashedSingleFixedMapHolder<T>(std::move(Map));
+ return Ctx.HolderFactory.CreateDirectHashedSingleFixedMapHolder<T, OptionalKey>(std::move(Map), std::move(NullPayload));
}
};
@@ -277,12 +301,13 @@ public:
}
};
-template <typename T>
+template <typename T, bool OptionalKey>
class THashedSingleFixedSetAccumulator {
using TSetType = TValuesDictHashSingleFixedSet<T>;
TComputationContext& Ctx;
TSetType Set;
+ bool HasNull = false;
public:
THashedSingleFixedSetAccumulator(TType* keyType, const TKeyTypes& keyTypes, bool isTuple, bool encoded,
@@ -301,22 +326,29 @@ public:
void Add(NUdf::TUnboxedValue&& key)
{
+ if constexpr (OptionalKey) {
+ if (!key) {
+ HasNull = true;
+ return;
+ }
+ }
Set.emplace(key.Get<T>());
}
NUdf::TUnboxedValue Build()
{
- return Ctx.HolderFactory.CreateDirectHashedSingleFixedSetHolder<T>(std::move(Set));
+ return Ctx.HolderFactory.CreateDirectHashedSingleFixedSetHolder<T, OptionalKey>(std::move(Set), HasNull);
}
};
-template <typename T>
+template <typename T, bool OptionalKey>
class THashedSingleFixedCompactSetAccumulator {
using TSetType = TValuesDictHashSingleFixedCompactSet<T>;
TComputationContext& Ctx;
TPagedArena Pool;
TSetType Set;
+ bool HasNull = false;
public:
THashedSingleFixedCompactSetAccumulator(TType* keyType, const TKeyTypes& keyTypes, bool isTuple, bool encoded,
@@ -335,12 +367,18 @@ public:
void Add(NUdf::TUnboxedValue&& key)
{
+ if constexpr (OptionalKey) {
+ if (!key) {
+ HasNull = true;
+ return;
+ }
+ }
Set.Insert(key.Get<T>());
}
NUdf::TUnboxedValue Build()
{
- return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactSetHolder<T>(std::move(Set));
+ return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactSetHolder<T, OptionalKey>(std::move(Set), HasNull);
}
};
@@ -454,17 +492,18 @@ public:
}
};
-template <typename T, bool Multi>
+template <typename T, bool OptionalKey, bool Multi>
class THashedSingleFixedCompactMapAccumulator;
-template <typename T>
-class THashedSingleFixedCompactMapAccumulator<T, false> {
+template <typename T, bool OptionalKey>
+class THashedSingleFixedCompactMapAccumulator<T, OptionalKey, false> {
using TMapType = TValuesDictHashSingleFixedCompactMap<T>;
TComputationContext& Ctx;
TPagedArena Pool;
TMapType Map;
- TType *PayloadType;
+ std::optional<ui64> NullPayload;
+ TType* PayloadType;
TValuePacker PayloadPacker;
public:
@@ -485,23 +524,30 @@ public:
void Add(NUdf::TUnboxedValue&& key, NUdf::TUnboxedValue&& payload)
{
+ if constexpr (OptionalKey) {
+ if (!key) {
+ NullPayload = AddSmallValue(Pool, PayloadPacker.Pack(payload));
+ return;
+ }
+ }
Map.InsertNew(key.Get<T>(), AddSmallValue(Pool, PayloadPacker.Pack(payload)));
}
NUdf::TUnboxedValue Build()
{
- return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactMapHolder(std::move(Map), std::move(Pool), PayloadType, &Ctx);
+ return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactMapHolder<T, OptionalKey>(std::move(Map), std::move(NullPayload), std::move(Pool), PayloadType, &Ctx);
}
};
-template <typename T>
-class THashedSingleFixedCompactMapAccumulator<T, true> {
+template <typename T, bool OptionalKey>
+class THashedSingleFixedCompactMapAccumulator<T, OptionalKey, true> {
using TMapType = TValuesDictHashSingleFixedCompactMultiMap<T>;
TComputationContext& Ctx;
TPagedArena Pool;
TMapType Map;
- TType *PayloadType;
+ std::vector<ui64> NullPayloads;
+ TType* PayloadType;
TValuePacker PayloadPacker;
public:
@@ -522,12 +568,18 @@ public:
void Add(NUdf::TUnboxedValue&& key, NUdf::TUnboxedValue&& payload)
{
+ if constexpr (OptionalKey) {
+ if (!key) {
+ NullPayloads.push_back(AddSmallValue(Pool, PayloadPacker.Pack(payload)));
+ return;
+ }
+ }
Map.Insert(key.Get<T>(), AddSmallValue(Pool, PayloadPacker.Pack(payload)));
}
NUdf::TUnboxedValue Build()
{
- return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactMultiMapHolder(std::move(Map), std::move(Pool), PayloadType, &Ctx);
+ return Ctx.HolderFactory.CreateDirectHashedSingleFixedCompactMultiMapHolder<T, OptionalKey>(std::move(Map), std::move(NullPayloads), std::move(Pool), PayloadType, &Ctx);
}
};
@@ -1881,15 +1933,23 @@ IComputationNode* WrapToHashedDictInternal(TCallable& callable, const TComputati
const bool isCompact = AS_VALUE(TDataLiteral, callable.GetInput(callable.GetInputsCount() - 2U))->AsValue().Get<bool>();
const auto payloadSelectorNode = callable.GetInput(callable.GetInputsCount() - 4U);
+ const bool isOptional = keyType->IsOptional();
+ const auto unwrappedKeyType = isOptional ? AS_TYPE(TOptionalType, keyType)->GetItemType() : keyType;
+
if (!multi && payloadType->IsVoid()) {
if (isCompact) {
- if (keyType->IsData()) {
+ if (unwrappedKeyType->IsData()) {
#define USE_HASHED_SINGLE_FIXED_COMPACT_SET(xType, xLayoutType) \
case NUdf::TDataType<xType>::Id: \
- return WrapToSet< \
- THashedSingleFixedCompactSetAccumulator<xLayoutType>>(callable, ctx.NodeLocator, ctx.Mutables);
+ if (isOptional) { \
+ return WrapToSet< \
+ THashedSingleFixedCompactSetAccumulator<xLayoutType, true>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ } else { \
+ return WrapToSet< \
+ THashedSingleFixedCompactSetAccumulator<xLayoutType, false>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ }
- switch (AS_TYPE(TDataType, keyType)->GetSchemeType()) {
+ switch (AS_TYPE(TDataType, unwrappedKeyType)->GetSchemeType()) {
KNOWN_FIXED_VALUE_TYPES(USE_HASHED_SINGLE_FIXED_COMPACT_SET)
}
#undef USE_HASHED_SINGLE_FIXED_COMPACT_SET
@@ -1898,13 +1958,18 @@ IComputationNode* WrapToHashedDictInternal(TCallable& callable, const TComputati
return WrapToSet<THashedCompactSetAccumulator>(callable, ctx.NodeLocator, ctx.Mutables);
}
- if (keyType->IsData()) {
+ if (unwrappedKeyType->IsData()) {
#define USE_HASHED_SINGLE_FIXED_SET(xType, xLayoutType) \
case NUdf::TDataType<xType>::Id: \
- return WrapToSet< \
- THashedSingleFixedSetAccumulator<xLayoutType>>(callable, ctx.NodeLocator, ctx.Mutables);
+ if (isOptional) { \
+ return WrapToSet< \
+ THashedSingleFixedSetAccumulator<xLayoutType, true>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ } else { \
+ return WrapToSet< \
+ THashedSingleFixedSetAccumulator<xLayoutType, false>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ }
- switch (AS_TYPE(TDataType, keyType)->GetSchemeType()) {
+ switch (AS_TYPE(TDataType, unwrappedKeyType)->GetSchemeType()) {
KNOWN_FIXED_VALUE_TYPES(USE_HASHED_SINGLE_FIXED_SET)
}
#undef USE_HASHED_SINGLE_FIXED_SET
@@ -1913,18 +1978,28 @@ IComputationNode* WrapToHashedDictInternal(TCallable& callable, const TComputati
}
if (isCompact) {
- if (keyType->IsData()) {
+ if (unwrappedKeyType->IsData()) {
#define USE_HASHED_SINGLE_FIXED_COMPACT_MAP(xType, xLayoutType) \
case NUdf::TDataType<xType>::Id: \
if (multi) { \
- return WrapToMap< \
- THashedSingleFixedCompactMapAccumulator<xLayoutType, true>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ if (isOptional) { \
+ return WrapToMap< \
+ THashedSingleFixedCompactMapAccumulator<xLayoutType, true, true>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ } else { \
+ return WrapToMap< \
+ THashedSingleFixedCompactMapAccumulator<xLayoutType, false, true>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ } \
} else { \
- return WrapToMap< \
- THashedSingleFixedCompactMapAccumulator<xLayoutType, false>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ if (isOptional) { \
+ return WrapToMap< \
+ THashedSingleFixedCompactMapAccumulator<xLayoutType, true, false>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ } else { \
+ return WrapToMap< \
+ THashedSingleFixedCompactMapAccumulator<xLayoutType, false, false>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ } \
}
- switch (AS_TYPE(TDataType, keyType)->GetSchemeType()) {
+ switch (AS_TYPE(TDataType, unwrappedKeyType)->GetSchemeType()) {
KNOWN_FIXED_VALUE_TYPES(USE_HASHED_SINGLE_FIXED_COMPACT_MAP)
}
#undef USE_HASHED_SINGLE_FIXED_COMPACT_MAP
@@ -1937,18 +2012,28 @@ IComputationNode* WrapToHashedDictInternal(TCallable& callable, const TComputati
}
}
- if (keyType->IsData()) {
+ if (unwrappedKeyType->IsData()) {
#define USE_HASHED_SINGLE_FIXED_MAP(xType, xLayoutType) \
case NUdf::TDataType<xType>::Id: \
if (multi) { \
- return WrapToMap< \
- THashedSingleFixedMultiMapAccumulator<xLayoutType>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ if (isOptional) { \
+ return WrapToMap< \
+ THashedSingleFixedMultiMapAccumulator<xLayoutType, true>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ } else { \
+ return WrapToMap< \
+ THashedSingleFixedMultiMapAccumulator<xLayoutType, false>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ } \
} else { \
- return WrapToMap< \
- THashedSingleFixedMapAccumulator<xLayoutType>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ if (isOptional) { \
+ return WrapToMap< \
+ THashedSingleFixedMapAccumulator<xLayoutType, true>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ } else { \
+ return WrapToMap< \
+ THashedSingleFixedMapAccumulator<xLayoutType, false>>(callable, ctx.NodeLocator, ctx.Mutables); \
+ } \
}
- switch (AS_TYPE(TDataType, keyType)->GetSchemeType()) {
+ switch (AS_TYPE(TDataType, unwrappedKeyType)->GetSchemeType()) {
KNOWN_FIXED_VALUE_TYPES(USE_HASHED_SINGLE_FIXED_MAP)
}
#undef USE_HASHED_SINGLE_FIXED_MAP
diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp
index 5a5cd7e2de5..75bf0fe352a 100644
--- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp
+++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp
@@ -5,10 +5,10 @@
#include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
#include <util/random/shuffle.h>
+#include <map>
+#include <optional>
-
-namespace NKikimr {
-namespace NMiniKQL {
+namespace NKikimr::NMiniKQL {
static const TStringBuf data[] = {
"13d49d4db08e57d645fe4d44bbed4738f386af6e9e742cf186961063feb9919b",
@@ -214,7 +214,336 @@ Y_UNIT_TEST_SUITE(TMiniKQLToDictTest) {
}
}
#endif
-}
+ template <bool LLVM>
+ static void TestDictWithDataKeyImpl(bool optionalKey, bool multi, bool compact, bool withNull, bool withData) {
+ TSetup<LLVM> setup;
+ TProgramBuilder& pb = *setup.PgmBuilder;
+ TType* keyType = pb.NewDataType(NUdf::EDataSlot::Int32, optionalKey);
+ TType* valueType = pb.NewDataType(NUdf::EDataSlot::Int32, false);
+ TType* tupleType = pb.NewTupleType({keyType, valueType});
+ TVector<TRuntimeNode> items;
+ TVector<TRuntimeNode> keys;
+ if (withNull) {
+ UNIT_ASSERT(optionalKey);
+ keys.push_back(pb.NewEmptyOptional(keyType));
+ for (size_t k = 0; k < 1 + multi; ++k) {
+ items.push_back(pb.NewTuple(tupleType, {keys.back(), pb.NewDataLiteral((i32)items.size())}));
+ }
+ }
+ if (withData) {
+ for (i32 i = 0; i < 2; ++i) {
+ auto key = pb.NewDataLiteral(i);
+ if (optionalKey) {
+ key = pb.NewOptional(key);
+ }
+ keys.push_back(key);
+ for (size_t k = 0; k < 1 + multi; ++k) {
+ items.push_back(pb.NewTuple(tupleType, {key, pb.NewDataLiteral((i32)items.size())}));
+ }
+ }
+ }
+ auto list = pb.NewList(tupleType, items);
+ auto keyList = pb.NewList(keyType, keys);
+ auto dict = pb.ToHashedDict(list, multi, [&](TRuntimeNode tuple) { return pb.Nth(tuple, 0); }, [&pb](TRuntimeNode tuple) { return pb.Nth(tuple, 1); }, compact);
+
+ auto compareLists = [&](bool itemIsTuple, TRuntimeNode list1, TRuntimeNode list2) {
+ return pb.And({
+ pb.Equals(
+ pb.Length(list1),
+ pb.Length(list2)
+ ),
+ pb.Not(
+ pb.Exists(
+ pb.Head(
+ pb.SkipWhile(
+ pb.Zip({list1, list2}),
+ [&](TRuntimeNode pair) {
+ if (itemIsTuple) {
+ return pb.And({
+ pb.AggrEquals(pb.Nth(pb.Nth(pair, 0), 0), pb.Nth(pb.Nth(pair, 1), 0)),
+ pb.AggrEquals(pb.Nth(pb.Nth(pair, 0), 1), pb.Nth(pb.Nth(pair, 1), 1)),
+ });
+ } else {
+ return pb.AggrEquals(pb.Nth(pair, 0), pb.Nth(pair, 1));
+ }
+ }
+ )
+ )
+ )
+ )
+ });
+ };
+
+ TVector<TRuntimeNode> results;
+
+ // Check Dict has items
+ results.push_back(pb.AggrEquals(
+ pb.HasItems(dict),
+ pb.NewDataLiteral(withNull || withData)
+ ));
+
+ // Check Dict length
+ results.push_back(pb.AggrEquals(
+ pb.Length(dict),
+ pb.NewDataLiteral((ui64)keys.size())
+ ));
+
+ // Check Dict Contains
+ results.push_back(pb.AllOf(
+ pb.Map(list, [&](TRuntimeNode tuple) {
+ return pb.Contains(dict, pb.Nth(tuple, 0));
+ }),
+ [&](TRuntimeNode item) { return item; }
+ ));
+
+ // Check Dict Lookup
+ results.push_back(compareLists(false,
+ pb.Sort(
+ pb.FlatMap(
+ pb.Map(
+ keyList,
+ [&](TRuntimeNode key) {
+ return pb.Unwrap(pb.Lookup(dict, key), pb.NewDataLiteral<NUdf::EDataSlot::String>("Lookup failed"), "", 0, 0);
+ }
+ ),
+ [&](TRuntimeNode item) {
+ return multi ? item : pb.NewOptional(item);
+ }
+ ),
+ pb.NewDataLiteral(true),
+ [&](TRuntimeNode item) { return item; }
+ ),
+ pb.Sort(
+ pb.Map(list, [&](TRuntimeNode tuple) {
+ return pb.Nth(tuple, 1);
+ }),
+ pb.NewDataLiteral(true),
+ [&](TRuntimeNode item) { return item; }
+ )
+ ));
+
+ // Check Dict items iterator
+ results.push_back(compareLists(true,
+ pb.Sort(
+ pb.FlatMap(
+ pb.DictItems(dict),
+ [&](TRuntimeNode pair) {
+ if (multi) {
+ return pb.Map(
+ pb.Nth(pair, 1),
+ [&](TRuntimeNode p) {
+ return pb.NewTuple({pb.Nth(pair, 0), p});
+ }
+ );
+ } else {
+ return pb.NewOptional(pair);
+ }
+ }
+ ),
+ pb.NewTuple({pb.NewDataLiteral(true), pb.NewDataLiteral(true)}),
+ [&](TRuntimeNode item) { return item; }
+ ),
+ list
+ ));
+
+ // Check Dict payloads iterator
+ results.push_back(compareLists(false,
+ pb.Sort(
+ pb.FlatMap(
+ pb.DictPayloads(dict),
+ [&](TRuntimeNode item) {
+ return multi ? item : pb.NewOptional(item);
+ }
+ ),
+ pb.NewDataLiteral(true),
+ [&](TRuntimeNode item) { return item; }
+ ),
+ pb.Map(
+ list,
+ [&](TRuntimeNode item) {
+ return pb.Nth(item, 1);
+ }
+ )
+ ));
+
+ auto graph = setup.BuildGraph(pb.NewTuple(results));
+ NUdf::TUnboxedValue res = graph->GetValue();
+
+ UNIT_ASSERT_C(res.GetElement(0).Get<bool>(), "Dict HasItems fail");
+ UNIT_ASSERT_C(res.GetElement(1).Get<bool>(), "Dict Length fail");
+ UNIT_ASSERT_C(res.GetElement(2).Get<bool>(), "Dict Contains fail");
+ UNIT_ASSERT_C(res.GetElement(3).Get<bool>(), "Dict Lookup fail");
+ UNIT_ASSERT_C(res.GetElement(4).Get<bool>(), "DictItems fail");
+ UNIT_ASSERT_C(res.GetElement(5).Get<bool>(), "DictPayloads fail");
+ }
+
+ Y_UNIT_TEST_LLVM(TestDictWithDataKey) {
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict
+ }
+
+ Y_UNIT_TEST_LLVM(TestDictCompactWithDataKey) {
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict
+ }
+
+ Y_UNIT_TEST_LLVM(TestDictMultiWithDataKey) {
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict
+ }
+
+ Y_UNIT_TEST_LLVM(TestDictCompactMultiWithDataKey) {
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict
+ }
+
+ Y_UNIT_TEST_LLVM(TestDictWithOptionalDataKey) {
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/true, /*withData*/false);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/true, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict
+ }
+
+ Y_UNIT_TEST_LLVM(TestDictCompactWithOptionalDataKey) {
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/true, /*withData*/false);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/true, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict
+ }
+
+ Y_UNIT_TEST_LLVM(TestDictMultiWithOptionalDataKey) {
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/true, /*withData*/false);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/true, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict
+ }
+
+ Y_UNIT_TEST_LLVM(TestDictCompactMultiWithOptionalDataKey) {
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/true, /*withData*/false);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/true, /*withData*/true);
+ TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict
+ }
+
+ template <bool LLVM>
+ static void TestSetWithDataKeyImpl(bool optionalKey, bool compact, bool withNull, bool withData) {
+ TSetup<LLVM> setup;
+ TProgramBuilder& pb = *setup.PgmBuilder;
+
+ TType* keyType = pb.NewDataType(NUdf::EDataSlot::Int32, optionalKey);
+ TVector<TRuntimeNode> keys;
+ if (withNull) {
+ UNIT_ASSERT(optionalKey);
+ keys.push_back(pb.NewEmptyOptional(keyType));
+ }
+ if (withData) {
+ for (i32 i = 0; i < 2; ++i) {
+ auto key = pb.NewDataLiteral(i);
+ if (optionalKey) {
+ key = pb.NewOptional(key);
+ }
+ keys.push_back(key);
+ }
+ }
+ auto keyList = pb.NewList(keyType, keys);
+ auto set = pb.ToHashedDict(keyList, false, [&](TRuntimeNode key) { return key; }, [&pb](TRuntimeNode) { return pb.NewVoid(); }, compact);
+
+ auto compareLists = [&](TRuntimeNode list1, TRuntimeNode list2) {
+ return pb.And({
+ pb.Equals(
+ pb.Length(list1),
+ pb.Length(list2)
+ ),
+ pb.Not(
+ pb.Exists(
+ pb.Head(
+ pb.SkipWhile(
+ pb.Zip({list1, list2}),
+ [&](TRuntimeNode pair) {
+ return pb.AggrEquals(pb.Nth(pair, 0), pb.Nth(pair, 1));
+ }
+ )
+ )
+ )
+ )
+ });
+ };
+
+ TVector<TRuntimeNode> results;
+
+ // Check Set has items
+ results.push_back(pb.AggrEquals(
+ pb.HasItems(set),
+ pb.NewDataLiteral(withNull || withData)
+ ));
+
+ // Check Set length
+ results.push_back(pb.AggrEquals(
+ pb.Length(set),
+ pb.NewDataLiteral((ui64)keys.size())
+ ));
+
+ // Check Set Contains
+ results.push_back(pb.AllOf(
+ pb.Map(keyList, [&](TRuntimeNode key) {
+ return pb.Contains(set, key);
+ }),
+ [&](TRuntimeNode item) { return item; }
+ ));
+
+ // Check Set Lookup
+ results.push_back(pb.AllOf(
+ pb.Map(keyList, [&](TRuntimeNode key) {
+ return pb.Exists(pb.Lookup(set, key));
+ }),
+ [&](TRuntimeNode item) { return item; }
+ ));
+
+ // Check Set items iterator
+ results.push_back(compareLists(
+ pb.Sort(
+ pb.DictKeys(set),
+ pb.NewDataLiteral(true),
+ [&](TRuntimeNode item) { return item; }
+ ),
+ keyList
+ ));
+
+ auto graph = setup.BuildGraph(pb.NewTuple(results));
+ NUdf::TUnboxedValue res = graph->GetValue();
+
+ UNIT_ASSERT_C(res.GetElement(0).Get<bool>(), "Set HasItems fail");
+ UNIT_ASSERT_C(res.GetElement(1).Get<bool>(), "Set Length fail");
+ UNIT_ASSERT_C(res.GetElement(2).Get<bool>(), "Set Contains fail");
+ UNIT_ASSERT_C(res.GetElement(3).Get<bool>(), "Set Lookup fail");
+ UNIT_ASSERT_C(res.GetElement(4).Get<bool>(), "Set DictKeys fail");
+ }
+
+ Y_UNIT_TEST_LLVM(TestSetWithDataKey) {
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/false, /*withNull*/false, /*withData*/true);
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/false, /*withNull*/false, /*withData*/false); // empty set
+ }
+
+ Y_UNIT_TEST_LLVM(TestSetCompactWithDataKey) {
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/true, /*withNull*/false, /*withData*/true);
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/true, /*withNull*/false, /*withData*/false); // empty set
+ }
+
+ Y_UNIT_TEST_LLVM(TestSetWithOptionalDataKey) {
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/false, /*withData*/true);
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/true, /*withData*/false);
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/true, /*withData*/true);
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/false, /*withData*/false); // empty set
+ }
+
+ Y_UNIT_TEST_LLVM(TestSetCompactWithOptionalDataKey) {
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/false, /*withData*/true);
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/true, /*withData*/false);
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/true, /*withData*/true);
+ TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/false, /*withData*/false); // empty set
+ }
}
-}
+
+
+} // namespace
diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp
index a546e94dedc..ff8be740309 100644
--- a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp
+++ b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp
@@ -1199,44 +1199,54 @@ private:
mutable std::optional<TValuePacker> Packer;
};
-template <typename T>
-class THashedSingleFixedSetHolder : public TComputationValue<THashedSingleFixedSetHolder<T>> {
+template <typename T, bool OptionalKey>
+class THashedSingleFixedSetHolder : public TComputationValue<THashedSingleFixedSetHolder<T, OptionalKey>> {
public:
using TSetType = TValuesDictHashSingleFixedSet<T>;
class TIterator : public TComputationValue<TIterator> {
public:
+ enum class EState {
+ AtStart,
+ AtNull,
+ Iterator
+ };
TIterator(const THashedSingleFixedSetHolder* parent)
: TComputationValue<TIterator>(parent->GetMemInfo())
, Parent(const_cast<THashedSingleFixedSetHolder*>(parent))
, Iterator(Parent->Set.begin())
, End(Parent->Set.end())
- , AtStart(true)
+ , State(EState::AtStart)
{
}
private:
- bool Skip() override {
- if (AtStart) {
- AtStart = false;
- }
- else {
+ bool Skip() final {
+ switch (State) {
+ case EState::AtStart:
+ State = OptionalKey && Parent->HasNull ? EState::AtNull : EState::Iterator;
+ break;
+ case EState::AtNull:
+ State = EState::Iterator;
+ break;
+ case EState::Iterator:
if (Iterator == End)
return false;
++Iterator;
+ break;
}
- return Iterator != End;
+ return EState::AtNull == State || Iterator != End;
}
- bool Next(NUdf::TUnboxedValue& key) override {
+ bool Next(NUdf::TUnboxedValue& key) final {
if (!Skip())
return false;
- key = NUdf::TUnboxedValuePod(*Iterator);
+ key = EState::AtNull == State ? NUdf::TUnboxedValuePod() : NUdf::TUnboxedValuePod(*Iterator);
return true;
}
- bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override {
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) final {
if (!Next(key))
return false;
payload = NUdf::TUnboxedValuePod::Void();
@@ -1246,96 +1256,112 @@ public:
const NUdf::TRefCountedPtr<THashedSingleFixedSetHolder> Parent;
typename TSetType::const_iterator Iterator;
typename TSetType::const_iterator End;
- bool AtStart;
+ EState State;
};
- THashedSingleFixedSetHolder(TMemoryUsageInfo* memInfo, TSetType&& set)
+ THashedSingleFixedSetHolder(TMemoryUsageInfo* memInfo, TSetType&& set, bool hasNull)
: TComputationValue<THashedSingleFixedSetHolder>(memInfo)
, Set(std::move(set))
+ , HasNull(hasNull)
{
+ MKQL_ENSURE(OptionalKey || !HasNull, "Null value is not allowed for non-optional key type");
}
private:
- bool Contains(const NUdf::TUnboxedValuePod& key) const override {
+ bool Contains(const NUdf::TUnboxedValuePod& key) const final {
+ if constexpr (OptionalKey) {
+ if (!key) {
+ return HasNull;
+ }
+ }
return Set.find(key.Get<T>()) != Set.cend();
}
- NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
- const auto it = Set.find(key.Get<T>());
- if (it == Set.cend()) {
- return NUdf::TUnboxedValuePod();
- }
- return NUdf::TUnboxedValuePod::Void();
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final {
+ if (Contains(key))
+ return NUdf::TUnboxedValuePod::Void();
+ return NUdf::TUnboxedValuePod();
}
- NUdf::TUnboxedValue GetKeysIterator() const override {
+ NUdf::TUnboxedValue GetKeysIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator(this));
}
- NUdf::TUnboxedValue GetDictIterator() const override {
+ NUdf::TUnboxedValue GetDictIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator(this));
}
- NUdf::TUnboxedValue GetPayloadsIterator() const override {
+ NUdf::TUnboxedValue GetPayloadsIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator(this));
}
- NUdf::TUnboxedValue GetListIterator() const override {
+ NUdf::TUnboxedValue GetListIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator(this));
}
- ui64 GetDictLength() const override {
- return Set.size();
+ ui64 GetDictLength() const final {
+ return Set.size() + ui64(OptionalKey && HasNull);
}
- bool HasDictItems() const override {
- return !Set.empty();
+ bool HasDictItems() const final {
+ return !Set.empty() || (OptionalKey && HasNull);
}
- bool IsSortedDict() const override {
+ bool IsSortedDict() const final {
return false;
}
const TSetType Set;
+ const bool HasNull;
};
-template <typename T>
-class THashedSingleFixedCompactSetHolder : public TComputationValue<THashedSingleFixedCompactSetHolder<T>> {
+template <typename T, bool OptionalKey>
+class THashedSingleFixedCompactSetHolder : public TComputationValue<THashedSingleFixedCompactSetHolder<T, OptionalKey>> {
public:
using TSetType = TValuesDictHashSingleFixedCompactSet<T>;
class TIterator : public TComputationValue<TIterator> {
public:
+ enum class EState {
+ AtStart,
+ AtNull,
+ Iterator
+ };
TIterator(const THashedSingleFixedCompactSetHolder* parent)
: TComputationValue<TIterator>(parent->GetMemInfo())
, Parent(const_cast<THashedSingleFixedCompactSetHolder*>(parent))
, Iterator(Parent->Set.Iterate())
- , AtStart(true)
+ , State(EState::AtStart)
{
}
private:
- bool Skip() override {
- if (AtStart) {
- AtStart = false;
- }
- else {
+ bool Skip() final {
+ switch (State) {
+ case EState::AtStart:
+ State = OptionalKey && Parent->HasNull ? EState::AtNull : EState::Iterator;
+ break;
+ case EState::AtNull:
+ State = EState::Iterator;
+ break;
+ case EState::Iterator:
if (!Iterator.Ok())
return false;
++Iterator;
+ break;
}
- return Iterator.Ok();
+ return EState::AtNull == State || Iterator.Ok();
}
- bool Next(NUdf::TUnboxedValue& key) override {
+ bool Next(NUdf::TUnboxedValue& key) final {
if (!Skip())
return false;
- key = NUdf::TUnboxedValuePod(*Iterator);
+ key = EState::AtNull == State ? NUdf::TUnboxedValuePod() : NUdf::TUnboxedValuePod(*Iterator);
return true;
}
- bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override {
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) final {
if (!Next(key))
return false;
payload = NUdf::TUnboxedValuePod::Void();
@@ -1344,55 +1370,63 @@ public:
const NUdf::TRefCountedPtr<THashedSingleFixedCompactSetHolder> Parent;
typename TSetType::TIterator Iterator;
- bool AtStart;
+ EState State;
};
- THashedSingleFixedCompactSetHolder(TMemoryUsageInfo* memInfo, TSetType&& set)
+ THashedSingleFixedCompactSetHolder(TMemoryUsageInfo* memInfo, TSetType&& set, bool hasNull)
: TComputationValue<THashedSingleFixedCompactSetHolder>(memInfo)
, Set(std::move(set))
+ , HasNull(hasNull)
{
+ MKQL_ENSURE(OptionalKey || !HasNull, "Null value is not allowed for non-optional key type");
}
private:
- bool Contains(const NUdf::TUnboxedValuePod& key) const override {
+ bool Contains(const NUdf::TUnboxedValuePod& key) const final {
+ if constexpr (OptionalKey) {
+ if (!key) {
+ return HasNull;
+ }
+ }
return Set.Has(key.Get<T>());
}
- NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
- if (Set.Has(key.Get<T>()))
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final {
+ if (Contains(key))
return NUdf::TUnboxedValuePod::Void();
return NUdf::TUnboxedValuePod();
}
- NUdf::TUnboxedValue GetKeysIterator() const override {
+ NUdf::TUnboxedValue GetKeysIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator(this));
}
- NUdf::TUnboxedValue GetDictIterator() const override {
+ NUdf::TUnboxedValue GetDictIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator(this));
}
- NUdf::TUnboxedValue GetPayloadsIterator() const override {
+ NUdf::TUnboxedValue GetPayloadsIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator(this));
}
- NUdf::TUnboxedValue GetListIterator() const override {
+ NUdf::TUnboxedValue GetListIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator(this));
}
- ui64 GetDictLength() const override {
- return Set.Size();
+ ui64 GetDictLength() const final {
+ return Set.Size() + ui64(OptionalKey && HasNull);
}
- bool HasDictItems() const override {
- return !Set.Empty();
+ bool HasDictItems() const final {
+ return !Set.Empty() || (OptionalKey && HasNull);
}
- bool IsSortedDict() const override {
+ bool IsSortedDict() const final {
return false;
}
const TSetType Set;
+ const bool HasNull;
};
class THashedCompactSetHolder : public TComputationValue<THashedCompactSetHolder> {
@@ -1947,209 +1981,273 @@ private:
std::optional<TValuePacker> Packer;
};
-template <typename T>
-class THashedSingleFixedMapHolder : public TComputationValue<THashedSingleFixedMapHolder<T>> {
+template <typename T, bool OptionalKey>
+class THashedSingleFixedMapHolder : public TComputationValue<THashedSingleFixedMapHolder<T, OptionalKey>> {
public:
using TMapType = TValuesDictHashSingleFixedMap<T>;
template <bool NoSwap>
class TIterator : public TComputationValue<TIterator<NoSwap>> {
public:
+ enum class EState {
+ AtStart,
+ AtNull,
+ Iterator
+ };
TIterator(const THashedSingleFixedMapHolder* parent)
: TComputationValue<TIterator<NoSwap>>(parent->GetMemInfo())
, Parent(const_cast<THashedSingleFixedMapHolder*>(parent))
, Iterator(Parent->Map.begin())
, End(Parent->Map.end())
- , AtStart(true)
+ , State(EState::AtStart)
{
}
private:
- bool Skip() override {
- if (AtStart) {
- AtStart = false;
- }
- else {
+ bool Skip() final {
+ switch (State) {
+ case EState::AtStart:
+ State = OptionalKey && Parent->NullPayload.has_value() ? EState::AtNull : EState::Iterator;
+ break;
+ case EState::AtNull:
+ State = EState::Iterator;
+ break;
+ case EState::Iterator:
if (Iterator == End) {
return false;
}
++Iterator;
+ break;
}
- return Iterator != End;
+ return EState::AtNull == State || Iterator != End;
}
- bool Next(NUdf::TUnboxedValue& key) override {
+ bool Next(NUdf::TUnboxedValue& key) final {
if (!Skip())
return false;
- key = NoSwap ? NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator->first)) : Iterator->second;
+ key = NoSwap
+ ? (EState::AtNull == State ? NUdf::TUnboxedValue() : NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator->first)))
+ : (EState::AtNull == State ? *Parent->NullPayload : Iterator->second);
return true;
}
- bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override {
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) final {
if (!Next(key))
return false;
- payload = NoSwap ? Iterator->second : NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator->first));
+ payload = NoSwap
+ ? (EState::AtNull == State ? *Parent->NullPayload : Iterator->second)
+ : (EState::AtNull == State ? NUdf::TUnboxedValue() : NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator->first)));
return true;
}
const NUdf::TRefCountedPtr<THashedSingleFixedMapHolder> Parent;
typename TMapType::const_iterator Iterator;
typename TMapType::const_iterator End;
- bool AtStart;
+ EState State;
};
- THashedSingleFixedMapHolder(TMemoryUsageInfo* memInfo, TValuesDictHashSingleFixedMap<T>&& map)
+ THashedSingleFixedMapHolder(TMemoryUsageInfo* memInfo, TValuesDictHashSingleFixedMap<T>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload)
: TComputationValue<THashedSingleFixedMapHolder>(memInfo)
, Map(std::move(map))
+ , NullPayload(std::move(nullPayload))
{
}
private:
- bool Contains(const NUdf::TUnboxedValuePod& key) const override {
+ bool Contains(const NUdf::TUnboxedValuePod& key) const final {
+ if constexpr (OptionalKey) {
+ if (!key) {
+ return NullPayload.has_value();
+ }
+ }
return Map.find(key.Get<T>()) != Map.end();
}
- NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final {
+ if constexpr (OptionalKey) {
+ if (!key) {
+ return NullPayload.has_value() ? NullPayload->MakeOptional() : NUdf::TUnboxedValuePod();
+ }
+ }
const auto it = Map.find(key.Get<T>());
if (it == Map.end())
return NUdf::TUnboxedValuePod();
return it->second.MakeOptional();
}
- NUdf::TUnboxedValue GetKeysIterator() const override {
+ NUdf::TUnboxedValue GetKeysIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator<true>(this));
}
- NUdf::TUnboxedValue GetDictIterator() const override {
+ NUdf::TUnboxedValue GetDictIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator<true>(this));
}
- NUdf::TUnboxedValue GetPayloadsIterator() const override {
+ NUdf::TUnboxedValue GetPayloadsIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator<false>(this));
}
- ui64 GetDictLength() const override {
- return Map.size();
+ ui64 GetDictLength() const final {
+ return Map.size() + ui64(OptionalKey && NullPayload.has_value());
}
- bool HasDictItems() const override {
- return !Map.empty();
+ bool HasDictItems() const final {
+ return !Map.empty() || (OptionalKey && NullPayload.has_value());
}
- bool IsSortedDict() const override {
+ bool IsSortedDict() const final {
return false;
}
const TMapType Map;
+ const std::optional<NUdf::TUnboxedValue> NullPayload;
};
-template <typename T>
-class THashedSingleFixedCompactMapHolder : public TComputationValue<THashedSingleFixedCompactMapHolder<T>> {
+template <typename T, bool OptionalKey>
+class THashedSingleFixedCompactMapHolder : public TComputationValue<THashedSingleFixedCompactMapHolder<T, OptionalKey>> {
public:
using TMapType = TValuesDictHashSingleFixedCompactMap<T>;
template <bool NoSwap>
class TIterator : public TComputationValue<TIterator<NoSwap>> {
public:
+ enum class EState {
+ AtStart,
+ AtNull,
+ Iterator
+ };
TIterator(const THashedSingleFixedCompactMapHolder* parent)
: TComputationValue<TIterator<NoSwap>>(parent->GetMemInfo())
, Parent(const_cast<THashedSingleFixedCompactMapHolder*>(parent))
, Iterator(Parent->Map.Iterate())
- , AtStart(true)
+ , State(EState::AtStart)
{
}
private:
- bool Skip() override {
- if (AtStart) {
- AtStart = false;
- }
- else if (Iterator.Ok()) {
- ++Iterator;
+ bool Skip() final {
+ switch (State) {
+ case EState::AtStart:
+ State = OptionalKey && Parent->NullPayload.has_value() ? EState::AtNull : EState::Iterator;
+ break;
+ case EState::AtNull:
+ State = EState::Iterator;
+ break;
+ case EState::Iterator:
+ if (Iterator.Ok())
+ ++Iterator;
+ break;
}
- return Iterator.Ok();
+ return EState::AtNull == State || Iterator.Ok();
}
- bool Next(NUdf::TUnboxedValue& key) override {
+ bool Next(NUdf::TUnboxedValue& key) final {
if (!Skip())
return false;
- key = NoSwap ?
- NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator.Get().first)):
- Parent->PayloadPacker.Unpack(GetSmallValue(Iterator.Get().second), Parent->Ctx->HolderFactory);
+
+ key = NoSwap
+ ? (EState::AtNull == State
+ ? NUdf::TUnboxedValue()
+ : NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator.Get().first))
+ )
+ : (EState::AtNull == State
+ ? Parent->PayloadPacker.Unpack(GetSmallValue(*Parent->NullPayload), Parent->Ctx->HolderFactory)
+ : Parent->PayloadPacker.Unpack(GetSmallValue(Iterator.Get().second), Parent->Ctx->HolderFactory)
+ );
return true;
}
- bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override {
+ bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) final {
if (!Next(key))
return false;
- payload = NoSwap ?
- Parent->PayloadPacker.Unpack(GetSmallValue(Iterator.Get().second), Parent->Ctx->HolderFactory):
- NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator.Get().first));
+ payload = NoSwap
+ ? (EState::AtNull == State
+ ? Parent->PayloadPacker.Unpack(GetSmallValue(*Parent->NullPayload), Parent->Ctx->HolderFactory)
+ : Parent->PayloadPacker.Unpack(GetSmallValue(Iterator.Get().second), Parent->Ctx->HolderFactory)
+ )
+ : (EState::AtNull == State
+ ? NUdf::TUnboxedValue()
+ : NUdf::TUnboxedValue(NUdf::TUnboxedValuePod(Iterator.Get().first))
+ );
return true;
}
const NUdf::TRefCountedPtr<THashedSingleFixedCompactMapHolder> Parent;
typename TMapType::TIterator Iterator;
- bool AtStart;
+ EState State;
};
- THashedSingleFixedCompactMapHolder(TMemoryUsageInfo* memInfo, TMapType&& map, TPagedArena&& pool,
+ THashedSingleFixedCompactMapHolder(TMemoryUsageInfo* memInfo, TMapType&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool,
TType* payloadType, TComputationContext* ctx)
: TComputationValue<THashedSingleFixedCompactMapHolder>(memInfo)
, Pool(std::move(pool))
, Map(std::move(map))
+ , NullPayload(std::move(nullPayload))
, PayloadPacker(false, payloadType)
, Ctx(ctx)
{
}
private:
- bool Contains(const NUdf::TUnboxedValuePod& key) const override {
+ bool Contains(const NUdf::TUnboxedValuePod& key) const final {
+ if constexpr (OptionalKey) {
+ if (!key) {
+ return NullPayload.has_value();
+ }
+ }
return Map.Has(key.Get<T>());
}
- NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
+ NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final {
+ if constexpr (OptionalKey) {
+ if (!key) {
+ return NullPayload.has_value()
+ ? PayloadPacker.Unpack(GetSmallValue(*NullPayload), Ctx->HolderFactory).Release().MakeOptional()
+ : NUdf::TUnboxedValuePod();
+ }
+ }
auto it = Map.Find(key.Get<T>());
if (!it.Ok())
return NUdf::TUnboxedValuePod();
return PayloadPacker.Unpack(GetSmallValue(it.Get().second), Ctx->HolderFactory).Release().MakeOptional();
}
- NUdf::TUnboxedValue GetKeysIterator() const override {
+ NUdf::TUnboxedValue GetKeysIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator<true>(this));
}
- NUdf::TUnboxedValue GetDictIterator() const override {
+ NUdf::TUnboxedValue GetDictIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator<true>(this));
}
- NUdf::TUnboxedValue GetPayloadsIterator() const override {
+ NUdf::TUnboxedValue GetPayloadsIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator<false>(this));
}
- ui64 GetDictLength() const override {
- return Map.Size();
+ ui64 GetDictLength() const final {
+ return Map.Size() + ui64(OptionalKey && NullPayload.has_value());
}
- bool HasDictItems() const override {
- return !Map.Empty();
+ bool HasDictItems() const final {
+ return !Map.Empty() || (OptionalKey && NullPayload.has_value());
}
- bool IsSortedDict() const override {
+ bool IsSortedDict() const final {
return false;
}
private:
TPagedArena Pool;
const TMapType Map;
+ const std::optional<ui64> NullPayload;
mutable TValuePacker PayloadPacker;
TComputationContext* Ctx;
};
-template <typename T>
-class THashedSingleFixedCompactMultiMapHolder : public TComputationValue<THashedSingleFixedCompactMultiMapHolder<T>> {
+template <typename T, bool OptionalKey>
+class THashedSingleFixedCompactMultiMapHolder : public TComputationValue<THashedSingleFixedCompactMultiMapHolder<T, OptionalKey>> {
public:
using TMapType = TValuesDictHashSingleFixedCompactMultiMap<T>;
using TMapIterator = typename TMapType::TIterator;
@@ -2166,7 +2264,7 @@ public:
}
private:
- bool Next(NUdf::TUnboxedValue& value) override {
+ bool Next(NUdf::TUnboxedValue& value) final {
if (!Iterator.Ok()) {
return false;
}
@@ -2176,7 +2274,7 @@ public:
return true;
}
- bool Skip() override {
+ bool Skip() final {
if (!Iterator.Ok()) {
return false;
}
@@ -2197,11 +2295,11 @@ public:
Y_ASSERT(From.Ok());
}
- bool HasFastListLength() const override {
+ bool HasFastListLength() const final {
return true;
}
- ui64 GetListLength() const override {
+ ui64 GetListLength() const final {
if (!Length) {
Length = Parent->Map.Count(From.GetKey());
}
@@ -2209,11 +2307,11 @@ public:
return *Length;
}
- bool HasListItems() const override {
+ bool HasListItems() const final {
return true;
}
- NUdf::TUnboxedValue GetListIterator() const override {
+ NUdf::TUnboxedValue GetListIterator() const final {
return NUdf::TUnboxedValuePod(new TIterator(Parent.Get(), From));
}
@@ -2221,6 +2319,70 @@ public:
TMapIterator From;
};
+ class TNullPayloadList: public TCustomListValue {
+ public:
+ class TIterator : public TComputationValue<TIterator> {
+ public:
+ TIterator(const THashedSingleFixedCompactMultiMapHolder* parent)
+ : TComputationValue<TIterator>(parent->GetMemInfo())
+ , Parent(const_cast<THashedSingleFixedCompactMultiMapHolder*>(parent))
+ , Iterator(Parent->NullPayloads.cbegin())
+ {
+ }
+
+ private:
+ bool Next(NUdf::TUnboxedValue& value) final {
+ if (Iterator == Parent->NullPayloads.cend()) {
+ return false;
+ }
+
+ value = Parent->PayloadPacker.Unpack(GetSmallValue(*Iterator), Parent->Ctx->HolderFactory);
+ ++Iterator;
+ return true;
+ }
+
+ bool Skip() final {
+ if (Iterator == Parent->NullPayloads.cend()) {
+ return false;
+ }
+
+ ++Iterator;
+ return true;
+ }
+
+ const NUdf::TRefCountedPtr<THashedSingleFixedCompactMultiMapHolder> Parent;
+ typename std::vector<ui64>::const_iterator Iterator;
+ };
+
+ TNullPayloadList(TMemoryUsageInfo* memInfo, const THashedSingleFixedCompactMultiMapHolder* parent)
+ : TCustomListValue(memInfo)
+ , Parent(const_cast<THashedSingleFixedCompactMultiMapHolder*>(parent))
+ {
+ }
+
+ bool HasFastListLength() const final {
+ return true;
+ }
+
+ ui64 GetListLength() const final {
+ if (!Length) {
+ Length = Parent->NullPayloads.size();
+ }
+
+ return *Length;
+ }
+
+ bool HasListItems() const final {
+ return true;
+ }
+
+ NUdf::TUnboxedValue GetListIterator() const final {
+ return NUdf::TUnboxedValuePod(new TIterator(Parent.Get()));
+ }
+
+ const NUdf::TRefCountedPtr<THashedSingleFixedCompactMultiMapHolder> Parent;
+ };
+
template <bool NoSwap>
class TIterator : public TComputationValue<TIterator<NoSwap>> {
public:
@@ -2228,11 +2390,19 @@ public:
: TComputationValue<TIterator<NoSwap>>(parent->GetMemInfo())
, Parent(const_cast<THashedSingleFixedCompactMultiMapHolder*>(parent))
, Iterator(parent->Map.Iterate())
+ , AtNull(OptionalKey && !parent->NullPayloads.empty())
{
}
private:
bool Next(NUdf::TUnboxedValue& key) override {
+ if (AtNull) {
+ AtNull = false;
+ key = NoSwap
+ ? NUdf::TUnboxedValuePod()
+ : Parent->Ctx->HolderFactory.template Create<TNullPayloadList>(Parent.Get());
+ return true;
+ }
if (!Iterator.Ok()) {
return false;
}
@@ -2245,6 +2415,17 @@ public:
}
bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) override {
+ if (AtNull) {
+ AtNull = false;
+ if (NoSwap) {
+ key = NUdf::TUnboxedValuePod();
+ payload = Parent->Ctx->HolderFactory.template Create<TNullPayloadList>(Parent.Get());
+ } else {
+ payload = NUdf::TUnboxedValuePod();
+ key = Parent->Ctx->HolderFactory.template Create<TNullPayloadList>(Parent.Get());
+ }
+ return true;
+ }
if (!Iterator.Ok()) {
return false;
}
@@ -2261,6 +2442,10 @@ public:
}
bool Skip() override {
+ if (AtNull) {
+ AtNull = false;
+ return true;
+ }
if (!Iterator.Ok()) {
return false;
}
@@ -2271,13 +2456,15 @@ public:
const NUdf::TRefCountedPtr<THashedSingleFixedCompactMultiMapHolder> Parent;
TMapIterator Iterator;
+ bool AtNull;
};
- THashedSingleFixedCompactMultiMapHolder(TMemoryUsageInfo* memInfo, TMapType&& map, TPagedArena&& pool,
+ THashedSingleFixedCompactMultiMapHolder(TMemoryUsageInfo* memInfo, TMapType&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool,
TType* payloadType, TComputationContext* ctx)
: TComputationValue<THashedSingleFixedCompactMultiMapHolder>(memInfo)
, Pool(std::move(pool))
, Map(std::move(map))
+ , NullPayloads(std::move(nullPayloads))
, PayloadPacker(false, payloadType)
, Ctx(ctx)
{
@@ -2285,10 +2472,22 @@ public:
private:
bool Contains(const NUdf::TUnboxedValuePod& key) const override {
+ if constexpr (OptionalKey) {
+ if (!key) {
+ return !NullPayloads.empty();
+ }
+ }
return Map.Has(key.Get<T>());
}
NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override {
+ if constexpr (OptionalKey) {
+ if (!key) {
+ return NullPayloads.empty()
+ ? NUdf::TUnboxedValuePod()
+ : Ctx->HolderFactory.Create<TNullPayloadList>(this);
+ }
+ }
const auto it = Map.Find(key.Get<T>());
if (!it.Ok())
return NUdf::TUnboxedValuePod();
@@ -2308,11 +2507,11 @@ private:
}
ui64 GetDictLength() const override {
- return Map.UniqSize();
+ return Map.UniqSize() + ui64(OptionalKey && !NullPayloads.empty());
}
bool HasDictItems() const override {
- return !Map.Empty();
+ return !Map.Empty() || (OptionalKey && !NullPayloads.empty());
}
bool IsSortedDict() const override {
@@ -2322,6 +2521,7 @@ private:
private:
TPagedArena Pool;
const TMapType Map;
+ const std::vector<ui64> NullPayloads;
mutable TValuePacker PayloadPacker;
TComputationContext* Ctx;
};
@@ -3316,36 +3516,50 @@ NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSetHolder(
filler, types, isTuple, eagerFill, encodedType, hash, equate, *this));
}
-template <typename T>
+template <typename T, bool OptionalKey>
NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedSetHolder(
- TValuesDictHashSingleFixedSet<T>&& set) const {
- return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedSetHolder<T>>(CurrentAllocState, &MemInfo, std::move(set)));
+ TValuesDictHashSingleFixedSet<T>&& set, bool hasNull) const {
+ return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedSetHolder<T, OptionalKey>>(CurrentAllocState, &MemInfo, std::move(set), hasNull));
}
-#define DEFINE_HASHED_SINGLE_FIXED_SET(xType) \
- template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedSetHolder<xType> \
- (TValuesDictHashSingleFixedSet<xType>&& set) const;
+#define DEFINE_HASHED_SINGLE_FIXED_SET_OPT(xType) \
+ template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedSetHolder<xType, true> \
+ (TValuesDictHashSingleFixedSet<xType>&& set, bool hasNull) const;
+
+KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_SET_OPT)
+#undef DEFINE_HASHED_SINGLE_FIXED_SET_OPT
-KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_SET)
-#undef DEFINE_HASHED_SINGLE_FIXED_SET
+#define DEFINE_HASHED_SINGLE_FIXED_SET_NONOPT(xType) \
+ template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedSetHolder<xType, false> \
+ (TValuesDictHashSingleFixedSet<xType>&& set, bool hasNull) const;
-template <typename T>
+KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_SET_NONOPT)
+#undef DEFINE_HASHED_SINGLE_FIXED_SET_NONOPT
+
+template <typename T, bool OptionalKey>
NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactSetHolder(
- TValuesDictHashSingleFixedCompactSet<T>&& set) const {
- return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactSetHolder<T>>(CurrentAllocState, &MemInfo, std::move(set)));
+ TValuesDictHashSingleFixedCompactSet<T>&& set, bool hasNull) const {
+ return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactSetHolder<T, OptionalKey>>(CurrentAllocState, &MemInfo, std::move(set), hasNull));
}
-#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET(xType) \
- template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactSetHolder<xType> \
- (TValuesDictHashSingleFixedCompactSet<xType>&& set) const;
+#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_OPT(xType) \
+ template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactSetHolder<xType, true> \
+ (TValuesDictHashSingleFixedCompactSet<xType>&& set, bool hasNull) const;
+
+KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_OPT)
+#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_OPT
-KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET)
-#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET
+#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_NONOPT(xType) \
+ template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactSetHolder<xType, false> \
+ (TValuesDictHashSingleFixedCompactSet<xType>&& set, bool hasNull) const;
-template <typename T>
+KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_NONOPT)
+#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_SET_NONOPT
+
+template <typename T, bool OptionalKey>
NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedMapHolder(
- TValuesDictHashSingleFixedMap<T>&& map) const {
- return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedMapHolder<T>>(CurrentAllocState, &MemInfo, std::move(map)));
+ TValuesDictHashSingleFixedMap<T>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload) const {
+ return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedMapHolder<T, OptionalKey>>(CurrentAllocState, &MemInfo, std::move(map), std::move(nullPayload)));
}
NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedCompactSetHolder(
@@ -3365,18 +3579,20 @@ NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedCompactMultiMapHolder(
return NUdf::TUnboxedValuePod(AllocateOn<THashedCompactMultiMapHolder>(CurrentAllocState, &MemInfo, std::move(map), std::move(pool), keyType, payloadType, ctx));
}
-template <typename T>
+template <typename T, bool OptionalKey>
NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMapHolder(
- TValuesDictHashSingleFixedCompactMap<T>&& map, TPagedArena&& pool, TType* payloadType,
+ TValuesDictHashSingleFixedCompactMap<T>&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool, TType* payloadType,
TComputationContext* ctx) const {
- return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactMapHolder<T>>(CurrentAllocState, &MemInfo, std::move(map), std::move(pool), payloadType, ctx));
+ return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactMapHolder<T, OptionalKey>>(CurrentAllocState, &MemInfo,
+ std::move(map), std::move(nullPayload), std::move(pool), payloadType, ctx));
}
-template <typename T>
+template <typename T, bool OptionalKey>
NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMultiMapHolder(
- TValuesDictHashSingleFixedCompactMultiMap<T>&& map, TPagedArena&& pool, TType* payloadType,
+ TValuesDictHashSingleFixedCompactMultiMap<T>&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool, TType* payloadType,
TComputationContext* ctx) const {
- return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactMultiMapHolder<T>>(CurrentAllocState, &MemInfo, std::move(map), std::move(pool), payloadType, ctx));
+ return NUdf::TUnboxedValuePod(AllocateOn<THashedSingleFixedCompactMultiMapHolder<T, OptionalKey>>(CurrentAllocState, &MemInfo,
+ std::move(map), std::move(nullPayloads), std::move(pool), payloadType, ctx));
}
NUdf::IDictValueBuilder::TPtr THolderFactory::NewDict(
@@ -3395,28 +3611,51 @@ NUdf::IDictValueBuilder::TPtr THolderFactory::NewDict(
useIHash ? MakeCompareImpl(keyType) : nullptr);
}
-#define DEFINE_HASHED_SINGLE_FIXED_MAP(xType) \
- template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedMapHolder<xType> \
- (TValuesDictHashSingleFixedMap<xType>&& map) const;
+#define DEFINE_HASHED_SINGLE_FIXED_MAP_OPT(xType) \
+ template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedMapHolder<xType, true> \
+ (TValuesDictHashSingleFixedMap<xType>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload) const;
+
+KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_MAP_OPT)
+#undef DEFINE_HASHED_SINGLE_FIXED_MAP_OPT
+
+#define DEFINE_HASHED_SINGLE_FIXED_MAP_NONOPT(xType) \
+ template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedMapHolder<xType, false> \
+ (TValuesDictHashSingleFixedMap<xType>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload) const;
+
+KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_MAP_NONOPT)
+#undef DEFINE_HASHED_SINGLE_FIXED_MAP_NONOPT
+
+#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_OPT(xType) \
+ template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMapHolder<xType, true> \
+ (TValuesDictHashSingleFixedCompactMap<xType>&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool, TType* payloadType, \
+ TComputationContext* ctx) const;
+
+KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_OPT)
+#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_OPT
+
+#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_NONOPT(xType) \
+ template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMapHolder<xType, false> \
+ (TValuesDictHashSingleFixedCompactMap<xType>&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool, TType* payloadType, \
+ TComputationContext* ctx) const;
-KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_MAP)
-#undef DEFINE_HASHED_SINGLE_FIXED_MAP
+KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_NONOPT)
+#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP_NONOPT
-#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP(xType) \
- template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMapHolder<xType> \
- (TValuesDictHashSingleFixedCompactMap<xType>&& map, TPagedArena&& pool, TType* payloadType, \
+#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_OPT(xType) \
+ template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMultiMapHolder<xType, true> \
+ (TValuesDictHashSingleFixedCompactMultiMap<xType>&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool, TType* payloadType, \
TComputationContext* ctx) const;
-KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP)
-#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MAP
+KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_OPT)
+#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_OPT
-#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP(xType) \
- template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMultiMapHolder<xType> \
- (TValuesDictHashSingleFixedCompactMultiMap<xType>&& map, TPagedArena&& pool, TType* payloadType, \
+#define DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_NONOPT(xType) \
+ template NUdf::TUnboxedValuePod THolderFactory::CreateDirectHashedSingleFixedCompactMultiMapHolder<xType, false> \
+ (TValuesDictHashSingleFixedCompactMultiMap<xType>&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool, TType* payloadType, \
TComputationContext* ctx) const;
-KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP)
-#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP
+KNOWN_PRIMITIVE_VALUE_TYPES(DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_NONOPT)
+#undef DEFINE_HASHED_SINGLE_FIXED_COMPACT_MULTI_MAP_NONOPT
//////////////////////////////////////////////////////////////////////////////
// TNodeFactory
diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h
index 85681918c70..59813f771e2 100644
--- a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h
+++ b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h
@@ -18,6 +18,8 @@
#include <functional>
#include <unordered_map>
#include <unordered_set>
+#include <optional>
+#include <vector>
#ifndef MKQL_DISABLE_CODEGEN
namespace llvm {
@@ -460,14 +462,14 @@ public:
NUdf::IHash::TPtr hash,
NUdf::IEquate::TPtr equate) const;
- template <typename T>
- NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedSetHolder(TValuesDictHashSingleFixedSet<T>&& set) const;
+ template <typename T, bool OptionalKey>
+ NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedSetHolder(TValuesDictHashSingleFixedSet<T>&& set, bool hasNull) const;
- template <typename T>
- NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactSetHolder(TValuesDictHashSingleFixedCompactSet<T>&& set) const;
+ template <typename T, bool OptionalKey>
+ NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactSetHolder(TValuesDictHashSingleFixedCompactSet<T>&& set, bool hasNull) const;
- template <typename T>
- NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedMapHolder(TValuesDictHashSingleFixedMap<T>&& map) const;
+ template <typename T, bool OptionalKey>
+ NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedMapHolder(TValuesDictHashSingleFixedMap<T>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload) const;
NUdf::TUnboxedValuePod CreateDirectHashedCompactSetHolder(
TValuesDictHashCompactSet&& set, TPagedArena&& pool, TType* keyType,
@@ -481,14 +483,14 @@ public:
TValuesDictHashCompactMultiMap&& map, TPagedArena&& pool, TType* keyType, TType* payloadType,
TComputationContext* ctx) const;
- template <typename T>
+ template <typename T, bool OptionalKey>
NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactMapHolder(
- TValuesDictHashSingleFixedCompactMap<T>&& map, TPagedArena&& pool, TType* payloadType,
+ TValuesDictHashSingleFixedCompactMap<T>&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool, TType* payloadType,
TComputationContext* ctx) const;
- template <typename T>
+ template <typename T, bool OptionalKey>
NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactMultiMapHolder(
- TValuesDictHashSingleFixedCompactMultiMap<T>&& map, TPagedArena&& pool, TType* payloadType,
+ TValuesDictHashSingleFixedCompactMultiMap<T>&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool, TType* payloadType,
TComputationContext* ctx) const;
NUdf::IDictValueBuilder::TPtr NewDict(
diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_pack_ut.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_pack_ut.cpp
index 55e41c32327..07a99c994cd 100644
--- a/ydb/library/yql/minikql/computation/mkql_computation_node_pack_ut.cpp
+++ b/ydb/library/yql/minikql/computation/mkql_computation_node_pack_ut.cpp
@@ -261,7 +261,7 @@ protected:
map[4] = NUdf::TUnboxedValuePod::Embedded("4");
map[10] = NUdf::TUnboxedValuePod::Embedded("10");
map[1] = NUdf::TUnboxedValuePod::Embedded("1");
- const NUdf::TUnboxedValue value = HolderFactory.CreateDirectHashedSingleFixedMapHolder<ui32>(std::move(map));
+ const NUdf::TUnboxedValue value = HolderFactory.CreateDirectHashedSingleFixedMapHolder<ui32, false>(std::move(map), std::nullopt);
const auto uValue = TestPackUnpack(dictType, value, "Type:Dict");