diff options
author | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-01-02 19:18:39 +0300 |
---|---|---|
committer | ivanmorozov <ivanmorozov@yandex-team.com> | 2023-01-02 19:18:39 +0300 |
commit | 3b9d6412ef442facb9ef6eb1e7aa279d4279145a (patch) | |
tree | c435fe2f9383d4d03917dbab3dab7ffdc8014425 | |
parent | 30dcdc9a5b8c97ce1c47bc4f569b1f540044ac5b (diff) | |
download | ydb-3b9d6412ef442facb9ef6eb1e7aa279d4279145a.tar.gz |
made squeezetodict through transparent structures
5 files changed, 160 insertions, 197 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp index 70242223d0..8f579b5a47 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp @@ -24,12 +24,7 @@ using NYql::EnsureDynamicCast; namespace { class THashedMultiMapAccumulator { - using TMapType = std::unordered_map< - NUdf::TUnboxedValue, - TUnboxedValueVector, - NYql::TVaryingHash<NUdf::TUnboxedValue, TValueHasher>, - TValueEqual, - TMKQLAllocator<std::pair<const NUdf::TUnboxedValue, TUnboxedValueVector>>>; + using TMapType = TValuesDictHashMap; TComputationContext& Ctx; TType* KeyType; @@ -62,30 +57,17 @@ public: key = MakeString(Packer->Pack(key)); } - const auto ins = Map.emplace(std::move(key), 1U); - if (ins.second) - ins.first->second.front() = std::move(payload); - else - ins.first->second.emplace_back(std::move(payload)); + auto it = Map.find(key); + if (it == Map.end()) { + it = Map.emplace(std::move(key), Ctx.HolderFactory.NewVectorHolder()).first; + } + it->second.Push(std::move(payload)); } NUdf::TUnboxedValue Build() { const auto filler = [this](TValuesDictHashMap& targetMap) { - targetMap.reserve(Map.size()); - - for (auto& pair : Map) { - auto itemFactory = [](const NUdf::TUnboxedValuePod& value) { - return value; - }; - - ui64 start = 0; - ui64 finish = pair.second.size(); - auto payloadList = CreateOwningVectorListAdapter(std::move(pair.second), itemFactory, - start, finish, false, Ctx.HolderFactory.GetMemInfo()); - - targetMap.emplace(pair.first, std::move(payloadList)); - } + targetMap = std::move(Map); }; return Ctx.HolderFactory.CreateDirectHashedDictHolder(filler, KeyTypes, IsTuple, true, Packer ? KeyType : nullptr, Hash, Equate); @@ -141,23 +123,17 @@ public: template<typename T, bool OptionalKey> class THashedSingleFixedMultiMapAccumulator { - using TMapType = std::unordered_map< - T, - TUnboxedValueVector, - NYql::TVaryingHash<T, TMyHash<T>>, - TMyEquals<T>, - TMKQLAllocator<std::pair<const T, TUnboxedValueVector>>>; + using TMapType = TValuesDictHashSingleFixedMap<T>; TComputationContext& Ctx; const TKeyTypes& KeyTypes; TMapType Map; TUnboxedValueVector NullPayloads; - + NUdf::TUnboxedValue CurrentEmptyVectorForInsert; public: THashedSingleFixedMultiMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) - : Ctx(ctx), KeyTypes(keyTypes), Map(0, TMyHash<T>(), TMyEquals<T>()) - { + : Ctx(ctx), KeyTypes(keyTypes), Map(0, TMyHash<T>(), TMyEquals<T>()) { Y_UNUSED(keyType); Y_UNUSED(payloadType); Y_UNUSED(isTuple); @@ -166,52 +142,29 @@ public: Y_UNUSED(equate); Y_UNUSED(hash); Map.reserve(itemsCountHint); + CurrentEmptyVectorForInsert = Ctx.HolderFactory.NewVectorHolder(); } - void Add(NUdf::TUnboxedValue&& key, NUdf::TUnboxedValue&& payload) - { + void Add(NUdf::TUnboxedValue&& key, NUdf::TUnboxedValue&& payload) { if constexpr (OptionalKey) { if (!key) { NullPayloads.emplace_back(std::move(payload)); return; } } - const auto ins = Map.emplace(key.Get<T>(), 1U); - if (ins.second) - ins.first->second.front() = std::move(payload); - else - ins.first->second.emplace_back(std::move(payload)); + auto insertInfo = Map.emplace(key.Get<T>(), CurrentEmptyVectorForInsert); + if (insertInfo.second) { + CurrentEmptyVectorForInsert = Ctx.HolderFactory.NewVectorHolder(); + } + insertInfo.first->second.Push(payload.Release()); } - NUdf::TUnboxedValue Build() - { - const auto filler = [this](TValuesDictHashMap& targetMap) { - targetMap.reserve(Map.size()); - - auto itemFactory = [](const NUdf::TUnboxedValuePod& value) { - return value; - }; - for (auto& pair : Map) { - ui64 start = 0; - ui64 finish = pair.second.size(); - auto payloadList = CreateOwningVectorListAdapter(std::move(pair.second), itemFactory, - start, finish, false, - Ctx.HolderFactory.GetMemInfo()); - - targetMap.emplace(NUdf::TUnboxedValuePod(pair.first), std::move(payloadList)); - } - if constexpr (OptionalKey) { - if (!NullPayloads.empty()) { - auto payloadList = CreateOwningVectorListAdapter(std::move(NullPayloads), itemFactory, - /*start*/ 0, /*finish*/ NullPayloads.size(), /*reversed*/ false, - Ctx.HolderFactory.GetMemInfo()); - - targetMap.emplace(NUdf::TUnboxedValuePod(), std::move(payloadList)); - } - } - }; - - return Ctx.HolderFactory.CreateDirectHashedDictHolder(filler, KeyTypes, false, true, nullptr, nullptr, nullptr); + NUdf::TUnboxedValue Build() { + std::optional<NUdf::TUnboxedValue> nullPayload; + if (NullPayloads.size()) { + nullPayload = Ctx.HolderFactory.VectorAsVectorHolder(std::move(NullPayloads)); + } + return Ctx.HolderFactory.CreateDirectHashedSingleFixedMapHolder<T, OptionalKey>(std::move(Map), std::move(nullPayload)); } }; diff --git a/ydb/library/yql/public/udf/udf_value.h b/ydb/library/yql/public/udf/udf_value.h index c192c81dee..41649061c9 100644 --- a/ydb/library/yql/public/udf/udf_value.h +++ b/ydb/library/yql/public/udf/udf_value.h @@ -678,6 +678,9 @@ public: inline ui64 GetEstimatedListLength() const; inline TUnboxedValue GetListIterator() const; inline bool HasListItems() const; +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 11) + inline void Push(const TUnboxedValuePod& value) const; +#endif // Dict accessors inline ui64 GetDictLength() const; diff --git a/ydb/library/yql/public/udf/udf_value_inl.h b/ydb/library/yql/public/udf/udf_value_inl.h index bbe323cbd9..2e4d4d2e31 100644 --- a/ydb/library/yql/public/udf/udf_value_inl.h +++ b/ydb/library/yql/public/udf/udf_value_inl.h @@ -384,6 +384,13 @@ inline bool TUnboxedValuePod::HasFastListLength() const { return TBoxedValueAccessor::HasFastListLength(*Raw.Boxed.Value); } +#if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 11) +inline void TUnboxedValuePod::Push(const TUnboxedValuePod& value) const { + UDF_VERIFY(IsBoxed(), "Value is not a list"); + return TBoxedValueAccessor::Push(*Raw.Boxed.Value, value); +} +#endif + inline ui64 TUnboxedValuePod::GetListLength() const { UDF_VERIFY(IsBoxed(), "Value is not a list"); diff --git a/ydb/tests/functional/suite_tests/canondata/test_postgres.TestPGSQL.test_sql_suite_results-window.test_/query_7 b/ydb/tests/functional/suite_tests/canondata/test_postgres.TestPGSQL.test_sql_suite_results-window.test_/query_7 index 856bc49955..f526bde5c9 100644 --- a/ydb/tests/functional/suite_tests/canondata/test_postgres.TestPGSQL.test_sql_suite_results-window.test_/query_7 +++ b/ydb/tests/functional/suite_tests/canondata/test_postgres.TestPGSQL.test_sql_suite_results-window.test_/query_7 @@ -1,17 +1,25 @@ [ { - "column0": 5000, + "column0": 6000, "column1": 1 }, { - "column0": 9800, + "column0": 11200, "column1": 2 }, { - "column0": 14600, + "column0": 16400, "column1": 2 }, { + "column0": 20900, + "column1": 4 + }, + { + "column0": 25100, + "column1": 5 + }, + { "column0": 3900, "column1": 1 }, @@ -20,23 +28,15 @@ "column1": 2 }, { - "column0": 6000, + "column0": 5000, "column1": 1 }, { - "column0": 11200, + "column0": 9800, "column1": 2 }, { - "column0": 16400, + "column0": 14600, "column1": 2 - }, - { - "column0": 20900, - "column1": 4 - }, - { - "column0": 25100, - "column1": 5 } ]
\ No newline at end of file diff --git a/ydb/tests/functional/suite_tests/canondata/test_stream_query.TestStreamQuery.test_sql_suite_results-window.test_/window.test.out b/ydb/tests/functional/suite_tests/canondata/test_stream_query.TestStreamQuery.test_sql_suite_results-window.test_/window.test.out index a119b3b90e..2cd8a42b47 100644 --- a/ydb/tests/functional/suite_tests/canondata/test_stream_query.TestStreamQuery.test_sql_suite_results-window.test_/window.test.out +++ b/ydb/tests/functional/suite_tests/canondata/test_stream_query.TestStreamQuery.test_sql_suite_results-window.test_/window.test.out @@ -7,8 +7,8 @@ SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PA --------------+-------+--------+--------- b'develop' | 7 | 4200 | 25100 b'develop' | 9 | 4500 | 25100 - b'develop' | 11 | 5200 | 25100 b'develop' | 10 | 5200 | 25100 + b'develop' | 11 | 5200 | 25100 b'develop' | 8 | 6000 | 25100 b'personnel' | 5 | 3500 | 7400 b'personnel' | 2 | 3900 | 7400 @@ -25,16 +25,16 @@ SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITI depname | empno | salary | column3 --------------+-------+--------+--------- - b'sales' | 3 | 4800 | 1 - b'sales' | 4 | 4800 | 1 - b'sales' | 1 | 5000 | 3 - b'personnel' | 5 | 3500 | 1 - b'personnel' | 2 | 3900 | 2 b'develop' | 7 | 4200 | 1 b'develop' | 9 | 4500 | 2 b'develop' | 10 | 5200 | 3 b'develop' | 11 | 5200 | 3 b'develop' | 8 | 6000 | 5 + b'personnel' | 5 | 3500 | 1 + b'personnel' | 2 | 3900 | 2 + b'sales' | 3 | 4800 | 1 + b'sales' | 4 | 4800 | 1 + b'sales' | 1 | 5000 | 3 (10 rows) @@ -76,16 +76,16 @@ SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PA depname | empno | salary | column3 --------------+-------+--------+--------- - b'sales' | 1 | 5000 | 14600 - b'sales' | 3 | 4800 | 14600 - b'sales' | 4 | 4800 | 14600 - b'personnel' | 2 | 3900 | 7400 - b'personnel' | 5 | 3500 | 7400 b'develop' | 7 | 4200 | 25100 b'develop' | 8 | 6000 | 25100 b'develop' | 9 | 4500 | 25100 b'develop' | 10 | 5200 | 25100 b'develop' | 11 | 5200 | 25100 + b'personnel' | 2 | 3900 | 7400 + b'personnel' | 5 | 3500 | 7400 + b'sales' | 1 | 5000 | 14600 + b'sales' | 3 | 4800 | 14600 + b'sales' | 4 | 4800 | 14600 (10 rows) @@ -96,15 +96,15 @@ SELECT depname, empno, salary, rank() OVER w AS r FROM empsalary WINDOW w AS (PA depname | empno | salary | r --------------+-------+--------+--- - b'sales' | 3 | 4800 | 1 - b'sales' | 4 | 4800 | 1 b'develop' | 7 | 4200 | 1 + b'sales' | 4 | 4800 | 1 + b'sales' | 3 | 4800 | 1 b'personnel' | 5 | 3500 | 1 b'personnel' | 2 | 3900 | 2 b'develop' | 9 | 4500 | 2 - b'sales' | 1 | 5000 | 3 - b'develop' | 10 | 5200 | 3 b'develop' | 11 | 5200 | 3 + b'develop' | 10 | 5200 | 3 + b'sales' | 1 | 5000 | 3 b'develop' | 8 | 6000 | 5 (10 rows) @@ -144,16 +144,16 @@ SELECT sum(four) OVER w AS sum_1, ten, four FROM tenk1 WHERE unique2 < 10 WINDOW sum_1 | ten | four -------+-----+------ - 3 | 1 | 3 - 4 | 1 | 1 - 5 | 1 | 1 - 3 | 3 | 3 + 1 | 9 | 1 + 0 | 4 | 0 + 1 | 7 | 1 0 | 0 | 0 0 | 0 | 0 2 | 0 | 2 - 1 | 7 | 1 - 0 | 4 | 0 - 1 | 9 | 1 + 3 | 3 | 3 + 3 | 1 | 3 + 4 | 1 | 1 + 5 | 1 | 1 (10 rows) @@ -184,16 +184,16 @@ SELECT rank() OVER w AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10 WINDOW w rank_1 | ten | four --------+-----+------ - 1 | 1 | 3 - 2 | 3 | 3 - 1 | 0 | 0 - 1 | 0 | 0 - 3 | 4 | 0 + 1 | 0 | 2 1 | 1 | 1 1 | 1 | 1 3 | 7 | 1 4 | 9 | 1 - 1 | 0 | 2 + 1 | 0 | 0 + 1 | 0 | 0 + 3 | 4 | 0 + 1 | 1 | 3 + 2 | 3 | 3 (10 rows) @@ -204,16 +204,16 @@ SELECT dense_rank() OVER w, ten, four FROM tenk1 WHERE unique2 < 10 WINDOW w AS column0 | ten | four ---------+-----+------ - 1 | 1 | 3 - 2 | 3 | 3 - 1 | 0 | 0 - 1 | 0 | 0 - 2 | 4 | 0 + 1 | 0 | 2 1 | 1 | 1 1 | 1 | 1 2 | 7 | 1 3 | 9 | 1 - 1 | 0 | 2 + 1 | 0 | 0 + 1 | 0 | 0 + 2 | 4 | 0 + 1 | 1 | 3 + 2 | 3 | 3 (10 rows) @@ -224,16 +224,16 @@ SELECT lag(ten) OVER w, ten, four FROM tenk1 WHERE unique2 < 10 WINDOW w AS (PAR column0 | ten | four ---------+-----+------ - None | 1 | 3 - 1 | 3 | 3 - None | 0 | 0 - 0 | 0 | 0 - 0 | 4 | 0 + None | 0 | 2 None | 1 | 1 1 | 1 | 1 1 | 7 | 1 7 | 9 | 1 - None | 0 | 2 + None | 0 | 0 + 0 | 0 | 0 + 0 | 4 | 0 + None | 1 | 3 + 1 | 3 | 3 (10 rows) @@ -244,16 +244,16 @@ SELECT lead(ten) OVER w, ten, four FROM tenk1 WHERE unique2 < 10 WINDOW w AS (PA column0 | ten | four ---------+-----+------ - 3 | 1 | 3 - None | 3 | 3 - 0 | 0 | 0 - 4 | 0 | 0 - None | 4 | 0 + None | 0 | 2 1 | 1 | 1 7 | 1 | 1 9 | 7 | 1 None | 9 | 1 - None | 0 | 2 + 0 | 0 | 0 + 4 | 0 | 0 + None | 4 | 0 + 3 | 1 | 3 + None | 3 | 3 (10 rows) @@ -264,16 +264,16 @@ SELECT lead(ten * 2, 1) OVER w, ten, four FROM tenk1 WHERE unique2 < 10 WINDOW w column0 | ten | four ---------+-----+------ - 6 | 1 | 3 - None | 3 | 3 - 0 | 0 | 0 - 8 | 0 | 0 - None | 4 | 0 + None | 0 | 2 2 | 1 | 1 14 | 1 | 1 18 | 7 | 1 None | 9 | 1 - None | 0 | 2 + 0 | 0 | 0 + 8 | 0 | 0 + None | 4 | 0 + 6 | 1 | 3 + None | 3 | 3 (10 rows) @@ -284,16 +284,16 @@ SELECT first_value(ten) OVER w, ten, four FROM tenk1 WHERE unique2 < 10 WINDOW w column0 | ten | four ---------+-----+------ - 1 | 1 | 3 - 1 | 3 | 3 - 0 | 0 | 0 - 0 | 0 | 0 - 0 | 4 | 0 + 0 | 0 | 2 1 | 1 | 1 1 | 1 | 1 1 | 7 | 1 1 | 9 | 1 - 0 | 0 | 2 + 0 | 0 | 0 + 0 | 0 | 0 + 0 | 4 | 0 + 1 | 1 | 3 + 1 | 3 | 3 (10 rows) @@ -348,16 +348,16 @@ FROM tenk1 GROUP BY ten, two WINDOW w AS (PARTITION BY two ORDER BY ten); ten | two | gsum | wsum -----+-----+-------+-------- - 0 | 0 | 45000 | 45000 - 2 | 0 | 47000 | 92000 - 4 | 0 | 49000 | 141000 - 6 | 0 | 51000 | 192000 - 8 | 0 | 53000 | 245000 1 | 1 | 46000 | 46000 3 | 1 | 48000 | 94000 5 | 1 | 50000 | 144000 7 | 1 | 52000 | 196000 9 | 1 | 54000 | 250000 + 0 | 0 | 45000 | 45000 + 2 | 0 | 47000 | 92000 + 4 | 0 | 49000 | 141000 + 6 | 0 | 51000 | 192000 + 8 | 0 | 53000 | 245000 (10 rows) @@ -368,12 +368,12 @@ SELECT count(*) OVER w, four FROM (SELECT * FROM tenk1 WHERE two = 1) WHERE uniq column0 | four ---------+------ - 2 | 3 - 2 | 3 4 | 1 4 | 1 4 | 1 4 | 1 + 2 | 3 + 2 | 3 (6 rows) @@ -386,16 +386,16 @@ WINDOW w AS (PARTITION BY four ORDER BY ten); cntsum -------- + 51 + 22 + 24 + 82 92 - 136 21 22 87 - 22 - 24 - 82 92 - 51 + 136 (10 rows) @@ -422,16 +422,16 @@ SELECT avg(four) OVER w FROM tenk1 WHERE unique2 < 10 WINDOW w AS (PARTITION BY column0 --------- - 3.0 - 3.0 - 0.0 - 0.0 - 0.0 + 2.0 1.0 1.0 1.0 1.0 - 2.0 + 0.0 + 0.0 + 0.0 + 3.0 + 3.0 (10 rows) @@ -443,16 +443,16 @@ FROM tenk1 GROUP BY ten, two WINDOW win AS (PARTITION BY two ORDER BY ten); ten | two | gsum | wsum -----+-----+-------+-------- - 0 | 0 | 45000 | 45000 - 2 | 0 | 47000 | 92000 - 4 | 0 | 49000 | 141000 - 6 | 0 | 51000 | 192000 - 8 | 0 | 53000 | 245000 1 | 1 | 46000 | 46000 3 | 1 | 48000 | 94000 5 | 1 | 50000 | 144000 7 | 1 | 52000 | 196000 9 | 1 | 54000 | 250000 + 0 | 0 | 45000 | 45000 + 2 | 0 | 47000 | 92000 + 4 | 0 | 49000 | 141000 + 6 | 0 | 51000 | 192000 + 8 | 0 | 53000 | 245000 (10 rows) @@ -509,16 +509,16 @@ SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION B column0 | column1 ---------+--------- - 5000 | 1 - 9800 | 2 - 14600 | 2 - 3900 | 1 - 7400 | 2 6000 | 1 11200 | 2 16400 | 2 20900 | 4 25100 | 5 + 3900 | 1 + 7400 | 2 + 5000 | 1 + 9800 | 2 + 14600 | 2 (10 rows) @@ -600,13 +600,13 @@ select first_value(max(x)) over w, y column0 | y ---------+---- - 9999 | 12 - 9999 | 10 - 9999 | 8 - 9999 | 6 - 9999 | 4 - 9999 | 2 - 9999 | 0 + 9980 | 0 + 9980 | 2 + 9980 | 4 + 9980 | 6 + 9980 | 8 + 9980 | 10 + 9980 | 12 (7 rows) @@ -621,26 +621,26 @@ WINDOW w AS (partition by four order by ten); four | ten | column2 | column3 ------+-----+---------+--------- - 0 | 0 | 0 | 0 - 0 | 2 | 2 | 2 - 0 | 4 | 6 | 4 - 0 | 6 | 12 | 6 - 0 | 8 | 20 | 8 3 | 1 | 1 | 1 3 | 3 | 4 | 3 3 | 5 | 9 | 5 3 | 7 | 16 | 7 3 | 9 | 25 | 9 - 2 | 0 | 0 | 0 - 2 | 2 | 2 | 2 - 2 | 4 | 6 | 4 - 2 | 6 | 12 | 6 - 2 | 8 | 20 | 8 1 | 1 | 1 | 1 1 | 3 | 4 | 3 1 | 5 | 9 | 5 1 | 7 | 16 | 7 1 | 9 | 25 | 9 + 2 | 0 | 0 | 0 + 2 | 2 | 2 | 2 + 2 | 4 | 6 | 4 + 2 | 6 | 12 | 6 + 2 | 8 | 20 | 8 + 0 | 0 | 0 | 0 + 0 | 2 | 2 | 2 + 0 | 4 | 6 | 4 + 0 | 6 | 12 | 6 + 0 | 8 | 20 | 8 (20 rows) @@ -655,26 +655,26 @@ WINDOW w AS (partition by four order by ten/4 rows between unbounded preceding a four | two | column2 | column3 ------+-----+---------+--------- - 0 | 0 | 0 | 0 - 0 | 0 | 0 | 0 - 0 | 1 | 1 | 1 - 0 | 1 | 2 | 1 - 0 | 2 | 4 | 2 3 | 0 | 0 | 0 3 | 0 | 0 | 0 3 | 1 | 1 | 1 3 | 1 | 2 | 1 3 | 2 | 4 | 2 - 2 | 0 | 0 | 0 - 2 | 0 | 0 | 0 - 2 | 1 | 1 | 1 - 2 | 1 | 2 | 1 - 2 | 2 | 4 | 2 1 | 0 | 0 | 0 1 | 0 | 0 | 0 1 | 1 | 1 | 1 1 | 1 | 2 | 1 1 | 2 | 4 | 2 + 2 | 0 | 0 | 0 + 2 | 0 | 0 | 0 + 2 | 1 | 1 | 1 + 2 | 1 | 2 | 1 + 2 | 2 | 4 | 2 + 0 | 0 | 0 | 0 + 0 | 0 | 0 | 0 + 0 | 1 | 1 | 1 + 0 | 1 | 2 | 1 + 0 | 2 | 4 | 2 (20 rows) |