diff options
author | vvvv <vvvv@ydb.tech> | 2023-04-06 14:50:50 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2023-04-06 14:50:50 +0300 |
commit | 867680d32dbb79d3502e1e42d46bdc2fe6a3f2bf (patch) | |
tree | f506de8406df6d7c8d295f98fb854e267af6284c | |
parent | 1081f842b292b5ac2aad05bb887c47956bbcc45c (diff) | |
download | ydb-867680d32dbb79d3502e1e42d46bdc2fe6a3f2bf.tar.gz |
Relaxed dict type, support of Auto mode for Dicts (+internal sorting of structs)
17 files changed, 399 insertions, 92 deletions
diff --git a/ydb/library/yql/ast/yql_expr.cpp b/ydb/library/yql/ast/yql_expr.cpp index 02a31b07b1a..1ee199e1ce3 100644 --- a/ydb/library/yql/ast/yql_expr.cpp +++ b/ydb/library/yql/ast/yql_expr.cpp @@ -2931,12 +2931,16 @@ ui32 TVariantExprType::MakeFlags(const TTypeAnnotationNode* underlyingType) { bool TDictExprType::Validate(TPosition position, TExprContext& ctx) const { - if (!KeyType->IsHashable() || !KeyType->IsEquatable()) { - ctx.AddError(TIssue(position, TStringBuilder() << "Expected hashable and equatable type as dict key type, but got: " << *KeyType)); - return false; + if (KeyType->IsHashable() && KeyType->IsEquatable()) { + return true; } - return true; + if (KeyType->IsComparableInternal()) { + return true; + } + + ctx.AddError(TIssue(position, TStringBuilder() << "Expected hashable and equatable or internally comparable dict key type, but got: " << *KeyType)); + return false; } bool TDictExprType::Validate(TPositionHandle position, TExprContext& ctx) const { diff --git a/ydb/library/yql/ast/yql_expr.h b/ydb/library/yql/ast/yql_expr.h index 2681f89ce97..03e723045eb 100644 --- a/ydb/library/yql/ast/yql_expr.h +++ b/ydb/library/yql/ast/yql_expr.h @@ -132,6 +132,7 @@ enum ETypeAnnotationFlags : ui32 { TypeHasNestedOptional = 0x800, TypeNonPresortable = 0x1000, TypeHasDynamicSize = 0x2000, + TypeNonComparableInternal = 0x4000, }; const ui64 TypeHashMagic = 0x10000; @@ -213,6 +214,10 @@ public: return IsPersistable() && (GetFlags() & TypeNonComparable) == 0; } + bool IsComparableInternal() const { + return IsPersistable() && (GetFlags() & TypeNonComparableInternal) == 0; + } + bool HasNull() const { return (GetFlags() & TypeHasNull) != 0; } @@ -712,6 +717,7 @@ public: if (!(props & NUdf::CanCompare)) { ret |= TypeNonComparable; + ret |= TypeNonComparableInternal; } if (slot == NUdf::EDataSlot::Yson) { diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index dbc02b65318..2c414bbf089 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -5374,13 +5374,13 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { if (node->Head().IsCallable("AsList") && node->Child(2)->Child(1)->IsCallable("Void")) { TMaybe<bool> isMany; - TMaybe<bool> isHashed; + TMaybe<EDictType> type; TMaybe<ui64> itemsCount; bool isCompact; - auto settingsError = ParseToDictSettings(*node, ctx, isMany, isHashed, itemsCount, isCompact); + auto settingsError = ParseToDictSettings(*node, ctx, type, isMany, itemsCount, isCompact); YQL_ENSURE(!settingsError); - if (!*isMany && *isHashed) { + if (!*isMany && *type != EDictType::Sorted) { YQL_CLOG(DEBUG, Core) << "ToDict without payload over list literal"; return ctx.Builder(node->Pos()) .Callable("DictFromKeys") diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp index 9077e3b2198..a297de6cc70 100644 --- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -641,7 +641,7 @@ TExprNode::TPtr PeepHoleDictFromKeysToDict(const TExprNode::TPtr& node, TExprCon .Build() .Settings() .Add().Build("One", TNodeFlags::Default) - .Add().Build("Hashed", TNodeFlags::Default) + .Add().Build("Auto", TNodeFlags::Default) .Build() .Done() .Ptr(); diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index e33ea28f98d..f5335777945 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -6644,10 +6644,10 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } TMaybe<bool> isMany; - TMaybe<bool> isHashed; + TMaybe<EDictType> type; TMaybe<ui64> itemsCount; bool isCompact; - TMaybe<TIssue> error = ParseToDictSettings(*input, ctx.Expr, isMany, isHashed, itemsCount, isCompact); + TMaybe<TIssue> error = ParseToDictSettings(*input, ctx.Expr, type, isMany, itemsCount, isCompact); if (error) { ctx.Expr.AddError(*error); return IGraphTransformer::TStatus::Error; @@ -6664,18 +6664,25 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Error; } - if (*isHashed) { + switch (*type) { + case EDictType::Hashed: { if (!keyType->IsEquatable() || !keyType->IsHashable()) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), TStringBuilder() << "Expected equatable and hashable key type for hashed dict, but got: " << *keyType)); return IGraphTransformer::TStatus::Error; } - } else { + break; + } + case EDictType::Sorted: { if (!keyType->IsComparable()) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), TStringBuilder() << "Expected comparable key type for sorted dict, but got: " << *keyType)); return IGraphTransformer::TStatus::Error; } + break; + } + case EDictType::Auto: + break; } if (isCompact) { @@ -6733,10 +6740,10 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } TMaybe<bool> isMany; - TMaybe<bool> isHashed; + TMaybe<EDictType> type; TMaybe<ui64> itemsCount; bool isCompact; - if (const auto error = ParseToDictSettings(*input, ctx.Expr, isMany, isHashed, itemsCount, isCompact)) { + if (const auto error = ParseToDictSettings(*input, ctx.Expr, type, isMany, itemsCount, isCompact)) { ctx.Expr.AddError(*error); return IGraphTransformer::TStatus::Error; } @@ -6752,10 +6759,25 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Error; } - if (!*isHashed && !keyType->IsComparable()) { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), TStringBuilder() - << "Expected comparable key type for sorted dict, but got: " << *keyType)); - return IGraphTransformer::TStatus::Error; + switch (*type) { + case EDictType::Sorted: { + if (!keyType->IsComparable()) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), TStringBuilder() + << "Expected comparable key type for sorted dict, but got: " << *keyType)); + return IGraphTransformer::TStatus::Error; + } + break; + } + case EDictType::Hashed: { + if (!keyType->IsEquatable() || !keyType->IsHashable()) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), TStringBuilder() + << "Expected hashable and equatable key type for hashed dict, but got: " << *keyType)); + return IGraphTransformer::TStatus::Error; + } + break; + } + case EDictType::Auto: + break; } if (input->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Flow) { diff --git a/ydb/library/yql/core/yql_opt_utils.cpp b/ydb/library/yql/core/yql_opt_utils.cpp index 6c4ce003eee..c0c1bdfd1d6 100644 --- a/ydb/library/yql/core/yql_opt_utils.cpp +++ b/ydb/library/yql/core/yql_opt_utils.cpp @@ -445,7 +445,7 @@ TExprNode::TPtr MergeSettings(const TExprNode& settings1, const TExprNode& setti return ret; } -TMaybe<TIssue> ParseToDictSettings(const TExprNode& input, TExprContext& ctx, TMaybe<bool>& isMany, TMaybe<bool>& isHashed, TMaybe<ui64>& itemsCount, bool& isCompact) { +TMaybe<TIssue> ParseToDictSettings(const TExprNode& input, TExprContext& ctx, TMaybe<EDictType>& type, TMaybe<bool>& isMany, TMaybe<ui64>& itemsCount, bool& isCompact) { isCompact = false; auto settings = input.Child(3); if (settings->Type() != TExprNode::List) { @@ -461,10 +461,13 @@ TMaybe<TIssue> ParseToDictSettings(const TExprNode& input, TExprContext& ctx, TM isMany = true; } else if (child->Content() == "Sorted") { - isHashed = false; + type = EDictType::Sorted; } else if (child->Content() == "Hashed") { - isHashed = true; + type = EDictType::Hashed; + } + else if (child->Content() == "Auto") { + type = EDictType::Auto; } else if (child->Content() == "Compact") { isCompact = true; @@ -493,13 +496,26 @@ TMaybe<TIssue> ParseToDictSettings(const TExprNode& input, TExprContext& ctx, TM } - if (!isHashed || !isMany) { - return TIssue(ctx.GetPosition(input.Pos()), TStringBuilder() << "Both options must be specified: Sorted/Hashed and Many/One"); + if (!type || !isMany) { + return TIssue(ctx.GetPosition(input.Pos()), TStringBuilder() << "Both options must be specified: Sorted/Hashed/Auto and Many/One"); } return TMaybe<TIssue>(); } +EDictType SelectDictType(EDictType type, const TTypeAnnotationNode* keyType) { + if (type != EDictType::Auto) { + return type; + } + + if (keyType->IsHashable() && keyType->IsEquatable()) { + return EDictType::Hashed; + } + + YQL_ENSURE(keyType->IsComparableInternal()); + return EDictType::Sorted; +} + TExprNode::TPtr MakeSingleGroupRow(const TExprNode& aggregateNode, TExprNode::TPtr reduced, TExprContext& ctx) { auto pos = aggregateNode.Pos(); auto aggregatedColumns = aggregateNode.Child(2); diff --git a/ydb/library/yql/core/yql_opt_utils.h b/ydb/library/yql/core/yql_opt_utils.h index 6863df79371..c71c35647ed 100644 --- a/ydb/library/yql/core/yql_opt_utils.h +++ b/ydb/library/yql/core/yql_opt_utils.h @@ -52,7 +52,14 @@ TExprNode::TPtr MergeSettings(const TExprNode& settings1, const TExprNode& setti TExprNode::TPtr ReplaceSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx); TExprNode::TPtr ReplaceSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx); -TMaybe<TIssue> ParseToDictSettings(const TExprNode& node, TExprContext& ctx, TMaybe<bool>& isMany, TMaybe<bool>& isHashed, TMaybe<ui64>& itemsCount, bool& isCompact); +enum class EDictType { + Hashed, + Sorted, + Auto, +}; + +TMaybe<TIssue> ParseToDictSettings(const TExprNode& node, TExprContext& ctx, TMaybe<EDictType>& type, TMaybe<bool>& isMany, TMaybe<ui64>& itemsCount, bool& isCompact); +EDictType SelectDictType(EDictType type, const TTypeAnnotationNode* keyType); using MemberUpdaterFunc = std::function<bool (TString& memberName, const TTypeAnnotationNode* TypeAnnotation)>; bool UpdateStructMembers(TExprContext& ctx, const TExprNode::TPtr& node, const TStringBuf& goal, TExprNode::TListType& members, diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp b/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp index c2f5d45b3a1..3cef11427ae 100644 --- a/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp +++ b/ydb/library/yql/minikql/comp_nodes/mkql_todict.cpp @@ -38,6 +38,8 @@ class THashedMultiMapAccumulator { TMapType Map; public: + static constexpr bool IsSorted = false; + THashedMultiMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), KeyType(keyType), KeyTypes(keyTypes), IsTuple(isTuple), Map(0, TValueHasher(KeyTypes, isTuple, hash), @@ -89,6 +91,8 @@ class THashedMapAccumulator { TMapType Map; public: + static constexpr bool IsSorted = false; + THashedMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), KeyType(keyType), KeyTypes(keyTypes), IsTuple(isTuple), Map(0, TValueHasher(KeyTypes, isTuple, hash), @@ -132,6 +136,8 @@ class THashedSingleFixedMultiMapAccumulator { TUnboxedValueVector NullPayloads; NUdf::TUnboxedValue CurrentEmptyVectorForInsert; public: + static constexpr bool IsSorted = false; + THashedSingleFixedMultiMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), KeyTypes(keyTypes), Map(0, TMyHash<T>(), TMyEquals<T>()) { @@ -178,6 +184,8 @@ class THashedSingleFixedMapAccumulator { std::optional<NUdf::TUnboxedValue> NullPayload; public: + static constexpr bool IsSorted = false; + THashedSingleFixedMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), Map(0, TMyHash<T>(), TMyEquals<T>()) @@ -223,6 +231,8 @@ class THashedSetAccumulator { NUdf::IEquate::TPtr Equate; public: + static constexpr bool IsSorted = false; + THashedSetAccumulator(TType* keyType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), KeyType(keyType), KeyTypes(keyTypes), IsTuple(isTuple), Set(0, TValueHasher(KeyTypes, isTuple, hash), @@ -264,6 +274,8 @@ class THashedSingleFixedSetAccumulator { bool HasNull = false; public: + static constexpr bool IsSorted = false; + THashedSingleFixedSetAccumulator(TType* keyType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), Set(0, TMyHash<T>(), TMyEquals<T>()) @@ -305,6 +317,8 @@ class THashedSingleFixedCompactSetAccumulator { bool HasNull = false; public: + static constexpr bool IsSorted = false; + THashedSingleFixedCompactSetAccumulator(TType* keyType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), Pool(&Ctx.HolderFactory.GetPagePool()), Set(Ctx.HolderFactory.GetPagePool(), itemsCountHint / COMPACT_HASH_MAX_LOAD_FACTOR) @@ -346,6 +360,8 @@ class THashedCompactSetAccumulator { TValuePacker KeyPacker; public: + static constexpr bool IsSorted = false; + THashedCompactSetAccumulator(TType* keyType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), Pool(&Ctx.HolderFactory.GetPagePool()), Set(Ctx.HolderFactory.GetPagePool(), itemsCountHint / COMPACT_HASH_MAX_LOAD_FACTOR, TSmallValueHash(), TSmallValueEqual()) @@ -385,6 +401,8 @@ class THashedCompactMapAccumulator<false> { TValuePacker KeyPacker, PayloadPacker; public: + static constexpr bool IsSorted = false; + THashedCompactMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), Pool(&Ctx.HolderFactory.GetPagePool()), Map(Ctx.HolderFactory.GetPagePool(), itemsCountHint / COMPACT_HASH_MAX_LOAD_FACTOR) @@ -421,6 +439,8 @@ class THashedCompactMapAccumulator<true> { TValuePacker KeyPacker, PayloadPacker; public: + static constexpr bool IsSorted = false; + THashedCompactMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), Pool(&Ctx.HolderFactory.GetPagePool()), Map(Ctx.HolderFactory.GetPagePool(), itemsCountHint / COMPACT_HASH_MAX_LOAD_FACTOR) @@ -461,6 +481,8 @@ class THashedSingleFixedCompactMapAccumulator<T, OptionalKey, false> { TValuePacker PayloadPacker; public: + static constexpr bool IsSorted = false; + THashedSingleFixedCompactMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), Pool(&Ctx.HolderFactory.GetPagePool()), Map(Ctx.HolderFactory.GetPagePool(), itemsCountHint / COMPACT_HASH_MAX_LOAD_FACTOR) @@ -505,6 +527,8 @@ class THashedSingleFixedCompactMapAccumulator<T, OptionalKey, true> { TValuePacker PayloadPacker; public: + static constexpr bool IsSorted = false; + THashedSingleFixedCompactMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), Pool(&Ctx.HolderFactory.GetPagePool()), Map(Ctx.HolderFactory.GetPagePool(), itemsCountHint / COMPACT_HASH_MAX_LOAD_FACTOR) @@ -549,6 +573,8 @@ class TSortedSetAccumulator { TUnboxedValueVector Items; public: + static constexpr bool IsSorted = true; + TSortedSetAccumulator(TType* keyType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), KeyType(keyType), KeyTypes(keyTypes), IsTuple(isTuple), Compare(compare), Equate(equate) @@ -599,6 +625,8 @@ class TSortedMapAccumulator<false> { TKeyPayloadPairVector Items; public: + static constexpr bool IsSorted = true; + TSortedMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), KeyType(keyType), KeyTypes(keyTypes), IsTuple(isTuple), Compare(compare), Equate(equate) @@ -644,6 +672,8 @@ class TSortedMapAccumulator<true> { TKeyPayloadPairVector Items; public: + static constexpr bool IsSorted = true; + TSortedMapAccumulator(TType* keyType, TType* payloadType, const TKeyTypes& keyTypes, bool isTuple, bool encoded, NUdf::ICompare::TPtr compare, NUdf::IEquate::TPtr equate, NUdf::IHash::TPtr hash, TComputationContext& ctx, ui64 itemsCountHint) : Ctx(ctx), KeyType(keyType), KeyTypes(keyTypes), IsTuple(isTuple), Compare(compare), Equate(equate) @@ -766,9 +796,9 @@ public: if constexpr (IsStream) { return ctx.HolderFactory.Create<TStreamValue>(List->GetValue(ctx), Item, Key, TSetAccumulator(KeyType, KeyTypes, IsTuple, Encoded, - UseIHash ? MakeCompareImpl(KeyType) : nullptr, + UseIHash && TSetAccumulator::IsSorted ? MakeCompareImpl(KeyType) : nullptr, UseIHash ? MakeEquateImpl(KeyType) : nullptr, - UseIHash ? MakeHashImpl(KeyType) : nullptr, + UseIHash && !TSetAccumulator::IsSorted ? MakeHashImpl(KeyType) : nullptr, ctx, ItemsCountHint), ctx); } @@ -781,11 +811,10 @@ public: return ctx.HolderFactory.GetEmptyContainer(); } - bool UseICompare = UseIHash && std::is_same_v<TSetAccumulator, TSortedSetAccumulator>; TSetAccumulator accumulator(KeyType, KeyTypes, IsTuple, Encoded, - UseICompare ? MakeCompareImpl(KeyType) : nullptr, + UseIHash && TSetAccumulator::IsSorted ? MakeCompareImpl(KeyType) : nullptr, UseIHash ? MakeEquateImpl(KeyType) : nullptr, - UseIHash ? MakeHashImpl(KeyType) : nullptr, + UseIHash && !TSetAccumulator::IsSorted ? MakeHashImpl(KeyType) : nullptr, ctx, itemsCountHint); TThresher<false>::DoForEachItem(list, @@ -998,9 +1027,9 @@ public: private: void MakeState(TComputationContext& ctx, NUdf::TUnboxedValue& state) const { state = ctx.HolderFactory.Create<TState>(TSetAccumulator(KeyType, KeyTypes, IsTuple, Encoded, - UseIHash ? MakeCompareImpl(KeyType) : nullptr, + UseIHash && TSetAccumulator::IsSorted ? MakeCompareImpl(KeyType) : nullptr, UseIHash ? MakeEquateImpl(KeyType) : nullptr, - UseIHash ? MakeHashImpl(KeyType) : nullptr, + UseIHash && !TSetAccumulator::IsSorted ? MakeHashImpl(KeyType) : nullptr, ctx, ItemsCountHint)); } @@ -1190,9 +1219,9 @@ public: private: void MakeState(TComputationContext& ctx, NUdf::TUnboxedValue& state) const { state = ctx.HolderFactory.Create<TState>(TSetAccumulator(KeyType, KeyTypes, IsTuple, Encoded, - UseIHash ? MakeCompareImpl(KeyType) : nullptr, + UseIHash && TSetAccumulator::IsSorted ? MakeCompareImpl(KeyType) : nullptr, UseIHash ? MakeEquateImpl(KeyType) : nullptr, - UseIHash ? MakeHashImpl(KeyType) : nullptr, + UseIHash && !TSetAccumulator::IsSorted ? MakeHashImpl(KeyType) : nullptr, ctx, ItemsCountHint)); } @@ -1287,9 +1316,9 @@ public: if constexpr (IsStream) { return ctx.HolderFactory.Create<TStreamValue>(List->GetValue(ctx), Item, Key, Payload, TMapAccumulator(KeyType, PayloadType, KeyTypes, IsTuple, Encoded, - UseIHash ? MakeCompareImpl(KeyType) : nullptr, + UseIHash && TMapAccumulator::IsSorted ? MakeCompareImpl(KeyType) : nullptr, UseIHash ? MakeEquateImpl(KeyType) : nullptr, - UseIHash ? MakeHashImpl(KeyType) : nullptr, + UseIHash && !TMapAccumulator::IsSorted ? MakeHashImpl(KeyType) : nullptr, ctx, ItemsCountHint), ctx); } @@ -1304,9 +1333,9 @@ public: } TMapAccumulator accumulator(KeyType, PayloadType, KeyTypes, IsTuple, Encoded, - UseIHash ? MakeCompareImpl(KeyType) : nullptr, + UseIHash && TMapAccumulator::IsSorted ? MakeCompareImpl(KeyType) : nullptr, UseIHash ? MakeEquateImpl(KeyType) : nullptr, - UseIHash ? MakeHashImpl(KeyType) : nullptr, + UseIHash && !TMapAccumulator::IsSorted ? MakeHashImpl(KeyType) : nullptr, ctx, itemsCountHint); TThresher<false>::DoForEachItem(list, @@ -1500,9 +1529,9 @@ public: private: void MakeState(TComputationContext& ctx, NUdf::TUnboxedValue& state) const { state = ctx.HolderFactory.Create<TState>(TMapAccumulator(KeyType, PayloadType, KeyTypes, IsTuple, Encoded, - UseIHash ? MakeCompareImpl(KeyType) : nullptr, + UseIHash && TMapAccumulator::IsSorted ? MakeCompareImpl(KeyType) : nullptr, UseIHash ? MakeEquateImpl(KeyType) : nullptr, - UseIHash ? MakeHashImpl(KeyType) : nullptr, + UseIHash && !TMapAccumulator::IsSorted ? MakeHashImpl(KeyType) : nullptr, ctx, ItemsCountHint)); } @@ -1701,9 +1730,9 @@ public: private: void MakeState(TComputationContext& ctx, NUdf::TUnboxedValue& state) const { state = ctx.HolderFactory.Create<TState>(TMapAccumulator(KeyType, PayloadType, KeyTypes, IsTuple, Encoded, - UseIHash ? MakeCompareImpl(KeyType) : nullptr, + UseIHash && TMapAccumulator::IsSorted ? MakeCompareImpl(KeyType) : nullptr, UseIHash ? MakeEquateImpl(KeyType) : nullptr, - UseIHash ? MakeHashImpl(KeyType) : nullptr, + UseIHash && !TMapAccumulator::IsSorted ? MakeHashImpl(KeyType) : nullptr, ctx, ItemsCountHint)); } diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp index f8848fcd700..c8de496a533 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_graph.cpp @@ -421,8 +421,11 @@ private: items.push_back(std::make_pair(GetComputationNode(item.first.GetNode()), GetComputationNode(item.second.GetNode()))); } + bool isSorted = !CanHash(keyType); AddNode(node, NodeFactory->CreateDictNode(std::move(items), types, isTuple, encoded ? keyType : nullptr, - useIHash ? MakeHashImpl(keyType) : nullptr, useIHash ? MakeEquateImpl(keyType) : nullptr)); + useIHash && !isSorted ? MakeHashImpl(keyType) : nullptr, + useIHash ? MakeEquateImpl(keyType) : nullptr, + useIHash && isSorted ? MakeCompareImpl(keyType) : nullptr, isSorted)); } void Visit(TCallable& node) override { diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp index 8d46ca3df90..5e4088b6f35 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.cpp @@ -2729,7 +2729,8 @@ public: TDictNode(TComputationMutables& mutables, std::vector<std::pair<IComputationNode*, IComputationNode*>>&& itemNodes, const TKeyTypes& types, bool isTuple, TType* encodedType, - NUdf::IHash::TPtr hash, NUdf::IEquate::TPtr equate) + NUdf::IHash::TPtr hash, NUdf::IEquate::TPtr equate, + NUdf::ICompare::TPtr compare, bool isSorted) : TBaseComputation(mutables) , ItemNodes(std::move(itemNodes)) , Types(types) @@ -2737,6 +2738,8 @@ public: , EncodedType(encodedType) , Hash(hash) , Equate(equate) + , Compare(compare) + , IsSorted(isSorted) {} NUdf::TUnboxedValuePod DoCalculate(TComputationContext& ctx) const { @@ -2751,20 +2754,29 @@ public: packer.emplace(true, EncodedType); } - THashedDictFiller filler = - [&items, &packer](TValuesDictHashMap& map) { - for (auto& value : items) { - auto key = std::move(value.first); - if (packer) { - key = MakeString(packer->Pack(key)); - } + if (IsSorted) { + const TSortedDictFiller filler = [&](TKeyPayloadPairVector& values) { + values = std::move(items); + }; - map.emplace(std::move(key), std::move(value.second)); - } - }; + return ctx.HolderFactory.CreateDirectSortedDictHolder(filler, Types, IsTuple, EDictSortMode::RequiresSorting, + true, EncodedType, Compare, Equate); + } else { + THashedDictFiller filler = + [&items, &packer](TValuesDictHashMap& map) { + for (auto& value : items) { + auto key = std::move(value.first); + if (packer) { + key = MakeString(packer->Pack(key)); + } + + map.emplace(std::move(key), std::move(value.second)); + } + }; - return ctx.HolderFactory.CreateDirectHashedDictHolder( - filler, Types, IsTuple, true, EncodedType, Hash, Equate); + return ctx.HolderFactory.CreateDirectHashedDictHolder( + filler, Types, IsTuple, true, EncodedType, Hash, Equate); + } } private: @@ -2781,6 +2793,8 @@ private: TType* EncodedType; NUdf::IHash::TPtr Hash; NUdf::IEquate::TPtr Equate; + NUdf::ICompare::TPtr Compare; + const bool IsSorted; }; class TVariantNode : public TMutableCodegeneratorNode<TVariantNode> { @@ -3904,13 +3918,13 @@ IComputationNode* TNodeFactory::CreateArrayNode(TComputationNodePtrVector&& valu IComputationNode* TNodeFactory::CreateDictNode( std::vector<std::pair<IComputationNode*, IComputationNode*>>&& items, const TKeyTypes& types, bool isTuple, TType* encodedType, - NUdf::IHash::TPtr hash, NUdf::IEquate::TPtr equate) const + NUdf::IHash::TPtr hash, NUdf::IEquate::TPtr equate, NUdf::ICompare::TPtr compare, bool isSorted) const { if (items.empty()) { return new TEmptyNode(Mutables); } - return new TDictNode(Mutables, std::move(items), types, isTuple, encodedType, hash, equate); + return new TDictNode(Mutables, std::move(items), types, isTuple, encodedType, hash, equate, compare, isSorted); } IComputationNode* TNodeFactory::CreateVariantNode(IComputationNode* item, ui32 index) const { diff --git a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h index 2e812910a23..eaf0bd90b47 100644 --- a/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h +++ b/ydb/library/yql/minikql/computation/mkql_computation_node_holders.h @@ -617,7 +617,7 @@ public: IComputationNode* CreateDictNode( std::vector<std::pair<IComputationNode*, IComputationNode*>>&& items, const TKeyTypes& types, bool isTuple, TType* encodedType, - NUdf::IHash::TPtr hash, NUdf::IEquate::TPtr equate) const; + NUdf::IHash::TPtr hash, NUdf::IEquate::TPtr equate, NUdf::ICompare::TPtr compare, bool isSorted) const; IComputationNode* CreateVariantNode(IComputationNode* item, ui32 index) const; diff --git a/ydb/library/yql/minikql/mkql_type_builder.cpp b/ydb/library/yql/minikql/mkql_type_builder.cpp index 271e0fae050..3593af50337 100644 --- a/ydb/library/yql/minikql/mkql_type_builder.cpp +++ b/ydb/library/yql/minikql/mkql_type_builder.cpp @@ -1,6 +1,7 @@ #include "mkql_type_builder.h" #include "mkql_node_cast.h" #include "mkql_node_builder.h" +#include "mkql_alloc.h" #include <ydb/library/yql/public/udf/udf_type_ops.h> @@ -1168,18 +1169,56 @@ private: }; template <> +class TCompare<NMiniKQL::TType::EKind::Struct> final : public NUdf::ICompare { +public: + explicit TCompare(const NMiniKQL::TType* type) { + auto structType = static_cast<const NMiniKQL::TStructType*>(type); + auto count = structType->GetMembersCount(); + Compare_.reserve(count); + for (ui32 i = 0; i < count; ++i) { + Compare_.push_back(MakeCompareImpl(structType->GetMemberType(i))); + } + } + + bool Less(NUdf::TUnboxedValuePod lhs, NUdf::TUnboxedValuePod rhs) const override { + return Compare(lhs, rhs) < 0; + } + + int Compare(NUdf::TUnboxedValuePod lhs, NUdf::TUnboxedValuePod rhs) const override { + for (size_t i = 0; i < Compare_.size(); ++i) { + auto cmp = Compare_[i]->Compare( + static_cast<const NUdf::TUnboxedValuePod&>(lhs.GetElement(i)), + static_cast<const NUdf::TUnboxedValuePod&>(rhs.GetElement(i))); + if (cmp != 0) { + return cmp; + } + } + return 0; + } + +private: + std::vector<NUdf::ICompare::TPtr, NKikimr::NMiniKQL::TMKQLAllocator<NUdf::ICompare::TPtr>> Compare_; +}; + +template <> class TCompare<NMiniKQL::TType::EKind::Variant> final : public NUdf::ICompare { public: explicit TCompare(const NMiniKQL::TType* type) { auto variantType = static_cast<const NMiniKQL::TVariantType*>(type); if (variantType->GetUnderlyingType()->IsStruct()) { - throw TTypeNotSupported() << "Variant over struct is unordered"; - } - auto tupleType = static_cast<const NMiniKQL::TTupleType*>(variantType->GetUnderlyingType()); - ui32 count = tupleType->GetElementsCount(); - Compare_.reserve(count); - for (ui32 i = 0; i < count; ++i) { - Compare_.push_back(MakeCompareImpl(tupleType->GetElementType(i))); + auto structType = static_cast<const NMiniKQL::TStructType*>(variantType->GetUnderlyingType()); + ui32 count = structType->GetMembersCount(); + Compare_.reserve(count); + for (ui32 i = 0; i < count; ++i) { + Compare_.push_back(MakeCompareImpl(structType->GetMemberType(i))); + } + } else { + auto tupleType = static_cast<const NMiniKQL::TTupleType*>(variantType->GetUnderlyingType()); + ui32 count = tupleType->GetElementsCount(); + Compare_.reserve(count); + for (ui32 i = 0; i < count; ++i) { + Compare_.push_back(MakeCompareImpl(tupleType->GetElementType(i))); + } } } @@ -1269,6 +1308,90 @@ private: const NUdf::ICompare::TPtr Compare_; }; +template <> +class TCompare<NMiniKQL::TType::EKind::Dict> final : public NUdf::ICompare { +public: + explicit TCompare(const NMiniKQL::TType* type) + : CompareKey_(MakeCompareImpl(static_cast<const NMiniKQL::TDictType*>(type)->GetKeyType())) + , ComparePayload_(MakeCompareImpl(static_cast<const NMiniKQL::TDictType*>(type)->GetPayloadType())) + {} + + bool Less(NUdf::TUnboxedValuePod lhs, NUdf::TUnboxedValuePod rhs) const override { + return Compare(lhs, rhs) < 0; + } + + int Compare(NUdf::TUnboxedValuePod lhs, NUdf::TUnboxedValuePod rhs) const override { + auto lhsIter = lhs.GetDictIterator(); + auto rhsIter = rhs.GetDictIterator(); + + using TKP = std::pair<NUdf::TUnboxedValue, NUdf::TUnboxedValue>; + TVector<TKP, NMiniKQL::TMKQLAllocator<TKP>> lhsData, rhsData; + + lhsData.reserve(lhs.GetDictLength()); + rhsData.reserve(rhs.GetDictLength()); + + NUdf::TUnboxedValue key, payload; + while (lhsIter.NextPair(key, payload)) { + lhsData.emplace_back(std::make_pair(key, payload)); + } + + while (rhsIter.NextPair(key, payload)) { + rhsData.emplace_back(std::make_pair(key, payload)); + } + + if (!lhs.IsSortedDict()) { + Sort(lhsData.begin(), lhsData.end(), [&](const auto& x, const auto& y) { + return CompareKey_->Less(x.first, y.first); + }); + } + + if (!rhs.IsSortedDict()) { + Sort(rhsData.begin(), rhsData.end(), [&](const auto& x, const auto& y) { + return CompareKey_->Less(x.first, y.first); + }); + } + + auto lhsCurr = lhsData.begin(); + auto rhsCurr = rhsData.begin(); + for (;;) { + bool hasLeft = lhsCurr != lhsData.end(); + bool hasRight = rhsCurr != rhsData.end(); + if (!hasLeft || !hasRight) { + if (hasLeft == hasRight) { + return 0; + } + + return hasLeft ? 1 : -1; + } + + auto cmpKeys = CompareKey_->Compare( + static_cast<const NUdf::TUnboxedValuePod&>(lhsCurr->first), + static_cast<const NUdf::TUnboxedValuePod&>(rhsCurr->first) + ); + + if (cmpKeys) { + return cmpKeys; + } + + auto cmpPayloads = ComparePayload_->Compare( + static_cast<const NUdf::TUnboxedValuePod&>(lhsCurr->second), + static_cast<const NUdf::TUnboxedValuePod&>(rhsCurr->second) + ); + + if (cmpPayloads) { + return cmpPayloads; + } + + ++lhsCurr; + ++rhsCurr; + } + } + +private: + const NUdf::ICompare::TPtr CompareKey_; + const NUdf::ICompare::TPtr ComparePayload_; +}; + ////////////////////////////////////////////////////////////////////////////// // TBlockTypeBuilder ////////////////////////////////////////////////////////////////////////////// @@ -1974,6 +2097,79 @@ void TTypeInfoHelper::DoBlock(const NMiniKQL::TBlockType* tt, NUdf::ITypeVisitor } } +bool CanHash(const NMiniKQL::TType* type) { + switch (type->GetKind()) { + case NMiniKQL::TType::EKind::Data: { + auto slot = static_cast<const NMiniKQL::TDataType*>(type)->GetDataSlot(); + if (!slot) { + return false; + } + if (!(NUdf::GetDataTypeInfo(*slot).Features & NUdf::CanHash)) { + return false; + } + + return true; + } + case NMiniKQL::TType::EKind::Optional: { + auto optionalType = static_cast<const TOptionalType*>(type); + return CanHash(optionalType->GetItemType()); + } + + case NMiniKQL::TType::EKind::Tuple: { + auto tupleType = static_cast<const TTupleType*>(type); + for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) { + if (!CanHash(tupleType->GetElementType(i))) { + return false; + } + } + + return true; + } + + case NMiniKQL::TType::EKind::Struct: { + auto structType = static_cast<const TStructType*>(type); + for (ui32 i = 0; i < structType->GetMembersCount(); ++i) { + if (!CanHash(structType->GetMemberType(i))) { + return false; + } + } + + return true; + } + + case NMiniKQL::TType::EKind::List: { + auto listType = static_cast<const TListType*>(type); + return CanHash(listType->GetItemType()); + } + + case NMiniKQL::TType::EKind::Variant: { + auto variantType = static_cast<const TVariantType*>(type); + return CanHash(variantType->GetUnderlyingType()); + } + + case NMiniKQL::TType::EKind::Dict: { + auto dictType = static_cast<const TDictType*>(type); + return CanHash(dictType->GetKeyType()) && CanHash(dictType->GetPayloadType()); + }; + + case NMiniKQL::TType::EKind::Void: + case NMiniKQL::TType::EKind::Null: + case NMiniKQL::TType::EKind::EmptyList: + case NMiniKQL::TType::EKind::EmptyDict: + return true; + case NMiniKQL::TType::EKind::Pg: { + auto pgType = static_cast<const TPgType*>(type); + return NYql::NPg::LookupType(pgType->GetTypeId()).HashProcId != 0; + } + case NMiniKQL::TType::EKind::Tagged: { + auto taggedType = static_cast<const TTaggedType*>(type); + return CanHash(taggedType->GetBaseType()); + } + default: + return false; + } +} + NUdf::IHash::TPtr MakeHashImpl(const NMiniKQL::TType* type) { switch (type->GetKind()) { case NMiniKQL::TType::EKind::Data: { @@ -2048,6 +2244,8 @@ NUdf::ICompare::TPtr MakeCompareImpl(const NMiniKQL::TType* type) { return new TCompare<NMiniKQL::TType::EKind::Optional>(type); case NMiniKQL::TType::EKind::Tuple: return new TCompare<NMiniKQL::TType::EKind::Tuple>(type); + case NMiniKQL::TType::EKind::Struct: + return new TCompare<NMiniKQL::TType::EKind::Struct>(type); case NMiniKQL::TType::EKind::Void: case NMiniKQL::TType::EKind::Null: case NMiniKQL::TType::EKind::EmptyList: @@ -2058,6 +2256,8 @@ NUdf::ICompare::TPtr MakeCompareImpl(const NMiniKQL::TType* type) { } case NMiniKQL::TType::EKind::List: return new TCompare<NMiniKQL::TType::EKind::List>(type); + case NMiniKQL::TType::EKind::Dict: + return new TCompare<NMiniKQL::TType::EKind::Dict>(type); case NMiniKQL::TType::EKind::Pg: return MakePgCompare((const TPgType*)type); case NMiniKQL::TType::EKind::Tagged: { @@ -2065,7 +2265,7 @@ NUdf::ICompare::TPtr MakeCompareImpl(const NMiniKQL::TType* type) { return MakeCompareImpl(taggedType->GetBaseType()); } default: - throw TTypeNotSupported() << "Data, Pg, Optional, Variant over Tuple, Tuple or List is expected for comparing," + throw TTypeNotSupported() << "Data, Pg, Optional, Variant, Tuple, Struct, List or Dict are expected for comparing," << "but got: " << PrintNode(type); } } diff --git a/ydb/library/yql/minikql/mkql_type_builder.h b/ydb/library/yql/minikql/mkql_type_builder.h index 4861cecca70..580f3e4732d 100644 --- a/ydb/library/yql/minikql/mkql_type_builder.h +++ b/ydb/library/yql/minikql/mkql_type_builder.h @@ -210,6 +210,7 @@ private: static void DoBlock(const NMiniKQL::TBlockType* tt, NUdf::ITypeVisitor* v); }; +bool CanHash(const NMiniKQL::TType* type); NUdf::IHash::TPtr MakeHashImpl(const NMiniKQL::TType* type); NUdf::ICompare::TPtr MakeCompareImpl(const NMiniKQL::TType* type); NUdf::IEquate::TPtr MakeEquateImpl(const NMiniKQL::TType* type); diff --git a/ydb/library/yql/mount/lib/yql/core.yql b/ydb/library/yql/mount/lib/yql/core.yql index f12c459358b..f297cafde31 100644 --- a/ydb/library/yql/mount/lib/yql/core.yql +++ b/ydb/library/yql/mount/lib/yql/core.yql @@ -126,7 +126,7 @@ def signature(script, name): (MatchType seq 'EmptyDict (lambda '() (EmptyList)) (lambda '() seq))) ))) -(let ToSet (lambda '(list) (ToDict list (lambda '(x) x) (lambda '(x) (Void)) '('Hashed 'One)))) +(let ToSet (lambda '(list) (ToDict list (lambda '(x) x) (lambda '(x) (Void)) '('Auto 'One)))) (let SetIsDisjointImpl (lambda '(dict scan) (Not (HasItems (SkipWhile (Apply KeysImpl scan) (lambda '(item) (Not (Contains dict item)))))))) @@ -156,7 +156,7 @@ def signature(script, name): (let key (Nth p '0)) (let lookup (Lookup y key)) (return (Map lookup (lambda '(r) '(key (Apply f key (Nth p '1) r))))) - )))) (lambda '(z) (Nth z '0)) (lambda '(z) (Nth z '1)) '('Hashed 'One) + )))) (lambda '(z) (Nth z '0)) (lambda '(z) (Nth z '1)) '('Auto 'One) ) ))) @@ -166,7 +166,7 @@ def signature(script, name): (let SetDifferenceImpl (lambda '(x y) (ToDict (Filter (DictItems x) (lambda '(p) (Not (Contains y (Nth p '0))))) - (lambda '(z) (Nth z '0)) (lambda '(z) (Nth z '1)) '('Hashed 'One) ))) + (lambda '(z) (Nth z '0)) (lambda '(z) (Nth z '1)) '('Auto 'One) ))) (let SetUnionImpl (lambda '(f) (lambda '(x y) (ToDict (Extend @@ -181,7 +181,7 @@ def signature(script, name): (return (If (Exists lookup) (Null) '(key (Apply f key lookup (Just (Nth p '1)) )))) )))) ) - (lambda '(z) (Nth z '0)) (lambda '(z) (Nth z '1)) '('Hashed 'One) + (lambda '(z) (Nth z '0)) (lambda '(z) (Nth z '1)) '('Auto 'One) ) ))) @@ -198,7 +198,7 @@ def signature(script, name): (return (If (Exists lookup) (Null) '(key (Apply f key lookup (Just (Nth p '1)) )))) )))) ) - (lambda '(z) (Nth z '0)) (lambda '(z) (Nth z '1)) '('Hashed 'One) + (lambda '(z) (Nth z '0)) (lambda '(z) (Nth z '1)) '('Auto 'One) ) ))) @@ -236,7 +236,7 @@ def signature(script, name): '('('Data (Apply factory (StructType '('Data (ListItemType (DictPayloadType (TypeOf dict))))) (lambda '(x) (Member x 'Data))))) '('('compact)) ) -(lambda '(x) (Member x 'Key)) (lambda '(x) (Member x 'Data)) '('Hashed 'One) +(lambda '(x) (Member x 'Key)) (lambda '(x) (Member x 'Data)) '('Auto 'One) ))))) (let DictAggregate (lambda '(dict factory) (MatchType dict 'Null (lambda '() (Null)) (lambda '() (MatchType dict 'Optional diff --git a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp index dbb31c0556a..60dd3adeefe 100644 --- a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp +++ b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp @@ -1781,14 +1781,15 @@ TMkqlCommonCallableCompiler::TShared::TShared() { AddCallable("ToDict", [](const TExprNode& node, TMkqlBuildContext& ctx) { const auto list = MkqlBuildExpr(node.Head(), ctx); TMaybe<bool> isMany; - TMaybe<bool> isHashed; + TMaybe<EDictType> type; TMaybe<ui64> itemsCount; bool isCompact; - if (const auto error = ParseToDictSettings(node, ctx.ExprCtx, isMany, isHashed, itemsCount, isCompact)) { + if (const auto error = ParseToDictSettings(node, ctx.ExprCtx, type, isMany, itemsCount, isCompact)) { ythrow TNodeException(node) << error->GetMessage(); } - const auto factory = *isHashed ? &TProgramBuilder::ToHashedDict : &TProgramBuilder::ToSortedDict; + *type = SelectDictType(*type, node.Child(1)->GetTypeAnn()); + const auto factory = *type == EDictType::Hashed ? &TProgramBuilder::ToHashedDict : &TProgramBuilder::ToSortedDict; return (ctx.ProgramBuilder.*factory)(list, *isMany, [&](TRuntimeNode item) { return MkqlBuildLambda(*node.Child(1), ctx, {item}); }, [&](TRuntimeNode item) { @@ -1799,14 +1800,15 @@ TMkqlCommonCallableCompiler::TShared::TShared() { AddCallable("SqueezeToDict", [](const TExprNode& node, TMkqlBuildContext& ctx) { const auto stream = MkqlBuildExpr(node.Head(), ctx); TMaybe<bool> isMany; - TMaybe<bool> isHashed; + TMaybe<EDictType> type; TMaybe<ui64> itemsCount; bool isCompact; - if (const auto error = ParseToDictSettings(node, ctx.ExprCtx, isMany, isHashed, itemsCount, isCompact)) { + if (const auto error = ParseToDictSettings(node, ctx.ExprCtx, type, isMany, itemsCount, isCompact)) { ythrow TNodeException(node) << error->GetMessage(); } - const auto factory = *isHashed ? &TProgramBuilder::SqueezeToHashedDict : &TProgramBuilder::SqueezeToSortedDict; + *type = SelectDictType(*type, node.Child(1)->GetTypeAnn()); + const auto factory = *type == EDictType::Hashed ? &TProgramBuilder::SqueezeToHashedDict : &TProgramBuilder::SqueezeToSortedDict; return (ctx.ProgramBuilder.*factory)(stream, *isMany, [&](TRuntimeNode item) { return MkqlBuildLambda(*node.Child(1), ctx, {item}); }, [&](TRuntimeNode item) { @@ -1817,14 +1819,15 @@ TMkqlCommonCallableCompiler::TShared::TShared() { AddCallable("NarrowSqueezeToDict", [](const TExprNode& node, TMkqlBuildContext& ctx) { const auto stream = MkqlBuildExpr(node.Head(), ctx); TMaybe<bool> isMany; - TMaybe<bool> isHashed; + TMaybe<EDictType> type; TMaybe<ui64> itemsCount; bool isCompact; - if (const auto error = ParseToDictSettings(node, ctx.ExprCtx, isMany, isHashed, itemsCount, isCompact)) { + if (const auto error = ParseToDictSettings(node, ctx.ExprCtx, type, isMany, itemsCount, isCompact)) { ythrow TNodeException(node) << error->GetMessage(); } - const auto factory = *isHashed ? &TProgramBuilder::NarrowSqueezeToHashedDict : &TProgramBuilder::NarrowSqueezeToSortedDict; + *type = SelectDictType(*type, node.Child(1)->GetTypeAnn()); + const auto factory = *type == EDictType::Hashed ? &TProgramBuilder::NarrowSqueezeToHashedDict : &TProgramBuilder::NarrowSqueezeToSortedDict; return (ctx.ProgramBuilder.*factory)(stream, *isMany, [&](TRuntimeNode::TList items) { return MkqlBuildLambda(*node.Child(1), ctx, items); }, [&](TRuntimeNode::TList items) { diff --git a/ydb/library/yql/providers/common/provider/yql_provider.cpp b/ydb/library/yql/providers/common/provider/yql_provider.cpp index f13a717f2d1..98fd3555dbc 100644 --- a/ydb/library/yql/providers/common/provider/yql_provider.cpp +++ b/ydb/library/yql/providers/common/provider/yql_provider.cpp @@ -974,10 +974,10 @@ double GetDataReplicationFactor(double factor, const TExprNode* node, const TExp // TODO: check MapJoinCore input unique using constraints if (const auto& lambda = node->Tail(); node->Head().IsCallable("SqueezeToDict") && lambda.Tail().IsCallable("MapJoinCore") && lambda.Tail().Child(1U) == &lambda.Head().Head()) { TMaybe<bool> isMany; - TMaybe<bool> isHashed; + TMaybe<EDictType> type; bool isCompact = false; TMaybe<ui64> itemsCount; - ParseToDictSettings(node->Head(), ctx, isMany, isHashed, itemsCount, isCompact); + ParseToDictSettings(node->Head(), ctx, type, isMany, itemsCount, isCompact); if (isMany.GetOrElse(true)) { factor *= 5.0; } @@ -1002,10 +1002,10 @@ double GetDataReplicationFactor(double factor, const TExprNode* node, const TExp // TODO: check MapJoinCore input unique using constraints if (node->Child(1)->IsCallable("ToDict")) { TMaybe<bool> isMany; - TMaybe<bool> isHashed; + TMaybe<EDictType> type; bool isCompact = false; TMaybe<ui64> itemsCount; - ParseToDictSettings(*node->Child(1), ctx, isMany, isHashed, itemsCount, isCompact); + ParseToDictSettings(*node->Child(1), ctx, type, isMany, itemsCount, isCompact); if (isMany.GetOrElse(true)) { factor *= 5.0; } diff --git a/ydb/library/yql/sql/v1/builtin.cpp b/ydb/library/yql/sql/v1/builtin.cpp index 1626b9d302e..81550d0b4c1 100644 --- a/ydb/library/yql/sql/v1/builtin.cpp +++ b/ydb/library/yql/sql/v1/builtin.cpp @@ -2467,7 +2467,7 @@ private: TNodePtr Node; }; -template <bool Sorted> +template <bool Sorted, bool Hashed> class TYqlToDict final: public TCallNode { public: TYqlToDict(TPosition pos, const TString& mode, const TVector<TNodePtr>& args) @@ -2477,7 +2477,7 @@ public: private: TCallNode::TPtr DoClone() const override { - return new TYqlToDict<Sorted>(GetPos(), Mode, CloneContainer(Args)); + return new TYqlToDict<Sorted, Hashed>(GetPos(), Mode, CloneContainer(Args)); } bool DoInit(TContext& ctx, ISource* src) override { @@ -2487,7 +2487,7 @@ private: } Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("0")))); Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("1")))); - Args.push_back(Q(Y(Q(Sorted ? "Sorted" : "Hashed"), Q(Mode)))); + Args.push_back(Q(Y(Q(Sorted ? "Sorted" : Hashed ? "Hashed" : "Auto"), Q(Mode)))); return TCallNode::DoInit(ctx, src); } private: @@ -2846,10 +2846,12 @@ struct TBuiltinFuncData { {"asdictstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDictStrict", 0, -1)}, {"asset", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsSet", 0, -1)}, {"assetstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsSetStrict", 0, -1)}, - {"todict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false>>("One")}, - {"tomultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false>>("Many")}, - {"tosorteddict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true>>("One")}, - {"tosortedmultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true>>("Many")}, + {"todict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, false>>("One")}, + {"tomultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, false>>("Many")}, + {"tosorteddict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true, false>>("One")}, + {"tosortedmultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true, false>>("Many")}, + {"tohasheddict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, true>>("One")}, + {"tohashedmultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false, true>>("Many")}, {"dictkeys", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeys", 1, 1) }, {"dictpayloads", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloads", 1, 1) }, {"dictitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictItems", 1, 1) }, |