diff options
author | a-romanov <Anton.Romanov@ydb.tech> | 2023-02-07 10:44:43 +0300 |
---|---|---|
committer | a-romanov <Anton.Romanov@ydb.tech> | 2023-02-07 10:44:43 +0300 |
commit | d5699fb35010a0df42c08f9ede5c769e5cb4964d (patch) | |
tree | cb5a05019a937cd6e73495cd7b9900d40588c192 | |
parent | b4040cb39102371b468bcd6c9e2c7a694f91be31 (diff) | |
download | ydb-d5699fb35010a0df42c08f9ede5c769e5cb4964d.tar.gz |
Unique & Distinct constraints.
18 files changed, 544 insertions, 490 deletions
diff --git a/ydb/library/yql/ast/yql_constraint.cpp b/ydb/library/yql/ast/yql_constraint.cpp index a943fbc3227..bf336bea76e 100644 --- a/ydb/library/yql/ast/yql_constraint.cpp +++ b/ydb/library/yql/ast/yql_constraint.cpp @@ -287,25 +287,6 @@ const TSortedConstraintNode::TFullSetType TSortedConstraintNode::GetAllSets() co return sets; } -bool TSortedConstraintNode::IsOrderBy(const TUniqueConstraintNode& unique) const { - auto columns = GetAllSets(); - const auto ordered = columns; - for (const auto& set : unique.GetAllSets()) { - if (std::all_of(set.cbegin(), set.cend(), [&ordered](const TPathType& path) { - return !path.empty() && std::any_of(ordered.cbegin(), ordered.cend(), [&path](const TSetType& s) { return s.contains(path); }); - })) { - std::for_each(set.cbegin(), set.cend(), [&columns](const TPathType& path) { - if (const auto it = std::find_if(columns.cbegin(), columns.cend(), [&path](const TSetType& s) { return s.contains(path); }); columns.cend() != it) - columns.erase(it); - }); - if (columns.empty()) - return true; - } - } - - return false; -} - const TSortedConstraintNode* TSortedConstraintNode::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx) { if (constraints.empty()) { return nullptr; @@ -498,23 +479,25 @@ const TGroupByConstraintNode* TGroupByConstraintNode::MakeCommon(const std::vect ////////////////////////////////////////////////////////////////////////////////////////////////////////////// -namespace { - -TUniqueConstraintNode::TSetType ColumnsListToSet(const std::vector<std::string_view>& columns) { +template<bool Distinct> +typename TUniqueConstraintNodeBase<Distinct>::TSetType +TUniqueConstraintNodeBase<Distinct>::ColumnsListToSet(const std::vector<std::string_view>& columns) { YQL_ENSURE(!columns.empty()); - TUniqueConstraintNode::TSetType set; + TSetType set; set.reserve(columns.size()); - std::transform(columns.cbegin(), columns.cend(), std::back_inserter(set), [](const std::string_view& column) { return TConstraintNode::TPathType(1U, column); }); + std::transform(columns.cbegin(), columns.cend(), std::back_inserter(set), [](const std::string_view& column) { return TPathType(1U, column); }); std::sort(set.begin(), set.end()); return set; } -TUniqueConstraintNode::TFullSetType DedupSets(TUniqueConstraintNode::TFullSetType&& sets) { +template<bool Distinct> +typename TUniqueConstraintNodeBase<Distinct>::TFullSetType +TUniqueConstraintNodeBase<Distinct>::DedupSets(TFullSetType&& sets) { for (bool found = true; found && sets.size() > 1U;) { found = false; for (auto ot = sets.cbegin(); !found && sets.cend() != ot; ++ot) { for (auto it = sets.cbegin(); sets.cend() != it;) { - if (ot->size() < it->size() && std::all_of(ot->cbegin(), ot->cend(), [it](const TConstraintNode::TPathType& path) { return it->contains(path); })) { + if (ot->size() < it->size() && std::all_of(ot->cbegin(), ot->cend(), [it](const TPathType& path) { return it->contains(path); })) { it = sets.erase(it); found = true; } else @@ -526,9 +509,8 @@ TUniqueConstraintNode::TFullSetType DedupSets(TUniqueConstraintNode::TFullSetTyp return std::move(sets); } -} - -TUniqueConstraintNode::TUniqueConstraintNode(TExprContext& ctx, TFullSetType&& sets) +template<bool Distinct> +TUniqueConstraintNodeBase<Distinct>::TUniqueConstraintNodeBase(TExprContext& ctx, TFullSetType&& sets) : TConstraintNode(ctx, Name()), Sets_(DedupSets(std::move(sets))) { YQL_ENSURE(!Sets_.empty()); @@ -541,36 +523,41 @@ TUniqueConstraintNode::TUniqueConstraintNode(TExprContext& ctx, TFullSetType&& s } } -TUniqueConstraintNode::TUniqueConstraintNode(TExprContext& ctx, const std::vector<std::string_view>& columns) - : TUniqueConstraintNode(ctx, TFullSetType{ColumnsListToSet(columns)}) +template<bool Distinct> +TUniqueConstraintNodeBase<Distinct>::TUniqueConstraintNodeBase(TExprContext& ctx, const std::vector<std::string_view>& columns) + : TUniqueConstraintNodeBase(ctx, TFullSetType{ColumnsListToSet(columns)}) {} -TUniqueConstraintNode::TUniqueConstraintNode(TUniqueConstraintNode&& constr) = default; +template<bool Distinct> +TUniqueConstraintNodeBase<Distinct>::TUniqueConstraintNodeBase(TUniqueConstraintNodeBase&& constr) = default; -bool TUniqueConstraintNode::Equals(const TConstraintNode& node) const { +template<bool Distinct> +bool TUniqueConstraintNodeBase<Distinct>::Equals(const TConstraintNode& node) const { if (this == &node) { return true; } if (GetHash() != node.GetHash()) { return false; } - if (const auto c = dynamic_cast<const TUniqueConstraintNode*>(&node)) { + if (const auto c = dynamic_cast<const TUniqueConstraintNodeBase*>(&node)) { return Sets_ == c->Sets_; } return false; } -bool TUniqueConstraintNode::Includes(const TConstraintNode& node) const { +template<bool Distinct> +bool TUniqueConstraintNodeBase<Distinct>::Includes(const TConstraintNode& node) const { if (this == &node) { return true; } - if (const auto c = dynamic_cast<const TUniqueConstraintNode*>(&node)) { + if (const auto c = dynamic_cast<const TUniqueConstraintNodeBase*>(&node)) { return std::includes(Sets_.cbegin(), Sets_.cend(), c->Sets_.cbegin(), c->Sets_.cend()); } return false; } -void TUniqueConstraintNode::Out(IOutputStream& out) const { +template<bool Distinct> +void TUniqueConstraintNodeBase<Distinct>::Out(IOutputStream& out) const { TConstraintNode::Out(out); out.Write('('); @@ -589,7 +576,8 @@ void TUniqueConstraintNode::Out(IOutputStream& out) const { out.Write(')'); } -void TUniqueConstraintNode::ToJson(NJson::TJsonWriter& out) const { +template<bool Distinct> +void TUniqueConstraintNodeBase<Distinct>::ToJson(NJson::TJsonWriter& out) const { out.OpenArray(); for (const auto& set : Sets_) { out.OpenArray(); @@ -601,17 +589,18 @@ void TUniqueConstraintNode::ToJson(NJson::TJsonWriter& out) const { out.CloseArray(); } -const TUniqueConstraintNode* TUniqueConstraintNode::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx) { +template<bool Distinct> +const TUniqueConstraintNodeBase<Distinct>* TUniqueConstraintNodeBase<Distinct>::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx) { if (constraints.empty()) { return nullptr; } if (constraints.size() == 1) { - return constraints.front()->GetConstraint<TUniqueConstraintNode>(); + return constraints.front()->GetConstraint<TUniqueConstraintNodeBase>(); } TFullSetType sets; for (auto c: constraints) { - if (const auto uniq = c->GetConstraint<TUniqueConstraintNode>()) { + if (const auto uniq = c->GetConstraint<TUniqueConstraintNodeBase>()) { if (sets.empty()) sets = uniq->GetAllSets(); else { @@ -628,10 +617,31 @@ const TUniqueConstraintNode* TUniqueConstraintNode::MakeCommon(const std::vector } } - return sets.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNode>(std::move(sets)); + return sets.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNodeBase>(std::move(sets)); } -bool TUniqueConstraintNode::HasEqualColumns(const std::vector<std::string_view>& columns) const { +template<bool Distinct> +bool TUniqueConstraintNodeBase<Distinct>::IsOrderBy(const TSortedConstraintNode& sorted) const { + auto columns = sorted.GetAllSets(); + const auto ordered = columns; + for (const auto& set : GetAllSets()) { + if (std::all_of(set.cbegin(), set.cend(), [&ordered](const TPathType& path) { + return !path.empty() && std::any_of(ordered.cbegin(), ordered.cend(), [&path](const TSetType& s) { return s.contains(path); }); + })) { + std::for_each(set.cbegin(), set.cend(), [&columns](const TPathType& path) { + if (const auto it = std::find_if(columns.cbegin(), columns.cend(), [&path](const TSetType& s) { return s.contains(path); }); columns.cend() != it) + columns.erase(it); + }); + if (columns.empty()) + return true; + } + } + + return false; +} + +template<bool Distinct> +bool TUniqueConstraintNodeBase<Distinct>::HasEqualColumns(const std::vector<std::string_view>& columns) const { if (columns.empty()) return false; @@ -651,7 +661,9 @@ bool TUniqueConstraintNode::HasEqualColumns(const std::vector<std::string_view>& return false; } -const TUniqueConstraintNode* TUniqueConstraintNode::FilterFields(TExprContext& ctx, const TPathFilter& predicate) const { +template<bool Distinct> +const TUniqueConstraintNodeBase<Distinct>* +TUniqueConstraintNodeBase<Distinct>::FilterFields(TExprContext& ctx, const TPathFilter& predicate) const { auto sets = Sets_; for (auto it = sets.cbegin(); sets.cend() != it;) { if (std::all_of(it->cbegin(), it->cend(), predicate)) @@ -659,10 +671,12 @@ const TUniqueConstraintNode* TUniqueConstraintNode::FilterFields(TExprContext& c else it = sets.erase(it); } - return sets.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNode>(std::move(sets)); + return sets.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNodeBase>(std::move(sets)); } -const TUniqueConstraintNode* TUniqueConstraintNode::RenameFields(TExprContext& ctx, const TPathReduce& reduce) const { +template<bool Distinct> +const TUniqueConstraintNodeBase<Distinct>* +TUniqueConstraintNodeBase<Distinct>::RenameFields(TExprContext& ctx, const TPathReduce& reduce) const { TFullSetType sets; sets.reserve(Sets_.size()); for (const auto& set : Sets_) { @@ -683,20 +697,25 @@ const TUniqueConstraintNode* TUniqueConstraintNode::RenameFields(TExprContext& c if (set.size() == newSets.front().size()) sets.insert_unique(newSets.cbegin(), newSets.cend()); } - return sets.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNode>(std::move(sets)); + return sets.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNodeBase>(std::move(sets)); } -bool TUniqueConstraintNode::IsApplicableToType(const TTypeAnnotationNode& type) const { +template<bool Distinct> +bool TUniqueConstraintNodeBase<Distinct>::IsApplicableToType(const TTypeAnnotationNode& type) const { const auto& itemType = GetSeqItemType(type); return std::all_of(Sets_.cbegin(), Sets_.cend(), [&itemType](const TSetType& set) { return std::all_of(set.cbegin(), set.cend(), std::bind(&GetSubTypeByPath, std::placeholders::_1, std::cref(itemType))); }); } -const TConstraintNode* TUniqueConstraintNode::OnlySimpleColumns(TExprContext& ctx) const { - return FilterFields(ctx, std::bind(std::equal_to<TPathType::size_type>(), std::bind(&TPathType::size, std::placeholders::_1), 1ULL)); +template<bool Distinct> +const TConstraintNode* TUniqueConstraintNodeBase<Distinct>::OnlySimpleColumns(TExprContext& ctx) const { + return FilterFields(ctx, std::bind(std::equal_to<typename TPathType::size_type>(), std::bind(&TPathType::size, std::placeholders::_1), 1ULL)); } +template class TUniqueConstraintNodeBase<false>; +template class TUniqueConstraintNodeBase<true>; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////// template<class TOriginalConstraintNode> @@ -1016,6 +1035,7 @@ bool TPartOfConstraintNode<TOriginalConstraintNode>::IsApplicableToType(const TT template class TPartOfConstraintNode<TSortedConstraintNode>; template class TPartOfConstraintNode<TUniqueConstraintNode>; +template class TPartOfConstraintNode<TDistinctConstraintNode>; ////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1677,6 +1697,11 @@ void Out<NYql::TUniqueConstraintNode>(IOutputStream& out, const NYql::TUniqueCon } template<> +void Out<NYql::TDistinctConstraintNode>(IOutputStream& out, const NYql::TDistinctConstraintNode& c) { + c.Out(out); +} + +template<> void Out<NYql::TPartOfSortedConstraintNode>(IOutputStream& out, const NYql::TPartOfSortedConstraintNode& c) { c.Out(out); } @@ -1687,6 +1712,11 @@ void Out<NYql::TPartOfUniqueConstraintNode>(IOutputStream& out, const NYql::TPar } template<> +void Out<NYql::TPartOfDistinctConstraintNode>(IOutputStream& out, const NYql::TPartOfDistinctConstraintNode& c) { + c.Out(out); +} + +template<> void Out<NYql::TPassthroughConstraintNode>(IOutputStream& out, const NYql::TPassthroughConstraintNode& c) { c.Out(out); } diff --git a/ydb/library/yql/ast/yql_constraint.h b/ydb/library/yql/ast/yql_constraint.h index c9c32cfa582..fd7be161e3e 100644 --- a/ydb/library/yql/ast/yql_constraint.h +++ b/ydb/library/yql/ast/yql_constraint.h @@ -173,40 +173,6 @@ protected: TSetType Columns_; }; -class TUniqueConstraintNode final: public TConstraintNode { -public: - using TSetType = NSorted::TSimpleSet<TPathType>; - using TFullSetType = NSorted::TSimpleSet<TSetType>; -protected: - friend struct TExprContext; - - TUniqueConstraintNode(TExprContext& ctx, const std::vector<std::string_view>& columns); - TUniqueConstraintNode(TExprContext& ctx, TFullSetType&& sets); - TUniqueConstraintNode(TUniqueConstraintNode&& constr); -public: - static constexpr std::string_view Name() { - return "Unique"; - } - - const TFullSetType& GetAllSets() const { return Sets_; } - - bool Equals(const TConstraintNode& node) const override; - bool Includes(const TConstraintNode& node) const override; - void Out(IOutputStream& out) const override; - void ToJson(NJson::TJsonWriter& out) const override; - - bool HasEqualColumns(const std::vector<std::string_view>& columns) const; - - static const TUniqueConstraintNode* MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx); - const TUniqueConstraintNode* FilterFields(TExprContext& ctx, const TPathFilter& predicate) const; - const TUniqueConstraintNode* RenameFields(TExprContext& ctx, const TPathReduce& reduce) const; - - bool IsApplicableToType(const TTypeAnnotationNode& type) const override; - const TConstraintNode* OnlySimpleColumns(TExprContext& ctx) const override; -private: - TFullSetType Sets_; -}; - class TSortedConstraintNode final: public TConstraintNode { public: using TSetType = NSorted::TSimpleSet<TPathType>; @@ -234,7 +200,6 @@ public: void ToJson(NJson::TJsonWriter& out) const override; bool IsPrefixOf(const TSortedConstraintNode& node) const; - bool IsOrderBy(const TUniqueConstraintNode& unique) const; const TSortedConstraintNode* CutPrefix(size_t newPrefixLength, TExprContext& ctx) const; @@ -250,6 +215,48 @@ protected: TContainerType Content_; }; +template<bool Distinct> +class TUniqueConstraintNodeBase final: public TConstraintNode { +public: + using TSetType = NSorted::TSimpleSet<TPathType>; + using TFullSetType = NSorted::TSimpleSet<TSetType>; +protected: + friend struct TExprContext; + + TUniqueConstraintNodeBase(TExprContext& ctx, const std::vector<std::string_view>& columns); + TUniqueConstraintNodeBase(TExprContext& ctx, TFullSetType&& sets); + TUniqueConstraintNodeBase(TUniqueConstraintNodeBase&& constr); +public: + static constexpr std::string_view Name() { + return Distinct ? "Distinct" : "Unique"; + } + + const TFullSetType& GetAllSets() const { return Sets_; } + + bool Equals(const TConstraintNode& node) const override; + bool Includes(const TConstraintNode& node) const override; + void Out(IOutputStream& out) const override; + void ToJson(NJson::TJsonWriter& out) const override; + + bool IsOrderBy(const TSortedConstraintNode& sorted) const; + bool HasEqualColumns(const std::vector<std::string_view>& columns) const; + + static const TUniqueConstraintNodeBase* MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx); + const TUniqueConstraintNodeBase* FilterFields(TExprContext& ctx, const TPathFilter& predicate) const; + const TUniqueConstraintNodeBase* RenameFields(TExprContext& ctx, const TPathReduce& reduce) const; + + bool IsApplicableToType(const TTypeAnnotationNode& type) const override; + const TConstraintNode* OnlySimpleColumns(TExprContext& ctx) const override; +private: + static TSetType ColumnsListToSet(const std::vector<std::string_view>& columns); + static TFullSetType DedupSets(TFullSetType&& sets); + + TFullSetType Sets_; +}; + +using TUniqueConstraintNode = TUniqueConstraintNodeBase<false>; +using TDistinctConstraintNode = TUniqueConstraintNodeBase<true>; + class TGroupByConstraintNode final: public TColumnSetConstraintNodeBase { protected: friend struct TExprContext; @@ -271,9 +278,10 @@ public: template<class TOriginalConstraintNode> class TPartOfConstraintNode : public TConstraintNode { public: + using TMainConstraint = TOriginalConstraintNode; using TPartType = NSorted::TSimpleMap<TPathType, TPathType>; using TReversePartType = NSorted::TSimpleMap<TPathType, NSorted::TSimpleSet<TPathType>>; - using TMapType = std::unordered_map<const TOriginalConstraintNode*, TPartType>; + using TMapType = std::unordered_map<const TMainConstraint*, TPartType>; private: friend struct TExprContext; @@ -296,11 +304,11 @@ public: static const TPartOfConstraintNode* MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx); - static TMapType GetCommonMapping(const TOriginalConstraintNode* complete, const TPartOfConstraintNode* incomplete = nullptr, const std::string_view& field = {}); + static TMapType GetCommonMapping(const TMainConstraint* complete, const TPartOfConstraintNode* incomplete = nullptr, const std::string_view& field = {}); static void UniqueMerge(TMapType& output, TMapType&& input); static TMapType ExtractField(const TMapType& mapping, const std::string_view& field); - static const TOriginalConstraintNode* MakeComplete(TExprContext& ctx, const TMapType& mapping, const TOriginalConstraintNode* original); + static const TMainConstraint* MakeComplete(TExprContext& ctx, const TMapType& mapping, const TMainConstraint* original); bool IsApplicableToType(const TTypeAnnotationNode& type) const override; private: @@ -309,6 +317,7 @@ private: using TPartOfSortedConstraintNode = TPartOfConstraintNode<TSortedConstraintNode>; using TPartOfUniqueConstraintNode = TPartOfConstraintNode<TUniqueConstraintNode>; +using TPartOfDistinctConstraintNode = TPartOfConstraintNode<TDistinctConstraintNode>; template<> constexpr std::string_view TPartOfSortedConstraintNode::Name() { @@ -320,6 +329,11 @@ constexpr std::string_view TPartOfUniqueConstraintNode::Name() { return "PartOfUnique"; } +template<> +constexpr std::string_view TPartOfDistinctConstraintNode::Name() { + return "PartOfDistinct"; +} + class TPassthroughConstraintNode final: public TConstraintNode { public: using TPartType = NSorted::TSimpleMap<TPathType, std::string_view>; diff --git a/ydb/library/yql/core/common_opt/yql_co_finalizers.cpp b/ydb/library/yql/core/common_opt/yql_co_finalizers.cpp index 3eb073b219f..355751a66b4 100644 --- a/ydb/library/yql/core/common_opt/yql_co_finalizers.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_finalizers.cpp @@ -185,7 +185,7 @@ void RegisterCoFinalizers(TFinalizingOptimizerMap& map) { return true; }; - map[TCoAssumeUnique::CallableName()] = [](const TExprNode::TPtr& node, TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, TOptimizeContext& optCtx) { + map[TCoAssumeUnique::CallableName()] = map[TCoAssumeDistinct::CallableName()] = [](const TExprNode::TPtr& node, TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, TOptimizeContext& optCtx) { SubsetFieldsForNodeWithMultiUsage(node, *optCtx.ParentsMap, toOptimize, ctx, [] (const TExprNode::TPtr& input, const TExprNode::TPtr& members, const TParentsMap&, TExprContext& ctx) { return ApplyExtractMembersToAssumeUnique(input, members, ctx, " with multi-usage"); diff --git a/ydb/library/yql/core/common_opt/yql_co_flow1.cpp b/ydb/library/yql/core/common_opt/yql_co_flow1.cpp index 661fb3299f0..1e0c5a35a4a 100644 --- a/ydb/library/yql/core/common_opt/yql_co_flow1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_flow1.cpp @@ -1624,7 +1624,7 @@ TExprNode::TPtr OptimizeLookup(const TExprNode::TPtr& node, TExprContext& ctx, T } constexpr std::initializer_list<std::string_view> FlowPriority = { - "AssumeSorted", "AssumeUnique", + "AssumeSorted", "AssumeUnique", "AssumeDistinct", "Map", "OrderedMap", "MapNext", "Filter", "OrderedFilter", "FlatMap", "OrderedFlatMap", diff --git a/ydb/library/yql/core/common_opt/yql_co_flow2.cpp b/ydb/library/yql/core/common_opt/yql_co_flow2.cpp index 5ab7cb9752c..3b868489c0a 100644 --- a/ydb/library/yql/core/common_opt/yql_co_flow2.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_flow2.cpp @@ -1389,7 +1389,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { return node; } - if (self.Input().Maybe<TCoAssumeUnique>()) { + if (self.Input().Maybe<TCoAssumeUnique>() || self.Input().Maybe<TCoAssumeDistinct>()) { if (auto res = ApplyExtractMembersToAssumeUnique(self.Input().Ptr(), self.Members().Ptr(), ctx, {})) { return res; } diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index 8e06f1f87f6..ff36f9de63c 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -138,25 +138,33 @@ TExprNode::TPtr KeepSortedConstraint(TExprNode::TPtr node, const TSortedConstrai .Build(); } -TExprNode::TPtr KeepConstraints(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx) { - auto res = KeepSortedConstraint(node, src.GetConstraint<TSortedConstraintNode>(), ctx); - if (const auto uniq = src.GetConstraint<TUniqueConstraintNode>()) { +template<bool Distinct> +TExprNode::TPtr KeepUniqueConstraint(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx) { + if (const auto uniq = src.GetConstraint<TUniqueConstraintNodeBase<Distinct>>()) { TExprNode::TListType columns; for (const auto& set : uniq->GetAllSets()) for (const auto& path : set) if (!path.empty()) columns.emplace_back(ctx.NewAtom(node->Pos(), path.front())); - res = columns.empty() ? - ctx.NewCallable(node->Pos(), "AssumeUnique", {std::move(res)}): + const auto& name = std::conditional_t<Distinct, TCoAssumeDistinct, TCoAssumeUnique>::CallableName(); + return columns.empty() ? + ctx.NewCallable(node->Pos(), name, {std::move(node)}): ctx.Builder(node->Pos()) - .Callable("AssumeUnique") - .Add(0, std::move(res)) + .Callable(name) + .Add(0, std::move(node)) .List(1) .Add(std::move(columns)) .Seal() .Seal() .Build(); } + return node; +} + +TExprNode::TPtr KeepConstraints(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx) { + auto res = KeepSortedConstraint(node, src.GetConstraint<TSortedConstraintNode>(), ctx); + res = KeepUniqueConstraint<true>(std::move(res), src, ctx); + res = KeepUniqueConstraint<false>(std::move(res), src, ctx); return res; } @@ -536,7 +544,7 @@ TExprNode::TPtr HandleEmptyListInJoin(const TExprNode::TPtr& node, TExprContext& return node; } -TExprNode::TPtr UpdateJoinTreeUniqueRecursive(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, const TVector<const TUniqueConstraintNode*>& unique, TExprContext& ctx) { +TExprNode::TPtr UpdateJoinTreeUniqueRecursive(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, const TVector<const TDistinctConstraintNode*>& unique, TExprContext& ctx) { TExprNode::TPtr res = joinTree; TEquiJoinLinkSettings linkSettings = GetEquiJoinLinkSettings(*joinTree->Child(5)); @@ -602,12 +610,12 @@ TExprNode::TPtr UpdateJoinTreeUniqueRecursive(const TExprNode::TPtr& joinTree, c TExprNode::TPtr HandleUniqueListInJoin(const TExprNode::TPtr& node, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) { - if (!typeCtx.IsConstraintCheckEnabled<TUniqueConstraintNode>()) { + if (!typeCtx.IsConstraintCheckEnabled<TDistinctConstraintNode>()) { return node; } TJoinLabels labels; - TVector<const TUniqueConstraintNode*> unique; + TVector<const TDistinctConstraintNode*> unique; unique.reserve(node->ChildrenSize() - 2); for (ui32 i = 0; i < node->ChildrenSize() - 2; ++i) { auto err = labels.Add(ctx, *node->Child(i)->Child(1), @@ -616,7 +624,7 @@ TExprNode::TPtr HandleUniqueListInJoin(const TExprNode::TPtr& node, TExprContext ctx.AddError(*err); return nullptr; } - unique.push_back(node->Child(i)->Head().GetConstraint<TUniqueConstraintNode>()); + unique.push_back(node->Child(i)->Head().GetConstraint<TDistinctConstraintNode>()); } auto joinTree = UpdateJoinTreeUniqueRecursive(node->ChildPtr(node->ChildrenSize() - 2), labels, unique, ctx); diff --git a/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json b/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json index f865f8be7dd..44182d0962e 100644 --- a/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json +++ b/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json @@ -1685,6 +1685,14 @@ ] }, { + "Name": "TCoAssumeDistinct", + "Base": "TFreeArgCallable", + "Match": {"Type": "Callable", "Name": "AssumeDistinct"}, + "Children": [ + {"Index": 0, "Name": "Input", "Type": "TExprBase"} + ] + }, + { "Name": "TCoAssumeColumnOrder", "Base": "TCoInputBase", "Match": {"Type": "Callable", "Name": "AssumeColumnOrder"}, diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index 66499ddeebf..6aefd6e05a5 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -11481,6 +11481,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["Sort"] = &SortWrapper; Functions["AssumeSorted"] = &SortWrapper; Functions["AssumeUnique"] = &AssumeUniqueWrapper; + Functions["AssumeDistinct"] = &AssumeUniqueWrapper; Functions["AssumeAllMembersNullableAtOnce"] = &AssumeAllMembersNullableAtOnceWrapper; Functions["AssumeStrict"] = &AssumeStrictWrapper; Functions["Top"] = &TopWrapper; diff --git a/ydb/library/yql/core/type_ann/type_ann_list.cpp b/ydb/library/yql/core/type_ann/type_ann_list.cpp index b229254f991..a9f91d81b9c 100644 --- a/ydb/library/yql/core/type_ann/type_ann_list.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_list.cpp @@ -4323,7 +4323,7 @@ namespace { } if (input->ChildrenSize() > 1U) { - if (!EnsureStructType(input->Head().Pos(), *inputItemType, ctx.Expr)) { + if (!EnsureStaticContainerType(input->Head().Pos(), *inputItemType, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -4332,11 +4332,13 @@ namespace { return status; } - const auto structType = inputItemType->Cast<TStructExprType>(); - for (const auto& x : input->Child(i)->Children()) { - YQL_ENSURE(x->IsAtom()); - if (!FindOrReportMissingMember(x->Content(), x->Pos(), *structType, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; + if (ETypeAnnotationKind::Struct == inputItemType->GetKind()) { + const auto structType = inputItemType->Cast<TStructExprType>(); + for (const auto& x : input->Child(i)->Children()) { + YQL_ENSURE(x->IsAtom()); + if (!FindOrReportMissingMember(x->Content(), x->Pos(), *structType, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } } } } diff --git a/ydb/library/yql/core/yql_expr_constraint.cpp b/ydb/library/yql/core/yql_expr_constraint.cpp index 20a97713508..5cbbf656b3e 100644 --- a/ydb/library/yql/core/yql_expr_constraint.cpp +++ b/ydb/library/yql/core/yql_expr_constraint.cpp @@ -80,11 +80,12 @@ public: : TCallableTransformerBase<TCallableConstraintTransformer>(types, instantOnly) , SubGraph(subGraph) { - Functions["Unordered"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; - Functions["UnorderedSubquery"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; + Functions["Unordered"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TDistinctConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; + Functions["UnorderedSubquery"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TDistinctConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; Functions["Sort"] = &TCallableConstraintTransformer::SortWrap; Functions["AssumeSorted"] = &TCallableConstraintTransformer::SortWrap; - Functions["AssumeUnique"] = &TCallableConstraintTransformer::AssumeUniqueWrap; + Functions["AssumeUnique"] = &TCallableConstraintTransformer::AssumeUniqueWrap<false>; + Functions["AssumeDistinct"] = &TCallableConstraintTransformer::AssumeUniqueWrap<true>; Functions["AssumeColumnOrder"] = &TCallableConstraintTransformer::CopyAllFrom<0>; Functions["AssumeAllMembersNullableAtOnce"] = &TCallableConstraintTransformer::CopyAllFrom<0>; Functions["Top"] = &TCallableConstraintTransformer::TopWrap<false>; @@ -105,10 +106,10 @@ public: Functions["ToStream"] = &TCallableConstraintTransformer::CopyAllFrom<0>; Functions["ToSequence"] = &TCallableConstraintTransformer::CopyAllFrom<0>; Functions["Collect"] = &TCallableConstraintTransformer::CopyAllFrom<0>; - Functions["FilterNullMembers"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TVarIndexConstraintNode>; - Functions["SkipNullMembers"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TVarIndexConstraintNode>; - Functions["FilterNullElements"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TVarIndexConstraintNode>; - Functions["SkipNullElements"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TVarIndexConstraintNode>; + Functions["FilterNullMembers"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode>; + Functions["SkipNullMembers"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode>; + Functions["FilterNullElements"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode>; + Functions["SkipNullElements"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode>; Functions["Right!"] = &TCallableConstraintTransformer::CopyAllFrom<0>; Functions["Cons!"] = &TCallableConstraintTransformer::CopyAllFrom<1>; Functions["ExtractMembers"] = &TCallableConstraintTransformer::ExtractMembersWrap; @@ -151,20 +152,20 @@ public: Functions["Limit"] = &TCallableConstraintTransformer::TakeWrap; Functions["Member"] = &TCallableConstraintTransformer::MemberWrap; Functions["AsStruct"] = &TCallableConstraintTransformer::AsStructWrap; - Functions["Just"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; - Functions["Unwrap"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; - Functions["ToList"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; - Functions["ToOptional"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; + Functions["Just"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; + Functions["Unwrap"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; + Functions["ToList"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; + Functions["ToOptional"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; Functions["Head"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; Functions["Last"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; - Functions["Reverse"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; + Functions["Reverse"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; Functions["Replicate"] = &TCallableConstraintTransformer::FromFirst<TPassthroughConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; Functions["AddMember"] = &TCallableConstraintTransformer::AddMemberWrap; Functions["RemoveMember"] = &TCallableConstraintTransformer::RemoveMemberWrap; Functions["ForceRemoveMember"] = &TCallableConstraintTransformer::RemoveMemberWrap; Functions["ReplaceMember"] = &TCallableConstraintTransformer::ReplaceMemberWrap; Functions["AsList"] = &TCallableConstraintTransformer::AsListWrap; - Functions["OptionalIf"] = &TCallableConstraintTransformer::FromSecond<TPassthroughConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TSortedConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; + Functions["OptionalIf"] = &TCallableConstraintTransformer::FromSecond<TPassthroughConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TSortedConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; Functions["ListIf"] = &TCallableConstraintTransformer::CopyAllFrom<1>; Functions["FlatListIf"] = &TCallableConstraintTransformer::CopyAllFrom<1>; Functions["FlatOptionalIf"] = &TCallableConstraintTransformer::CopyAllFrom<1>; @@ -180,7 +181,7 @@ public: Functions["If"] = &TCallableConstraintTransformer::IfWrap; Functions["Nothing"] = &TCallableConstraintTransformer::FromEmpty; Functions["IfPresent"] = &TCallableConstraintTransformer::IfPresentWrap; - Functions["Coalesce"] = &TCallableConstraintTransformer::CommonFromChildren<0, TSortedConstraintNode, TPartOfSortedConstraintNode, TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; + Functions["Coalesce"] = &TCallableConstraintTransformer::CommonFromChildren<0, TSortedConstraintNode, TPartOfSortedConstraintNode, TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>; Functions["CombineByKey"] = &TCallableConstraintTransformer::FromFinalLambda<TCoCombineByKey::idx_FinishHandlerLambda>; Functions["FinalizeByKey"] = &TCallableConstraintTransformer::FromFinalLambda<TCoFinalizeByKey::idx_FinishHandlerLambda>; Functions["CombineCore"] = &TCallableConstraintTransformer::FromFinalLambda<TCoCombineCore::idx_FinishHandler>; @@ -340,7 +341,7 @@ private: } } - return FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TVarIndexConstraintNode>(input, output, ctx); + return FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TDistinctConstraintNode, TVarIndexConstraintNode>(input, output, ctx); } TStatus SortWrap(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) const { @@ -352,13 +353,14 @@ private: input->AddConstraint(sorted); } - return FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TVarIndexConstraintNode>(input, output, ctx); + return FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TDistinctConstraintNode, TVarIndexConstraintNode>(input, output, ctx); } + template<bool Distinct> TStatus AssumeUniqueWrap(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) const { - TUniqueConstraintNode::TFullSetType sets; + typename TUniqueConstraintNodeBase<Distinct>::TFullSetType sets; for (auto i = 1U; i < input->ChildrenSize(); ++i) { - TUniqueConstraintNode::TSetType columns; + typename TUniqueConstraintNodeBase<Distinct>::TSetType columns; columns.reserve(input->Child(i)->ChildrenSize()); for (const auto& column: input->Child(i)->Children()) columns.insert_unique(TConstraintNode::TPathType(1U, column->Content())); @@ -366,10 +368,10 @@ private: } if (sets.empty()) - sets.insert_unique(TUniqueConstraintNode::TSetType{TConstraintNode::TPathType()}); + sets.insert_unique(typename TUniqueConstraintNodeBase<Distinct>::TSetType{TConstraintNode::TPathType()}); - input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(std::move(sets))); - return FromFirst<TPassthroughConstraintNode, TSortedConstraintNode, TEmptyConstraintNode, TVarIndexConstraintNode>(input, output, ctx); + input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNodeBase<Distinct>>(std::move(sets))); + return FromFirst<TPassthroughConstraintNode, TSortedConstraintNode, TUniqueConstraintNodeBase<!Distinct>, TEmptyConstraintNode, TVarIndexConstraintNode>(input, output, ctx); } template <bool UseSort> @@ -384,7 +386,7 @@ private: } } - return FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TVarIndexConstraintNode>(input, output, ctx); + return FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TDistinctConstraintNode, TVarIndexConstraintNode>(input, output, ctx); } template <bool CheckMembersType = false> @@ -554,9 +556,39 @@ private: } } + if (const auto part = input->Head().GetConstraint<TPartOfDistinctConstraintNode>()) { + if (const auto filtered = part->RenameFields(ctx, rename)) { + input->AddConstraint(filtered); + } + } + return FromFirst<TVarIndexConstraintNode>(input, output, ctx); } + template<class TConstraint> + static void FilterFromHead(const TExprNode& input, TConstraintSet& constraints, const TConstraintNode::TPathFilter& filter, TExprContext& ctx) { + if (const auto source = input.Head().GetConstraint<TConstraint>()) { + if (const auto filtered = source->FilterFields(ctx, filter)) { + constraints.AddConstraint(filtered); + } + } + } + + template<class TConstraint> + static void FilterFromHead(const TExprNode::TPtr& input, const TConstraintNode::TPathFilter& filter, TExprContext& ctx) { + if (const auto source = input->Head().GetConstraint<TConstraint>()) { + if (const auto filtered = source->FilterFields(ctx, filter)) { + input->AddConstraint(filtered); + } + } + } + + template<class TConstraint> + static void FilterFromHeadIfMissed(const TExprNode::TPtr& input, const TConstraintNode::TPathFilter& filter, TExprContext& ctx) { + if (!input->GetConstraint<TConstraint>()) + FilterFromHead<TConstraint>(input, filter, ctx); + } + TStatus ExtractMembersWrap(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) const { const auto outItemType = GetSeqItemType(*input->GetTypeAnn()).Cast<TStructExprType>(); if (const auto passthrough = input->Head().GetConstraint<TPassthroughConstraintNode>()) { @@ -579,30 +611,12 @@ private: } const auto filter = [outItemType](const TConstraintNode::TPathType& path) { return !path.empty() && outItemType->FindItem(path.front()); }; - - if (const auto sort = input->Head().GetConstraint<TSortedConstraintNode>()) { - if (const auto filtered = sort->FilterFields(ctx, filter)) { - input->AddConstraint(filtered); - } - } - - if (const auto part = input->Head().GetConstraint<TPartOfSortedConstraintNode>()) { - if (const auto filtered = part->FilterFields(ctx, filter)) { - input->AddConstraint(filtered); - } - } - - if (const auto uniq = input->Head().GetConstraint<TUniqueConstraintNode>()) { - if (const auto filtered = uniq->FilterFields(ctx, filter)) { - input->AddConstraint(filtered); - } - } - - if (const auto part = input->Head().GetConstraint<TPartOfUniqueConstraintNode>()) { - if (const auto filtered = part->FilterFields(ctx, filter)) { - input->AddConstraint(filtered); - } - } + FilterFromHead<TSortedConstraintNode>(input, filter, ctx); + FilterFromHead<TUniqueConstraintNode>(input, filter, ctx); + FilterFromHead<TDistinctConstraintNode>(input, filter, ctx); + FilterFromHead<TPartOfSortedConstraintNode>(input, filter, ctx); + FilterFromHead<TPartOfUniqueConstraintNode>(input, filter, ctx); + FilterFromHead<TPartOfDistinctConstraintNode>(input, filter, ctx); return FromFirst<TEmptyConstraintNode, TVarIndexConstraintNode>(input, output, ctx); } @@ -635,18 +649,9 @@ private: } const auto filter = [outStructType](const TConstraintNode::TPathType& path) { return !path.empty() && outStructType->FindItem(path.front()); }; - - if (const auto part = input->Head().GetConstraint<TPartOfSortedConstraintNode>()) { - if (const auto filtered = part->FilterFields(ctx, filter)) { - input->AddConstraint(filtered); - } - } - - if (const auto part = input->Head().GetConstraint<TPartOfUniqueConstraintNode>()) { - if (const auto filtered = part->FilterFields(ctx, filter)) { - input->AddConstraint(filtered); - } - } + FilterFromHead<TPartOfSortedConstraintNode>(input, filter, ctx); + FilterFromHead<TPartOfUniqueConstraintNode>(input, filter, ctx); + FilterFromHead<TPartOfDistinctConstraintNode>(input, filter, ctx); } else if (outItemType->GetKind() == ETypeAnnotationKind::Variant) { if (auto multi = input->Head().GetConstraint<TMultiConstraintNode>()) { @@ -678,18 +683,9 @@ private: } const auto filter = [outStructType](const TConstraintNode::TPathType& path) { return !path.empty() && outStructType->FindItem(path.front()); }; - - if (const auto part = input->Head().GetConstraint<TPartOfSortedConstraintNode>()) { - if (const auto filtered = part->FilterFields(ctx, filter)) { - constr.AddConstraint(filtered); - } - } - - if (const auto part = input->Head().GetConstraint<TPartOfUniqueConstraintNode>()) { - if (const auto filtered = part->FilterFields(ctx, filter)) { - constr.AddConstraint(filtered); - } - } + FilterFromHead<TPartOfSortedConstraintNode>(*input, constr, filter, ctx); + FilterFromHead<TPartOfUniqueConstraintNode>(*input, constr, filter, ctx); + FilterFromHead<TPartOfDistinctConstraintNode>(*input, constr, filter, ctx); } input->AddConstraint(ctx.MakeConstraint<TMultiConstraintNode>(std::move(multiItems))); } @@ -709,7 +705,7 @@ private: FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode>(input, output, ctx); } - return FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>(input, output, ctx); + return FromFirst<TPassthroughConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>(input, output, ctx); } template<class TConstraint> @@ -804,6 +800,9 @@ private: if (auto mapping = TPartOfUniqueConstraintNode::GetCommonMapping(inputConstr->GetConstraint<TUniqueConstraintNode>(), inputConstr->GetConstraint<TPartOfUniqueConstraintNode>()); !mapping.empty()) { multiItems.back().second.AddConstraint(ctx.MakeConstraint<TPartOfUniqueConstraintNode>(std::move(mapping))); } + if (auto mapping = TPartOfDistinctConstraintNode::GetCommonMapping(inputConstr->GetConstraint<TDistinctConstraintNode>(), inputConstr->GetConstraint<TPartOfDistinctConstraintNode>()); !mapping.empty()) { + multiItems.back().second.AddConstraint(ctx.MakeConstraint<TPartOfDistinctConstraintNode>(std::move(mapping))); + } if (const auto pass = inputConstr->GetConstraint<TPassthroughConstraintNode>()) { multiItems.back().second.AddConstraint(pass); continue; @@ -852,9 +851,12 @@ private: } } - if (auto mapping = TPartOfUniqueConstraintNode::GetCommonMapping(GetDetailedUnique(node.Head().GetConstraint<TUniqueConstraintNode>(), *node.Head().GetTypeAnn(), ctx), node.Head().GetConstraint<TPartOfUniqueConstraintNode>()); !mapping.empty()) { + if (auto mapping = TPartOfUniqueConstraintNode::GetCommonMapping(GetDetailed(node.Head().GetConstraint<TUniqueConstraintNode>(), *node.Head().GetTypeAnn(), ctx), node.Head().GetConstraint<TPartOfUniqueConstraintNode>()); !mapping.empty()) { constraints.emplace_back(ctx.MakeConstraint<TPartOfUniqueConstraintNode>(std::move(mapping))); } + if (auto mapping = TPartOfDistinctConstraintNode::GetCommonMapping(GetDetailed(node.Head().GetConstraint<TDistinctConstraintNode>(), *node.Head().GetTypeAnn(), ctx), node.Head().GetConstraint<TPartOfDistinctConstraintNode>()); !mapping.empty()) { + constraints.emplace_back(ctx.MakeConstraint<TPartOfDistinctConstraintNode>(std::move(mapping))); + } if (const auto groupBy = node.Head().GetConstraint<TGroupByConstraintNode>()) { constraints.emplace_back(groupBy); } @@ -864,6 +866,52 @@ private: return constraints; } + template<class TConstraint, class TInput> + static void GetFromMapLambda(const TInput& input, const TConstraintSet& handler, TConstraintSet& output, TExprContext& ctx) { + if (const auto lambda = handler.GetConstraint<TConstraint>()) { + if (const auto original = input.template GetConstraint<typename TConstraint::TMainConstraint>()) { + if (const auto complete = TConstraint::MakeComplete(ctx, lambda->GetColumnMapping(), original)) { + output.AddConstraint(complete); + } + } + if (const auto part = input.template GetConstraint<TConstraint>()) { + auto mapping = lambda->GetColumnMapping(); + for (auto it = mapping.cbegin(); mapping.cend() != it;) { + if (part->GetColumnMapping().contains(it->first)) + ++it; + else + it = mapping.erase(it); + } + if (!mapping.empty()) { + output.AddConstraint(ctx.MakeConstraint<TConstraint>(std::move(mapping))); + } + } + } + } + + template<class TConstraint, bool WideOutput> + static void GetFromMapLambda(const TExprNode::TPtr& input, TExprContext& ctx) { + if (const auto lambda = GetConstraintFromLambda<TConstraint, WideOutput>(input->Tail(), ctx)) { + if (const auto original = GetDetailed(input->Head().GetConstraint<typename TConstraint::TMainConstraint>(), *input->Head().GetTypeAnn(), ctx)) { + if (const auto complete = TConstraint::MakeComplete(ctx, lambda->GetColumnMapping(), original)) { + input->AddConstraint(complete); + } + } + if (const auto part = input->Head().GetConstraint<TConstraint>()) { + auto mapping = lambda->GetColumnMapping(); + for (auto it = mapping.cbegin(); mapping.cend() != it;) { + if (part->GetColumnMapping().contains(it->first)) + ++it; + else + it = mapping.erase(it); + } + if (!mapping.empty()) { + input->AddConstraint(ctx.MakeConstraint<TConstraint>(std::move(mapping))); + } + } + } + } + template <bool Ordered, bool Flat, bool WideInput = false, bool WideOutput = false> TStatus MapWrap(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) const { const auto inItemType = GetSeqItemType(input->Head().GetTypeAnn()); @@ -888,6 +936,14 @@ private: } } + if (const auto& mapping = TPartOfDistinctConstraintNode::GetCommonMapping(input->Head().GetConstraint<TDistinctConstraintNode>(), input->Head().GetConstraint<TPartOfDistinctConstraintNode>()); !mapping.empty()) { + for (ui32 i = 0U; i < argConstraints.size(); ++i) { + if (auto extracted = TPartOfDistinctConstraintNode::ExtractField(mapping, ctx.GetIndexAsString(i)); !extracted.empty()) { + argConstraints[i].emplace_back(ctx.MakeConstraint<TPartOfDistinctConstraintNode>(std::move(extracted))); + } + } + } + if (inItemType) { const auto multiType = inItemType->Cast<TMultiExprType>(); const auto inputPassthrough = input->Head().GetConstraint<TPassthroughConstraintNode>(); @@ -920,47 +976,12 @@ private: } } + GetFromMapLambda<TPartOfUniqueConstraintNode, WideOutput>(input, ctx); + GetFromMapLambda<TPartOfDistinctConstraintNode, WideOutput>(input, ctx); if constexpr (Ordered) { - if (const auto lambdaSorted = GetConstraintFromLambda<TPartOfSortedConstraintNode, WideOutput>(input->Tail(), ctx)) { - if (const auto sorted = input->Head().GetConstraint<TSortedConstraintNode>()) { - if (const auto complete = TPartOfSortedConstraintNode::MakeComplete(ctx, lambdaSorted->GetColumnMapping(), sorted)) { - input->AddConstraint(complete); - } - } - if (const auto part = input->Head().GetConstraint<TPartOfSortedConstraintNode>()) { - auto mapping = lambdaSorted->GetColumnMapping(); - for (auto it = mapping.cbegin(); mapping.cend() != it;) { - if (part->GetColumnMapping().contains(it->first)) - ++it; - else - it = mapping.erase(it); - } - if (!mapping.empty()) { - input->AddConstraint(ctx.MakeConstraint<TPartOfSortedConstraintNode>(std::move(mapping))); - } - } - } + GetFromMapLambda<TPartOfSortedConstraintNode, WideOutput>(input, ctx); } - if (const auto lambdaUnique = GetConstraintFromLambda<TPartOfUniqueConstraintNode, WideOutput>(input->Tail(), ctx)) { - if (const auto unique = GetDetailedUnique(input->Head().GetConstraint<TUniqueConstraintNode>(), *input->Head().GetTypeAnn(), ctx)) { - if (const auto complete = TPartOfUniqueConstraintNode::MakeComplete(ctx, lambdaUnique->GetColumnMapping(), unique)) { - input->AddConstraint(complete); - } - } - if (const auto part = input->Head().GetConstraint<TPartOfUniqueConstraintNode>()) { - auto mapping = lambdaUnique->GetColumnMapping(); - for (auto it = mapping.cbegin(); mapping.cend() != it;) { - if (part->GetColumnMapping().contains(it->first)) - ++it; - else - it = mapping.erase(it); - } - if (!mapping.empty()) { - input->AddConstraint(ctx.MakeConstraint<TPartOfUniqueConstraintNode>(std::move(mapping))); - } - } - } const auto lambdaVarIndex = GetConstraintFromLambda<TVarIndexConstraintNode, WideOutput>(input->Tail(), ctx); const auto lambdaMulti = GetConstraintFromLambda<TMultiConstraintNode, WideOutput>(input->Tail(), ctx); const bool multiInput = ETypeAnnotationKind::Variant == inItemType->GetKind(); @@ -1004,21 +1025,12 @@ private: remappedItems.back().second.AddConstraint(ctx.MakeConstraint<TPassthroughConstraintNode>(std::move(mapping))); } } - - if constexpr (Ordered) { - if (const auto sorted = input->Head().GetConstraint<TSortedConstraintNode>()) { - if (const auto outSorted = GetPassthroughSortedConstraint(*sorted, *lambdaPassthrough, ctx)) { - remappedItems.back().second.AddConstraint(outSorted); - } - } - } } - if (input->Head().GetConstraint<TUniqueConstraintNode>()) { - if (const auto lambdaUnique = item.second.template GetConstraint<TUniqueConstraintNode>()) { - remappedItems.back().second.AddConstraint(lambdaUnique); - } + GetFromMapLambda<TPartOfUniqueConstraintNode>(input->Head(), item.second, remappedItems.back().second, ctx); + GetFromMapLambda<TPartOfDistinctConstraintNode>(input->Head(), item.second, remappedItems.back().second, ctx); + if constexpr (Ordered) { + GetFromMapLambda<TPartOfSortedConstraintNode>(input->Head(), item.second, remappedItems.back().second, ctx); } - if (const auto empty = item.second.template GetConstraint<TEmptyConstraintNode>()) { remappedItems.pop_back(); } @@ -1039,19 +1051,11 @@ private: remappedItems.back().second.AddConstraint(ctx.MakeConstraint<TPassthroughConstraintNode>(std::move(mapping))); } } - - if constexpr (Ordered) { - if (const auto sorted = origConstr->template GetConstraint<TSortedConstraintNode>()) { - if (auto outSorted = GetPassthroughSortedConstraint(*sorted, *lambdaPassthrough, ctx)) { - remappedItems.back().second.AddConstraint(outSorted); - } - } - } } - if (origConstr->template GetConstraint<TUniqueConstraintNode>()) { - if (const auto lambdaUnique = item.second.template GetConstraint<TUniqueConstraintNode>()) { - remappedItems.back().second.AddConstraint(lambdaUnique); - } + GetFromMapLambda<TPartOfUniqueConstraintNode>(*origConstr, item.second, remappedItems.back().second, ctx); + GetFromMapLambda<TPartOfDistinctConstraintNode>(*origConstr, item.second, remappedItems.back().second, ctx); + if constexpr (Ordered) { + GetFromMapLambda<TPartOfSortedConstraintNode>(*origConstr, item.second, remappedItems.back().second, ctx); } if (const auto empty = item.second.template GetConstraint<TEmptyConstraintNode>()) { remappedItems.pop_back(); @@ -1106,14 +1110,11 @@ private: if (const auto lambdaEmpty = GetConstraintFromLambda<TEmptyConstraintNode, WideOutput>(input->Tail(), ctx)) { input->AddConstraint(lambdaEmpty); + const auto& filter = std::bind(&TConstraintNode::GetSubTypeByPath, std::placeholders::_1, std::cref(GetSeqItemType(*input->GetTypeAnn()))); + FilterFromHeadIfMissed<TUniqueConstraintNode>(input, filter, ctx); + FilterFromHeadIfMissed<TDistinctConstraintNode>(input, filter, ctx); if constexpr (Ordered) { - if (!input->GetConstraint<TSortedConstraintNode>()) { - if (const auto sorted = input->Head().GetConstraint<TSortedConstraintNode>()) { - if (const auto filtered = sorted->FilterFields(ctx, std::bind(&TConstraintNode::GetSubTypeByPath, std::placeholders::_1, std::cref(*inItemType)))) { - input->AddConstraint(filtered); - } - } - } + FilterFromHeadIfMissed<TSortedConstraintNode>(input, filter, ctx); } } } @@ -1151,9 +1152,15 @@ private: if (const auto unique = input->Head().GetConstraint<TUniqueConstraintNode>()) { input->AddConstraint(unique); } + if (const auto unique = input->Head().GetConstraint<TDistinctConstraintNode>()) { + input->AddConstraint(unique); + } if (const auto part = input->Head().GetConstraint<TPartOfUniqueConstraintNode>()) { input->AddConstraint(part); } + if (const auto part = input->Head().GetConstraint<TPartOfDistinctConstraintNode>()) { + input->AddConstraint(part); + } } return CommonFromChildren<0, TPassthroughConstraintNode, TPartOfSortedConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>(input, output, ctx); @@ -1168,6 +1175,12 @@ private: if (const auto part = input->Head().GetConstraint<TPartOfUniqueConstraintNode>()) { input->AddConstraint(part); } + if (const auto unique = input->Head().GetConstraint<TDistinctConstraintNode>()) { + input->AddConstraint(unique); + } + if (const auto part = input->Head().GetConstraint<TPartOfDistinctConstraintNode>()) { + input->AddConstraint(part); + } if constexpr (Ordered) { if (const auto sorted = input->Head().GetConstraint<TSortedConstraintNode>()) { @@ -1261,6 +1274,11 @@ private: input->AddConstraint(extracted); } } + if (const auto part = structNode.GetConstraint<TPartOfDistinctConstraintNode>()) { + if (const auto extracted = part->ExtractField(ctx, memberName)) { + input->AddConstraint(extracted); + } + } } if (structNode.IsCallable("AsStruct")) { @@ -1280,6 +1298,7 @@ private: TPassthroughConstraintNode::TMapType passthrough; TPartOfSortedConstraintNode::TMapType sorted; TPartOfUniqueConstraintNode::TMapType uniques; + TPartOfDistinctConstraintNode::TMapType distincts; std::vector<const TConstraintSet*> structConstraints; for (auto i = 0U; i < input->ChildrenSize(); ++i) { @@ -1302,6 +1321,10 @@ private: TPartOfUniqueConstraintNode::UniqueMerge(uniques, part->GetColumnMapping(name)); } + if (const auto part = child->GetConstraint<TPartOfDistinctConstraintNode>()) { + TPartOfDistinctConstraintNode::UniqueMerge(distincts, part->GetColumnMapping(name)); + } + if (const auto& valueNode = SkipModifiers(child); TCoMember::Match(valueNode) || TCoNth::Match(valueNode)) { structConstraints.push_back(&valueNode->Head().GetConstraintSet()); } else if (valueNode->IsArgument() && ETypeAnnotationKind::Struct != valueNode->GetTypeAnn()->GetKind()) { @@ -1317,6 +1340,9 @@ private: if (!uniques.empty()) { input->AddConstraint(ctx.MakeConstraint<TPartOfUniqueConstraintNode>(std::move(uniques))); } + if (!distincts.empty()) { + input->AddConstraint(ctx.MakeConstraint<TPartOfDistinctConstraintNode>(std::move(distincts))); + } if (const auto varIndex = TVarIndexConstraintNode::MakeCommon(structConstraints, ctx)) { input->AddConstraint(varIndex); } @@ -1328,6 +1354,7 @@ private: TPassthroughConstraintNode::TMapType passthrough; TPartOfSortedConstraintNode::TMapType sorted; TPartOfUniqueConstraintNode::TMapType uniques; + TPartOfDistinctConstraintNode::TMapType distincts; std::vector<const TConstraintSet*> structConstraints; for (const auto& child : input->Children()) { @@ -1347,6 +1374,9 @@ private: if (const auto part = child->Tail().GetConstraint<TPartOfUniqueConstraintNode>()) { TPartOfUniqueConstraintNode::UniqueMerge(uniques, part->GetColumnMapping(name)); } + if (const auto part = child->Tail().GetConstraint<TPartOfDistinctConstraintNode>()) { + TPartOfDistinctConstraintNode::UniqueMerge(distincts, part->GetColumnMapping(name)); + } if (const auto valueNode = SkipModifiers(&child->Tail()); TCoMember::Match(valueNode) || TCoNth::Match(valueNode)) { structConstraints.push_back(&valueNode->Head().GetConstraintSet()); @@ -1363,6 +1393,9 @@ private: if (!uniques.empty()) { input->AddConstraint(ctx.MakeConstraint<TPartOfUniqueConstraintNode>(std::move(uniques))); } + if (!distincts.empty()) { + input->AddConstraint(ctx.MakeConstraint<TPartOfDistinctConstraintNode>(std::move(distincts))); + } if (const auto varIndex = TVarIndexConstraintNode::MakeCommon(structConstraints, ctx)) { input->AddConstraint(varIndex); } @@ -1370,6 +1403,37 @@ private: return TStatus::Ok; } + template<class TPartOfConstraint> + static void AddPartOf(const TExprNode::TPtr& input, TExprContext& ctx) { + typename TPartOfConstraint::TMapType map; + if (const auto part = input->Head().GetConstraint<TPartOfConstraint>()) { + map = part->GetColumnMapping(); + } + if (const auto part = input->Tail().GetConstraint<TPartOfConstraint>()) { + TPartOfConstraint::UniqueMerge(map, part->GetColumnMapping(input->Child(1)->Content())); + } + if (!map.empty()) { + input->AddConstraint(ctx.MakeConstraint<TPartOfConstraint>(std::move(map))); + } + } + + template<class TPartOfConstraint> + static void ReplacePartOf(const TExprNode::TPtr& input, TExprContext& ctx) { + typename TPartOfConstraint::TMapType sorted; + const auto& name = input->Child(1)->Content(); + if (const auto part = input->Head().GetConstraint<TPartOfConstraint>()) { + if (const auto filtered = part->FilterFields(ctx, [&name](const TConstraintNode::TPathType& path) { return !path.empty() && path.front() != name; })) { + sorted = filtered->GetColumnMapping(); + } + } + if (const auto part = input->Tail().GetConstraint<TPartOfConstraint>()) { + TPartOfConstraint::UniqueMerge(sorted, part->GetColumnMapping(name)); + } + if (!sorted.empty()) { + input->AddConstraint(ctx.MakeConstraint<TPartOfConstraint>(std::move(sorted))); + } + } + TStatus AddMemberWrap(const TExprNode::TPtr& input, TExprNode::TPtr& /*output*/, TExprContext& ctx) const { const auto& addStructNode = input->Head(); const auto& extraFieldNode = input->Tail(); @@ -1395,31 +1459,9 @@ private: input->AddConstraint(emptyConstraint); } - { - TPartOfSortedConstraintNode::TMapType sorted; - if (const auto part = input->Head().GetConstraint<TPartOfSortedConstraintNode>()) { - sorted = part->GetColumnMapping(); - } - if (const auto part = input->Tail().GetConstraint<TPartOfSortedConstraintNode>()) { - TPartOfSortedConstraintNode::UniqueMerge(sorted, part->GetColumnMapping(name)); - } - if (!sorted.empty()) { - input->AddConstraint(ctx.MakeConstraint<TPartOfSortedConstraintNode>(std::move(sorted))); - } - } - - { - TPartOfUniqueConstraintNode::TMapType uniques; - if (const auto part = input->Head().GetConstraint<TPartOfUniqueConstraintNode>()) { - uniques = part->GetColumnMapping(); - } - if (const auto part = input->Tail().GetConstraint<TPartOfUniqueConstraintNode>()) { - TPartOfUniqueConstraintNode::UniqueMerge(uniques, part->GetColumnMapping(name)); - } - if (!uniques.empty()) { - input->AddConstraint(ctx.MakeConstraint<TPartOfUniqueConstraintNode>(std::move(uniques))); - } - } + AddPartOf<TPartOfSortedConstraintNode>(input, ctx); + AddPartOf<TPartOfUniqueConstraintNode>(input, ctx); + AddPartOf<TPartOfDistinctConstraintNode>(input, ctx); TVector<const TConstraintSet*> structConstraints; structConstraints.push_back(&addStructNode.GetConstraintSet()); @@ -1459,16 +1501,9 @@ private: } const auto filter = [&name](const TConstraintNode::TPathType& path) { return !path.empty() && path.front() != name; }; - if (const auto part = input->Head().GetConstraint<TPartOfSortedConstraintNode>()) { - if (const auto filtered = part->FilterFields(ctx, filter)) { - input->AddConstraint(filtered); - } - } - if (const auto part = input->Head().GetConstraint<TPartOfUniqueConstraintNode>()) { - if (const auto filtered = part->FilterFields(ctx, filter)) { - input->AddConstraint(filtered); - } - } + FilterFromHead<TPartOfSortedConstraintNode>(input, filter, ctx); + FilterFromHead<TPartOfUniqueConstraintNode>(input, filter, ctx); + FilterFromHead<TPartOfDistinctConstraintNode>(input, filter, ctx); return FromFirst<TVarIndexConstraintNode>(input, output, ctx); } @@ -1515,34 +1550,9 @@ private: } } - { - TPartOfSortedConstraintNode::TMapType sorted; - if (const auto part = input->Head().GetConstraint<TPartOfSortedConstraintNode>()) { - if (const auto filtered = part->FilterFields(ctx, [&name](const TConstraintNode::TPathType& path) { return !path.empty() && path.front() != name; })) { - sorted = filtered->GetColumnMapping(); - } - } - if (const auto part = input->Tail().GetConstraint<TPartOfSortedConstraintNode>()) { - TPartOfSortedConstraintNode::UniqueMerge(sorted, part->GetColumnMapping(name)); - } - if (!sorted.empty()) { - input->AddConstraint(ctx.MakeConstraint<TPartOfSortedConstraintNode>(std::move(sorted))); - } - } - { - TPartOfUniqueConstraintNode::TMapType uniques; - if (const auto part = input->Head().GetConstraint<TPartOfUniqueConstraintNode>()) { - if (const auto filtered = part->FilterFields(ctx, [&name](const TConstraintNode::TPathType& path) { return !path.empty() && path.front() != name; })) { - uniques = filtered->GetColumnMapping(); - } - } - if (const auto part = input->Tail().GetConstraint<TPartOfUniqueConstraintNode>()) { - TPartOfUniqueConstraintNode::UniqueMerge(uniques, part->GetColumnMapping(name)); - } - if (!uniques.empty()) { - input->AddConstraint(ctx.MakeConstraint<TPartOfUniqueConstraintNode>(std::move(uniques))); - } - } + ReplacePartOf<TPartOfSortedConstraintNode>(input, ctx); + ReplacePartOf<TPartOfUniqueConstraintNode>(input, ctx); + ReplacePartOf<TPartOfDistinctConstraintNode>(input, ctx); if (const auto varIndex = TVarIndexConstraintNode::MakeCommon(structConstraints, ctx)) { input->AddConstraint(varIndex); @@ -1552,15 +1562,15 @@ private: } TStatus ListWrap(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) const { - if (input->ChildrenSize() == 1) { + switch (input->ChildrenSize()) { + case 1: return FromEmpty(input, output, ctx); - } else if (input->ChildrenSize() == 2) { - return FromSecond<TPassthroughConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>(input, output, ctx); + case 2: + return FromSecond<TPassthroughConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>(input, output, ctx); + default: + break; } - else if (input->ChildrenSize() > 2) { - return CommonFromChildren<1, TPassthroughConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>(input, output, ctx); - } - return TStatus::Ok; + return CommonFromChildren<1, TPassthroughConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>(input, output, ctx); } TStatus DictWrap(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) const { @@ -1599,6 +1609,8 @@ private: , TPartOfSortedConstraintNode , TUniqueConstraintNode , TPartOfUniqueConstraintNode + , TDistinctConstraintNode + , TPartOfDistinctConstraintNode , TPassthroughConstraintNode , TEmptyConstraintNode , TVarIndexConstraintNode @@ -1632,6 +1644,8 @@ private: , TPartOfSortedConstraintNode , TUniqueConstraintNode , TPartOfUniqueConstraintNode + , TDistinctConstraintNode + , TPartOfDistinctConstraintNode , TPassthroughConstraintNode , TEmptyConstraintNode , TVarIndexConstraintNode @@ -2032,6 +2046,11 @@ private: input->AddConstraint(extracted); } } + if (const auto part = structNode.GetConstraint<TPartOfDistinctConstraintNode>()) { + if (const auto extracted = part->ExtractField(ctx, memberName)) { + input->AddConstraint(extracted); + } + } } if (input->Head().IsList()) { @@ -2057,7 +2076,9 @@ private: emptyInputs.push_back(i); } if (const auto err = labels.Add(ctx, input->Child(i)->Tail(), - GetSeqItemType(*list.GetTypeAnn()).Cast<TStructExprType>(), GetDetailedUnique(list.GetConstraint<TUniqueConstraintNode>(), *list.GetTypeAnn(), ctx))) { + GetSeqItemType(*list.GetTypeAnn()).Cast<TStructExprType>(), + GetDetailed(list.GetConstraint<TUniqueConstraintNode>(), *list.GetTypeAnn(), ctx), + GetDetailed(list.GetConstraint<TDistinctConstraintNode>(), *list.GetTypeAnn(), ctx))) { ctx.AddError(*err); return TStatus::Error; } @@ -2072,33 +2093,39 @@ private: } const TUniqueConstraintNode* unique = nullptr; - if (const auto status = EquiJoinUniq(input->Pos(), unique, labels, *joinTree, ctx); status != IGraphTransformer::TStatus::Ok) { + const TDistinctConstraintNode* distinct = nullptr; + if (const auto status = EquiJoinConstraints(input->Pos(), unique, distinct, labels, *joinTree, ctx); status != IGraphTransformer::TStatus::Ok) { return status; } - if (unique) { - if (const auto renames = LoadJoinRenameMap(input->Tail()); !renames.empty()) { - unique = unique->RenameFields(ctx, [&renames](const TConstraintNode::TPathType& path) -> std::vector<TConstraintNode::TPathType> { - if (path.empty()) - return {}; + if (const auto renames = LoadJoinRenameMap(input->Tail()); !renames.empty() && (unique || distinct)) { + const auto rename = [&renames](const TConstraintNode::TPathType& path) -> std::vector<TConstraintNode::TPathType> { + if (path.empty()) + return {}; - const auto it = renames.find(path.front()); - if (renames.cend() == it || it->second.empty()) - return {}; + const auto it = renames.find(path.front()); + if (renames.cend() == it || it->second.empty()) + return {}; - std::vector<TConstraintNode::TPathType> res(it->second.size()); - std::transform(it->second.cbegin(), it->second.cend(), res.begin(), [&path](const std::string_view& newName) { - auto newPath = path; - newPath.front() = newName; - return newPath; - }); - return res; + std::vector<TConstraintNode::TPathType> res(it->second.size()); + std::transform(it->second.cbegin(), it->second.cend(), res.begin(), [&path](const std::string_view& newName) { + auto newPath = path; + newPath.front() = newName; + return newPath; }); - } + return res; + }; + + if (unique) + unique = unique->RenameFields(ctx, rename); + if (distinct) + distinct = distinct->RenameFields(ctx, rename); } if (unique) input->AddConstraint(unique); + if (distinct) + input->AddConstraint(distinct); return TStatus::Ok; } @@ -2188,6 +2215,7 @@ private: sets.reserve(fields.size()); for (const auto& field: fields) sets.insert_unique(TUniqueConstraintNode::TSetType{TConstraintNode::TPathType(1U, field)}); + input->AddConstraint(ctx.MakeConstraint<TDistinctConstraintNode>(TDistinctConstraintNode::TFullSetType(sets))); input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(std::move(sets))); } } @@ -2214,6 +2242,7 @@ private: } if (!uniqColumns.empty()) { input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(uniqColumns)); + input->AddConstraint(ctx.MakeConstraint<TDistinctConstraintNode>(uniqColumns)); } } } @@ -2276,6 +2305,7 @@ private: sets.reserve(width); for (ui32 i = 0U; i < width; ++i) sets.insert_unique(TUniqueConstraintNode::TSetType{TConstraintNode::TPathType(1U, ctx.GetIndexAsString(i))}); + input->AddConstraint(ctx.MakeConstraint<TDistinctConstraintNode>(TDistinctConstraintNode::TFullSetType(sets))); input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(std::move(sets))); } } else { @@ -2301,6 +2331,7 @@ private: } if (!uniqColumns.empty()) { input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(uniqColumns)); + input->AddConstraint(ctx.MakeConstraint<TDistinctConstraintNode>(uniqColumns)); } } } @@ -2343,6 +2374,7 @@ private: } if (!uniqColumns.empty()) { input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(uniqColumns)); + input->AddConstraint(ctx.MakeConstraint<TDistinctConstraintNode>(uniqColumns)); } } } @@ -2394,26 +2426,10 @@ private: } } - if (!partitionKeys.empty() && lambdaPassthrough) { - if (auto uniq = handlerLambda->GetConstraint<TUniqueConstraintNode>()) { - auto mapping = lambdaPassthrough->GetReverseMapping(); - std::vector<std::string_view> uniqColumns; - for (auto key: partitionKeys) { - auto range = mapping.equal_range(key); - if (range.first != range.second) { - for (auto i = range.first; i != range.second; ++i) { - uniqColumns.emplace_back(i->second); - } - } else { - uniqColumns.clear(); - break; - } - } - if (uniq->HasEqualColumns(uniqColumns)) { - input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(uniqColumns)); - } - } - } + if (const auto unique = handlerLambda->GetConstraint<TUniqueConstraintNode>()) + input->AddConstraint(unique); + if (const auto distinct = handlerLambda->GetConstraint<TDistinctConstraintNode>()) + input->AddConstraint(distinct); const bool multiInput = ETypeAnnotationKind::Variant == GetSeqItemType(*input->Head().GetTypeAnn()).GetKind(); const auto lambdaVarIndex = handlerLambda->GetConstraint<TVarIndexConstraintNode>(); @@ -2447,7 +2463,7 @@ private: if (lambdaMulti && !input->Head().GetConstraint<TEmptyConstraintNode>()) { TMultiConstraintNode::TMapType remappedItems; - for (auto& item: lambdaMulti->GetItems()) { + for (const auto& item: lambdaMulti->GetItems()) { remappedItems.push_back(std::make_pair(item.first, TConstraintSet{})); if (!multiInput) { // remapping one to many if (const auto lambdaPassthrough = item.second.template GetConstraint<TPassthroughConstraintNode>()) { @@ -2459,28 +2475,14 @@ private: remappedItems.back().second.AddConstraint(ctx.MakeConstraint<TPassthroughConstraintNode>(std::move(mapping))); } } - if (const auto lambdaUnique = item.second.template GetConstraint<TUniqueConstraintNode>()) { - auto mapping = lambdaPassthrough->GetReverseMapping(); - std::vector<std::string_view> uniqColumns; - for (auto key: partitionKeys) { - auto range = mapping.equal_range(key); - if (range.first != range.second) { - for (auto i = range.first; i != range.second; ++i) { - uniqColumns.emplace_back(i->second); - } - } else { - uniqColumns.clear(); - break; - } - } - if (lambdaUnique->HasEqualColumns(uniqColumns)) { - remappedItems.back().second.AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(uniqColumns)); - } - } } - - if (const auto empty = item.second.template GetConstraint<TEmptyConstraintNode>()) { + if (const auto empty = item.second.GetConstraint<TEmptyConstraintNode>()) remappedItems.pop_back(); + else { + if (const auto unique = item.second.GetConstraint<TUniqueConstraintNode>()) + remappedItems.back().second.AddConstraint(unique); + if (const auto distinct = item.second.GetConstraint<TDistinctConstraintNode>()) + remappedItems.back().second.AddConstraint(distinct); } } else if (lambdaVarIndex && multi) { @@ -2499,27 +2501,14 @@ private: remappedItems.back().second.AddConstraint(ctx.MakeConstraint<TPassthroughConstraintNode>(std::move(mapping))); } } - if (const auto lambdaUnique = item.second.template GetConstraint<TUniqueConstraintNode>()) { - auto mapping = lambdaPassthrough->GetReverseMapping(); - std::vector<std::string_view> uniqColumns; - for (auto key: partitionKeys) { - auto range = mapping.equal_range(key); - if (range.first != range.second) { - for (auto i = range.first; i != range.second; ++i) { - uniqColumns.emplace_back(i->second); - } - } else { - uniqColumns.clear(); - break; - } - } - if (lambdaUnique->HasEqualColumns(uniqColumns)) { - remappedItems.back().second.AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(uniqColumns)); - } - } } - if (const auto empty = item.second.template GetConstraint<TEmptyConstraintNode>()) { + if (const auto empty = item.second.template GetConstraint<TEmptyConstraintNode>()) remappedItems.pop_back(); + else { + if (const auto unique = item.second.GetConstraint<TUniqueConstraintNode>()) + remappedItems.back().second.AddConstraint(unique); + if (const auto distinct = item.second.GetConstraint<TDistinctConstraintNode>()) + remappedItems.back().second.AddConstraint(distinct); } } else { remappedItems.pop_back(); @@ -2677,7 +2666,8 @@ private: return fields; } - static const TUniqueConstraintNode* GetDetailedUnique(const TUniqueConstraintNode* unique, const TTypeAnnotationNode& type, TExprContext& ctx) { + template<bool Distinct> + static const TUniqueConstraintNodeBase<Distinct>* GetDetailed(const TUniqueConstraintNodeBase<Distinct>* unique, const TTypeAnnotationNode& type, TExprContext& ctx) { if (!unique) return nullptr; @@ -2685,7 +2675,12 @@ private: return unique; const auto& columns = GetAllItemTypeFields(type, ctx); - return columns.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNode>(columns); + return columns.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNodeBase<Distinct>>(columns); + } + + static const TSortedConstraintNode* GetDetailed(const TSortedConstraintNode* sorted, const TTypeAnnotationNode&, TExprContext&) { + // TODO:: get for tuple. + return sorted; } static const TStructExprType* GetNonEmptyStructItemType(const TTypeAnnotationNode& type) { @@ -2724,38 +2719,6 @@ private: return nullptr; } - static const TSortedConstraintNode* GetPassthroughSortedConstraint(const TSortedConstraintNode& inputSorted, - const TPassthroughConstraintNode& passthrough, TExprContext& ctx) - { - const auto& reverseMapping = passthrough.GetReverseMapping(); - const auto& content = inputSorted.GetContent(); - TSortedConstraintNode::TContainerType filtered; - for (auto i = 0U; i < content.size(); ++i) { - TSortedConstraintNode::TContainerType::value_type nextItem; - for (const auto& path : content[i].first) { - if (path.size() == 1U) { - auto range = reverseMapping.equal_range(path.front()); - if (range.first != range.second) { - for (auto it = range.first; it != range.second; ++it) { - nextItem.first.insert_unique(TConstraintNode::TPathType{it->second}); - } - } - } - } - if (nextItem.first.empty()) - break; - - nextItem.second = content[i].second; - ::Sort(nextItem.first); - filtered.emplace_back(std::move(nextItem)); - } - - if (!filtered.empty()) { - return ctx.MakeConstraint<TSortedConstraintNode>(std::move(filtered)); - } - return nullptr; - } - static const TExprNode* SkipModifiers(const TExprNode* valueNode) { if (TCoJust::Match(valueNode)) { return SkipModifiers(valueNode->Child(0)); @@ -2765,7 +2728,6 @@ private: } return valueNode; } - private: const bool SubGraph; std::unordered_map<std::string_view, THandler> Functions; @@ -2813,6 +2775,18 @@ TCallableConstraintTransformer::GetConstraintFromWideResultLambda<TPartOfUniqueC return uniques.empty() ? nullptr : ctx.MakeConstraint<TPartOfUniqueConstraintNode>(std::move(uniques)); } +template<> const TPartOfDistinctConstraintNode* +TCallableConstraintTransformer::GetConstraintFromWideResultLambda<TPartOfDistinctConstraintNode>(const TExprNode& lambda, TExprContext& ctx) { + TPartOfDistinctConstraintNode::TMapType uniques; + + for (auto i = 1U; i < lambda.ChildrenSize(); ++i) { + if (const auto part = lambda.Child(i)->GetConstraint<TPartOfDistinctConstraintNode>()) + TPartOfDistinctConstraintNode::UniqueMerge(uniques, part->GetColumnMapping(ctx.GetIndexAsString(i - 1U))); + } + + return uniques.empty() ? nullptr : ctx.MakeConstraint<TPartOfDistinctConstraintNode>(std::move(uniques)); +} + template<> const TVarIndexConstraintNode* TCallableConstraintTransformer::TCallableConstraintTransformer::GetConstraintFromWideResultLambda<TVarIndexConstraintNode>(const TExprNode& lambda, TExprContext& ctx) { TVector<const TConstraintSet*> structConstraints; diff --git a/ydb/library/yql/core/yql_expr_type_annotation.cpp b/ydb/library/yql/core/yql_expr_type_annotation.cpp index 7504f9680bd..27261aa35f0 100644 --- a/ydb/library/yql/core/yql_expr_type_annotation.cpp +++ b/ydb/library/yql/core/yql_expr_type_annotation.cpp @@ -2401,6 +2401,15 @@ bool EnsureStructType(TPositionHandle position, const TTypeAnnotationNode& type, return true; } +bool EnsureStaticContainerType(TPositionHandle position, const TTypeAnnotationNode& type, TExprContext& ctx) { + if (HasError(&type, ctx) || !(type.GetKind() == ETypeAnnotationKind::Struct || type.GetKind() == ETypeAnnotationKind::Tuple || type.GetKind() == ETypeAnnotationKind::Multi)) { + ctx.AddError(TIssue(ctx.GetPosition(position), TStringBuilder() << "Expected struct, tuple or multi type, but got: " << type)); + return false; + } + + return true; +} + bool EnsureTypeWithStructType(const TExprNode& node, TExprContext& ctx) { if (!EnsureType(node, ctx)) { return false; diff --git a/ydb/library/yql/core/yql_expr_type_annotation.h b/ydb/library/yql/core/yql_expr_type_annotation.h index 9f1bfffc66d..50e15aee53c 100644 --- a/ydb/library/yql/core/yql_expr_type_annotation.h +++ b/ydb/library/yql/core/yql_expr_type_annotation.h @@ -97,6 +97,7 @@ bool EnsureStringOrUtf8Type(const TExprNode& node, TExprContext& ctx); bool EnsureStringOrUtf8Type(TPositionHandle position, const TTypeAnnotationNode& type, TExprContext& ctx); bool EnsureStructType(const TExprNode& node, TExprContext& ctx); bool EnsureStructType(TPositionHandle position, const TTypeAnnotationNode& type, TExprContext& ctx); +bool EnsureStaticContainerType(TPositionHandle position, const TTypeAnnotationNode& type, TExprContext& ctx); bool EnsureTypeWithStructType(const TExprNode& node, TExprContext& ctx); bool EnsureComposable(const TExprNode& node, TExprContext& ctx); bool EnsureComposableType(const TExprNode& node, TExprContext& ctx); diff --git a/ydb/library/yql/core/yql_join.cpp b/ydb/library/yql/core/yql_join.cpp index 9b3b69ce45d..3b39cad7ecb 100644 --- a/ydb/library/yql/core/yql_join.cpp +++ b/ydb/library/yql/core/yql_join.cpp @@ -105,11 +105,11 @@ namespace { IGraphTransformer::TStatus ParseJoins(const TJoinLabels& labels, const TExprNode& joins, TVector<TJoinState>& joinsStates, THashSet<TStringBuf>& scope, - TGLobalJoinState& globalState, bool strictKeys, TExprContext& ctx, const TUniqueConstraintNode** internal = nullptr, const TUniqueConstraintNode** external = nullptr); + TGLobalJoinState& globalState, bool strictKeys, TExprContext& ctx, const TUniqueConstraintNode** unique = nullptr, const TDistinctConstraintNode** distinct = nullptr); IGraphTransformer::TStatus ParseJoinScope(const TJoinLabels& labels, const TExprNode& side, TVector<TJoinState>& joinsStates, THashSet<TStringBuf>& scope, - TGLobalJoinState& globalState, bool strictKeys, const TUniqueConstraintNode*& internal, const TUniqueConstraintNode*& external, TExprContext& ctx) { + TGLobalJoinState& globalState, bool strictKeys, const TUniqueConstraintNode*& unique, const TDistinctConstraintNode*& distinct, TExprContext& ctx) { if (side.IsAtom()) { const auto label = side.Content(); const auto input = labels.FindInput(label); @@ -123,14 +123,20 @@ namespace { scope.insert(x); } + const auto rename = [&](const TConstraintNode::TPathType& path) -> std::vector<TConstraintNode::TPathType> { + if (path.empty()) + return {}; + auto newPath = path; + newPath.front() = ctx.AppendString((*input)->FullName(newPath.front())); + return {std::move(newPath)}; + }; + if (const auto u = (*input)->Unique) { - internal = external = u->RenameFields(ctx, [&](const TConstraintNode::TPathType& path) -> std::vector<TConstraintNode::TPathType> { - if (path.empty()) - return {}; - auto newPath = path; - newPath.front() = ctx.AppendString((*input)->FullName(newPath.front())); - return {std::move(newPath)}; - }); + unique = u->RenameFields(ctx, rename); + } + + if (const auto d = (*input)->Distinct) { + distinct = d->RenameFields(ctx, rename); } return IGraphTransformer::TStatus::Ok; @@ -143,12 +149,12 @@ namespace { } ++globalState.NestedJoins; - return ParseJoins(labels, side, joinsStates, scope, globalState, strictKeys, ctx, &internal, &external); + return ParseJoins(labels, side, joinsStates, scope, globalState, strictKeys, ctx, &unique, &distinct); } IGraphTransformer::TStatus ParseJoins(const TJoinLabels& labels, const TExprNode& joins, TVector<TJoinState>& joinsStates, THashSet<TStringBuf>& scope, - TGLobalJoinState& globalState, bool strictKeys, TExprContext& ctx, const TUniqueConstraintNode** internal, const TUniqueConstraintNode** external) { + TGLobalJoinState& globalState, bool strictKeys, TExprContext& ctx, const TUniqueConstraintNode** unique, const TDistinctConstraintNode** distinct) { if (!EnsureTupleSize(joins, 6, ctx)) { return IGraphTransformer::TStatus::Error; } @@ -164,16 +170,16 @@ namespace { } THashSet<TStringBuf> myLeftScope; - const TUniqueConstraintNode* lIntUnique = nullptr; - const TUniqueConstraintNode* lExtUnique = nullptr; - if (const auto status = ParseJoinScope(labels, *joins.Child(1), joinsStates, myLeftScope, globalState, strictKeys, lIntUnique, lExtUnique, ctx); status.Level != IGraphTransformer::TStatus::Ok) { + const TUniqueConstraintNode* lUnique = nullptr; + const TDistinctConstraintNode* lDistinct = nullptr; + if (const auto status = ParseJoinScope(labels, *joins.Child(1), joinsStates, myLeftScope, globalState, strictKeys, lUnique, lDistinct, ctx); status.Level != IGraphTransformer::TStatus::Ok) { return status; } THashSet<TStringBuf> myRightScope; - const TUniqueConstraintNode* rIntUnique = nullptr; - const TUniqueConstraintNode* rExtUnique = nullptr; - if (const auto status = ParseJoinScope(labels, *joins.Child(2), joinsStates, myRightScope, globalState, strictKeys, rIntUnique, rExtUnique, ctx); status.Level != IGraphTransformer::TStatus::Ok) { + const TUniqueConstraintNode* rUnique = nullptr; + const TDistinctConstraintNode* rDistinct = nullptr; + if (const auto status = ParseJoinScope(labels, *joins.Child(2), joinsStates, myRightScope, globalState, strictKeys, rUnique, rDistinct, ctx); status.Level != IGraphTransformer::TStatus::Ok) { return status; } @@ -371,47 +377,47 @@ namespace { } } - const bool lOneRow = (lIntUnique && lIntUnique->HasEqualColumns(lCheck)) || (leftHints && (leftHints->contains("unique") || leftHints->contains("any"))); - const bool rOneRow = (rIntUnique && rIntUnique->HasEqualColumns(rCheck)) || (rightHints && (rightHints->contains("unique") || rightHints->contains("any"))); + const bool lOneRow = (lUnique && lUnique->HasEqualColumns(lCheck)) || (leftHints && (leftHints->contains("unique") || leftHints->contains("any"))); + const bool rOneRow = (rUnique && rUnique->HasEqualColumns(rCheck)) || (rightHints && (rightHints->contains("unique") || rightHints->contains("any"))); const bool bothOne = lOneRow && rOneRow; - if (internal) { + if (unique) { if (singleSide) { if (leftSide) - *internal = lIntUnique; + *unique = lUnique; else if (rightSide) - *internal = rIntUnique; + *unique = rUnique; } else if (joinType.IsAtom("Exclusion") || (lOneRow && rOneRow && joinType.IsAtom({"Inner", "Full", "Left", "Right"}))) { - if (lIntUnique && rIntUnique) { - auto sets = lIntUnique->GetAllSets(); - sets.insert(rIntUnique->GetAllSets().cbegin(), rIntUnique->GetAllSets().cend()); - *internal = ctx.MakeConstraint<TUniqueConstraintNode>(std::move(sets)); - } else if (lIntUnique) - *internal = lIntUnique; - else if (rIntUnique) - *internal = rIntUnique; + if (lUnique && rUnique) { + auto sets = lUnique->GetAllSets(); + sets.insert(rUnique->GetAllSets().cbegin(), rUnique->GetAllSets().cend()); + *unique = ctx.MakeConstraint<TUniqueConstraintNode>(std::move(sets)); + } else if (lUnique) + *unique = lUnique; + else if (rUnique) + *unique = rUnique; } } - if (external) { + if (distinct) { if (singleSide) { if (leftSide) - *external = lExtUnique; + *distinct = lDistinct; else if (rightSide) - *external = rExtUnique; + *distinct = rDistinct; } else { const bool useBoth = bothOne && joinType.IsAtom("Inner"); - const bool useLeft = lExtUnique && ((leftSide && rOneRow) || useBoth); - const bool useRight = rExtUnique && ((rightSide && lOneRow) || useBoth); + const bool useLeft = lDistinct && ((leftSide && rOneRow) || useBoth); + const bool useRight = rDistinct && ((rightSide && lOneRow) || useBoth); if (useLeft && !useRight) - *external = lExtUnique; + *distinct = lDistinct; else if (useRight && !useLeft) - *external = rExtUnique; + *distinct = rDistinct; else if (useLeft && useRight) { - auto sets = lExtUnique->GetAllSets(); - sets.insert(rExtUnique->GetAllSets().cbegin(), rExtUnique->GetAllSets().cend()); - *external = ctx.MakeConstraint<TUniqueConstraintNode>(std::move(sets)); + auto sets = lDistinct->GetAllSets(); + sets.insert(rDistinct->GetAllSets().cbegin(), rDistinct->GetAllSets().cend()); + *distinct = ctx.MakeConstraint<TDistinctConstraintNode>(std::move(sets)); } } } @@ -498,10 +504,11 @@ namespace { } } -TMaybe<TIssue> TJoinLabel::Parse(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique) { +TMaybe<TIssue> TJoinLabel::Parse(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique, const TDistinctConstraintNode* distinct) { Tables.clear(); InputType = structType; Unique = unique; + Distinct = distinct; if (auto atom = TMaybeNode<TCoAtom>(&node)) { if (auto err = ValidateLabel(ctx, atom.Cast())) { return err; @@ -641,11 +648,11 @@ TVector<TString> TJoinLabel::EnumerateAllMembers() const { return result; } -TMaybe<TIssue> TJoinLabels::Add(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique) { +TMaybe<TIssue> TJoinLabels::Add(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique, const TDistinctConstraintNode* distinct) { ui32 index = Inputs.size(); Inputs.emplace_back(); TJoinLabel& label = Inputs.back(); - if (auto err = label.Parse(ctx, node, structType, unique)) { + if (auto err = label.Parse(ctx, node, structType, unique, distinct)) { return err; } @@ -936,9 +943,10 @@ IGraphTransformer::TStatus EquiJoinAnnotation( return IGraphTransformer::TStatus::Ok; } -IGraphTransformer::TStatus EquiJoinUniq( +IGraphTransformer::TStatus EquiJoinConstraints( TPositionHandle positionHandle, const TUniqueConstraintNode*& unique, + const TDistinctConstraintNode*& distinct, const TJoinLabels& labels, const TExprNode& joins, TExprContext& ctx @@ -949,8 +957,7 @@ IGraphTransformer::TStatus EquiJoinUniq( TVector<TJoinState> joinsStates(labels.Inputs.size()); TGLobalJoinState globalState; THashSet<TStringBuf> scope; - const TUniqueConstraintNode* stub = nullptr; - if (const auto parseStatus = ParseJoins(labels, joins, joinsStates, scope, globalState, false, ctx, &stub, &unique); parseStatus.Level != IGraphTransformer::TStatus::Ok) { + if (const auto parseStatus = ParseJoins(labels, joins, joinsStates, scope, globalState, false, ctx, &unique, &distinct); parseStatus.Level != IGraphTransformer::TStatus::Ok) { return parseStatus; } return IGraphTransformer::TStatus::Ok; diff --git a/ydb/library/yql/core/yql_join.h b/ydb/library/yql/core/yql_join.h index bade80b4f17..a1428456c0c 100644 --- a/ydb/library/yql/core/yql_join.h +++ b/ydb/library/yql/core/yql_join.h @@ -22,7 +22,7 @@ inline void SplitTableName(const TStringBuf& fullName, TStringBuf& table, TStrin } struct TJoinLabel { - TMaybe<TIssue> Parse(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique); + TMaybe<TIssue> Parse(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique, const TDistinctConstraintNode* distinct); TMaybe<TIssue> ValidateLabel(TExprContext& ctx, const NNodes::TCoAtom& label); TString FullName(const TStringBuf& column) const; TStringBuf ColumnName(const TStringBuf& column) const; @@ -37,10 +37,11 @@ struct TJoinLabel { const TStructExprType* InputType; TVector<TStringBuf> Tables; const TUniqueConstraintNode* Unique = nullptr; + const TDistinctConstraintNode* Distinct = nullptr; }; struct TJoinLabels { - TMaybe<TIssue> Add(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique = nullptr); + TMaybe<TIssue> Add(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique = nullptr, const TDistinctConstraintNode* distinct = nullptr); TMaybe<const TJoinLabel*> FindInput(const TStringBuf& table) const; TMaybe<ui32> FindInputIndex(const TStringBuf& table) const; TMaybe<const TTypeAnnotationNode*> FindColumn(const TStringBuf& table, const TStringBuf& column) const; @@ -76,9 +77,10 @@ IGraphTransformer::TStatus EquiJoinAnnotation( TExprContext& ctx ); -IGraphTransformer::TStatus EquiJoinUniq( +IGraphTransformer::TStatus EquiJoinConstraints( TPositionHandle positionHandle, const TUniqueConstraintNode*& unique, + const TDistinctConstraintNode*& distinct, const TJoinLabels& labels, const TExprNode& joins, TExprContext& ctx diff --git a/ydb/library/yql/core/yql_opt_utils.cpp b/ydb/library/yql/core/yql_opt_utils.cpp index e00c7004585..38b3d7e715f 100644 --- a/ydb/library/yql/core/yql_opt_utils.cpp +++ b/ydb/library/yql/core/yql_opt_utils.cpp @@ -1351,7 +1351,8 @@ IGraphTransformer::TStatus LocalUnorderedOptimize(TExprNode::TPtr input, TExprNo TOptimizeExprSettings settings(typeCtx); settings.ProcessedNodes = &processedNodes; // Prevent optimizer to go deeper - static THashSet<TStringBuf> CALLABLE = {"AssumeUnique", + static THashSet<TStringBuf> CALLABLE = { + "AssumeUnique", "AssumeDistinct", "Map", "OrderedMap", "Filter", "OrderedFilter", "FlatMap", "OrderedFlatMap", diff --git a/ydb/library/yql/core/yql_opt_utils.h b/ydb/library/yql/core/yql_opt_utils.h index cd5edca6f3c..db9ed559716 100644 --- a/ydb/library/yql/core/yql_opt_utils.h +++ b/ydb/library/yql/core/yql_opt_utils.h @@ -85,7 +85,7 @@ template <bool Bool> TExprNode::TPtr MakeBool(TPositionHandle position, TExprContext& ctx); TExprNode::TPtr MakeIdentityLambda(TPositionHandle position, TExprContext& ctx); -constexpr std::initializer_list<std::string_view> SkippableCallables = {"Unordered", "AssumeSorted", "AssumeUnique", "AssumeColumnOrder", "AssumeAllMembersNullableAtOnce"}; +constexpr std::initializer_list<std::string_view> SkippableCallables = {"Unordered", "AssumeSorted", "AssumeUnique", "AssumeDistinct", "AssumeColumnOrder", "AssumeAllMembersNullableAtOnce"}; const TExprNode& SkipCallables(const TExprNode& node, const std::initializer_list<std::string_view>& skipCallables); diff --git a/ydb/library/yql/core/yql_type_annotation.cpp b/ydb/library/yql/core/yql_type_annotation.cpp index 09cbfa7cf2c..be8c8f26866 100644 --- a/ydb/library/yql/core/yql_type_annotation.cpp +++ b/ydb/library/yql/core/yql_type_annotation.cpp @@ -40,11 +40,8 @@ bool TTypeAnnotationContext::DoInitialize(TExprContext& ctx) { Y_ENSURE(UserDataStorage); // Disable "in progress" constraints - //DisableConstraintCheck.emplace(TSortedConstraintNode::Name()); - //DisableConstraintCheck.emplace(TEmptyConstraintNode::Name()); DisableConstraintCheck.emplace(TUniqueConstraintNode::Name()); - //DisableConstraintCheck.emplace(TMultiConstraintNode::Name()); - //DisableConstraintCheck.emplace(TVarIndexConstraintNode::Name()); + DisableConstraintCheck.emplace(TDistinctConstraintNode::Name()); return true; } diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_datasink_constraints.cpp b/ydb/library/yql/providers/dq/provider/yql_dq_datasink_constraints.cpp index e201a05d13c..dc4add62589 100644 --- a/ydb/library/yql/providers/dq/provider/yql_dq_datasink_constraints.cpp +++ b/ydb/library/yql/providers/dq/provider/yql_dq_datasink_constraints.cpp @@ -83,7 +83,7 @@ public: TStatus HandleConnection(TExprBase input, TExprContext&) { const auto output = input.Cast<TDqConnection>().Output(); - TCopyConstraint<TUniqueConstraintNode, TEmptyConstraintNode>::Do(output.Ref(), input.Ptr()); + TCopyConstraint<TUniqueConstraintNode, TDistinctConstraintNode, TEmptyConstraintNode>::Do(output.Ref(), input.Ptr()); return TStatus::Ok; } @@ -95,7 +95,7 @@ public: ctx.AddError(TIssue(ctx.GetPosition(input.Pos()), "Expected sorted constraint on stage output.")); return TStatus::Error; } - TCopyConstraint<TUniqueConstraintNode, TEmptyConstraintNode>::Do(output.Ref(), input.Ptr()); + TCopyConstraint<TUniqueConstraintNode, TDistinctConstraintNode, TEmptyConstraintNode>::Do(output.Ref(), input.Ptr()); return TStatus::Ok; } |