diff options
author | a-romanov <Anton.Romanov@ydb.tech> | 2023-04-18 17:13:49 +0300 |
---|---|---|
committer | a-romanov <Anton.Romanov@ydb.tech> | 2023-04-18 17:13:49 +0300 |
commit | 6638fc866fe70c557e5c1a4c3c89eb2f55db0700 (patch) | |
tree | fcf36420a73ef95cb912752e6b6227328f710965 | |
parent | 81741a74f7f5ec660b057f8cc12469dd9fe89d99 (diff) | |
download | ydb-6638fc866fe70c557e5c1a4c3c89eb2f55db0700.tar.gz |
YQL-8971 YQL-15555 Drop GroupBy contraint.
-rw-r--r-- | ydb/library/yql/ast/yql_constraint.cpp | 165 | ||||
-rw-r--r-- | ydb/library/yql/ast/yql_constraint.h | 44 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_expr_constraint.cpp | 84 |
3 files changed, 8 insertions, 285 deletions
diff --git a/ydb/library/yql/ast/yql_constraint.cpp b/ydb/library/yql/ast/yql_constraint.cpp index f032bb7289..338e4268c7 100644 --- a/ydb/library/yql/ast/yql_constraint.cpp +++ b/ydb/library/yql/ast/yql_constraint.cpp @@ -126,105 +126,6 @@ bool TConstraintSet::FilterConstraints(const TPredicate& predicate) { ////////////////////////////////////////////////////////////////////////////////////////////////////////////// -TColumnSetConstraintNodeBase::TColumnSetConstraintNodeBase(TExprContext& ctx, TStringBuf name, const TSetType& columns) - : TConstraintNode(ctx, name) -{ - YQL_ENSURE(!columns.empty()); - for (auto& c: columns) { - Hash_ = MurmurHash<ui64>(c.data(), c.size(), Hash_); - Columns_.push_back(ctx.AppendString(c)); - } -} - -TColumnSetConstraintNodeBase::TColumnSetConstraintNodeBase(TExprContext& ctx, TStringBuf name, const std::vector<TStringBuf>& columns) - : TConstraintNode(ctx, name) -{ - YQL_ENSURE(!columns.empty()); - for (auto& c: columns) { - YQL_ENSURE(Columns_.insert_unique(ctx.AppendString(c)).second, "Duplicate Unique constraint column: " << c); - } - for (auto& c: Columns_) { - Hash_ = MurmurHash<ui64>(c.data(), c.size(), Hash_); - } -} - -TColumnSetConstraintNodeBase::TColumnSetConstraintNodeBase(TExprContext& ctx, TStringBuf name, const std::vector<TString>& columns) - : TConstraintNode(ctx, name) -{ - YQL_ENSURE(!columns.empty()); - for (auto& c: columns) { - YQL_ENSURE(Columns_.insert_unique(ctx.AppendString(c)).second, "Duplicate Unique constraint column: " << c); - } - for (auto& c: Columns_) { - Hash_ = MurmurHash<ui64>(c.data(), c.size(), Hash_); - } -} - -TColumnSetConstraintNodeBase::TColumnSetConstraintNodeBase(TColumnSetConstraintNodeBase&& constr) - : TConstraintNode(std::move(static_cast<TConstraintNode&>(constr))) - , Columns_(std::move(constr.Columns_)) -{ -} - -bool TColumnSetConstraintNodeBase::Equals(const TConstraintNode& node) const { - if (this == &node) { - return true; - } - if (GetHash() != node.GetHash()) { - return false; - } - if (GetName() != node.GetName()) { - return false; - } - if (auto c = dynamic_cast<const TColumnSetConstraintNodeBase*>(&node)) { - return Columns_ == c->Columns_; - } - return false; -} - -bool TColumnSetConstraintNodeBase::Includes(const TConstraintNode& node) const { - if (this == &node) { - return true; - } - if (GetName() != node.GetName()) { - return false; - } - if (auto c = dynamic_cast<const TColumnSetConstraintNodeBase*>(&node)) { - for (auto& col: c->Columns_) { - if (!Columns_.has(col)) { - return false; - } - } - return true; - } - return false; -} - -void TColumnSetConstraintNodeBase::Out(IOutputStream& out) const { - TConstraintNode::Out(out); - out.Write('('); - - bool first = true; - for (auto& col: Columns_) { - if (!first) { - out.Write(','); - } - out.Write(col); - first = false; - } - out.Write(')'); -} - -void TColumnSetConstraintNodeBase::ToJson(NJson::TJsonWriter& out) const { - out.OpenArray(); - for (const auto& column : Columns_) { - out.Write(column); - } - out.CloseArray(); -} - -////////////////////////////////////////////////////////////////////////////////////////////////////////////// - TSortedConstraintNode::TSortedConstraintNode(TExprContext& ctx, TContainerType&& content) : TConstraintNode(ctx, Name()), Content_(std::move(content)) { @@ -478,67 +379,6 @@ const TConstraintNode* TSortedConstraintNode::OnlySimpleColumns(TExprContext& ct ////////////////////////////////////////////////////////////////////////////////////////////////////////////// -TGroupByConstraintNode::TGroupByConstraintNode(TExprContext& ctx, const std::vector<TStringBuf>& columns) - : TColumnSetConstraintNodeBase(ctx, Name(), columns) -{ - YQL_ENSURE(!Columns_.empty()); -} - -TGroupByConstraintNode::TGroupByConstraintNode(TExprContext& ctx, const std::vector<TString>& columns) - : TColumnSetConstraintNodeBase(ctx, Name(), columns) -{ - YQL_ENSURE(!Columns_.empty()); -} - -TGroupByConstraintNode::TGroupByConstraintNode(TExprContext& ctx, const TGroupByConstraintNode& constr, size_t prefixLength) - : TColumnSetConstraintNodeBase(ctx, Name(), std::vector<TStringBuf>(constr.GetColumns().begin(), constr.GetColumns().begin() + Min<size_t>(prefixLength, constr.GetColumns().size()))) -{ - YQL_ENSURE(!Columns_.empty()); - YQL_ENSURE(Columns_.size() == prefixLength); -} - -TGroupByConstraintNode::TGroupByConstraintNode(TGroupByConstraintNode&& constr) - : TColumnSetConstraintNodeBase(std::move(static_cast<TColumnSetConstraintNodeBase&>(constr))) -{ -} - -size_t TGroupByConstraintNode::GetCommonPrefixLength(const TGroupByConstraintNode& node) const { - const size_t minSize = Min(Columns_.size(), node.Columns_.size()); - for (size_t i = 0; i < minSize; ++i) { - if (*(Columns_.begin() + i) != *(node.Columns_.begin() +i)) { - return i; - } - } - return minSize; -} - -const TGroupByConstraintNode* TGroupByConstraintNode::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx) { - if (constraints.empty()) { - return nullptr; - } - - auto groupBy = constraints.front()->GetConstraint<TGroupByConstraintNode>(); - if (constraints.size() == 1 || !groupBy) { - return groupBy; - } - - size_t commonPrefixLength = groupBy->GetColumns().size(); - for (size_t i = 1; i < constraints.size() && commonPrefixLength > 0; ++i) { - if (auto nextGroupBy = constraints[i]->GetConstraint<TGroupByConstraintNode>()) { - commonPrefixLength = Min(commonPrefixLength, nextGroupBy->GetCommonPrefixLength(*groupBy)); - } else { - commonPrefixLength = 0; - } - } - if (commonPrefixLength) { - return ctx.MakeConstraint<TGroupByConstraintNode>(*groupBy, commonPrefixLength); - } - - return nullptr; -} - -////////////////////////////////////////////////////////////////////////////////////////////////////////////// - namespace { TChoppedConstraintNode::TFullSetType MakeFullSet(const TConstraintNode::TSetType& keys) { @@ -2135,11 +1975,6 @@ void Out<NYql::TChoppedConstraintNode>(IOutputStream& out, const NYql::TChoppedC } template<> -void Out<NYql::TGroupByConstraintNode>(IOutputStream& out, const NYql::TGroupByConstraintNode& c) { - c.Out(out); -} - -template<> void Out<NYql::TUniqueConstraintNode>(IOutputStream& out, const NYql::TUniqueConstraintNode& c) { c.Out(out); } diff --git a/ydb/library/yql/ast/yql_constraint.h b/ydb/library/yql/ast/yql_constraint.h index eebe5fb1d3..f189177d01 100644 --- a/ydb/library/yql/ast/yql_constraint.h +++ b/ydb/library/yql/ast/yql_constraint.h @@ -6,8 +6,6 @@ #include <library/cpp/containers/sorted_vector/sorted_vector.h> #include <library/cpp/json/json_writer.h> -#include <util/generic/strbuf.h> -#include <util/generic/string.h> #include <util/stream/output.h> #include <deque> @@ -151,30 +149,6 @@ private: ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -class TColumnSetConstraintNodeBase: public TConstraintNode { -public: - using TSetType = NSorted::TSimpleSet<TStringBuf>; - -protected: - TColumnSetConstraintNodeBase(TExprContext& ctx, TStringBuf name, const TSetType& columns); - TColumnSetConstraintNodeBase(TExprContext& ctx, TStringBuf name, const std::vector<TStringBuf>& columns); - TColumnSetConstraintNodeBase(TExprContext& ctx, TStringBuf name, const std::vector<TString>& columns); - TColumnSetConstraintNodeBase(TColumnSetConstraintNodeBase&& constr); - -public: - const TSetType& GetColumns() const { - return Columns_; - } - - bool Equals(const TConstraintNode& node) const override; - bool Includes(const TConstraintNode& node) const override; - void Out(IOutputStream& out) const override; - void ToJson(NJson::TJsonWriter& out) const override; - -protected: - TSetType Columns_; -}; - class TSortedConstraintNode final: public TConstraintNode { public: using TContainerType = TSmallVec<std::pair<TSetType, bool>>; @@ -302,24 +276,6 @@ private: using TUniqueConstraintNode = TUniqueConstraintNodeBase<false>; using TDistinctConstraintNode = TUniqueConstraintNodeBase<true>; -class TGroupByConstraintNode final: public TColumnSetConstraintNodeBase { -protected: - friend struct TExprContext; - - TGroupByConstraintNode(TExprContext& ctx, const std::vector<TStringBuf>& columns); - TGroupByConstraintNode(TExprContext& ctx, const std::vector<TString>& columns); - TGroupByConstraintNode(TExprContext& ctx, const TGroupByConstraintNode& constr, size_t prefixLength); - TGroupByConstraintNode(TGroupByConstraintNode&& constr); - - size_t GetCommonPrefixLength(const TGroupByConstraintNode& node) const; -public: - static constexpr std::string_view Name() { - return "GroupBy"; - } - - static const TGroupByConstraintNode* MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx); -}; - template<class TOriginalConstraintNode> class TPartOfConstraintNode : public TConstraintNode { public: diff --git a/ydb/library/yql/core/yql_expr_constraint.cpp b/ydb/library/yql/core/yql_expr_constraint.cpp index b528fcecf4..075240f49e 100644 --- a/ydb/library/yql/core/yql_expr_constraint.cpp +++ b/ydb/library/yql/core/yql_expr_constraint.cpp @@ -188,7 +188,6 @@ public: Functions["PartitionByKey"] = &TCallableConstraintTransformer::ShuffleByKeysWrap<true>; Functions["PartitionsByKeys"] = &TCallableConstraintTransformer::ShuffleByKeysWrap<true>; Functions["ShuffleByKeys"] = &TCallableConstraintTransformer::ShuffleByKeysWrap<false>; - Functions["GroupByKey"] = &TCallableConstraintTransformer::GroupByKeyWrap; Functions["Switch"] = &TCallableConstraintTransformer::SwitchWrap; Functions["Visit"] = &TCallableConstraintTransformer::VisitWrap; Functions["VariantItem"] = &TCallableConstraintTransformer::VariantItemWrap; @@ -971,9 +970,6 @@ private: } if constexpr (Ordered) { - if (const auto groupBy = node.Head().GetConstraint<TGroupByConstraintNode>()) { - argsConstraints.front().emplace_back(groupBy); - } if (auto mapping = TPartOfSortedConstraintNode::GetCommonMapping(node.Head().GetConstraint<TSortedConstraintNode>(), node.Head().GetConstraint<TPartOfSortedConstraintNode>()); !mapping.empty()) { argsConstraints.front().emplace_back(ctx.MakeConstraint<TPartOfSortedConstraintNode>(std::move(mapping))); } @@ -1789,9 +1785,6 @@ private: } if constexpr (Ordered) { - if (const auto groupBy = node.Head().GetConstraint<TGroupByConstraintNode>()) { - argsConstraints.front().emplace_back(groupBy); - } if (auto mapping = TPartOfSortedConstraintNode::GetCommonMapping(node.Head().GetConstraint<TSortedConstraintNode>(), node.Head().GetConstraint<TPartOfSortedConstraintNode>()); !mapping.empty()) { argsConstraints.front().emplace_back(ctx.MakeConstraint<TPartOfSortedConstraintNode>(std::move(mapping))); } @@ -2588,22 +2581,13 @@ private: return TStatus::Ok; } - TStatus IsKeySwitchWrap(const TExprNode::TPtr& input, TExprNode::TPtr& /*output*/, TExprContext& /*ctx*/) const { - if (const auto status = UpdateLambdaConstraints(*input->Child(TCoIsKeySwitch::idx_ItemKeyExtractor)) - .Combine(UpdateLambdaConstraints(*input->Child(TCoIsKeySwitch::idx_StateKeyExtractor))); status != TStatus::Ok) { - return status; - } - - if (const auto groupBy = input->Head().GetConstraint<TGroupByConstraintNode>()) { - TVector<TStringBuf> keys; - ExtractKeys(*input->Child(2), keys); - if (!keys.empty()) { - if (AllOf(keys, [groupBy] (TStringBuf key) { return groupBy->GetColumns().find(key) != groupBy->GetColumns().end(); })) { - input->AddConstraint(groupBy); - } - } - } - return TStatus::Ok; + TStatus IsKeySwitchWrap(const TExprNode::TPtr& input, TExprNode::TPtr& /*output*/, TExprContext& ctx) const { + const TCoIsKeySwitch keySwitch(input); + TSmallVec<TConstraintNode::TListType> itemConstraints, stateConstraints; + itemConstraints.emplace_back(keySwitch.Item().Ref().GetAllConstraints()); + stateConstraints.emplace_back(keySwitch.State().Ref().GetAllConstraints()); + return UpdateLambdaConstraints(input->ChildRef(TCoIsKeySwitch::idx_ItemKeyExtractor), ctx, itemConstraints) + .Combine(UpdateLambdaConstraints(input->ChildRef(TCoIsKeySwitch::idx_StateKeyExtractor), ctx, stateConstraints)); } static const TExprNode& GetLiteralStructMember(const TExprNode& literal, const TExprNode& member) { @@ -2879,57 +2863,6 @@ private: return FromFirst<TEmptyConstraintNode>(input, output, ctx); } - TStatus GroupByKeyWrap(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) const { - const TStructExprType* inItemType = GetNonEmptyStructItemType(*input->Head().GetTypeAnn()); - const TStructExprType* outItemType = GetNonEmptyStructItemType(*input->GetTypeAnn()); - if (inItemType && outItemType) { - const auto keySelector = input->Child(TCoGroupByKey::idx_KeySelectorLambda); - if (const auto status = UpdateLambdaConstraints(*keySelector); status != TStatus::Ok) { - return status; - } - TConstraintNode::TListType argConstraints; - if (const auto inputPassthrough = input->Head().GetConstraint<TPassthroughConstraintNode>()) - argConstraints.emplace_back(inputPassthrough); - if (const auto status = UpdateLambdaConstraints(input->ChildRef(TCoGroupByKey::idx_HandlerLambda), ctx, {TConstraintNode::TListType{}, argConstraints}); status != TStatus::Ok) { - return status; - } - - if (const auto handlerLambda = input->Child(TCoGroupByKey::idx_HandlerLambda); handlerLambda->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional) { - TVector<TStringBuf> groupKeys; - ExtractKeys(*keySelector, groupKeys); - if (!groupKeys.empty()) { - if (const auto passthrough = handlerLambda->GetConstraint<TPassthroughConstraintNode>()) { - const auto mapping = passthrough->GetReverseMapping(); - std::vector<std::string_view> uniqColumns; - for (auto key: groupKeys) { - auto range = mapping.equal_range(key); - if (range.first != range.second) { - for (auto i = range.first; i != range.second; ++i) { - uniqColumns.emplace_back(i->second); - } - } else { - uniqColumns.clear(); - break; - } - } - if (!uniqColumns.empty()) { - input->AddConstraint(ctx.MakeConstraint<TUniqueConstraintNode>(uniqColumns)); - input->AddConstraint(ctx.MakeConstraint<TDistinctConstraintNode>(uniqColumns)); - } - } - } - } - } else { - auto status = UpdateAllChildLambdasConstraints(*input); - if (status != TStatus::Ok) { - return status; - } - } - - TApplyConstraintFromInput<TCoGroupByKey::idx_HandlerLambda, TMultiConstraintNode, TEmptyConstraintNode>::Do(input); - return FromFirst<TEmptyConstraintNode>(input, output, ctx); - } - template<bool Partitions> TStatus ShuffleByKeysWrap(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) const { using TCoBase = std::conditional_t<Partitions, TCoPartitionByKeyBase, TCoShuffleByKeys>; @@ -2958,14 +2891,13 @@ private: argConstraints.emplace_back(filtered); if constexpr (Partitions) { - TVector<TStringBuf> partitionKeys; + TVector<TStringBuf> partitionKeys; // TODO: Replace on TSetType ExtractKeys(*input->Child(TCoBase::idx_KeySelectorLambda), partitionKeys); if (!partitionKeys.empty()) { TChoppedConstraintNode::TFullSetType sets; sets.reserve(partitionKeys.size()); std::transform(partitionKeys.cbegin(), partitionKeys.cend(), std::back_inserter(sets), [](const TStringBuf& column) { return TConstraintNode::TSetType{TConstraintNode::TPathType(1U, column)}; }); argConstraints.emplace_back(ctx.MakeConstraint<TChoppedConstraintNode>(std::move(sets))); - argConstraints.emplace_back(ctx.MakeConstraint<TGroupByConstraintNode>(std::move(partitionKeys))); } } |