#include "yql_aggregate_expander.h"
#include <yql/essentials/core/yql_expr_optimize.h>
#include <yql/essentials/core/yql_expr_type_annotation.h>
#include <yql/essentials/core/yql_opt_window.h>
#include <yql/essentials/core/yql_opt_utils.h>
#include <yql/essentials/core/yql_type_helpers.h>
#include <yql/essentials/utils/log/log.h>
namespace NYql {
TExprNode::TPtr TAggregateExpander::ExpandAggregate() {
YQL_CLOG(DEBUG, Core) << "Expand " << Node->Content();
auto result = ExpandAggregateWithFullOutput();
if (result) {
auto outputColumns = GetSetting(*Node->Child(NNodes::TCoAggregate::idx_Settings), "output_columns");
if (outputColumns) {
result = Ctx.NewCallable(result->Pos(), "ExtractMembers", { result, outputColumns->ChildPtr(1) });
return result;
TExprNode::TPtr TAggregateExpander::ExpandAggregateWithFullOutput()
Suffix = Node->Content();
AggList = Node->HeadPtr();
KeyColumns = Node->ChildPtr(1);
AggregatedColumns = Node->Child(2);
auto settings = Node->Child(3);
bool allTraitsCollected = CollectTraits();
YQL_ENSURE(!HasSetting(*settings, "hopping"), "Aggregate with hopping unsupported here.");
HaveDistinct = AnyOf(AggregatedColumns->ChildrenList(),
[](const auto& child) { return child->ChildrenSize() == 3; });
EffectiveCompact = (HaveDistinct && CompactForDistinct && !UseBlocks) || ForceCompact || HasSetting(*settings, "compact");
for (const auto& trait : Traits) {
auto mergeLambda = trait->Child(5);
if (mergeLambda->Tail().IsCallable("Void")) {
EffectiveCompact = true;
if (Suffix == "Finalize") {
EffectiveCompact = true;
Suffix = "";
} else if (Suffix != "") {
EffectiveCompact = false;
OriginalRowType = GetSeqItemType(*Node->Head().GetTypeAnn()).Cast<TStructExprType>();
RowItems = OriginalRowType->GetItems();
ProcessSessionSetting(GetSetting(*settings, "session"));
RowType = Ctx.MakeType<TStructExprType>(RowItems);
TVector<const TTypeAnnotationNode*> keyItemTypes = GetKeyItemTypes();
bool needPickle = IsNeedPickle(keyItemTypes);
auto keyExtractor = GetKeyExtractor(needPickle);
if (Suffix == "" && !HaveSessionSetting && !EffectiveCompact && UsePhases) {
return GeneratePhases();
if (UseBlocks) {
if (Suffix == "Combine") {
auto ret = TryGenerateBlockCombine();
if (ret) {
return ret;
if (Suffix == "MergeFinalize" || Suffix == "MergeManyFinalize") {
auto ret = TryGenerateBlockMergeFinalize();
if (ret) {
return ret;
if (!allTraitsCollected) {
return RebuildAggregate();
if (Suffix == "MergeState" || Suffix == "MergeFinalize" || Suffix == "MergeManyFinalize") {
return GeneratePostAggregate(AggList, keyExtractor);
TExprNode::TPtr preAgg = GeneratePartialAggregate(keyExtractor, keyItemTypes, needPickle);
if (EffectiveCompact || !preAgg) {
preAgg = std::move(AggList);
if (Suffix == "Combine" || Suffix == "CombineState") {
return preAgg;
return GeneratePostAggregate(preAgg, keyExtractor);
TExprNode::TPtr TAggregateExpander::ExpandAggApply(const TExprNode::TPtr& node)
auto name = node->Head().Content();
if (name.StartsWith("pg_")) {
auto func = name.SubStr(3);
auto itemType = node->Child(1)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
TVector<ui32> argTypes;
bool needRetype = false;
auto status = ExtractPgTypesFromMultiLambda(node->ChildRef(2), argTypes, needRetype, Ctx);
YQL_ENSURE(status == IGraphTransformer::TStatus::Ok);
const NPg::TAggregateDesc* aggDescPtr;
if (node->Content().EndsWith("State")) {
auto stateType = node->Child(2)->GetTypeAnn()->Cast<TPgExprType>()->GetId();
auto resultType = node->GetTypeAnn()->Cast<TPgExprType>()->GetId();
aggDescPtr = &NPg::LookupAggregation(TString(func), stateType, resultType);
} else {
aggDescPtr = &NPg::LookupAggregation(TString(func), argTypes);
return ExpandPgAggregationTraits(node->Pos(), *aggDescPtr, false, node->ChildPtr(2), argTypes, itemType, Ctx);
const TString modulePath = "/lib/yql/aggregate.yqls";
auto exportsPtr = TypesCtx.Modules->GetModule(modulePath);
YQL_ENSURE(exportsPtr, "Failed to get module " << modulePath);
const auto& exports = exportsPtr->Symbols();
const auto ex = exports.find(TString(name) + "_traits_factory");
YQL_ENSURE(exports.cend() != ex);
TNodeOnNodeOwnedMap deepClones;
auto lambda = Ctx.DeepCopy(*ex->second, exportsPtr->ExprCtx(), deepClones, true, false);
auto listTypeNode = Ctx.NewCallable(node->Pos(), "ListType", { node->ChildPtr(node->ChildrenSize() == 4 && !node->Child(3)->IsCallable("Void") ? 3 : 1) });
auto extractor = node->ChildPtr(2);
auto traits = Ctx.ReplaceNodes(lambda->TailPtr(), {
{lambda->Head().Child(0), listTypeNode},
{lambda->Head().Child(1), extractor}
auto status = ExpandApplyNoRepeat(traits, traits, Ctx);
YQL_ENSURE(status != IGraphTransformer::TStatus::Error);
return traits;
bool TAggregateExpander::CollectTraits() {
bool allTraitsCollected = true;
for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
auto trait = AggregatedColumns->Child(index)->ChildPtr(1);
if (trait->IsCallable({ "AggApply", "AggApplyState", "AggApplyManyState" })) {
trait = ExpandAggApply(trait);
allTraitsCollected = false;
return allTraitsCollected;
TExprNode::TPtr TAggregateExpander::RebuildAggregate()
TExprNode::TListType newAggregatedColumnsItems = AggregatedColumns->ChildrenList();
for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
auto trait = AggregatedColumns->Child(index)->ChildPtr(1);
if (trait->IsCallable("AggApply")) {
newAggregatedColumnsItems[index] = Ctx.ChangeChild(*(newAggregatedColumnsItems[index]), 1, std::move(Traits[index]));
} else if (trait->IsCallable("AggApplyState") || trait->IsCallable("AggApplyManyState")) {
auto newTrait = Ctx.Builder(Node->Pos())
.Add(0, trait->ChildPtr(1))
.Add(1, trait->ChildPtr(2)) // extractor for state, not initial value itself
.Add(3, Traits[index]->ChildPtr(3))
.Add(4, Traits[index]->ChildPtr(4))
.Add(5, Traits[index]->ChildPtr(5))
.Add(6, Traits[index]->ChildPtr(6))
.Add(7, Traits[index]->ChildPtr(7))
newAggregatedColumnsItems[index] = Ctx.ChangeChild(*(newAggregatedColumnsItems[index]), 1, std::move(newTrait));
return Ctx.ChangeChild(*Node, 2, Ctx.NewList(Node->Pos(), std::move(newAggregatedColumnsItems)));
TExprNode::TPtr TAggregateExpander::GetContextLambda()
return HasContextFuncs(*AggregatedColumns) ?
.Arg(0, "stream")
.Atom(1, "Agg")
.Build() :
void TAggregateExpander::ProcessSessionSetting(TExprNode::TPtr sessionSetting)
if (!sessionSetting) {
HaveSessionSetting = true;
SessionOutputColumn = sessionSetting->Child(1)->Child(0)->Content();
// remove session column from other keys
TExprNodeList keyColumnsList = KeyColumns->ChildrenList();
EraseIf(keyColumnsList, [&](const auto& key) { return SessionOutputColumn == key->Content(); });
KeyColumns = Ctx.NewList(KeyColumns->Pos(), std::move(keyColumnsList));
SessionWindowParams.Traits = sessionSetting->Child(1)->ChildPtr(1);
ExtractSessionWindowParams(Node->Pos(), SessionWindowParams, Ctx);
ExtractSortKeyAndOrder(Node->Pos(), SessionWindowParams.SortTraits, SortParams, Ctx);
if (HaveDistinct) {
auto keySelector = BuildKeySelector(Node->Pos(), *OriginalRowType, KeyColumns, Ctx);
const auto sessionStartMemberLambda = AddSessionParamsMemberLambda(Node->Pos(), SessionStartMemberName, keySelector,
SessionWindowParams, Ctx);
AggList = Ctx.Builder(Node->Pos())
.Add(0, AggList)
.Add(1, keySelector)
.Add(2, SortParams.Order)
.Add(3, SortParams.Key)
.With(0, "partitionedStream")
auto keyColumnsList = KeyColumns->ChildrenList();
keyColumnsList.push_back(Ctx.NewAtom(Node->Pos(), SessionStartMemberName));
KeyColumns = Ctx.NewList(Node->Pos(), std::move(keyColumnsList));
RowItems.push_back(Ctx.MakeType<TItemExprType>(SessionStartMemberName, SessionWindowParams.KeyType));
SortParams.Key = SortParams.Order = VoidNode;
} else {
EffectiveCompact = true;
TVector<const TTypeAnnotationNode*> TAggregateExpander::GetKeyItemTypes()
TVector<const TTypeAnnotationNode*> keyItemTypes;
for (auto keyColumn : KeyColumns->Children()) {
auto index = RowType->FindItem(keyColumn->Content());
YQL_ENSURE(index, "Unknown column: " << keyColumn->Content());
auto type = RowType->GetItems()[*index]->GetItemType();
return keyItemTypes;
bool TAggregateExpander::IsNeedPickle(const TVector<const TTypeAnnotationNode*>& keyItemTypes)
bool needPickle = false;
for (auto type : keyItemTypes) {
needPickle |= !IsDataOrOptionalOfData(type);
return needPickle;
TExprNode::TPtr TAggregateExpander::GetKeyExtractor(bool needPickle)
TExprNode::TPtr keyExtractor = Ctx.Builder(Node->Pos())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (KeyColumns->ChildrenSize() == 0) {
return parent.Callable("Uint32").Atom(0, "0", TNodeFlags::Default).Seal();
else if (KeyColumns->ChildrenSize() == 1) {
return parent.Callable("Member").Arg(0, "item").Add(1, KeyColumns->HeadPtr()).Seal();
else {
auto listBuilder = parent.List();
ui32 pos = 0;
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
.Callable(pos++, "Member")
.Arg(0, "item")
.Add(1, KeyColumns->ChildPtr(i))
return listBuilder.Seal();
if (needPickle) {
keyExtractor = Ctx.Builder(Node->Pos())
.Apply(0, *keyExtractor)
.With(0, "item")
return keyExtractor;
void TAggregateExpander::CollectColumnsSpecs()
for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
auto child = AggregatedColumns->Child(index);
if (const auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr) {
const auto ins = Distinct2Columns.emplace(distinctField->Content(), TIdxSet());
if (ins.second) {
} else {
if (child->Head().IsAtom()) {
} else {
InitialColumnNames.push_back(Ctx.NewAtom(FinalColumnNames.back()->Pos(), "_yql_agg_" + ToString(InitialColumnNames.size()), TNodeFlags::Default));
void TAggregateExpander::BuildNothingStates()
for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
auto trait = Traits[index];
auto saveLambda = trait->Child(3);
auto saveLambdaType = saveLambda->GetTypeAnn();
auto typeNode = ExpandType(Node->Pos(), *saveLambdaType, Ctx);
.Callable(0, "OptionalType")
.Add(0, std::move(typeNode))
TExprNode::TPtr TAggregateExpander::GeneratePartialAggregate(const TExprNode::TPtr keyExtractor,
const TVector<const TTypeAnnotationNode*>& keyItemTypes, bool needPickle)
TExprNode::TPtr pickleTypeNode = nullptr;
if (needPickle) {
const TTypeAnnotationNode* pickleType = nullptr;
pickleType = KeyColumns->ChildrenSize() > 1 ? Ctx.MakeType<TTupleExprType>(keyItemTypes) : keyItemTypes[0];
pickleTypeNode = ExpandType(Node->Pos(), *pickleType, Ctx);
TExprNode::TPtr partialAgg = nullptr;
if (!NonDistinctColumns.empty()) {
partialAgg = GeneratePartialAggregateForNonDistinct(keyExtractor, pickleTypeNode);
for (ui32 index = 0; index < DistinctFields.size(); ++index) {
auto distinctField = DistinctFields[index];
bool needDistinctPickle = EffectiveCompact ? false : needPickle;
auto distinctGrouper = GenerateDistinctGrouper(distinctField, keyItemTypes, needDistinctPickle);
if (!partialAgg) {
partialAgg = std::move(distinctGrouper);
} else {
partialAgg = Ctx.Builder(Node->Pos())
.Add(0, std::move(partialAgg))
.Add(1, std::move(distinctGrouper))
// If no aggregation functions then add additional combiner
if (AggregatedColumns->ChildrenSize() == 0 && KeyColumns->ChildrenSize() > 0 && !SessionWindowParams.Update) {
if (!partialAgg) {
partialAgg = AggList;
auto uniqCombineInit = ReturnKeyAsIsForCombineInit(pickleTypeNode);
auto uniqCombineUpdate = Ctx.Builder(Node->Pos())
// Return state as-is
auto uniqCombineSave = Ctx.Builder(Node->Pos())
.Arg(0, "state")
partialAgg = Ctx.Builder(Node->Pos())
.Add(0, std::move(partialAgg))
.Add(1, PreMap)
.Add(2, keyExtractor)
.Add(3, std::move(uniqCombineInit))
.Add(4, std::move(uniqCombineUpdate))
.Add(5, std::move(uniqCombineSave))
return partialAgg;
std::function<TExprNodeBuilder& (TExprNodeBuilder&)> TAggregateExpander::GetPartialAggArgExtractor(ui32 i, bool deserialize) {
return [&, i, deserialize](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
auto trait = Traits[i];
auto extractorLambda = trait->Child(1);
auto loadLambda = trait->Child(4);
if (Suffix == "CombineState") {
if (deserialize) {
.Arg(0, "item")
.Add(1, ExpandType(Node->Pos(), *extractorLambda->Head().Head().GetTypeAnn(), Ctx))
} else {
.Arg(0, "item")
.Add(1, ExpandType(Node->Pos(), *extractorLambda->Head().Head().GetTypeAnn(), Ctx))
} else {
.Arg(0, "item")
.Add(1, ExpandType(Node->Pos(), *extractorLambda->Head().Head().GetTypeAnn(), Ctx))
return parent;
TExprNode::TPtr TAggregateExpander::GetFinalAggStateExtractor(ui32 i) {
auto trait = Traits[i];
if (Suffix.StartsWith("Merge")) {
auto lambda = trait->ChildPtr(1);
if (!Suffix.StartsWith("MergeMany")) {
return lambda;
if (lambda->Tail().IsCallable("Unwrap")) {
return Ctx.Builder(Node->Pos())
.ApplyPartial(lambda->HeadPtr(), lambda->Tail().HeadPtr())
.With(0, "item")
} else {
return Ctx.Builder(Node->Pos())
.Apply(0, *lambda)
.With(0, "item")
bool aggregateOnly = (Suffix != "");
const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
return Ctx.Builder(Node->Pos())
.Arg(0, "item")
.Add(1, columnNames[i])
TExprNode::TPtr TAggregateExpander::MakeInputBlocks(const TExprNode::TPtr& stream, TExprNode::TListType& keyIdxs,
TVector<TString>& outputColumns, TExprNode::TListType& aggs, bool overState, bool many, ui32* streamIdxColumn) {
TVector<TString> inputColumns;
auto flow = Ctx.NewCallable(Node->Pos(), "ToFlow", { stream });
for (ui32 i = 0; i < RowType->GetSize(); ++i) {
auto wideFlow = MakeExpandMap(Node->Pos(), inputColumns, flow, Ctx);
TExprNode::TListType extractorArgs;
TExprNode::TListType newRowItems;
for (ui32 i = 0; i < RowType->GetSize(); ++i) {
extractorArgs.push_back(Ctx.NewArgument(Node->Pos(), "field" + ToString(i)));
newRowItems.push_back(Ctx.NewList(Node->Pos(), { Ctx.NewAtom(Node->Pos(), RowType->GetItems()[i]->GetName()), extractorArgs.back() }));
const TExprNode::TPtr newRow = Ctx.NewCallable(Node->Pos(), "AsStruct", std::move(newRowItems));
TExprNode::TListType extractorRoots;
TVector<const TTypeAnnotationNode*> allKeyTypes;
for (ui32 index = 0; index < KeyColumns->ChildrenSize(); ++index) {
auto keyName = KeyColumns->Child(index)->Content();
auto rowIndex = RowType->FindItem(keyName);
YQL_ENSURE(rowIndex, "Unknown column: " << keyName);
auto type = RowType->GetItems()[*rowIndex]->GetItemType();
keyIdxs.push_back(Ctx.NewAtom(Node->Pos(), ToString(index)));
if (many) {
auto rowIndex = RowType->FindItem("_yql_group_stream_index");
if (!rowIndex) {
return nullptr;
if (streamIdxColumn) {
*streamIdxColumn = extractorRoots.size();
auto outputStructType = GetSeqItemType(*Node->GetTypeAnn()).Cast<TStructExprType>();
auto resolveStatus = TypesCtx.ArrowResolver->AreTypesSupported(Ctx.GetPosition(Node->Pos()), allKeyTypes, Ctx);
YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
if (resolveStatus != IArrowResolver::OK) {
return nullptr;
for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
auto trait = AggregatedColumns->Child(index)->ChildPtr(1);
TVector<const TTypeAnnotationNode*> allTypes;
const TTypeAnnotationNode* originalType = nullptr;
if (overState && !trait->Child(3)->IsCallable("Void")) {
auto originalExtractorType = trait->Child(3)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
originalType = GetOriginalResultType(trait->Pos(), many, originalExtractorType, Ctx);
ui32 argsCount = trait->Child(2)->ChildrenSize() - 1;
if (!overState && trait->Child(0)->Content() == "count_all") {
argsCount = 0;
auto rowArg = &trait->Child(2)->Head().Head();
const TNodeOnNodeOwnedMap remaps{ { rowArg, newRow } };
TVector<TExprNode::TPtr> roots;
for (ui32 i = 1; i < argsCount + 1; ++i) {
auto root = trait->Child(2)->ChildPtr(i);
auto status = RemapExpr(root, root, remaps, Ctx, TOptimizeExprSettings(&TypesCtx));
YQL_ENSURE(status.Level != IGraphTransformer::TStatus::Error);
.Callable(0, TString("AggBlockApply") + (overState ? "State" : ""))
.Atom(0, trait->Child(0)->Content())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (overState) {
if (originalType) {
parent.Add(1, ExpandType(Node->Pos(), *originalType, Ctx));
} else {
.Callable(1, "NullType")
return parent;
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i = 1; i < argsCount + 1; ++i) {
parent.Add(i + (overState ? 1 : 0), ExpandType(Node->Pos(), *trait->Child(2)->Child(i)->GetTypeAnn(), Ctx));
return parent;
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i = 1; i < argsCount + 1; ++i) {
parent.Atom(i, ToString(extractorRoots.size() + i - 1));
return parent;
for (auto root : roots) {
if (many) {
if (root->IsCallable("Unwrap")) {
root = root->HeadPtr();
} else {
root = Ctx.Builder(Node->Pos())
.Add(0, root)
auto outPos = outputStructType->FindItem(FinalColumnNames[index]->Content());
auto resolveStatus = TypesCtx.ArrowResolver->AreTypesSupported(Ctx.GetPosition(Node->Pos()), allTypes, Ctx);
YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
if (resolveStatus != IArrowResolver::OK) {
return nullptr;
auto extractorLambda = Ctx.NewLambda(Node->Pos(), Ctx.NewArguments(Node->Pos(), std::move(extractorArgs)), std::move(extractorRoots));
auto mappedWideFlow = Ctx.NewCallable(Node->Pos(), "WideMap", { wideFlow, extractorLambda });
auto blocks = Ctx.NewCallable(Node->Pos(), "WideToBlocks", { mappedWideFlow });
return blocks;
TExprNode::TPtr TAggregateExpander::TryGenerateBlockCombineAllOrHashed() {
if (!TypesCtx.ArrowResolver) {
return nullptr;
const bool hashed = (KeyColumns->ChildrenSize() > 0);
const bool isInputList = (AggList->GetTypeAnn()->GetKind() == ETypeAnnotationKind::List);
TExprNode::TListType keyIdxs;
TVector<TString> outputColumns;
TExprNode::TListType aggs;
TExprNode::TPtr stream = nullptr;
if (isInputList) {
stream = Ctx.NewArgument(Node->Pos(), "stream");
} else {
stream = AggList;
TExprNode::TPtr blocks = MakeInputBlocks(stream, keyIdxs, outputColumns, aggs, false, false);
if (!blocks) {
return nullptr;
TExprNode::TPtr aggWideFlow;
if (hashed) {
aggWideFlow = Ctx.Builder(Node->Pos())
.Callable(0, "WideFromBlocks")
.Callable(0, "BlockCombineHashed")
.Callable(0, "FromFlow")
.Add(0, blocks)
.Callable(1, "Void")
.Add(2, Ctx.NewList(Node->Pos(), std::move(keyIdxs)))
.Add(3, Ctx.NewList(Node->Pos(), std::move(aggs)))
} else {
aggWideFlow = Ctx.Builder(Node->Pos())
.Callable(0, "BlockCombineAll")
.Callable(0, "FromFlow")
.Add(0, blocks)
.Callable(1, "Void")
.Add(2, Ctx.NewList(Node->Pos(), std::move(aggs)))
auto finalFlow = MakeNarrowMap(Node->Pos(), outputColumns, aggWideFlow, Ctx);
if (isInputList) {
auto root = Ctx.NewCallable(Node->Pos(), "FromFlow", { finalFlow });
auto lambdaStream = Ctx.NewLambda(Node->Pos(), Ctx.NewArguments(Node->Pos(), { stream }), std::move(root));
return Ctx.Builder(Node->Pos())
.Add(0, AggList)
.With(0, "stream")
} else {
return finalFlow;
TExprNode::TPtr TAggregateExpander::GeneratePartialAggregateForNonDistinct(const TExprNode::TPtr& keyExtractor, const TExprNode::TPtr& pickleTypeNode)
bool combineOnly = Suffix == "Combine" || Suffix == "CombineState";
const auto& columnNames = combineOnly ? FinalColumnNames : InitialColumnNames;
auto initLambdaIndex = (Suffix == "CombineState") ? 4 : 1;
auto updateLambdaIndex = (Suffix == "CombineState") ? 5 : 2;
auto combineInit = Ctx.Builder(Node->Pos())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
ui32 ndx = 0;
for (ui32 i: NonDistinctColumns) {
auto trait = Traits[i];
auto initLambda = trait->Child(initLambdaIndex);
if (initLambda->Head().ChildrenSize() == 1) {
.Add(0, columnNames[i])
.Apply(1, *initLambda)
.Do(GetPartialAggArgExtractor(i, false))
} else {
.Add(0, columnNames[i])
.Apply(1, *initLambda)
.Do(GetPartialAggArgExtractor(i, false))
.Atom(0, ToString(i), TNodeFlags::Default)
return parent;
auto combineUpdate = Ctx.Builder(Node->Pos())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
ui32 ndx = 0;
for (ui32 i: NonDistinctColumns) {
auto trait = Traits[i];
auto updateLambda = trait->Child(updateLambdaIndex);
if (updateLambda->Head().ChildrenSize() == 2) {
.Add(0, columnNames[i])
.Apply(1, *updateLambda)
.Do(GetPartialAggArgExtractor(i, true))
.Arg(0, "state")
.Add(1, columnNames[i])
} else {
.Add(0, columnNames[i])
.Apply(1, *updateLambda)
.Do(GetPartialAggArgExtractor(i, true))
.Arg(0, "state")
.Add(1, columnNames[i])
.Atom(0, ToString(i), TNodeFlags::Default)
return parent;
auto combineSave = Ctx.Builder(Node->Pos())
.Callable(0, "AsStruct")
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i = 0; i < columnNames.size(); ++i) {
if (NonDistinctColumns.find(i) == NonDistinctColumns.end()) {
.Add(0, columnNames[i])
.Add(1, NothingStates[i])
} else {
auto trait = Traits[i];
auto saveLambda = trait->Child(3);
if (!DistinctFields.empty()) {
.Add(0, columnNames[i])
.Callable(1, "Just")
.Apply(0, *saveLambda)
.Arg(0, "state")
.Add(1, columnNames[i])
} else {
.Add(0, columnNames[i])
.Apply(1, *saveLambda)
.Arg(0, "state")
.Add(1, columnNames[i])
return parent;
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
ui32 pos = 0;
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
auto listBuilder = parent.List(columnNames.size() + i);
listBuilder.Add(0, KeyColumns->ChildPtr(i));
if (KeyColumns->ChildrenSize() > 1) {
if (pickleTypeNode) {
.Callable(1, "Nth")
.Callable(0, "Unpickle")
.Add(0, pickleTypeNode)
.Arg(1, "key")
.Atom(1, ToString(pos), TNodeFlags::Default)
} else {
.Callable(1, "Nth")
.Arg(0, "key")
.Atom(1, ToString(pos), TNodeFlags::Default)
} else {
if (pickleTypeNode) {
listBuilder.Callable(1, "Unpickle")
.Add(0, pickleTypeNode)
.Arg(1, "key")
} else {
listBuilder.Arg(1, "key");
return parent;
return Ctx.Builder(Node->Pos())
.Add(0, AggList)
.Add(1, PreMap)
.Add(2, keyExtractor)
.Add(3, std::move(combineInit))
.Add(4, std::move(combineUpdate))
.Add(5, std::move(combineSave))
void TAggregateExpander::GenerateInitForDistinct(TExprNodeBuilder& parent, ui32& ndx, const TIdxSet& indicies, const TExprNode::TPtr& distinctField) {
for (ui32 i: indicies) {
auto trait = Traits[i];
auto initLambda = trait->Child(1);
if (initLambda->Head().ChildrenSize() == 1) {
.Add(0, InitialColumnNames[i])
.Apply(1, *initLambda)
.Arg(0, "item")
.Add(1, distinctField)
} else {
.Add(0, InitialColumnNames[i])
.Apply(1, *initLambda)
.Arg(0, "item")
.Add(1, distinctField)
.Atom(0, ToString(i), TNodeFlags::Default)
TExprNode::TPtr TAggregateExpander::GenerateDistinctGrouper(const TExprNode::TPtr distinctField,
const TVector<const TTypeAnnotationNode*>& keyItemTypes, bool needDistinctPickle)
auto& indicies = Distinct2Columns[distinctField->Content()];
auto distinctIndex = RowType->FindItem(distinctField->Content());
YQL_ENSURE(distinctIndex, "Unknown field: " << distinctField->Content());
auto distinctType = RowType->GetItems()[*distinctIndex]->GetItemType();
TVector<const TTypeAnnotationNode*> distinctKeyItemTypes = keyItemTypes;
auto valueType = distinctType;
if (distinctType->GetKind() == ETypeAnnotationKind::Optional) {
distinctType = distinctType->Cast<TOptionalExprType>()->GetItemType();
if (distinctType->GetKind() != ETypeAnnotationKind::Data) {
needDistinctPickle = true;
valueType = Ctx.MakeType<TDataExprType>(EDataSlot::String);
const auto expandedValueType = needDistinctPickle ?
.Atom(0, "String", TNodeFlags::Default)
: ExpandType(Node->Pos(), *valueType, Ctx);
DistinctFieldNeedsPickle[distinctField->Content()] = needDistinctPickle;
auto udfSetCreateValue = Ctx.Builder(Node->Pos())
.Atom(0, "Set.Create")
.Callable(1, "Void").Seal()
.Callable(2, "TupleType")
.Callable(0, "TupleType")
.Add(0, expandedValueType)
.Callable(1, "DataType")
.Atom(0, "Uint32", TNodeFlags::Default)
.Callable(1, "StructType").Seal()
.Add(2, expandedValueType)
UdfSetCreate[distinctField->Content()] = udfSetCreateValue;
auto resourceType = Ctx.Builder(Node->Pos())
.Callable(0, "Apply")
.Add(0, udfSetCreateValue)
.Callable(1, "InstanceOf")
.Add(0, expandedValueType)
.Callable(2, "Uint32")
.Atom(0, "0", TNodeFlags::Default)
UdfAddValue[distinctField->Content()] = Ctx.Builder(Node->Pos())
.Atom(0, "Set.AddValue")
.Callable(1, "Void").Seal()
.Callable(2, "TupleType")
.Callable(0, "TupleType")
.Add(0, resourceType)
.Add(1, expandedValueType)
.Callable(1, "StructType").Seal()
.Add(2, expandedValueType)
UdfWasChanged[distinctField->Content()] = Ctx.Builder(Node->Pos())
.Atom(0, "Set.WasChanged")
.Callable(1, "Void").Seal()
.Callable(2, "TupleType")
.Callable(0, "TupleType")
.Add(0, resourceType)
.Callable(1, "StructType").Seal()
.Add(2, expandedValueType)
auto distinctKeyExtractor = Ctx.Builder(Node->Pos())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (KeyColumns->ChildrenSize() != 0) {
auto listBuilder = parent.List();
ui32 pos = 0;
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
.Callable(pos++, "Member")
.Arg(0, "item")
.Add(1, KeyColumns->ChildPtr(i))
.Callable(pos, "Member")
.Arg(0, "item")
.Add(1, distinctField)
return listBuilder.Seal();
} else {
return parent
.Arg(0, "item")
.Add(1, distinctField)
const TTypeAnnotationNode* distinctPickleType = nullptr;
TExprNode::TPtr distinctPickleTypeNode;
if (needDistinctPickle) {
distinctPickleType = KeyColumns->ChildrenSize() > 0 ? Ctx.MakeType<TTupleExprType>(distinctKeyItemTypes) : distinctKeyItemTypes.front();
distinctPickleTypeNode = ExpandType(Node->Pos(), *distinctPickleType, Ctx);
if (needDistinctPickle) {
distinctKeyExtractor = Ctx.Builder(Node->Pos())
.Apply(0, *distinctKeyExtractor).With(0, "item").Seal()
auto distinctCombineInit = Ctx.Builder(Node->Pos())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
ui32 ndx = 0;
GenerateInitForDistinct(parent, ndx, indicies, distinctField);
return parent;
auto distinctCombineUpdate = Ctx.Builder(Node->Pos())
ui32 ndx = 0;
auto distinctCombineSave = Ctx.Builder(Node->Pos())
.Callable(0, "AsStruct")
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i: indicies) {
auto trait = Traits[i];
auto saveLambda = trait->Child(3);
.Add(0, InitialColumnNames[i])
.Apply(1, *saveLambda)
.Arg(0, "state")
.Add(1, InitialColumnNames[i])
return parent;
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (KeyColumns->ChildrenSize() > 0) {
if (needDistinctPickle) {
ui32 pos = 0;
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
.Add(0, KeyColumns->ChildPtr(i))
.Callable(1, "Nth")
.Callable(0, "Unpickle")
.Add(0, distinctPickleTypeNode)
.Arg(1, "key")
.Atom(1, ToString(pos++), TNodeFlags::Default)
.Add(0, distinctField)
.Callable(1, "Nth")
.Callable(0, "Unpickle")
.Add(0, distinctPickleTypeNode)
.Arg(1, "key")
.Atom(1, ToString(pos++), TNodeFlags::Default)
} else {
ui32 pos = 0;
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
.Add(0, KeyColumns->ChildPtr(i))
.Callable(1, "Nth")
.Arg(0, "key")
.Atom(1, ToString(pos++), TNodeFlags::Default)
.Add(0, distinctField)
.Callable(1, "Nth")
.Arg(0, "key")
.Atom(1, ToString(pos++), TNodeFlags::Default)
} else {
if (needDistinctPickle) {
.Add(0, distinctField)
.Callable(1, "Unpickle")
.Add(0, distinctPickleTypeNode)
.Arg(1, "key")
} else {
.Add(0, distinctField)
.Arg(1, "key")
return parent;
auto distinctCombiner = Ctx.Builder(Node->Pos())
.Add(0, AggList)
.Add(1, PreMap)
.Add(2, distinctKeyExtractor)
.Add(3, std::move(distinctCombineInit))
.Add(4, std::move(distinctCombineUpdate))
.Add(5, std::move(distinctCombineSave))
auto distinctGrouper = Ctx.Builder(Node->Pos())
.Add(0, std::move(distinctCombiner))
.Add(1, distinctKeyExtractor)
.Callable(2, "Void").Seal()
.Callable(3, "Void").Seal()
.Callable(0, "Condense1")
.Arg(0, "groups")
.Arg(0, "item")
.Arg(1, "state")
.Add(2, distinctKeyExtractor)
.Add(3, distinctKeyExtractor)
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i = 0; i < InitialColumnNames.size(); ++i) {
if (indicies.find(i) != indicies.end()) {
.Add(0, InitialColumnNames[i])
.Callable(1, "Just")
.Callable(0, "Member")
.Arg(0, "state")
.Add(1, InitialColumnNames[i])
} else {
.Add(0, InitialColumnNames[i])
.Add(1, NothingStates[i])
return parent;
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (KeyColumns->ChildrenSize() > 0) {
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
parent.List(InitialColumnNames.size() + i)
.Add(0, KeyColumns->ChildPtr(i))
.Callable(1, "Member")
.Arg(0, "state")
.Add(1, KeyColumns->ChildPtr(i))
return parent;
return distinctGrouper;
TExprNode::TPtr TAggregateExpander::ReturnKeyAsIsForCombineInit(const TExprNode::TPtr& pickleTypeNode)
return Ctx.Builder(Node->Pos())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
ui32 pos = 0;
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
auto listBuilder = parent.List(i);
listBuilder.Add(0, KeyColumns->Child(i));
if (KeyColumns->ChildrenSize() > 1) {
if (pickleTypeNode) {
.Callable(1, "Nth")
.Callable(0, "Unpickle")
.Add(0, pickleTypeNode)
.Arg(1, "key")
.Atom(1, ToString(pos++), TNodeFlags::Default)
} else {
.Callable(1, "Nth")
.Arg(0, "key")
.Atom(1, ToString(pos++), TNodeFlags::Default)
} else {
if (pickleTypeNode) {
listBuilder.Callable(1, "Unpickle")
.Add(0, pickleTypeNode)
.Arg(1, "key")
} else {
listBuilder.Arg(1, "key");
return parent;
TExprNode::TPtr TAggregateExpander::BuildFinalizeByKeyLambda(const TExprNode::TPtr& preprocessLambda, const TExprNode::TPtr& keyExtractor) {
return Ctx.Builder(Node->Pos())
.Arg(0, "stream")
.Apply(0, preprocessLambda)
.With(0, "item")
.Add(2, keyExtractor)
.With(0, "item")
.With(0, "item")
.With(1, "state")
.With(0, "state")
TExprNode::TPtr TAggregateExpander::CountAggregateRewrite(const NNodes::TCoAggregate& node, TExprContext& ctx, bool useBlocks) {
auto keyColumns = node.Keys();
auto aggregatedColumns = node.Handlers();
if (keyColumns.Size() > 0 || aggregatedColumns.Size() != 1) {
return node.Ptr();
auto settings = node.Settings();
auto hoppingSetting = GetSetting(settings.Ref(), "hopping");
if (hoppingSetting) {
return node.Ptr();
if (GetSetting(settings.Ref(), "session")) {
// TODO: support
return node.Ptr();
auto aggregatedColumn = aggregatedColumns.Item(0);
const bool isDistinct = (aggregatedColumn.Ref().ChildrenSize() == 3);
auto traits = aggregatedColumn.Ref().Child(1);
auto outputColumn = aggregatedColumn.Ref().HeadPtr();
// validation of traits
const TTypeAnnotationNode* inputItemType;
bool onlyColumn = true;
bool onlyZero = true;
TExprNode::TPtr initVal;
if (traits->IsCallable("AggregationTraits")) {
inputItemType = traits->Head().GetTypeAnn()->Cast<TTypeExprType>()->GetType();
auto init = NNodes::TCoLambda(traits->Child(1));
TExprNode::TPtr updateVal;
if (init.Body().Ref().IsCallable("Uint64") &&
init.Body().Ref().Head().Content() == "1") {
onlyZero = false;
} else if (init.Body().Ref().IsCallable("Uint64") &&
init.Body().Ref().Head().Content() == "0") {
onlyColumn = false;
} else if (init.Body().Ref().IsCallable("AggrCountInit")) {
initVal = init.Body().Ref().HeadPtr();
onlyColumn = onlyColumn && init.Body().Ref().Child(0) == init.Args().Arg(0).Raw();
onlyZero = false;
} else {
return node.Ptr();
auto update = NNodes::TCoLambda(traits->Child(2));
auto inc = update.Body().Ptr();
if (inc->IsCallable("Inc") && inc->Child(0) == update.Args().Arg(1).Raw()) {
onlyZero = false;
} else if (inc->IsCallable("AggrCountUpdate") && inc->Child(1) == update.Args().Arg(1).Raw()) {
updateVal = inc->HeadPtr();
onlyColumn = onlyColumn && inc->Child(0) == update.Args().Arg(0).Raw();
onlyZero = false;
} else if (inc == update.Args().Arg(1).Raw()) {
onlyColumn = false;
} else {
return node.Ptr();
auto save = NNodes::TCoLambda(traits->Child(3));
if (save.Body().Raw() != save.Args().Arg(0).Raw()) {
return node.Ptr();
auto load = NNodes::TCoLambda(traits->Child(4));
if (load.Body().Raw() != load.Args().Arg(0).Raw()) {
return node.Ptr();
auto merge = NNodes::TCoLambda(traits->Child(5));
auto& plus = merge.Body().Ref();
if (!plus.IsCallable({ "+", "AggrAdd" }) ) {
return node.Ptr();
if (!(plus.Child(0) == merge.Args().Arg(0).Raw() &&
plus.Child(1) == merge.Args().Arg(1).Raw())) {
return node.Ptr();
auto finish = NNodes::TCoLambda(traits->Child(6));
if (finish.Body().Raw() != finish.Args().Arg(0).Raw()) {
return node.Ptr();
auto defVal = traits->Child(7);
if (!defVal->IsCallable("Uint64") || defVal->Head().Content() != "0") {
return node.Ptr();
if (!isDistinct) {
if (!onlyZero && !onlyColumn) {
if (!initVal || !updateVal || initVal != updateVal) {
return node.Ptr();
} else if (traits->IsCallable("AggApply")) {
if (traits->Head().Content() != "count_all" && traits->Head().Content() != "count") {
return node.Ptr();
inputItemType = traits->Child(1)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
onlyZero = false;
onlyColumn = false;
if (&traits->Child(2)->Head().Head() == &traits->Child(2)->Tail()) {
onlyColumn = true;
if (!isDistinct) {
if (traits->Head().Content() == "count") {
initVal = traits->Child(2)->TailPtr();
if (initVal->GetTypeAnn()->IsOptionalOrNull()) {
if (IsDepended(traits->Child(2)->Tail(), traits->Child(2)->Head().Head())) {
return node.Ptr();
} else {
initVal = nullptr;
} else {
return node.Ptr();
const bool isOptionalColumn = inputItemType->GetKind() == ETypeAnnotationKind::Optional;
if (!isDistinct) {
auto length = ctx.Builder(node.Pos())
.Add(0, node.Input().Ptr())
if (onlyZero) {
length = ctx.Builder(node.Pos())
.Atom(0, "0", TNodeFlags::Default)
} else if (!onlyColumn && initVal) {
length = ctx.Builder(node.Pos())
.Callable(0, "Exists")
.Add(0, initVal)
.Add(1, std::move(length))
.Callable(2, "Uint64")
.Atom(0, "0", TNodeFlags::Default)
auto ret = ctx.Builder(node.Pos())
.Callable(0, "AsStruct")
.Add(0, std::move(outputColumn))
.Add(1, std::move(length))
return ret;
if (useBlocks || !onlyColumn) {
return node.Ptr();
auto removedOptionalType = inputItemType;
if (isOptionalColumn) {
removedOptionalType = removedOptionalType->Cast<TOptionalExprType>()->GetItemType();
const bool needPickle = removedOptionalType->GetKind() != ETypeAnnotationKind::Data;
auto pickleTypeNode = ExpandType(node.Pos(), *inputItemType, ctx);
auto distictColumn = aggregatedColumn.Ref().ChildPtr(2);
auto combine = ctx.Builder(node.Pos())
.Callable(0, "ExtractMembers")
.Add(0, node.Input().Ptr())
.Add(0, distictColumn)
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (isOptionalColumn) {
.Callable(0, "Member")
.Arg(0, "row")
.Add(1, distictColumn)
} else {
.Callable(0, "Member")
.Arg(0, "row")
.Add(1, distictColumn)
return parent;
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (needPickle) {
.Arg(0, "item")
} else {
return parent;
.Callable(0, "AsStruct")
.Atom(0, "value")
.Arg(1, "key")
auto groupByKey = ctx.Builder(node.Pos())
.Add(0, combine)
.Arg(0, "combineRow")
.Atom(1, "value")
.Callable(2, "Void")
.Callable(3, "Void")
.Arg(0, "groups")
auto ret = ctx.Builder(node.Pos())
.Callable(0, "AsStruct")
.Add(0, outputColumn)
.Callable(1, "Length")
.Add(0, std::move(groupByKey))
return ret;
TExprNode::TPtr TAggregateExpander::GeneratePostAggregate(const TExprNode::TPtr& preAgg, const TExprNode::TPtr& keyExtractor)
auto preprocessLambda = GeneratePreprocessLambda(keyExtractor);
TExprNode::TPtr postAgg;
if (!UsePartitionsByKeys && UseFinalizeByKeys && !HaveSessionSetting) {
postAgg = Ctx.Builder(Node->Pos())
.Add(0, std::move(preAgg))
.Add(1, keyExtractor)
.Apply(BuildFinalizeByKeyLambda(preprocessLambda, keyExtractor))
.With(0, "stream")
} else {
auto condenseSwitch = GenerateCondenseSwitch(keyExtractor);
postAgg = Ctx.Builder(Node->Pos())
.Add(0, std::move(preAgg))
.Add(1, keyExtractor)
.Add(2, SortParams.Order)
.Add(3, SortParams.Key)
.Callable(0, "Condense1")
.Apply(0, preprocessLambda)
.With(0, "stream")
.Add(1, GeneratePostAggregateInitPhase())
.Add(2, condenseSwitch)
.Add(3, GeneratePostAggregateMergePhase())
.Add(1, GeneratePostAggregateSavePhase())
if (KeyColumns->ChildrenSize() == 0 && !HaveSessionSetting && (Suffix == "" || Suffix.EndsWith("Finalize"))) {
return MakeSingleGroupRow(*Node, postAgg, Ctx);
return postAgg;
TExprNode::TPtr TAggregateExpander::GeneratePreprocessLambda(const TExprNode::TPtr& keyExtractor)
TExprNode::TPtr preprocessLambda;
if (SessionWindowParams.Update) {
preprocessLambda = AddSessionParamsMemberLambda(Node->Pos(), SessionStartMemberName, "", keyExtractor,
SessionWindowParams.Key, SessionWindowParams.Init, SessionWindowParams.Update, Ctx);
} else {
preprocessLambda = MakeIdentityLambda(Node->Pos(), Ctx);
return preprocessLambda;
TExprNode::TPtr TAggregateExpander::GenerateCondenseSwitch(const TExprNode::TPtr& keyExtractor)
TExprNode::TPtr condenseSwitch;
if (SessionWindowParams.Update) {
condenseSwitch = Ctx.Builder(Node->Pos())
.Callable(0, "AggrNotEquals")
.Apply(0, keyExtractor)
.With(0, "item")
.Apply(1, keyExtractor)
.With(0, "state")
.Callable(1, "AggrNotEquals")
.Callable(0, "Member")
.Arg(0, "item")
.Atom(1, SessionStartMemberName)
.Callable(1, "Member")
.Arg(0, "state")
.Atom(1, SessionStartMemberName)
} else {
condenseSwitch = Ctx.Builder(Node->Pos())
.Arg(0, "item")
.Arg(1, "state")
.Add(2, keyExtractor)
.Add(3, keyExtractor)
return condenseSwitch;
TExprNode::TPtr TAggregateExpander::GeneratePostAggregateInitPhase()
bool aggregateOnly = (Suffix != "");
const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
ui32 index = 0U;
return Ctx.Builder(Node->Pos())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
.Add(0, KeyColumns->ChildPtr(i))
.Callable(1, "Member")
.Arg(0, "item")
.Add(1, KeyColumns->ChildPtr(i))
if (SessionWindowParams.Update) {
.Atom(0, SessionStartMemberName)
.Callable(1, "Member")
.Arg(0, "item")
.Atom(1, SessionStartMemberName)
return parent;
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i = 0; i < columnNames.size(); ++i) {
auto child = AggregatedColumns->Child(i);
auto trait = Traits[i];
if (!EffectiveCompact) {
auto loadLambda = trait->Child(4);
auto extractorLambda = GetFinalAggStateExtractor(i);
if (!DistinctFields.empty() || Suffix == "MergeManyFinalize") {
.Add(0, columnNames[i])
.Callable(1, "Map")
.Apply(0, *extractorLambda)
.With(0, "item")
.Add(1, loadLambda)
} else {
.Add(0, columnNames[i])
.Apply(1, *loadLambda)
.With(0, "item")
} else {
auto initLambda = trait->Child(1);
auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr;
auto initApply = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
parent.Apply(1, *initLambda)
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (distinctField) {
.Arg(0, "item")
.Add(1, distinctField)
} else {
.Arg(0, "item")
.Add(1, ExpandType(Node->Pos(), *initLambda->Head().Head().GetTypeAnn(), Ctx))
return parent;
.Do([&](TExprNodeReplaceBuilder& parent) -> TExprNodeReplaceBuilder& {
if (initLambda->Head().ChildrenSize() == 2) {
.Atom(0, ToString(i), TNodeFlags::Default)
return parent;
return parent;
if (distinctField) {
const bool isFirst = *Distinct2Columns[distinctField->Content()].begin() == i;
if (isFirst) {
.Add(0, columnNames[i])
.Callable(0, "NamedApply")
.Add(0, UdfSetCreate[distinctField->Content()])
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (!DistinctFieldNeedsPickle[distinctField->Content()]) {
parent.Callable(0, "Member")
.Arg(0, "item")
.Add(1, distinctField)
} else {
parent.Callable(0, "StablePickle")
.Callable(0, "Member")
.Arg(0, "item")
.Add(1, distinctField)
return parent;
.Callable(1, "Uint32")
.Atom(0, "0", TNodeFlags::Default)
.Callable(2, "AsStruct").Seal()
.Callable(3, "DependsOn")
.Callable(0, "String")
.Add(0, distinctField)
} else {
.Add(0, columnNames[i])
} else {
.Add(0, columnNames[i])
return parent;
TExprNode::TPtr TAggregateExpander::GeneratePostAggregateSavePhase()
bool aggregateOnly = (Suffix != "");
const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
ui32 index = 0U;
return Ctx.Builder(Node->Pos())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
if (KeyColumns->Child(i)->Content() == SessionStartMemberName) {
.Add(0, KeyColumns->ChildPtr(i))
.Callable(1, "Member")
.Arg(0, "state")
.Add(1, KeyColumns->ChildPtr(i))
if (SessionOutputColumn) {
.Atom(0, *SessionOutputColumn)
.Callable(1, "Member")
.Arg(0, "state")
.Atom(1, SessionStartMemberName)
return parent;
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i = 0; i < columnNames.size(); ++i) {
auto child = AggregatedColumns->Child(i);
auto trait = Traits[i];
auto finishLambda = (Suffix == "MergeState") ? trait->Child(3) : trait->Child(6);
if (!EffectiveCompact && (!DistinctFields.empty() || Suffix == "MergeManyFinalize")) {
if (child->Head().IsAtom()) {
.Add(0, FinalColumnNames[i])
.Callable(1, "Unwrap")
.Callable(0, "Map")
.Callable(0, "Member")
.Arg(0, "state")
.Add(1, columnNames[i])
.Add(1, finishLambda)
} else {
const auto& multiFields = child->Child(0);
for (ui32 field = 0; field < multiFields->ChildrenSize(); ++field) {
.Atom(0, multiFields->Child(field)->Content())
.Callable(1, "Nth")
.Callable(0, "Unwrap")
.Callable(0, "Map")
.Callable(0, "Member")
.Arg(0, "state")
.Add(1, columnNames[i])
.Add(1, finishLambda)
.Atom(1, ToString(field), TNodeFlags::Default)
} else {
auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr;
auto stateExtractor = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
const bool isFirst = distinctField ? (*Distinct2Columns[distinctField->Content()].begin() == i) : false;
if (distinctField && isFirst) {
.Callable(0, "Member")
.Arg(0, "state")
.Add(1, columnNames[i])
.Atom(1, "1", TNodeFlags::Default)
} else {
.Arg(0, "state")
.Add(1, columnNames[i])
return parent;
if (child->Head().IsAtom()) {
.Add(0, FinalColumnNames[i])
.Apply(1, *finishLambda)
} else {
const auto& multiFields = child->Head();
for (ui32 field = 0; field < multiFields.ChildrenSize(); ++field) {
.Atom(0, multiFields.Child(field)->Content())
.Callable(1, "Nth")
.Apply(0, *finishLambda)
.Atom(1, ToString(field), TNodeFlags::Default)
return parent;
TExprNode::TPtr TAggregateExpander::GeneratePostAggregateMergePhase()
bool aggregateOnly = (Suffix != "");
const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
ui32 index = 0U;
return Ctx.Builder(Node->Pos())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
.Add(0, KeyColumns->ChildPtr(i))
.Callable(1, "Member")
.Arg(0, "state")
.Add(1, KeyColumns->ChildPtr(i))
if (SessionWindowParams.Update) {
.Atom(0, SessionStartMemberName)
.Callable(1, "Member")
.Arg(0, "state")
.Atom(1, SessionStartMemberName)
return parent;
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
for (ui32 i = 0; i < columnNames.size(); ++i) {
auto child = AggregatedColumns->Child(i);
auto trait = Traits[i];
if (!EffectiveCompact) {
auto loadLambda = trait->Child(4);
auto mergeLambda = trait->Child(5);
auto extractorLambda = GetFinalAggStateExtractor(i);
if (!DistinctFields.empty() || Suffix == "MergeManyFinalize") {
.Add(0, columnNames[i])
.Callable(1, "OptionalReduce")
.Callable(0, "Map")
.Apply(0, extractorLambda)
.With(0, "item")
.Add(1, loadLambda)
.Callable(1, "Member")
.Arg(0, "state")
.Add(1, columnNames[i])
.Add(2, mergeLambda)
} else {
.Add(0, columnNames[i])
.Apply(1, *mergeLambda)
.With(0, "item")
.Arg(0, "state")
.Add(1, columnNames[i])
} else {
auto updateLambda = trait->Child(2);
auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr;
const bool isFirst = distinctField ? (*Distinct2Columns[distinctField->Content()].begin() == i) : false;
auto updateApply = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
parent.Apply(1, *updateLambda)
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (distinctField) {
.Arg(0, "item")
.Add(1, distinctField)
} else {
.Arg(0, "item")
.Add(1, ExpandType(Node->Pos(), *updateLambda->Head().Head().GetTypeAnn(), Ctx))
return parent;
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (distinctField && isFirst) {
.Callable(0, "Member")
.Arg(0, "state")
.Add(1, columnNames[i])
.Atom(1, "1", TNodeFlags::Default)
} else {
.Arg(0, "state")
.Add(1, columnNames[i])
return parent;
.Do([&](TExprNodeReplaceBuilder& parent) -> TExprNodeReplaceBuilder& {
if (updateLambda->Head().ChildrenSize() == 3) {
.Atom(0, ToString(i), TNodeFlags::Default)
return parent;
return parent;
if (distinctField) {
auto distinctIndex = *Distinct2Columns[distinctField->Content()].begin();
ui32 newValueIndex = 0;
auto newValue = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
parent.Callable(newValueIndex, "NamedApply")
.Add(0, UdfAddValue[distinctField->Content()])
.Callable(0, "Nth")
.Callable(0, "Member")
.Arg(0, "state")
.Add(1, columnNames[distinctIndex])
.Atom(1, "0", TNodeFlags::Default)
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (!DistinctFieldNeedsPickle[distinctField->Content()]) {
parent.Callable(1, "Member")
.Arg(0, "item")
.Add(1, distinctField)
} else {
parent.Callable(1, "StablePickle")
.Callable(0, "Member")
.Arg(0, "item")
.Add(1, distinctField)
return parent;
.Callable(2, "AsStruct").Seal()
return parent;
.Add(0, columnNames[i])
.Callable(1, "If")
.Callable(0, "NamedApply")
.Add(0, UdfWasChanged[distinctField->Content()])
.Callable(0, "NamedApply")
.Add(0, UdfAddValue[distinctField->Content()])
.Callable(0, "Nth")
.Callable(0, "Member")
.Arg(0, "state")
.Add(1, columnNames[distinctIndex])
.Atom(1, "0", TNodeFlags::Default)
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (!DistinctFieldNeedsPickle[distinctField->Content()]) {
parent.Callable(1, "Member")
.Arg(0, "item")
.Add(1, distinctField)
} else {
parent.Callable(1, "StablePickle")
.Callable(0, "Member")
.Arg(0, "item")
.Add(1, distinctField)
return parent;
.Callable(2, "AsStruct").Seal()
.Callable(2, "AsStruct").Seal()
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
if (distinctIndex == i) {
} else {
return parent;
.Callable(2, "Member")
.Arg(0, "state")
.Add(1, columnNames[i])
} else {
.Add(0, columnNames[i])
return parent;
TExprNode::TPtr TAggregateExpander::GenerateJustOverStates(const TExprNode::TPtr& input, const TIdxSet& indicies) {
return Ctx.Builder(Node->Pos())
.Add(0, input)
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
ui32 pos = 0;
for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
.Add(0, KeyColumns->ChildPtr(i))
.Callable(1, "Member")
.Arg(0, "row")
.Add(1, KeyColumns->ChildPtr(i))
for (ui32 i : indicies) {
.Add(0, InitialColumnNames[i])
.Callable(1, "Just")
.Callable(0, "Member")
.Arg(0, "row")
.Add(1, InitialColumnNames[i])
return parent;
TExprNode::TPtr TAggregateExpander::SerializeIdxSet(const TIdxSet& indicies) {
return Ctx.Builder(Node->Pos())
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
ui32 pos = 0;
for (ui32 i : indicies) {
parent.Atom(pos++, ToString(i));
return parent;
TExprNode::TPtr TAggregateExpander::GeneratePhases() {
const TExprNode::TPtr cleanOutputSettings = RemoveSetting(*Node->Child(3), "output_columns", Ctx);
const bool many = HaveDistinct;
YQL_CLOG(DEBUG, Core) << "Aggregate: generate " << (many ? "phases with distinct" : "simple phases");
TExprNode::TListType mergeTraits;
for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
auto originalTrait = AggregatedColumns->Child(index)->ChildPtr(1);
auto extractor = Ctx.Builder(Node->Pos())
.Arg(0, "row")
.Add(1, InitialColumnNames[index])
if (many) {
extractor = Ctx.Builder(Node->Pos())
.Apply(0, extractor)
.With(0, "row")
bool isAggApply = originalTrait->IsCallable("AggApply");
auto serializedStateType = isAggApply ? AggApplySerializedStateType(originalTrait, Ctx) : originalTrait->Child(3)->GetTypeAnn();
if (many) {
serializedStateType = Ctx.MakeType<TOptionalExprType>(serializedStateType);
auto extractorTypeNode = Ctx.Builder(Node->Pos())
.Add(0, InitialColumnNames[index])
.Add(1, ExpandType(Node->Pos(), *serializedStateType, Ctx))
if (isAggApply) {
auto initialType = originalTrait->GetTypeAnn();
if (many) {
initialType = Ctx.MakeType<TOptionalExprType>(initialType);
auto originalExtractorTypeNode = Ctx.Builder(Node->Pos())
.Add(0, InitialColumnNames[index])
.Add(1, ExpandType(Node->Pos(), *initialType, Ctx))
auto name = TString(originalTrait->ChildPtr(0)->Content());
if (name.StartsWith("pg_")) {
auto func = name.substr(3);
TVector<ui32> argTypes;
bool needRetype = false;
auto status = ExtractPgTypesFromMultiLambda(originalTrait->ChildRef(2), argTypes, needRetype, Ctx);
YQL_ENSURE(status == IGraphTransformer::TStatus::Ok);
const NPg::TAggregateDesc& aggDesc = NPg::LookupAggregation(TString(func), argTypes);
name = "pg_" + aggDesc.Name + "#" + ToString(aggDesc.AggId);
.Callable(many ? "AggApplyManyState" : "AggApplyState")
.Atom(0, name)
.Add(1, extractorTypeNode)
.Add(2, extractor)
.Add(3, originalExtractorTypeNode)
} else {
.Add(0, extractorTypeNode)
.Add(1, extractor)
.Add(3, originalTrait->ChildPtr(3))
.Add(4, originalTrait->ChildPtr(4))
.Add(5, originalTrait->ChildPtr(5))
.Add(6, originalTrait->ChildPtr(6))
.Add(7, originalTrait->ChildPtr(7))
TExprNode::TListType finalizeColumns;
for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
.Add(0, AggregatedColumns->Child(index)->ChildPtr(0))
.Add(1, mergeTraits[index])
if (!many) {
// simple Combine + MergeFinalize
TExprNode::TListType combineColumns;
for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
.Add(0, InitialColumnNames[index])
.Add(1, AggregatedColumns->Child(index)->ChildPtr(1))
auto combine = Ctx.Builder(Node->Pos())
.Add(0, AggList)
.Add(1, KeyColumns)
.Add(2, Ctx.NewList(Node->Pos(), std::move(combineColumns)))
.Add(3, cleanOutputSettings)
auto mergeFinalize = Ctx.Builder(Node->Pos())
.Add(0, combine)
.Add(1, KeyColumns)
.Add(2, Ctx.NewList(Node->Pos(), std::move(finalizeColumns)))
.Add(3, cleanOutputSettings)
return mergeFinalize;
// process with distincts
// Combine + Map with Just over states
// for each distinct field:
// Aggregate by keys + field w/o aggs
// Combine by keys + field with aggs
// Map with Just over states
// UnionAll
// MergeManyFinalize
TExprNode::TListType unionAllInputs;
TExprNode::TListType streams;
if (!NonDistinctColumns.empty()) {
TExprNode::TListType combineColumns;
for (ui32 i : NonDistinctColumns) {
.Add(0, InitialColumnNames[i])
.Add(1, AggregatedColumns->Child(i)->ChildPtr(1))
auto combine = Ctx.Builder(Node->Pos())
.Add(0, AggList)
.Add(1, KeyColumns)
.Add(2, Ctx.NewList(Node->Pos(), std::move(combineColumns)))
.Add(3, cleanOutputSettings)
unionAllInputs.push_back(GenerateJustOverStates(combine, NonDistinctColumns));
for (ui32 index = 0; index < DistinctFields.size(); ++index) {
auto distinctField = DistinctFields[index];
auto& indicies = Distinct2Columns[distinctField->Content()];
TExprNode::TListType allKeyColumns = KeyColumns->ChildrenList();
auto distinct = Ctx.Builder(Node->Pos())
.Add(0, AggList)
.Add(1, Ctx.NewList(Node->Pos(), std::move(allKeyColumns)))
.Add(3, cleanOutputSettings)
TExprNode::TListType combineColumns;
for (ui32 i : indicies) {
auto trait = AggregatedColumns->Child(i)->ChildPtr(1);
bool isAggApply = trait->IsCallable("AggApply");
if (isAggApply) {
trait = Ctx.Builder(Node->Pos())
.Add(0, trait->ChildPtr(0))
.Callable(1, "StructType")
.Add(0, distinctField)
.Add(1, trait->ChildPtr(1))
.Arg(0, "row")
.Add(1, distinctField)
} else {
TExprNode::TPtr newInit;
if (trait->ChildPtr(1)->Head().ChildrenSize() == 1) {
newInit = Ctx.Builder(Node->Pos())
.Arg(0, "row")
.Add(1, distinctField)
} else {
newInit = Ctx.Builder(Node->Pos())
.Arg(0, "row")
.Add(1, distinctField)
.With(1, "parent")
TExprNode::TPtr newUpdate;
if (trait->ChildPtr(2)->Head().ChildrenSize() == 2) {
newUpdate = Ctx.Builder(Node->Pos())
.Arg(0, "row")
.Add(1, distinctField)
.With(1, "state")
} else {
newUpdate = Ctx.Builder(Node->Pos())
.Arg(0, "row")
.Add(1, distinctField)
.With(1, "state")
.With(2, "parent")
trait = Ctx.Builder(Node->Pos())
.Callable(0, "StructType")
.Add(0, distinctField)
.Add(1, trait->ChildPtr(0))
.Add(1, newInit)
.Add(2, newUpdate)
.Add(3, trait->ChildPtr(3))
.Add(4, trait->ChildPtr(4))
.Add(5, trait->ChildPtr(5))
.Add(6, trait->ChildPtr(6))
.Add(7, trait->ChildPtr(7))
.Add(0, InitialColumnNames[i])
.Add(1, trait)
auto combine = Ctx.Builder(Node->Pos())
.Add(0, distinct)
.Add(1, KeyColumns)
.Add(2, Ctx.NewList(Node->Pos(), std::move(combineColumns)))
.Add(3, cleanOutputSettings)
unionAllInputs.push_back(GenerateJustOverStates(combine, indicies));
if (UseBlocks) {
for (ui32 i = 0; i < unionAllInputs.size(); ++i) {
unionAllInputs[i] = Ctx.Builder(Node->Pos())
.Add(0, unionAllInputs[i])
.Arg(0, "row")
.Atom(1, "_yql_group_stream_index")
.Callable(2, "Uint32")
.Atom(0, ToString(i))
auto settings = cleanOutputSettings;
if (UseBlocks) {
settings = AddSetting(*settings, Node->Pos(), "many_streams", Ctx.NewList(Node->Pos(), std::move(streams)), Ctx);
auto unionAll = Ctx.NewCallable(Node->Pos(), "UnionAll", std::move(unionAllInputs));
auto mergeManyFinalize = Ctx.Builder(Node->Pos())
.Add(0, unionAll)
.Add(1, KeyColumns)
.Add(2, Ctx.NewList(Node->Pos(), std::move(finalizeColumns)))
.Add(3, settings)
return mergeManyFinalize;
TExprNode::TPtr TAggregateExpander::TryGenerateBlockCombine() {
if (HaveSessionSetting || HaveDistinct) {
return nullptr;
for (const auto& x : AggregatedColumns->Children()) {
auto trait = x->ChildPtr(1);
if (!trait->IsCallable("AggApply")) {
return nullptr;
return TryGenerateBlockCombineAllOrHashed();
TExprNode::TPtr TAggregateExpander::TryGenerateBlockMergeFinalize() {
if (UsePartitionsByKeys || !UseBlocks) {
return nullptr;
if (HaveSessionSetting || HaveDistinct) {
return nullptr;
for (const auto& x : AggregatedColumns->Children()) {
auto trait = x->ChildPtr(1);
if (!trait->IsCallable({ "AggApplyState", "AggApplyManyState" })) {
return nullptr;
return TryGenerateBlockMergeFinalizeHashed();
TExprNode::TPtr TAggregateExpander::TryGenerateBlockMergeFinalizeHashed() {
if (!TypesCtx.ArrowResolver) {
return nullptr;
if (KeyColumns->ChildrenSize() == 0) {
return nullptr;
bool isMany = Suffix == "MergeManyFinalize";
auto streamArg = Ctx.NewArgument(Node->Pos(), "stream");
TExprNode::TListType keyIdxs;
TVector<TString> outputColumns;
TExprNode::TListType aggs;
ui32 streamIdxColumn;
auto blocks = MakeInputBlocks(streamArg, keyIdxs, outputColumns, aggs, true, isMany, &streamIdxColumn);
if (!blocks) {
return nullptr;
TExprNode::TPtr aggBlocks;
if (!isMany) {
aggBlocks = Ctx.Builder(Node->Pos())
.Callable(0, "BlockMergeFinalizeHashed")
.Callable(0, "FromFlow")
.Add(0, blocks)
.Add(1, Ctx.NewList(Node->Pos(), std::move(keyIdxs)))
.Add(2, Ctx.NewList(Node->Pos(), std::move(aggs)))
} else {
auto manyStreamsSetting = GetSetting(*Node->Child(3), "many_streams");
YQL_ENSURE(manyStreamsSetting, "Missing many_streams setting");
aggBlocks = Ctx.Builder(Node->Pos())
.Callable(0, "BlockMergeManyFinalizeHashed")
.Callable(0, "FromFlow")
.Add(0, blocks)
.Add(1, Ctx.NewList(Node->Pos(), std::move(keyIdxs)))
.Add(2, Ctx.NewList(Node->Pos(), std::move(aggs)))
.Atom(3, ToString(streamIdxColumn))
.Add(4, manyStreamsSetting->TailPtr())
auto aggWideFlow = Ctx.Builder(Node->Pos())
.Callable(0, "WideFromBlocks")
.Callable(0, "FromFlow")
.Add(0, aggBlocks)
auto finalFlow = MakeNarrowMap(Node->Pos(), outputColumns, aggWideFlow, Ctx);
auto root = Ctx.NewCallable(Node->Pos(), "FromFlow", { finalFlow });
auto lambdaStream = Ctx.NewLambda(Node->Pos(), Ctx.NewArguments(Node->Pos(), { streamArg }), std::move(root));
auto keySelector = BuildKeySelector(Node->Pos(), *OriginalRowType, KeyColumns, Ctx);
return Ctx.Builder(Node->Pos())
.Add(0, AggList)
.Add(1, keySelector)
.With(0, "stream")
TExprNode::TPtr ExpandAggregatePeephole(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) {
if (NNodes::TCoAggregate::Match(node.Get())) {
NNodes::TCoAggregate self(node);
auto ret = TAggregateExpander::CountAggregateRewrite(self, ctx, typesCtx.IsBlockEngineEnabled());
if (ret != node) {
YQL_CLOG(DEBUG, Core) << "CountAggregateRewrite on peephole";
return ret;
return ExpandAggregatePeepholeImpl(node, ctx, typesCtx, false, typesCtx.IsBlockEngineEnabled(), false);
} // namespace NYql