aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/core/yql_join.h
blob: e4fe1e985b027e5625f867c0b37ce782277bfbfb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#pragma once
#include <yql/essentials/ast/yql_expr.h>
#include <yql/essentials/core/expr_nodes/yql_expr_nodes.h>
#include <yql/essentials/core/yql_graph_transformer.h>
#include <yql/essentials/core/yql_cost_function.h>

#include <util/generic/set.h>
#include <util/generic/vector.h>
#include <util/generic/hash.h>
#include <util/generic/strbuf.h>

namespace NYql {

inline TString FullColumnName(const TStringBuf& table, const TStringBuf& column) {
    return TString::Join(table, ".", column);
}

inline void SplitTableName(const TStringBuf& fullName, TStringBuf& table, TStringBuf& column) {
    auto pos = fullName.find('.');
    Y_ENSURE(pos != TString::npos, "Expected full column name: " << fullName);
    table = fullName.substr(0, pos);
    column = fullName.substr(pos + 1);
}

struct TJoinLabel {
    TMaybe<TIssue> Parse(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique, const TDistinctConstraintNode* distinct);
    TMaybe<TIssue> ValidateLabel(TExprContext& ctx, const NNodes::TCoAtom& label);
    TString FullName(const TStringBuf& column) const;
    TVector<TString> AllNames(const TStringBuf& column) const;
    TStringBuf ColumnName(const TStringBuf& column) const;
    TStringBuf TableName(const TStringBuf& column) const;
    bool HasTable(const TStringBuf& table) const;
    TMaybe<const TTypeAnnotationNode*> FindColumn(const TStringBuf& table, const TStringBuf& column) const;
    TString MemberName(const TStringBuf& table, const TStringBuf& column) const;
    TVector<TString> EnumerateAllColumns() const;
    TVector<TString> EnumerateAllMembers() const;

    bool AddLabel = false;
    const TStructExprType* InputType;
    TVector<TStringBuf> Tables;
    const TUniqueConstraintNode* Unique = nullptr;
    const TDistinctConstraintNode* Distinct = nullptr;
};

struct TJoinLabels {
    TMaybe<TIssue> Add(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique = nullptr, const TDistinctConstraintNode* distinct = nullptr);
    TMaybe<const TJoinLabel*> FindInput(const TStringBuf& table) const;
    TMaybe<ui32> FindInputIndex(const TStringBuf& table) const;
    TMaybe<const TTypeAnnotationNode*> FindColumn(const TStringBuf& table, const TStringBuf& column) const;
    TMaybe<const TTypeAnnotationNode*> FindColumn(const TStringBuf& fullName) const;
    TVector<TString> EnumerateColumns(const TStringBuf& table) const;

    TVector<TJoinLabel> Inputs;
    THashMap<TStringBuf, ui32> InputByTable;
};

struct TJoinOptions {
    THashMap<TStringBuf, TVector<TStringBuf>> RenameMap;
    TSet<TVector<TStringBuf>> PreferredSortSets;

    bool Flatten = false;
    bool StrictKeys = false;
    bool Compact = false;
};

IGraphTransformer::TStatus ValidateEquiJoinOptions(
    TPositionHandle positionHandle,
    TExprNode& optionsNode,
    TJoinOptions& options,
    TExprContext& ctx
);

IGraphTransformer::TStatus EquiJoinAnnotation(
    TPositionHandle position,
    const TStructExprType*& resultType,
    const TJoinLabels& labels,
    TExprNode& joins,
    const TJoinOptions& options,
    TExprContext& ctx
);

IGraphTransformer::TStatus EquiJoinConstraints(
    TPositionHandle positionHandle,
    const TUniqueConstraintNode*& unique,
    const TDistinctConstraintNode*& distinct,
    const TJoinLabels& labels,
    TExprNode& joins,
    TExprContext& ctx
);

THashMap<TStringBuf, THashSet<TStringBuf>> CollectEquiJoinKeyColumnsByLabel(const TExprNode& joinTree);

bool IsLeftJoinSideOptional(const TStringBuf& joinType);
bool IsRightJoinSideOptional(const TStringBuf& joinType);

TExprNode::TPtr FilterOutNullJoinColumns(TPositionHandle pos, const TExprNode::TPtr& input,
    const TJoinLabel& label, const TSet<TString>& optionalKeyColumns, TExprContext& ctx);

TMap<TStringBuf, TVector<TStringBuf>> LoadJoinRenameMap(const TExprNode& settings);
NNodes::TCoLambda BuildJoinRenameLambda(TPositionHandle pos, const TMap<TStringBuf, TVector<TStringBuf>>& renameMap,
    const TStructExprType& joinResultType, TExprContext& ctx);
TSet<TVector<TStringBuf>> LoadJoinSortSets(const TExprNode& settings);

THashMap<TString, const TTypeAnnotationNode*> GetJoinColumnTypes(const TExprNode& joins,
    const TJoinLabels& labels, TExprContext& ctx);

THashMap<TString, const TTypeAnnotationNode*> GetJoinColumnTypes(const TExprNode& joins,
    const TJoinLabels& labels, const TStringBuf& joinType, TExprContext& ctx);

bool AreSameJoinKeys(const TExprNode& joins, const TStringBuf& table1, const TStringBuf& column1, const TStringBuf& table2, const TStringBuf& column2);
// returns (is required side + allow skip nulls);
std::pair<bool, bool> IsRequiredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex);

TMaybe<bool> IsFilteredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex);

void AppendEquiJoinRenameMap(TPositionHandle pos, const TMap<TStringBuf, TVector<TStringBuf>>& newRenameMap,
    TExprNode::TListType& joinSettingNodes, TExprContext& ctx);

void AppendEquiJoinSortSets(TPositionHandle pos, const TSet<TVector<TStringBuf>>& newSortSets,
    TExprNode::TListType& joinSettingNodes, TExprContext& ctx);

TMap<TStringBuf, TVector<TStringBuf>> UpdateUsedFieldsInRenameMap(
    const TMap<TStringBuf, TVector<TStringBuf>>& renameMap,
    const TSet<TStringBuf>& usedFields,
    const TStructExprType* structType
);


struct TEquiJoinParent {
    TEquiJoinParent(const TExprNode* node, ui32 index, const TExprNode* extractedMembers)
        : Node(node)
        , Index(index)
        , ExtractedMembers(extractedMembers)
    {
    }
    const TExprNode* Node;
    ui32 Index;
    const TExprNode* ExtractedMembers;
};

TVector<TEquiJoinParent> CollectEquiJoinOnlyParents(const NNodes::TCoFlatMapBase& flatMap, const TParentsMap& parents);

struct TEquiJoinLinkSettings {
    TPositionHandle Pos;
    TSet<TString> LeftHints;
    TSet<TString> RightHints;
    EJoinAlgoType JoinAlgo = EJoinAlgoType::Undefined;
    // JOIN implementation may ignore this flags if SortedMerge strategy is not supported
    bool ForceSortedMerge = false;
    bool Compact = false;
};

TEquiJoinLinkSettings GetEquiJoinLinkSettings(const TExprNode& linkSettings);
TExprNode::TPtr BuildEquiJoinLinkSettings(const TEquiJoinLinkSettings& linkSettings, TExprContext& ctx);

TExprNode::TPtr RemapNonConvertibleMemberForJoin(TPositionHandle pos, const TExprNode::TPtr& memberValue,
    const TTypeAnnotationNode& memberType, const TTypeAnnotationNode& unifiedType, TExprContext& ctx);

TExprNode::TPtr PrepareListForJoin(TExprNode::TPtr list, const TTypeAnnotationNode::TListType& keyTypes, TExprNode::TListType& keys, bool payload, bool optional, bool filter, TExprContext& ctx);
TExprNode::TPtr PrepareListForJoin(TExprNode::TPtr list, const TTypeAnnotationNode::TListType& keyTypes, TExprNode::TListType& keys, TExprNode::TListType&& payloads, bool payload, bool optional, bool filter, TExprContext& ctx);

template<bool Squeeze = false>
TExprNode::TPtr MakeDictForJoin(TExprNode::TPtr&& list, bool payload, bool multi, TExprContext& ctx);

TExprNode::TPtr MakeCrossJoin(TPositionHandle pos, TExprNode::TPtr left, TExprNode::TPtr right, TExprContext& ctx);

void GatherAndTerms(const TExprNode::TPtr& predicate, TExprNode::TListType& andTerms, bool& isPg, TExprContext& ctx);
TExprNode::TPtr FuseAndTerms(TPositionHandle position, const TExprNode::TListType& andTerms, const TExprNode::TPtr& exclude, bool isPg, TExprContext& ctx);

bool IsEquality(TExprNode::TPtr predicate, TExprNode::TPtr& left, TExprNode::TPtr& right);

void GatherJoinInputs(const TExprNode::TPtr& expr, const TExprNode& row,
    const TParentsMap& parentsMap, const THashMap<TString, TString>& backRenameMap,
    const TJoinLabels& labels, TSet<ui32>& inputs, TSet<TStringBuf>& usedFields);


}