aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Interpreters/PreparedSets.h
blob: b953b8470e1b4185e9db3b92daf1eea918ebec04 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#pragma once

#include <Parsers/IAST.h>
#include <DataTypes/IDataType.h>
#include <memory>
#include <unordered_map>
#include <vector>
#include <future>
#include <Storages/IStorage_fwd.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/SetKeys.h>

namespace DB
{

class QueryPlan;

class Set;
using SetPtr = std::shared_ptr<Set>;
struct SetKeyColumns;

class IQueryTreeNode;
using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;

struct Settings;

/// This is a structure for prepared sets cache.
/// SetPtr can be taken from cache, so we should pass holder for it.
struct SetAndKey
{
    String key;
    SetPtr set;
};

using SetAndKeyPtr = std::shared_ptr<SetAndKey>;

/// Represents a set in a query that might be referenced at analysis time and built later during execution.
/// Also it can represent a constant set that is ready to use.
/// At analysis stage the FutureSets are created but not necessarily filled. Then for non-constant sets there
/// must be an explicit step to build them before they can be used.
/// Set may be useful for indexes, in this case special ordered set with stored elements is build inplace.
class FutureSet
{
public:
    virtual ~FutureSet() = default;

    /// Returns set if set is ready (created and filled) or nullptr if not.
    virtual SetPtr get() const = 0;
    /// Returns set->getElementsTypes(), even if set is not created yet.
    virtual const DataTypes & getTypes() const = 0;
    /// If possible, return set with stored elements useful for PK analysis.
    virtual SetPtr buildOrderedSetInplace(const ContextPtr & context) = 0;
};

using FutureSetPtr = std::shared_ptr<FutureSet>;

/// Future set from already filled set.
/// Usually it is from StorageSet.
class FutureSetFromStorage final : public FutureSet
{
public:
    FutureSetFromStorage(SetPtr set_);

    SetPtr get() const override;
    const DataTypes & getTypes() const override;
    SetPtr buildOrderedSetInplace(const ContextPtr &) override;

private:
    SetPtr set;
};

/// Set from tuple is filled as well as set from storage.
/// Additionally, it can be converted to set useful for PK.
class FutureSetFromTuple final : public FutureSet
{
public:
    FutureSetFromTuple(Block block, const Settings & settings);

    SetPtr get() const override { return set; }
    SetPtr buildOrderedSetInplace(const ContextPtr & context) override;

    const DataTypes & getTypes() const override;

private:
    SetPtr set;
    SetKeyColumns set_key_columns;
};

/// Set from subquery can be built inplace for PK or in CreatingSet step.
/// If use_index_for_in_with_subqueries_max_values is reached, set for PK won't be created,
/// but ordinary set would be created instead.
class FutureSetFromSubquery final : public FutureSet
{
public:
    FutureSetFromSubquery(
        String key,
        std::unique_ptr<QueryPlan> source_,
        StoragePtr external_table_,
        FutureSetPtr external_table_set_,
        const Settings & settings);

    FutureSetFromSubquery(
        String key,
        QueryTreeNodePtr query_tree_,
        const Settings & settings);

    SetPtr get() const override;
    const DataTypes & getTypes() const override;
    SetPtr buildOrderedSetInplace(const ContextPtr & context) override;

    std::unique_ptr<QueryPlan> build(const ContextPtr & context);

    QueryTreeNodePtr detachQueryTree() { return std::move(query_tree); }
    void setQueryPlan(std::unique_ptr<QueryPlan> source_);

private:
    SetAndKeyPtr set_and_key;
    StoragePtr external_table;
    FutureSetPtr external_table_set;

    std::unique_ptr<QueryPlan> source;
    QueryTreeNodePtr query_tree;
};

/// Container for all the sets used in query.
class PreparedSets
{
public:

    using Hash = CityHash_v1_0_2::uint128;
    struct Hashing
    {
        UInt64 operator()(const Hash & key) const { return key.low64 ^ key.high64; }
    };

    using SetsFromTuple = std::unordered_map<Hash, std::vector<std::shared_ptr<FutureSetFromTuple>>, Hashing>;
    using SetsFromStorage = std::unordered_map<Hash, std::shared_ptr<FutureSetFromStorage>, Hashing>;
    using SetsFromSubqueries = std::unordered_map<Hash, std::shared_ptr<FutureSetFromSubquery>, Hashing>;

    FutureSetPtr addFromStorage(const Hash & key, SetPtr set_);
    FutureSetPtr addFromTuple(const Hash & key, Block block, const Settings & settings);

    FutureSetPtr addFromSubquery(
        const Hash & key,
        std::unique_ptr<QueryPlan> source,
        StoragePtr external_table,
        FutureSetPtr external_table_set,
        const Settings & settings);

    FutureSetPtr addFromSubquery(
        const Hash & key,
        QueryTreeNodePtr query_tree,
        const Settings & settings);

    FutureSetPtr findTuple(const Hash & key, const DataTypes & types) const;
    std::shared_ptr<FutureSetFromStorage> findStorage(const Hash & key) const;
    std::shared_ptr<FutureSetFromSubquery> findSubquery(const Hash & key) const;

    using Subqueries = std::vector<std::shared_ptr<FutureSetFromSubquery>>;
    Subqueries getSubqueries();

    const SetsFromTuple & getSetsFromTuple() const { return sets_from_tuple; }
    // const SetsFromStorage & getSetsFromStorage() const { return sets_from_storage; }
    // const SetsFromSubqueries & getSetsFromSubquery() const { return sets_from_subqueries; }

    static String toString(const Hash & key, const DataTypes & types);

private:
    SetsFromTuple sets_from_tuple;
    SetsFromStorage sets_from_storage;
    SetsFromSubqueries sets_from_subqueries;
};

using PreparedSetsPtr = std::shared_ptr<PreparedSets>;

/// A reference to a set that is being built by another task.
/// The difference from FutureSet is that this object can be used to wait for the set to be built in another thread.
using SharedSet = std::shared_future<SetPtr>;

/// This set cache is used to avoid building the same set multiple times. It is different from PreparedSets in way that
/// it can be used across multiple queries. One use case is when we execute the same mutation on multiple parts. In this
/// case each part is processed by a separate mutation task but they can share the same set.
class PreparedSetsCache
{
public:
    /// Lookup for set in the cache.
    /// If it is found, get the future to be able to wait for the set to be built.
    /// Otherwise create a promise, build the set and set the promise value.
    std::variant<std::promise<SetPtr>, SharedSet> findOrPromiseToBuild(const String & key);

private:
    struct Entry
    {
        SharedSet future; /// Other tasks can wait for the set to be built.
    };

    std::mutex cache_mutex;
    std::unordered_map<String, Entry> cache;
};

using PreparedSetsCachePtr = std::shared_ptr<PreparedSetsCache>;

}