aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Databases/TablesDependencyGraph.h
blob: e5be59d1ee9a8ece827aadca0cb5df5e91c5c40a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#pragma once

#include <Interpreters/StorageID.h>

#include <unordered_map>
#include <unordered_set>


namespace DB
{
using TableNamesSet = std::unordered_set<QualifiedTableName>;

/// Represents dependencies of some tables on other tables or dictionaries.
///
/// NOTES: A "dependent" depends on its "dependency". For example, if table "A" depends on table "B", then
/// "B" is a dependency for "A", and "A" is a dependent for "B".
///
/// Dependencies can be added to the graph in any order. For example, if table "A" depends on "B", and "B" depends on "C", then
/// it's allowed to add first "A->B" and then "B->C", or first "B->C" and then "A->B", the resulting graph will be the same.
///
/// This class is used to represent various types of table-table dependencies:
/// 1. View dependencies: "source_table -> materialized_view".
///    Data inserted to a source table is also inserted to corresponding materialized views.
/// 2. Loading dependencies: specify in which order tables must be loaded during startup.
///    For example a dictionary should be loaded after it's source table and it's written in the graph as "dictionary -> source_table".
/// 3. Referential dependencies: "table -> all tables mentioned in its definition".
///    Referential dependencies are checked to decide if it's safe to drop a table (it can be unsafe if the table is used by another table).
///
/// WARNING: This class doesn't have an embedded mutex, so it must be synchronized outside.
class TablesDependencyGraph
{
public:
    explicit TablesDependencyGraph(const String & name_for_logging_);

    TablesDependencyGraph(const TablesDependencyGraph & src);
    TablesDependencyGraph(TablesDependencyGraph && src) noexcept;
    TablesDependencyGraph & operator=(const TablesDependencyGraph & src);
    TablesDependencyGraph & operator=(TablesDependencyGraph && src) noexcept;

    /// The dependency graph is empty if doesn't contain any tables.
    bool empty() const;

    /// Clears this dependency graph.
    void clear();

    /// Adds a single dependency "table_id" on "dependency".
    void addDependency(const StorageID & table_id, const StorageID & dependency);

    /// Adds a table with specified dependencies if there are no dependencies of the table in the graph yet;
    /// otherwise it replaces the dependencies of the table in the graph and shows a warning.
    void addDependencies(const StorageID & table_id, const std::vector<StorageID> & dependencies);
    void addDependencies(const StorageID & table_id, const TableNamesSet & dependencies);
    void addDependencies(const QualifiedTableName & table_name, const TableNamesSet & dependencies);

    /// Removes a single dependency of "table_id" on "dependency".
    /// If "remove_isolated_tables" is set the function will also remove tables with no dependencies and no dependents
    /// from the graph.
    bool removeDependency(const StorageID & table_id, const StorageID & dependency, bool remove_isolated_tables = false);

    /// Removes all dependencies of "table_id", returns those dependencies.
    std::vector<StorageID> removeDependencies(const StorageID & table_id, bool remove_isolated_tables = false);

    /// Removes a table from the graph and removes all references to in from the graph (both from its dependencies and dependents).
    bool removeTable(const StorageID & table_id);

    /// Removes tables from the graph by a specified filter.
    size_t removeTablesIf(const std::function<bool(const StorageID &)> & function);

    /// Removes tables with no dependencies and no dependents from the graph.
    size_t removeIsolatedTables();

    /// Returns the number of tables in the graph.
    size_t getNumberOfTables() const;

    /// Returns a list of all tables in the graph.
    std::vector<StorageID> getTables() const;

    /// Adds tables and dependencies with another graph.
    void mergeWith(const TablesDependencyGraph & other);

    /// Returns a list of dependencies of a specified table.
    std::vector<StorageID> getDependencies(const StorageID & table_id) const;
    size_t getNumberOfDependencies(const StorageID & table_id) const;
    bool hasDependencies(const StorageID & table_id) const { return getNumberOfDependencies(table_id) != 0; }

    /// Returns a list of dependents of a specified table.
    std::vector<StorageID> getDependents(const StorageID & table_id) const;
    size_t getNumberOfDependents(const StorageID & table_id) const;
    bool hasDependents(const StorageID & table_id) const { return getNumberOfDependents(table_id) != 0; }

    /// Returns the number of dependencies and the number of dependents of a specified table.
    void getNumberOfAdjacents(const StorageID & table_id, size_t & num_dependencies, size_t & num_dependents) const;

    /// Returns true if a specified table has no dependencies and no dependents.
    bool isIsolatedTable(const StorageID & table_id) const;

    /// Checks that there are no cyclic dependencies in the graph.
    /// Cyclic dependencies are dependencies like "A->A" or "A->B->C->D->A".
    void checkNoCyclicDependencies() const;
    bool hasCyclicDependencies() const;
    String describeCyclicDependencies() const;
    std::vector<StorageID> getTablesWithCyclicDependencies() const;

    /// Returns a list of tables sorted by their dependencies:
    /// tables without dependencies first, then
    /// tables which depend on the tables without dependencies, then
    /// tables which depend on the tables which depend on the tables without dependencies, and so on.
    std::vector<StorageID> getTablesSortedByDependency() const;

    /// The same as getTablesSortedByDependency() but make a list for parallel processing.
    std::vector<std::vector<StorageID>> getTablesSortedByDependencyForParallel() const;

    /// Outputs information about this graph as a bunch of logging messages.
    void log() const;

    /// Calculates levels - this is required for checking cyclic dependencies, to sort tables by dependency, and to log the graph.
    /// This function is called automatically by the functions which need it, but can be invoked directly.
    void calculateLevels() const;

private:
    struct Node
    {
        StorageID storage_id;

        /// If A depends on B then "A.dependencies" contains "B".
        std::unordered_set<Node *> dependencies;

        /// If A depends on B then "B.dependents" contains "A".
        std::unordered_set<Node *> dependents;

        /// Tables without dependencies have level == 0, tables which depend on the tables without dependencies have level == 1, and so on.
        /// Calculated lazily.
        mutable size_t level = 0;

        /// Number of dependencies left, used only while we're calculating levels.
        mutable size_t num_dependencies_to_count = 0;

        explicit Node(const StorageID & storage_id_) : storage_id(storage_id_) {}
    };

    using NodeSharedPtr = std::shared_ptr<Node>;

    struct Hash
    {
        using is_transparent = void;
        size_t operator()(const Node * node) const { return std::hash<const Node *>{}(node); }
        size_t operator()(const NodeSharedPtr & node_ptr) const { return operator()(node_ptr.get()); }
    };

    struct Equal
    {
        using is_transparent = void;
        size_t operator()(const NodeSharedPtr & left, const Node * right) const { return left.get() == right; }
        size_t operator()(const NodeSharedPtr & left, const NodeSharedPtr & right) const { return left == right; }
    };

    std::unordered_set<NodeSharedPtr, Hash, Equal> nodes;

    /// Nodes can be found either by UUID or by database name & table name. That's why we need two maps here.
    std::unordered_map<StorageID, Node *, StorageID::DatabaseAndTableNameHash, StorageID::DatabaseAndTableNameEqual> nodes_by_database_and_table_names;
    std::unordered_map<UUID, Node *> nodes_by_uuid;

    /// Nodes sorted by their level. Calculated lazily.
    using NodesSortedByLevel = std::vector<const Node *>;
    mutable NodesSortedByLevel nodes_sorted_by_level_lazy;
    mutable bool levels_calculated = false;

    const String name_for_logging;
    mutable Poco::Logger * logger = nullptr;

    Node * findNode(const StorageID & table_id) const;
    Node * addOrUpdateNode(const StorageID & table_id);
    void removeNode(Node * node);

    static std::vector<StorageID> getDependencies(const Node & node);
    static std::vector<StorageID> getDependents(const Node & node);

    void setNeedRecalculateLevels() const;
    const NodesSortedByLevel & getNodesSortedByLevel() const;

    Poco::Logger * getLogger() const;
};

}