aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Backups/BackupEntriesCollector.h
blob: 54d820881291cdc94993edc5f818f7a49ba5dc0a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#pragma once

#include <Backups/BackupSettings.h>
#include <Databases/DDLRenamingVisitor.h>
#include <Core/QualifiedTableName.h>
#include <Parsers/ASTBackupQuery.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/TableLockHolder.h>
#include <Storages/MergeTree/ZooKeeperRetries.h>
#include <filesystem>
#include <queue>


namespace DB
{

class IBackupEntry;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
class IBackupCoordination;
class IDatabase;
using DatabasePtr = std::shared_ptr<IDatabase>;
struct StorageID;
enum class AccessEntityType;

/// Collects backup entries for all databases and tables which should be put to a backup.
class BackupEntriesCollector : private boost::noncopyable
{
public:
    BackupEntriesCollector(const ASTBackupQuery::Elements & backup_query_elements_,
                           const BackupSettings & backup_settings_,
                           std::shared_ptr<IBackupCoordination> backup_coordination_,
                           const ReadSettings & read_settings_,
                           const ContextPtr & context_);
    ~BackupEntriesCollector();

    /// Collects backup entries and returns the result.
    /// This function first generates a list of databases and then call IDatabase::getTablesForBackup() for each database from this list.
    /// Then it calls IStorage::backupData() to build a list of backup entries.
    BackupEntries run();

    const BackupSettings & getBackupSettings() const { return backup_settings; }
    std::shared_ptr<IBackupCoordination> getBackupCoordination() const { return backup_coordination; }
    const ReadSettings & getReadSettings() const { return read_settings; }
    ContextPtr getContext() const { return context; }

    /// Adds a backup entry which will be later returned by run().
    /// These function can be called by implementations of IStorage::backupData() in inherited storage classes.
    void addBackupEntry(const String & file_name, BackupEntryPtr backup_entry);
    void addBackupEntry(const std::pair<String, BackupEntryPtr> & backup_entry);
    void addBackupEntries(const BackupEntries & backup_entries_);
    void addBackupEntries(BackupEntries && backup_entries_);

    /// Adds a function which must be called after all IStorage::backupData() have finished their work on all hosts.
    /// This function is designed to help making a consistent in some complex cases like
    /// 1) we need to join (in a backup) the data of replicated tables gathered on different hosts.
    void addPostTask(std::function<void()> task);

    /// Returns an incremental counter used to backup access control.
    size_t getAccessCounter(AccessEntityType type);

private:
    void calculateRootPathInBackup();

    void gatherMetadataAndCheckConsistency();

    bool tryGatherMetadataAndCompareWithPrevious(std::optional<Exception> & inconsistency_error);

    void gatherDatabasesMetadata();

    void gatherDatabaseMetadata(
        const String & database_name,
        bool throw_if_database_not_found,
        bool backup_create_database_query,
        const std::optional<String> & table_name,
        bool throw_if_table_not_found,
        const std::optional<ASTs> & partitions,
        bool all_tables,
        const std::set<DatabaseAndTableName> & except_table_names);

    void gatherTablesMetadata();
    std::vector<std::pair<ASTPtr, StoragePtr>> findTablesInDatabase(const String & database_name) const;
    void lockTablesForReading();
    bool compareWithPrevious(std::optional<Exception> & inconsistency_error);

    void makeBackupEntriesForDatabasesDefs();
    void makeBackupEntriesForTablesDefs();
    void makeBackupEntriesForTablesData();
    void makeBackupEntriesForTableData(const QualifiedTableName & table_name);

    void runPostTasks();

    Strings setStage(const String & new_stage, const String & message = "");

    const ASTBackupQuery::Elements backup_query_elements;
    const BackupSettings backup_settings;
    std::shared_ptr<IBackupCoordination> backup_coordination;
    const ReadSettings read_settings;
    ContextPtr context;
    std::chrono::milliseconds on_cluster_first_sync_timeout;
    std::chrono::milliseconds consistent_metadata_snapshot_timeout;
    Poco::Logger * log;
    /// Unfortunately we can use ZooKeeper for collecting information for backup
    /// and we need to retry...
    ZooKeeperRetriesInfo global_zookeeper_retries_info;

    Strings all_hosts;
    DDLRenamingMap renaming_map;
    std::filesystem::path root_path_in_backup;

    struct DatabaseInfo
    {
        DatabasePtr database;
        ASTPtr create_database_query;
        String metadata_path_in_backup;

        struct TableParams
        {
            bool throw_if_table_not_found = false;
            std::optional<ASTs> partitions;
        };

        std::unordered_map<String, TableParams> tables;

        bool all_tables = false;
        std::unordered_set<String> except_table_names;
    };

    struct TableInfo
    {
        DatabasePtr database;
        StoragePtr storage;
        TableLockHolder table_lock;
        ASTPtr create_table_query;
        String metadata_path_in_backup;
        std::filesystem::path data_path_in_backup;
        std::optional<String> replicated_table_shared_id;
        std::optional<ASTs> partitions;
    };

    String current_stage;
    std::chrono::steady_clock::time_point consistent_metadata_snapshot_end_time;
    std::unordered_map<String, DatabaseInfo> database_infos;
    std::unordered_map<QualifiedTableName, TableInfo> table_infos;
    std::vector<std::pair<String, String>> previous_databases_metadata;
    std::vector<std::pair<QualifiedTableName, String>> previous_tables_metadata;

    BackupEntries backup_entries;
    std::queue<std::function<void()>> post_tasks;
    std::vector<size_t> access_counters;
};

}