aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Storages/getStructureOfRemoteTable.cpp
blob: ebd02f424fad70dac6b2e3bd93d930ff8487da5a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#include "getStructureOfRemoteTable.h"
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <Interpreters/ClusterProxy/executeQuery.h>
#include <QueryPipeline/RemoteQueryExecutor.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Storages/IStorage.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ASTFunction.h>
#include <Common/quoteString.h>
#include <Common/NetException.h>
#include <TableFunctions/TableFunctionFactory.h>


namespace DB
{

namespace ErrorCodes
{
    extern const int NO_REMOTE_SHARD_AVAILABLE;
}


ColumnsDescription getStructureOfRemoteTableInShard(
    const Cluster & cluster,
    const Cluster::ShardInfo & shard_info,
    const StorageID & table_id,
    ContextPtr context,
    const ASTPtr & table_func_ptr)
{
    String query;

    if (table_func_ptr)
    {
        if (shard_info.isLocal())
        {
            TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_func_ptr, context);
            return table_function_ptr->getActualTableStructure(context, /*is_insert_query*/ true);
        }

        auto table_func_name = queryToString(table_func_ptr);
        query = "DESC TABLE " + table_func_name;
    }
    else
    {
        if (shard_info.isLocal())
        {
            auto storage_ptr = DatabaseCatalog::instance().getTable(table_id, context);
            return storage_ptr->getInMemoryMetadataPtr()->getColumns();
        }

        /// Request for a table description
        query = "DESC TABLE " + table_id.getFullTableName();
    }

    ColumnsDescription res;
    auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), table_id);

    /// Ignore limit for result number of rows (that could be set during handling CSE/CTE),
    /// since this is a service query and should not lead to query failure.
    {
        Settings new_settings = new_context->getSettings();
        new_settings.max_result_rows = 0;
        new_settings.max_result_bytes = 0;
        new_context->setSettings(new_settings);
    }

    /// Expect only needed columns from the result of DESC TABLE. NOTE 'comment' column is ignored for compatibility reasons.
    Block sample_block
    {
        { ColumnString::create(), std::make_shared<DataTypeString>(), "name" },
        { ColumnString::create(), std::make_shared<DataTypeString>(), "type" },
        { ColumnString::create(), std::make_shared<DataTypeString>(), "default_type" },
        { ColumnString::create(), std::make_shared<DataTypeString>(), "default_expression" },
    };

    /// Execute remote query without restrictions (because it's not real user query, but part of implementation)
    RemoteQueryExecutor executor(shard_info.pool, query, sample_block, new_context);
    executor.setPoolMode(PoolMode::GET_ONE);
    if (!table_func_ptr)
        executor.setMainTable(table_id);

    const DataTypeFactory & data_type_factory = DataTypeFactory::instance();

    ParserExpression expr_parser;

    while (Block current = executor.readBlock())
    {
        ColumnPtr name = current.getByName("name").column;
        ColumnPtr type = current.getByName("type").column;
        ColumnPtr default_kind = current.getByName("default_type").column;
        ColumnPtr default_expr = current.getByName("default_expression").column;
        size_t size = name->size();

        for (size_t i = 0; i < size; ++i)
        {
            ColumnDescription column;

            column.name = (*name)[i].get<const String &>();

            String data_type_name = (*type)[i].get<const String &>();
            column.type = data_type_factory.get(data_type_name);

            String kind_name = (*default_kind)[i].get<const String &>();
            if (!kind_name.empty())
            {
                column.default_desc.kind = columnDefaultKindFromString(kind_name);
                String expr_str = (*default_expr)[i].get<const String &>();
                column.default_desc.expression = parseQuery(
                    expr_parser, expr_str.data(), expr_str.data() + expr_str.size(), "default expression", 0, context->getSettingsRef().max_parser_depth);
            }

            res.add(column);
        }
    }

    executor.finish();
    return res;
}

ColumnsDescription getStructureOfRemoteTable(
    const Cluster & cluster,
    const StorageID & table_id,
    ContextPtr context,
    const ASTPtr & table_func_ptr)
{
    const auto & shards_info = cluster.getShardsInfo();

    std::string fail_messages;

    /// Use local shard as first priority, as it needs no network communication
    for (const auto & shard_info : shards_info)
    {
        if (shard_info.isLocal())
        {
            const auto & res = getStructureOfRemoteTableInShard(cluster, shard_info, table_id, context, table_func_ptr);
            chassert(!res.empty());
            return res;
        }
    }

    for (const auto & shard_info : shards_info)
    {
        try
        {
            const auto & res = getStructureOfRemoteTableInShard(cluster, shard_info, table_id, context, table_func_ptr);

            /// Expect at least some columns.
            /// This is a hack to handle the empty block case returned by Connection when skip_unavailable_shards is set.
            if (res.empty())
                continue;

            return res;
        }
        catch (const NetException &)
        {
            std::string fail_message = getCurrentExceptionMessage(false);
            fail_messages += fail_message + '\n';
            continue;
        }
    }

    throw NetException(ErrorCodes::NO_REMOTE_SHARD_AVAILABLE,
        "All attempts to get table structure failed. Log: \n\n{}\n", fail_messages);
}

ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables(
    const Cluster & cluster,
    const StorageID & remote_table_id,
    const ColumnsDescription & storage_columns,
    ContextPtr context)
{
    const auto & shards_info = cluster.getShardsInfo();
    auto query = "DESC TABLE " + remote_table_id.getFullTableName();

    auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), remote_table_id);
    new_context->setSetting("describe_extend_object_types", true);

    /// Expect only needed columns from the result of DESC TABLE.
    Block sample_block
    {
        { ColumnString::create(), std::make_shared<DataTypeString>(), "name" },
        { ColumnString::create(), std::make_shared<DataTypeString>(), "type" },
    };

    auto execute_query_on_shard = [&](const auto & shard_info)
    {
        /// Execute remote query without restrictions (because it's not real user query, but part of implementation)
        RemoteQueryExecutor executor(shard_info.pool, query, sample_block, new_context);

        executor.setPoolMode(PoolMode::GET_ONE);
        executor.setMainTable(remote_table_id);

        ColumnsDescription res;
        while (auto block = executor.readBlock())
        {
            const auto & name_col = *block.getByName("name").column;
            const auto & type_col = *block.getByName("type").column;

            size_t size = name_col.size();
            for (size_t i = 0; i < size; ++i)
            {
                auto name = name_col[i].get<const String &>();
                auto type_name = type_col[i].get<const String &>();

                auto storage_column = storage_columns.tryGetPhysical(name);
                if (storage_column && storage_column->type->hasDynamicSubcolumns())
                    res.add(ColumnDescription(std::move(name), DataTypeFactory::instance().get(type_name)));
            }
        }

        return res;
    };

    ColumnsDescriptionByShardNum columns;
    for (const auto & shard_info : shards_info)
    {
        auto res = execute_query_on_shard(shard_info);

        /// Expect at least some columns.
        /// This is a hack to handle the empty block case returned by Connection when skip_unavailable_shards is set.
        if (!res.empty())
            columns.emplace(shard_info.shard_num, std::move(res));
    }

    if (columns.empty())
        throw NetException(ErrorCodes::NO_REMOTE_SHARD_AVAILABLE, "All attempts to get table structure failed");

    return columns;
}

}