1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
#pragma once
#include "clickhouse_config.h"
#if USE_ARROW || USE_ORC || USE_PARQUET
#include <DataTypes/IDataType.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Core/Block.h>
#error #include <arrow/table.h>
namespace DB
{
class Block;
class Chunk;
class ArrowColumnToCHColumn
{
public:
using NameToColumnPtr = std::unordered_map<std::string, std::shared_ptr<arrow::ChunkedArray>>;
ArrowColumnToCHColumn(
const Block & header_,
const std::string & format_name_,
bool allow_missing_columns_,
bool null_as_default_,
bool case_insensitive_matching_ = false);
void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table, size_t num_rows, BlockMissingValues * block_missing_values = nullptr);
void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values = nullptr);
/// Transform arrow schema to ClickHouse header. If hint_header is provided,
/// we will skip columns in schema that are not in hint_header.
static Block arrowSchemaToCHHeader(
const arrow::Schema & schema,
const std::string & format_name,
bool skip_columns_with_unsupported_types = false,
const Block * hint_header = nullptr,
bool ignore_case = false);
struct DictionaryInfo
{
std::shared_ptr<ColumnWithTypeAndName> values;
Int64 default_value_index = -1;
UInt64 dictionary_size;
};
private:
const Block & header;
const std::string format_name;
/// If false, throw exception if some columns in header not exists in arrow table.
bool allow_missing_columns;
bool null_as_default;
bool case_insensitive_matching;
/// Map {column name : dictionary column}.
/// To avoid converting dictionary from Arrow Dictionary
/// to LowCardinality every chunk we save it and reuse.
std::unordered_map<std::string, DictionaryInfo> dictionary_infos;
};
}
#endif
|