aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
blob: d885cd497affd5aa3debf9964e06400683bf4733 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#pragma once

#include <Core/Block.h>
#include <Formats/FormatSettings.h>
#include <IO/WriteBuffer.h>
#include <Processors/Formats/IRowOutputFormat.h>
#include <Formats/BSONTypes.h>

namespace DB
{

/*
 * Class for formatting data in BSON format.
 * Each row is formatted as a separate BSON document.
 * Each column is formatted as a single field with column name as a key.
 * It uses the following correspondence between ClickHouse types and BSON types:
 *
 * ClickHouse type         | BSON Type
 * Bool                    | \x08 boolean
 * Int8/UInt8/Enum8        | \x10 int32
 * Int16UInt16/Enum16      | \x10 int32
 * Int32                   | \x10 int32
 * UInt32                  | \x12 int64
 * Int64                   | \x12 int64
 * UInt64                  | \x11 uint64
 * Float32/Float64         | \x01 double
 * Date/Date32             | \x10 int32
 * DateTime                | \x12 int64
 * DateTime64              | \x09 datetime
 * Decimal32               | \x10 int32
 * Decimal64               | \x12 int64
 * Decimal128              | \x05 binary, \x00 binary subtype, size = 16
 * Decimal256              | \x05 binary, \x00 binary subtype, size = 32
 * Int128/UInt128          | \x05 binary, \x00 binary subtype, size = 16
 * Int256/UInt256          | \x05 binary, \x00 binary subtype, size = 32
 * String/FixedString      | \x05 binary, \x00 binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled
 * UUID                    | \x05 binary, \x04 uuid subtype, size = 16
 * Array                   | \x04 array
 * Tuple                   | \x04 array
 * Named Tuple             | \x03 document
 * Map                     | \x03 document
 *
 * Note: on Big-Endian platforms this format will not work properly.
 */

class BSONEachRowRowOutputFormat final : public IRowOutputFormat
{
public:
    BSONEachRowRowOutputFormat(
        WriteBuffer & out_, const Block & header_, const FormatSettings & settings_);

    String getName() const override { return "BSONEachRowRowOutputFormat"; }

private:
    void write(const Columns & columns, size_t row_num) override;
    void writeField(const IColumn &, const ISerialization &, size_t) override { }

    void serializeField(
        const IColumn & column,
        const DataTypePtr & data_type,
        size_t row_num,
        const String & name,
        const String & path,
        std::unordered_map<String, size_t> & nested_document_sizes);

    /// Count field size in bytes that we will get after serialization in BSON format.
    /// It's needed to calculate document size before actual serialization,
    /// because in BSON format we should write the size of the document before its content.
    size_t countBSONFieldSize(
        const IColumn & column,
        const DataTypePtr & data_type,
        size_t row_num,
        const String & name,
        const String & path,
        std::unordered_map<String, size_t> & nested_document_sizes);

    NamesAndTypes fields;
    FormatSettings settings;
};

}