aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Processors/Formats/Impl/AvroRowOutputFormat.h
blob: 06ee29f6d1ad94fdda984bc984d83d91d6381070 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#pragma once
#include "clickhouse_config.h"
#if USE_AVRO
#include <unordered_map>

#include <Core/Block.h>
#include <Formats/FormatSchemaInfo.h>
#include <Formats/FormatSettings.h>
#include <IO/WriteBuffer.h>
#include <Processors/Formats/IRowOutputFormat.h>

#include <DataFile.hh>
#include <Schema.hh>
#include <ValidSchema.hh>


namespace DB
{
class WriteBuffer;

class AvroSerializerTraits;

class AvroSerializer
{
public:
    AvroSerializer(const ColumnsWithTypeAndName & columns, std::unique_ptr<AvroSerializerTraits>, const FormatSettings & settings_);
    const avro::ValidSchema & getSchema() const { return valid_schema; }
    void serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder);

    using SerializeFn = std::function<void(const IColumn & column, size_t row_num, avro::Encoder & encoder)>;
    struct SchemaWithSerializeFn
    {
        avro::Schema schema;
        SerializeFn serialize;
    };

private:
    /// Type names for different complex types (e.g. enums, fixed strings) must be unique. We use simple incremental number to give them different names.
    SchemaWithSerializeFn createSchemaWithSerializeFn(const DataTypePtr & data_type, size_t & type_name_increment, const String & column_name);

    std::vector<SerializeFn> serialize_fns;
    avro::ValidSchema valid_schema;
    std::unique_ptr<AvroSerializerTraits> traits;
    const FormatSettings & settings;
};

class AvroRowOutputFormat final : public IRowOutputFormat
{
public:
    AvroRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_);
    virtual ~AvroRowOutputFormat() override;

    String getName() const override { return "AvroRowOutputFormat"; }

private:
    void write(const Columns & columns, size_t row_num) override;
    void writeField(const IColumn &, const ISerialization &, size_t) override {}
    virtual void writePrefix() override;
    virtual void finalizeImpl() override;
    virtual void resetFormatterImpl() override;

    void createFileWriter();

    FormatSettings settings;
    AvroSerializer serializer;
    std::unique_ptr<avro::DataFileWriterBase> file_writer_ptr;
};

}
#endif