aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Processors/Formats/Impl/TemplateRowInputFormat.h
blob: 2752cb13e500439df983b9336a05b3080c5385ff (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#pragma once

#include <Core/Block.h>
#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
#include <Processors/Formats/ISchemaReader.h>
#include <Formats/FormatSettings.h>
#include <Formats/ParsedTemplateFormatString.h>
#include <Formats/SchemaInferenceUtils.h>
#include <IO/ReadHelpers.h>
#include <IO/PeekableReadBuffer.h>
#include <Interpreters/Context.h>


namespace DB
{

class TemplateFormatReader;

class TemplateRowInputFormat final : public RowInputFormatWithDiagnosticInfo
{
    using EscapingRule = FormatSettings::EscapingRule;
public:
    TemplateRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
                           FormatSettings settings_, bool ignore_spaces_,
                           ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_,
                           std::string row_between_delimiter);

    String getName() const override { return "TemplateRowInputFormat"; }

    void resetParser() override;

private:
    TemplateRowInputFormat(const Block & header_, std::unique_ptr<PeekableReadBuffer> buf_, const Params & params_,
                           FormatSettings settings_, bool ignore_spaces_,
                           ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_,
                           std::string row_between_delimiter);

    bool readRow(MutableColumns & columns, RowReadExtension & extra) override;

    void readPrefix() override;

    bool allowSyncAfterError() const override;
    void syncAfterError() override;

    bool deserializeField(const DataTypePtr & type,
        const SerializationPtr & serialization, IColumn & column, size_t file_column);

    bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
    void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;

    bool isGarbageAfterField(size_t after_col_idx, ReadBuffer::Position pos) override;

    void setReadBuffer(ReadBuffer & in_) override;

    std::unique_ptr<PeekableReadBuffer> buf;
    const DataTypes data_types;

    FormatSettings settings;
    const bool ignore_spaces;
    const ParsedTemplateFormatString format;
    const ParsedTemplateFormatString row_format;

    bool end_of_stream = false;
    std::vector<size_t> always_default_columns;
    const char default_csv_delimiter;

    const std::string row_between_delimiter;

    std::unique_ptr<TemplateFormatReader> format_reader;
};

class TemplateFormatReader
{
    using EscapingRule = FormatSettings::EscapingRule;

public:
    TemplateFormatReader(
        PeekableReadBuffer & buf_,
        bool ignore_spaces_,
        const ParsedTemplateFormatString & format_,
        const ParsedTemplateFormatString & row_format_,
        std::string row_between_delimiter,
        const FormatSettings & format_settings_);

    void readPrefix();
    void skipField(EscapingRule escaping_rule);
    inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); }

    template <typename ReturnType = void>
    ReturnType tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end);
    bool checkForSuffix();

    void setReadBuffer(PeekableReadBuffer & buf_) { buf = &buf_; }

    void skipDelimiter(size_t index);
    void skipRowEndDelimiter();
    void skipRowBetweenDelimiter();

    size_t getFormatDataIdx() const { return format_data_idx; }

private:
    PeekableReadBuffer * buf;
    bool ignore_spaces;
    const ParsedTemplateFormatString & format;
    const ParsedTemplateFormatString & row_format;
    const std::string row_between_delimiter;
    const FormatSettings & format_settings;
    size_t format_data_idx;
    size_t row_num;
};

class TemplateSchemaReader : public IRowSchemaReader
{
public:
    TemplateSchemaReader(ReadBuffer & in_,
                         bool ignore_spaces_,
                         const ParsedTemplateFormatString & format_,
                         const ParsedTemplateFormatString & row_format_,
                         std::string row_between_delimiter,
                         const FormatSettings & format_settings_);

    std::optional<DataTypes> readRowAndGetDataTypes() override;

private:
    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;

    PeekableReadBuffer buf;
    const ParsedTemplateFormatString format;
    const ParsedTemplateFormatString row_format;
    TemplateFormatReader format_reader;
    bool first_row = true;
    JSONInferenceInfo json_inference_info;
    const char default_csv_delimiter;
};

bool parseDelimiterWithDiagnosticInfo(WriteBuffer & out, ReadBuffer & buf, const String & delimiter, const String & description, bool skip_spaces);

}