1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
#include <memory>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <Formats/FormatFactory.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <IO/WriteBufferFromVector.h>
#include <IO/WriteHelpers.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Formats/IRowOutputFormat.h>
#include <base/map.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNKNOWN_FORMAT;
extern const int BAD_ARGUMENTS;
}
namespace
{
/** formatRow(<format>, x, y, ...) is a function that allows you to use RowOutputFormat over
* several columns to generate a string per row, such as CSV, TSV, JSONEachRow, etc.
* formatRowNoNewline(...) trims the newline character of each row.
*/
template <bool no_newline>
class FunctionFormatRow : public IFunction
{
public:
static constexpr auto name = no_newline ? "formatRowNoNewline" : "formatRow";
FunctionFormatRow(String format_name_, Names arguments_column_names_, ContextPtr context_)
: format_name(std::move(format_name_))
, arguments_column_names(std::move(arguments_column_names_))
, context(std::move(context_))
{
if (!FormatFactory::instance().getAllFormats().contains(format_name))
throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", format_name);
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
auto col_str = ColumnString::create();
ColumnString::Chars & vec = col_str->getChars();
WriteBufferFromVector buffer(vec);
ColumnString::Offsets & offsets = col_str->getOffsets();
offsets.resize(input_rows_count);
Block arg_columns;
size_t arguments_size = arguments.size();
for (size_t i = 1; i < arguments_size; ++i)
{
auto argument_column = arguments[i];
argument_column.name = arguments_column_names[i];
arg_columns.insert(std::move(argument_column));
}
materializeBlockInplace(arg_columns);
auto format_settings = getFormatSettings(context);
auto out = FormatFactory::instance().getOutputFormat(format_name, buffer, arg_columns, context, format_settings);
/// This function make sense only for row output formats.
auto * row_output_format = dynamic_cast<IRowOutputFormat *>(out.get());
if (!row_output_format)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Cannot turn rows into a {} format strings. {} function supports only row output formats",
format_name, getName());
auto columns = arg_columns.getColumns();
for (size_t i = 0; i != input_rows_count; ++i)
{
row_output_format->writePrefixIfNeeded();
row_output_format->writeRow(columns, i);
row_output_format->finalize();
if constexpr (no_newline)
{
// replace '\n' with '\0'
if (buffer.position() != buffer.buffer().begin() && buffer.position()[-1] == '\n')
buffer.position()[-1] = '\0';
}
else
writeChar('\0', buffer);
offsets[i] = buffer.count();
row_output_format->resetFormatter();
}
return col_str;
}
private:
String format_name;
Names arguments_column_names;
ContextPtr context;
};
template <bool no_newline>
class FormatRowOverloadResolver : public IFunctionOverloadResolver
{
public:
static constexpr auto name = no_newline ? "formatRowNoNewline" : "formatRow";
static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique<FormatRowOverloadResolver>(context); }
explicit FormatRowOverloadResolver(ContextPtr context_) : context(context_) { }
String getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
bool useDefaultImplementationForNulls() const override { return false; }
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
{
if (arguments.size() < 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires at least two arguments: the format name and its output expression(s)", getName());
Names arguments_column_names;
arguments_column_names.reserve(arguments.size());
for (const auto & argument : arguments)
arguments_column_names.push_back(argument.name);
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments.at(0).column.get()))
return std::make_unique<FunctionToFunctionBaseAdaptor>(
std::make_shared<FunctionFormatRow<no_newline>>(name_col->getValue<String>(), std::move(arguments_column_names), context),
collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
return_type);
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument to {} must be a format name", getName());
}
DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared<DataTypeString>(); }
private:
ContextPtr context;
};
}
REGISTER_FUNCTION(FormatRow)
{
factory.registerFunction<FormatRowOverloadResolver<true>>();
factory.registerFunction<FormatRowOverloadResolver<false>>();
}
}
|