1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <base/find_symbols.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
class FunctionRegexpQuoteMeta : public IFunction
{
public:
static constexpr auto name = "regexpQuoteMeta";
static FunctionPtr create(ContextPtr)
{
return std::make_shared<FunctionRegexpQuoteMeta>();
}
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override
{
return 1;
}
bool useDefaultImplementationForConstants() const override
{
return true;
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (!WhichDataType(arguments[0].type).isString())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 1 argument of function {}. Must be String.",
arguments[0].type->getName(), getName());
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr & column_string = arguments[0].column;
const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get());
if (!input)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
arguments[0].column->getName(), getName());
auto dst_column = ColumnString::create();
auto & dst_data = dst_column->getChars();
auto & dst_offsets = dst_column->getOffsets();
dst_offsets.resize(input_rows_count);
const ColumnString::Offsets & src_offsets = input->getOffsets();
const auto * src_begin = reinterpret_cast<const char *>(input->getChars().data());
const auto * src_pos = src_begin;
for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
{
/// NOTE This implementation slightly differs from re2::RE2::QuoteMeta.
/// It escapes zero byte as \0 instead of \x00
/// and it escapes only required characters.
/// This is Ok. Look at comments in re2.cc
const char * src_end = src_begin + src_offsets[row_idx] - 1;
while (true)
{
const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end);
size_t bytes_to_copy = next_src_pos - src_pos;
size_t old_dst_size = dst_data.size();
dst_data.resize(old_dst_size + bytes_to_copy);
memcpySmallAllowReadWriteOverflow15(dst_data.data() + old_dst_size, src_pos, bytes_to_copy);
src_pos = next_src_pos + 1;
if (next_src_pos == src_end)
{
dst_data.emplace_back('\0');
break;
}
dst_data.emplace_back('\\');
dst_data.emplace_back(*next_src_pos);
}
dst_offsets[row_idx] = dst_data.size();
}
return dst_column;
}
};
}
REGISTER_FUNCTION(RegexpQuoteMeta)
{
factory.registerFunction<FunctionRegexpQuoteMeta>();
}
}
|