1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <pcg_random.hpp>
#include <Common/randomSeed.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_LARGE_STRING_SIZE;
}
namespace
{
/** Generate random string of specified length with printable ASCII characters, almost uniformly distributed.
* First argument is length, other optional arguments are ignored and used to prevent common subexpression elimination to get different values.
*/
class FunctionRandomPrintableASCII : public IFunction
{
public:
static constexpr auto name = "randomPrintableASCII";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionRandomPrintableASCII>(); }
String getName() const override
{
return name;
}
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires at least one argument: the size of resulting string", getName());
if (arguments.size() > 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires at most two arguments: the size of resulting string and optional disambiguation tag", getName());
const IDataType & length_type = *arguments[0];
if (!isNumber(length_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function {} must have numeric type", getName());
return std::make_shared<DataTypeString>();
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
auto col_to = ColumnString::create();
ColumnString::Chars & data_to = col_to->getChars();
ColumnString::Offsets & offsets_to = col_to->getOffsets();
offsets_to.resize(input_rows_count);
pcg64_fast rng(randomSeed());
const IColumn & length_column = *arguments[0].column;
IColumn::Offset offset = 0;
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
{
size_t length = length_column.getUInt(row_num);
if (length > (1 << 30))
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size in function {}", getName());
IColumn::Offset next_offset = offset + length + 1;
data_to.resize(next_offset);
offsets_to[row_num] = next_offset;
auto * data_to_ptr = data_to.data(); /// avoid assert on array indexing after end
for (size_t pos = offset, end = offset + length; pos < end; pos += 4) /// We have padding in column buffers that we can overwrite.
{
UInt64 rand = rng();
UInt16 rand1 = rand;
UInt16 rand2 = rand >> 16;
UInt16 rand3 = rand >> 32;
UInt16 rand4 = rand >> 48;
/// Printable characters are from range [32; 126].
/// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
data_to_ptr[pos + 0] = 32 + ((rand1 * 95) >> 16);
data_to_ptr[pos + 1] = 32 + ((rand2 * 95) >> 16);
data_to_ptr[pos + 2] = 32 + ((rand3 * 95) >> 16);
data_to_ptr[pos + 3] = 32 + ((rand4 * 95) >> 16);
/// NOTE gcc failed to vectorize this code (aliasing of char?)
/// TODO Implement SIMD optimizations from Danila Kutenin.
}
data_to[offset + length] = 0;
offset = next_offset;
}
return col_to;
}
};
}
REGISTER_FUNCTION(RandomPrintableASCII)
{
factory.registerFunction<FunctionRandomPrintableASCII>();
}
}
|