1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
#include <library/cpp/testing/unittest/registar.h>
#include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
#include <yql/essentials/minikql/mkql_function_registry.h>
#include <yql/essentials/minikql/mkql_program_builder.h>
#include <yql/essentials/public/udf/arrow/block_builder.h>
#include <yql/essentials/public/udf/arrow/block_reader.h>
#include <yql/essentials/public/udf/arrow/memory_pool.h>
namespace NYql::NUdf {
namespace {
using namespace NKikimr;
class TBlockReaderFixture : public NUnitTest::TBaseFixture {
class TArrayHelpers : public TThrRefBase {
public:
using TPtr = TIntrusivePtr<TArrayHelpers>;
explicit TArrayHelpers(const NMiniKQL::TType* type, arrow::MemoryPool* const arrowPool)
: Builder(MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), type, *arrowPool, NMiniKQL::CalcBlockLen(CalcMaxBlockItemSize(type)), nullptr))
, Reader(MakeBlockReader(NMiniKQL::TTypeInfoHelper(), type))
{}
public:
const std::unique_ptr<IArrayBuilder> Builder;
const std::unique_ptr<IBlockReader> Reader;
};
public:
TBlockReaderFixture()
: FunctionRegistry(CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry()))
, Alloc(__LOCATION__)
, Env(Alloc)
, PgmBuilder(Env, *FunctionRegistry)
, ArrowPool(GetYqlMemoryPool())
{}
NMiniKQL::TType* OptionaType(NMiniKQL::TType* type) const {
return PgmBuilder.NewOptionalType(type);
}
template <typename T>
NMiniKQL::TType* DataType() const {
return PgmBuilder.NewDataType(NUdf::TDataType<T>::Id);
}
NMiniKQL::TType* DataType(NUdf::EDataSlot dataSlot) const {
return PgmBuilder.NewDataType(dataSlot);
}
template <typename... TArgs>
NMiniKQL::TType* TupleType(TArgs&&... args) const {
return PgmBuilder.NewTupleType({std::forward<TArgs>(args)...});
}
TArrayHelpers::TPtr GetArrayHelpers(const NMiniKQL::TType* type) const {
return MakeIntrusive<TArrayHelpers>(type, ArrowPool);
}
public:
TIntrusivePtr<NMiniKQL::IFunctionRegistry> FunctionRegistry;
NMiniKQL::TScopedAlloc Alloc;
NMiniKQL::TTypeEnvironment Env;
NMiniKQL::TProgramBuilder PgmBuilder;
arrow::MemoryPool* const ArrowPool;
};
} // anonymous namespace
Y_UNIT_TEST_SUITE(BlockReaderTest) {
Y_UNIT_TEST_F(TestLogicalDataSize, TBlockReaderFixture) {
const std::vector arrayHelpers = {
GetArrayHelpers(DataType<ui32>()),
GetArrayHelpers(OptionaType(DataType<char*>())),
GetArrayHelpers(OptionaType(TupleType(OptionaType(DataType<ui32>()), DataType<char*>()))),
GetArrayHelpers(DataType(NUdf::EDataSlot::TzDate)),
GetArrayHelpers(PgmBuilder.NewNullType())
};
constexpr ui32 size = 1000;
constexpr ui32 stringSize = 37;
for (ui32 i = 0; i < size; ++i) {
arrayHelpers[0]->Builder->Add(TBlockItem(i));
const auto str = NUnitTest::RandomString(stringSize, i);
arrayHelpers[1]->Builder->Add((i % 2) ? TBlockItem(str) : TBlockItem());
TBlockItem tuple[] = { ((i / 2) % 2) ? TBlockItem(i) : TBlockItem(), TBlockItem(str) };
arrayHelpers[2]->Builder->Add((i % 2) ? TBlockItem(tuple) : TBlockItem());
TBlockItem tzDate(i);
tzDate.SetTimezoneId(i % 100);
arrayHelpers[3]->Builder->Add(tzDate);
arrayHelpers[4]->Builder->Add(TBlockItem::Zero());
}
std::vector<std::shared_ptr<arrow::ArrayData>> arrays;
arrays.reserve(arrayHelpers.size());
for (const auto& helper : arrayHelpers) {
arrays.emplace_back(helper->Builder->Build(true).array());
}
constexpr ui32 offset = 133;
constexpr ui32 len = 533;
static_assert(offset + len < size);
constexpr ui64 offsetSize = sizeof(arrow::BinaryType::offset_type) * len;
constexpr ui64 bitmaskSize = (len - 1) / 8 + 1;
constexpr ui64 nonEmptyStrings = (len - offset % 2) / 2 + offset % 2;
const std::vector<ui64> expectedLogicalSize = {
sizeof(ui32) * len,
bitmaskSize + offsetSize + stringSize * nonEmptyStrings,
2 * bitmaskSize + offsetSize + sizeof(ui32) * len + stringSize * nonEmptyStrings,
(sizeof(ui16) + sizeof(ui16)) * len,
0
};
// Test GetDataWeight with offset and length
for (ui32 i = 0; i < arrayHelpers.size(); ++i) {
UNIT_ASSERT_VALUES_EQUAL_C(arrayHelpers[i]->Reader->GetSliceDataWeight(*arrays[i], offset, len), expectedLogicalSize[i], "array: " << i);
}
// Test GetDataWeight after slize
for (ui32 i = 0; i < arrayHelpers.size(); ++i) {
const auto slice = DeepSlice(arrays[i], offset, len);
UNIT_ASSERT_VALUES_EQUAL_C(arrayHelpers[i]->Reader->GetDataWeight(*slice), expectedLogicalSize[i], "sliced array: " << i);
}
}
}
} // namespace NYql::NUdf
|