summaryrefslogtreecommitdiffstats
path: root/yql/essentials/parser/pg_wrapper/ut/codegen_ut.cpp
diff options
context:
space:
mode:
authorvvvv <[email protected]>2024-11-07 12:29:36 +0300
committervvvv <[email protected]>2024-11-07 13:49:47 +0300
commitd4c258e9431675bab6745c8638df6e3dfd4dca6b (patch)
treeb5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/parser/pg_wrapper/ut/codegen_ut.cpp
parent13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff)
Moved other yql/essentials libs YQL-19206
init commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/parser/pg_wrapper/ut/codegen_ut.cpp')
-rw-r--r--yql/essentials/parser/pg_wrapper/ut/codegen_ut.cpp286
1 files changed, 286 insertions, 0 deletions
diff --git a/yql/essentials/parser/pg_wrapper/ut/codegen_ut.cpp b/yql/essentials/parser/pg_wrapper/ut/codegen_ut.cpp
new file mode 100644
index 00000000000..50320e27ddc
--- /dev/null
+++ b/yql/essentials/parser/pg_wrapper/ut/codegen_ut.cpp
@@ -0,0 +1,286 @@
+#include "../pg_compat.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/resource/resource.h>
+#include <yql/essentials/minikql/codegen/codegen.h>
+#include <yql/essentials/minikql/arrow/arrow_defs.h>
+
+#include <arrow/compute/kernel.h>
+#include <arrow/array/builder_primitive.h>
+#include <arrow/array/builder_binary.h>
+
+#include <llvm/IR/Module.h>
+
+#include <yql/essentials/parser/pg_wrapper/arrow.h>
+
+extern "C" {
+#include <yql/essentials/parser/pg_wrapper/postgresql/src/backend/catalog/pg_collation_d.h>
+#include <yql/essentials/parser/pg_wrapper/postgresql/src/backend/utils/fmgrprotos.h>
+}
+
+#include <yql/essentials/parser/pg_catalog/catalog.h>
+#include <yql/essentials/minikql/arrow/arrow_util.h>
+
+#include <util/datetime/cputimer.h>
+
+using namespace NYql;
+using namespace NYql::NCodegen;
+
+extern "C" {
+Y_PRAGMA_DIAGNOSTIC_PUSH
+Y_PRAGMA("GCC diagnostic ignored \"-Wreturn-type-c-linkage\"")
+#include <yql/essentials/parser/pg_wrapper/pg_kernels_fwd.inc>
+Y_PRAGMA_DIAGNOSTIC_POP
+}
+
+enum class EKernelFlavor {
+ Indirect,
+ DefArg,
+ Cpp,
+ BitCode,
+ Ideal
+};
+
+Y_UNIT_TEST_SUITE(TPgCodegen) {
+ void PgFuncImpl(EKernelFlavor flavor, bool constArg, bool fixed) {
+ const TString& name = fixed ? "date_eq" : "textout";
+ ICodegen::TPtr codegen;
+ TExecFunc execFunc;
+ switch (flavor) {
+ case EKernelFlavor::Indirect: {
+ if (fixed) {
+ execFunc = MakeIndirectExec<true, true>(&date_eq);
+ } else {
+ execFunc = MakeIndirectExec<true, false>(&textout);
+ }
+ break;
+ }
+ case EKernelFlavor::DefArg: {
+ if (fixed) {
+ execFunc = TGenericExec<TPgDirectFunc<&date_eq>, true, true, TDefaultArgsPolicy>({});
+ } else {
+ execFunc = TGenericExec<TPgDirectFunc<&textout>, true, false, TDefaultArgsPolicy>({});
+ }
+ break;
+ }
+ case EKernelFlavor::Cpp: {
+ execFunc = fixed ? arrow_date_eq() : arrow_textout();
+ break;
+ }
+ case EKernelFlavor::BitCode: {
+ codegen = ICodegen::Make(ETarget::Native);
+ auto bitcode = NResource::Find(fixed ? "/llvm_bc/PgFuncs1" : "/llvm_bc/PgFuncs17");
+ codegen->LoadBitCode(bitcode, "Funcs");
+ auto func = codegen->GetModule().getFunction(std::string("arrow_" + name));
+ Y_ENSURE(func);
+ codegen->AddGlobalMapping("GetPGKernelState", (const void*)&GetPGKernelState);
+ codegen->Verify();
+ codegen->ExportSymbol(func);
+ codegen->Compile();
+ //codegen->ShowGeneratedFunctions(&Cerr);
+ typedef TExecFunc (*TFunc)();
+ auto funcPtr = (TFunc)codegen->GetPointerToFunction(func);
+ execFunc = funcPtr();
+ break;
+ }
+ case EKernelFlavor::Ideal: {
+ if (fixed) {
+ execFunc = [](arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) {
+ size_t length = batch.values[0].length();
+ //NUdf::TFixedSizeArrayBuilder<ui64, true> builder(NKikimr::NMiniKQL::TTypeInfoHelper(), arrow::uint64(), *arrow::default_memory_pool(), length);
+ NUdf::TTypedBufferBuilder<ui64> dataBuilder(arrow::default_memory_pool());
+ NUdf::TTypedBufferBuilder<ui8> nullBuilder(arrow::default_memory_pool());
+ dataBuilder.Reserve(length);
+ nullBuilder.Reserve(length);
+ auto out = dataBuilder.MutableData();
+ auto outNulls = nullBuilder.MutableData();
+ NUdf::TFixedSizeBlockReader<ui64, false> reader1;
+ NUdf::TFixedSizeBlockReader<ui64, false> reader2;
+ const auto& array1 = *batch.values[0].array();
+ const auto ptr1 = array1.GetValues<ui64>(1);
+ if (batch.values[1].is_array()) {
+ const auto& array2 = *batch.values[1].array();
+ const auto ptr2 = array2.GetValues<ui64>(1);
+ for (size_t i = 0; i < length; ++i) {
+ //auto x = reader1.GetItem(array1, i).As<ui64>();
+ //auto y = reader2.GetItem(array2, i).As<ui64>();
+ auto x = ptr1[i];
+ auto y = ptr2[i];
+ out[i] = x == y ? 1 : 0;
+ outNulls[i] = false;
+ }
+ } else {
+ ui64 yConst = reader2.GetScalarItem(*batch.values[1].scalar()).As<ui64>();
+ for (size_t i = 0; i < length; ++i) {
+ auto x = ptr1[i];
+ out[i] = x == yConst ? 1 : 0;
+ outNulls[i] = false;
+ }
+ }
+
+ std::shared_ptr<arrow::Buffer> nulls;
+ nulls = nullBuilder.Finish();
+ nulls = NUdf::MakeDenseBitmap(nulls->data(), length, arrow::default_memory_pool());
+ std::shared_ptr<arrow::Buffer> data = dataBuilder.Finish();
+
+ *res = arrow::ArrayData::Make(arrow::uint64(), length ,{ data, nulls});
+ return arrow::Status::OK();
+ };
+ } else {
+ execFunc = [](arrow::compute::KernelContext* ctx, const arrow::compute::ExecBatch& batch, arrow::Datum* res) {
+ size_t length = batch.values[0].length();
+ NUdf::TStringArrayBuilder<arrow::BinaryType, true, NUdf::EPgStringType::None> builder(NKikimr::NMiniKQL::TTypeInfoHelper(), arrow::binary(), *ctx->memory_pool(), length);
+ NUdf::TStringBlockReader<arrow::BinaryType, true> reader;
+ const auto& array = *batch.values[0].array();
+ for (size_t i = 0; i < length; ++i) {
+ auto item = reader.GetItem(array, i);
+ if (!item) {
+ builder.Add(NUdf::TBlockItem{});
+ } else {
+ auto s = item.AsStringRef();
+ size_t len = s.Size() - VARHDRSZ - sizeof(void*);
+ const char* ptr = s.Data() + VARHDRSZ + sizeof(void*);
+ builder.Add(NUdf::TBlockItem{NUdf::TStringRef(ptr, len)});
+ }
+ }
+
+ *res = builder.Build(true);
+ return arrow::Status::OK();
+ };
+ };
+
+ break;
+ }
+ }
+
+ Y_ENSURE(execFunc);
+ arrow::compute::ExecContext execContent;
+ arrow::compute::KernelContext kernelCtx(&execContent);
+ TPgKernelState state;
+ kernelCtx.SetState(&state);
+ FmgrInfo finfo;
+ Zero(state.flinfo);
+ state.ProcDesc = fixed ? &NPg::LookupProc("date_eq", { 0, 0 }) : &NPg::LookupProc("textout", { 0 });
+ fmgr_info(state.ProcDesc->ProcId, &state.flinfo);
+ state.context = nullptr;
+ state.resultinfo = nullptr;
+ state.fncollation = DEFAULT_COLLATION_OID;
+ state.Name = name;
+ if (fixed) {
+ state.TypeLen = 1;
+ state.IsFixedResult = true;
+ state.IsFixedArg.push_back(true);
+ state.IsFixedArg.push_back(true);
+ } else {
+ state.TypeLen = -2;
+ state.IsFixedResult = false;
+ state.IsFixedArg.push_back(false);
+ }
+
+#ifdef NDEBUG
+ const size_t N = 10000;
+#else
+ const size_t N = 1000;
+#endif
+ std::vector<arrow::Datum> batchArgs;
+ if (fixed) {
+ arrow::UInt64Builder builder;
+ ARROW_OK(builder.Reserve(N));
+ for (size_t i = 0; i < N; ++i) {
+ builder.UnsafeAppend(i);
+ }
+
+ std::shared_ptr<arrow::ArrayData> out;
+ ARROW_OK(builder.FinishInternal(&out));
+ arrow::Datum arg1(out), arg2;
+ if (constArg) {
+ Cout << "with const arg\n";
+ arg2 = NKikimr::NMiniKQL::MakeScalarDatum<ui64>(0);
+ } else {
+ arg2 = out;
+ }
+
+ batchArgs.push_back(arg1);
+ batchArgs.push_back(arg2);
+ } else {
+ arrow::BinaryBuilder builder;
+ ARROW_OK(builder.Reserve(N));
+ for (size_t i = 0; i < N; ++i) {
+ std::string s(sizeof(void*) + VARHDRSZ + 500, 'A' + i % 26);
+ NUdf::ZeroMemoryContext(s.data() + sizeof(void*));
+ auto t = (text*)(s.data() + sizeof(void*));
+ SET_VARSIZE(t, VARHDRSZ + 500);
+ ARROW_OK(builder.Append(s));
+ }
+
+ std::shared_ptr<arrow::ArrayData> out;
+ ARROW_OK(builder.FinishInternal(&out));
+ arrow::Datum arg1(out);
+ batchArgs.push_back(arg1);
+ }
+
+ arrow::compute::ExecBatch batch(std::move(batchArgs), N);
+
+ {
+ Cout << "begin...\n";
+ TSimpleTimer timer;
+ for (size_t count = 0; count < (fixed ? 10000 : 1000); ++count) {
+ arrow::Datum res;
+ ARROW_OK(execFunc(&kernelCtx, batch, &res));
+ Y_ENSURE(res.length() == N);
+ }
+
+ Cout << "done, elapsed: " << timer.Get() << "\n";
+ }
+ }
+
+ Y_UNIT_TEST(PgFixedFuncIdeal) {
+ PgFuncImpl(EKernelFlavor::Ideal, false, true);
+ PgFuncImpl(EKernelFlavor::Ideal, true, true);
+ }
+
+ Y_UNIT_TEST(PgFixedFuncDefArg) {
+ PgFuncImpl(EKernelFlavor::DefArg, false, true);
+ PgFuncImpl(EKernelFlavor::DefArg, true, true);
+ }
+
+ Y_UNIT_TEST(PgFixedFuncIndirect) {
+ PgFuncImpl(EKernelFlavor::Indirect, false, true);
+ PgFuncImpl(EKernelFlavor::Indirect, true, true);
+ }
+
+#if !defined(USE_SLOW_PG_KERNELS)
+ Y_UNIT_TEST(PgFixedFuncCpp) {
+ PgFuncImpl(EKernelFlavor::Cpp, false, true);
+ PgFuncImpl(EKernelFlavor::Cpp, true, true);
+ }
+
+ Y_UNIT_TEST(PgFixedFuncBC) {
+ PgFuncImpl(EKernelFlavor::BitCode, false, true);
+ PgFuncImpl(EKernelFlavor::BitCode, true, true);
+ }
+#endif
+
+ Y_UNIT_TEST(PgStrFuncIdeal) {
+ PgFuncImpl(EKernelFlavor::Ideal, false, false);
+ }
+
+ Y_UNIT_TEST(PgStrFuncDefArg) {
+ PgFuncImpl(EKernelFlavor::DefArg, false, false);
+ }
+
+ Y_UNIT_TEST(PgStrFuncIndirect) {
+ PgFuncImpl(EKernelFlavor::Indirect, false, false);
+ }
+
+#if !defined(USE_SLOW_PG_KERNELS)
+ Y_UNIT_TEST(PgStrFuncCpp) {
+ PgFuncImpl(EKernelFlavor::Cpp, false, false);
+ }
+
+ Y_UNIT_TEST(PgStrFuncBC) {
+ PgFuncImpl(EKernelFlavor::BitCode, false, false);
+ }
+#endif
+
+}