diff options
author | atarasov5 <atarasov5@yandex-team.com> | 2025-02-10 10:32:55 +0300 |
---|---|---|
committer | atarasov5 <atarasov5@yandex-team.com> | 2025-02-10 11:19:12 +0300 |
commit | bbe71b332b223c20740308be5a9c2ea1431f1ff0 (patch) | |
tree | 26f167da71bfa8ab54db753d8236e0ba82b1f461 | |
parent | f374c32c8923c81c28d12b0d349770637efe563f (diff) | |
download | ydb-bbe71b332b223c20740308be5a9c2ea1431f1ff0.tar.gz |
YQL-19535: Fix wrong scalar conversion
commit_hash:997d731b3d106421ee68aa2e3e18f18b03ea7a66
-rw-r--r-- | yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp | 18 | ||||
-rw-r--r-- | yql/essentials/minikql/computation/mkql_block_impl.cpp | 10 |
2 files changed, 26 insertions, 2 deletions
diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp index f5c6f86884..1ad2c3c5c5 100644 --- a/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp +++ b/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp @@ -264,6 +264,24 @@ Y_UNIT_TEST(TestScalar) { UNIT_ASSERT_VALUES_EQUAL(TArrowBlock::From(value).GetDatum().scalar_as<arrow::UInt64Scalar>().value, testValue); } +template<auto Type, typename ArrowType> +void TestContainerForStringType() { + TSetup<false> setup; + auto dataLiteral = setup.PgmBuilder->NewDataLiteral<Type>("\"Just a string\""); + const auto dataAfterBlocks = setup.PgmBuilder->AsScalar(dataLiteral); + const auto graph = setup.BuildGraph(dataAfterBlocks); + const auto value = graph->GetValue(); + + UNIT_ASSERT(typeid(*TArrowBlock::From(value).GetDatum().scalar()) == typeid(ArrowType)); +} + +Y_UNIT_TEST(TestStringTypesHasAppropriateContainer) { + TestContainerForStringType<NUdf::EDataSlot::Utf8, arrow::StringScalar>(); + TestContainerForStringType<NUdf::EDataSlot::Json, arrow::StringScalar>(); + TestContainerForStringType<NUdf::EDataSlot::Yson, arrow::BinaryScalar>(); + TestContainerForStringType<NUdf::EDataSlot::String, arrow::BinaryScalar>(); +} + Y_UNIT_TEST_LLVM(TestReplicateScalar) { const ui64 count = 1000; const ui32 value = 42; diff --git a/yql/essentials/minikql/computation/mkql_block_impl.cpp b/yql/essentials/minikql/computation/mkql_block_impl.cpp index 2920a1ac3c..49d52eb0b8 100644 --- a/yql/essentials/minikql/computation/mkql_block_impl.cpp +++ b/yql/essentials/minikql/computation/mkql_block_impl.cpp @@ -114,8 +114,14 @@ arrow::Datum DoConvertScalar(TType* type, const T& value, arrow::MemoryPool& poo const auto& str = value.AsStringRef(); std::shared_ptr<arrow::Buffer> buffer(ARROW_RESULT(arrow::AllocateBuffer(str.Size(), &pool))); std::memcpy(buffer->mutable_data(), str.Data(), str.Size()); - auto type = (slot == NUdf::EDataSlot::String || slot == NUdf::EDataSlot::Yson || slot == NUdf::EDataSlot::JsonDocument) ? arrow::binary() : arrow::utf8(); - std::shared_ptr<arrow::Scalar> scalar = std::make_shared<arrow::BinaryScalar>(buffer, type); + std::shared_ptr<arrow::Scalar> scalar; + if (slot == NUdf::EDataSlot::String || slot == NUdf::EDataSlot::Yson || slot == NUdf::EDataSlot::JsonDocument) { + scalar = std::make_shared<arrow::BinaryScalar>(buffer, arrow::binary()); + } else { + // NOTE: Do not use |arrow::BinaryScalar| for utf8 and json types directly. + // This is necessary so that the type of the scalar is clearly preserved at runtime. + scalar = std::make_shared<arrow::StringScalar>(buffer); + } return arrow::Datum(scalar); } case NUdf::EDataSlot::TzDate: { |