aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoratarasov5 <atarasov5@yandex-team.com>2025-02-10 10:32:55 +0300
committeratarasov5 <atarasov5@yandex-team.com>2025-02-10 11:19:12 +0300
commitbbe71b332b223c20740308be5a9c2ea1431f1ff0 (patch)
tree26f167da71bfa8ab54db753d8236e0ba82b1f461
parentf374c32c8923c81c28d12b0d349770637efe563f (diff)
downloadydb-bbe71b332b223c20740308be5a9c2ea1431f1ff0.tar.gz
YQL-19535: Fix wrong scalar conversion
commit_hash:997d731b3d106421ee68aa2e3e18f18b03ea7a66
-rw-r--r--yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp18
-rw-r--r--yql/essentials/minikql/computation/mkql_block_impl.cpp10
2 files changed, 26 insertions, 2 deletions
diff --git a/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp b/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp
index f5c6f86884..1ad2c3c5c5 100644
--- a/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp
+++ b/yql/essentials/minikql/comp_nodes/ut/mkql_blocks_ut.cpp
@@ -264,6 +264,24 @@ Y_UNIT_TEST(TestScalar) {
UNIT_ASSERT_VALUES_EQUAL(TArrowBlock::From(value).GetDatum().scalar_as<arrow::UInt64Scalar>().value, testValue);
}
+template<auto Type, typename ArrowType>
+void TestContainerForStringType() {
+ TSetup<false> setup;
+ auto dataLiteral = setup.PgmBuilder->NewDataLiteral<Type>("\"Just a string\"");
+ const auto dataAfterBlocks = setup.PgmBuilder->AsScalar(dataLiteral);
+ const auto graph = setup.BuildGraph(dataAfterBlocks);
+ const auto value = graph->GetValue();
+
+ UNIT_ASSERT(typeid(*TArrowBlock::From(value).GetDatum().scalar()) == typeid(ArrowType));
+}
+
+Y_UNIT_TEST(TestStringTypesHasAppropriateContainer) {
+ TestContainerForStringType<NUdf::EDataSlot::Utf8, arrow::StringScalar>();
+ TestContainerForStringType<NUdf::EDataSlot::Json, arrow::StringScalar>();
+ TestContainerForStringType<NUdf::EDataSlot::Yson, arrow::BinaryScalar>();
+ TestContainerForStringType<NUdf::EDataSlot::String, arrow::BinaryScalar>();
+}
+
Y_UNIT_TEST_LLVM(TestReplicateScalar) {
const ui64 count = 1000;
const ui32 value = 42;
diff --git a/yql/essentials/minikql/computation/mkql_block_impl.cpp b/yql/essentials/minikql/computation/mkql_block_impl.cpp
index 2920a1ac3c..49d52eb0b8 100644
--- a/yql/essentials/minikql/computation/mkql_block_impl.cpp
+++ b/yql/essentials/minikql/computation/mkql_block_impl.cpp
@@ -114,8 +114,14 @@ arrow::Datum DoConvertScalar(TType* type, const T& value, arrow::MemoryPool& poo
const auto& str = value.AsStringRef();
std::shared_ptr<arrow::Buffer> buffer(ARROW_RESULT(arrow::AllocateBuffer(str.Size(), &pool)));
std::memcpy(buffer->mutable_data(), str.Data(), str.Size());
- auto type = (slot == NUdf::EDataSlot::String || slot == NUdf::EDataSlot::Yson || slot == NUdf::EDataSlot::JsonDocument) ? arrow::binary() : arrow::utf8();
- std::shared_ptr<arrow::Scalar> scalar = std::make_shared<arrow::BinaryScalar>(buffer, type);
+ std::shared_ptr<arrow::Scalar> scalar;
+ if (slot == NUdf::EDataSlot::String || slot == NUdf::EDataSlot::Yson || slot == NUdf::EDataSlot::JsonDocument) {
+ scalar = std::make_shared<arrow::BinaryScalar>(buffer, arrow::binary());
+ } else {
+ // NOTE: Do not use |arrow::BinaryScalar| for utf8 and json types directly.
+ // This is necessary so that the type of the scalar is clearly preserved at runtime.
+ scalar = std::make_shared<arrow::StringScalar>(buffer);
+ }
return arrow::Datum(scalar);
}
case NUdf::EDataSlot::TzDate: {