diff options
author | imunkin <imunkin@yandex-team.com> | 2024-12-09 12:17:13 +0300 |
---|---|---|
committer | imunkin <imunkin@yandex-team.com> | 2024-12-09 12:39:10 +0300 |
commit | 16918dad02d32c3b02a3b71bc7be468d69c7c45b (patch) | |
tree | 60579e76fe33a84dab54588d282e49a68f92a209 | |
parent | 23c050605f08c239b580fc04a248e2e8d4476692 (diff) | |
download | ydb-16918dad02d32c3b02a3b71bc7be468d69c7c45b.tar.gz |
Fix TArrowBlock::From and its usage
commit_hash:0d0fe61d962e4c365f99bd84ebdb2229696c4dc0
8 files changed, 53 insertions, 28 deletions
diff --git a/yql/essentials/minikql/comp_nodes/mkql_apply.cpp b/yql/essentials/minikql/comp_nodes/mkql_apply.cpp index 0dd8a5a121..97545dfebf 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_apply.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_apply.cpp @@ -54,7 +54,7 @@ public: state.Args[i] = state.HolderFactory.CreateArrowBlock(arrow::Datum(batch.values[i])); } - const auto ret = Callable_.Run(&state.ValueBuilder, state.Args.data()); + const auto& ret = Callable_.Run(&state.ValueBuilder, state.Args.data()); *res = TArrowBlock::From(ret).GetDatum(); return arrow::Status::OK(); }) diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_compress.cpp b/yql/essentials/minikql/comp_nodes/mkql_block_compress.cpp index c7b68b54c4..12a9858ba1 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_block_compress.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_block_compress.cpp @@ -84,6 +84,9 @@ public: const auto bitmapValue = getres.second[BitmapIndex_](ctx, block); const auto bitmap = CallInst::Create(getBitmap, { WrapArgumentForWindows(bitmapValue, ctx, block) }, "bitmap", block); + + ValueCleanup(EValueRepresentation::Any, bitmapValue, ctx, block); + const auto one = ConstantInt::get(bitmapType, 1); const auto band = BinaryOperator::CreateAnd(bitmap, one, "band", block); const auto good = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, band, one, "good", block); @@ -189,6 +192,9 @@ public: const auto bitmapValue = getres.second[BitmapIndex_](ctx, block); const auto pops = CallInst::Create(getPopCount, { WrapArgumentForWindows(bitmapValue, ctx, block) }, "pops", block); + + ValueCleanup(EValueRepresentation::Any, bitmapValue, ctx, block); + const auto good = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT, pops, ConstantInt::get(sizeType, 0), "good", block); BranchInst::Create(fill, loop, good, block); @@ -270,7 +276,7 @@ public: s.IsFinished_ = true; break; case EFetchResult::One: - switch (s.Check(bitmap.Release())) { + switch (s.Check(bitmap)) { case TState::EStep::Copy: for (ui32 i = 0; i < s.Values.size(); ++i) { if (const auto out = output[i]) { @@ -415,6 +421,8 @@ public: const auto checkPtr = CastInst::Create(Instruction::IntToPtr, checkFunc, PointerType::getUnqual(checkType), "check_func", block); const auto check = CallInst::Create(checkType, checkPtr, {stateArg, bitmapArg}, "check", block); + ValueCleanup(EValueRepresentation::Any, bitmap, ctx, block); + result->addIncoming(ConstantInt::get(statusType, static_cast<i32>(EFetchResult::One)), block); const auto step = SwitchInst::Create(check, save, 2U, block); @@ -552,9 +560,8 @@ private: EStep Check(const NUdf::TUnboxedValuePod bitmapValue) { Y_ABORT_UNLESS(!IsFinished_); Y_ABORT_UNLESS(!InputSize_); - const NUdf::TUnboxedValue b(std::move(bitmapValue)); auto& bitmap = Arrays_.back(); - bitmap = TArrowBlock::From(b).GetDatum().array(); + bitmap = TArrowBlock::From(bitmapValue).GetDatum().array(); if (!bitmap->length) return EStep::Skip; diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_skiptake.cpp b/yql/essentials/minikql/comp_nodes/mkql_block_skiptake.cpp index 505c9d635a..2c29ecd4c9 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_block_skiptake.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_block_skiptake.cpp @@ -117,7 +117,12 @@ public: block = test; - const auto height = CallInst::Create(getCount, { WrapArgumentForWindows(getres.second.back()(ctx, block), ctx, block) }, "height", block); + + const auto countValue = getres.second.back()(ctx, block); + const auto height = CallInst::Create(getCount, { WrapArgumentForWindows(countValue, ctx, block) }, "height", block); + + ValueCleanup(EValueRepresentation::Any, countValue, ctx, block); + const auto part = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT, count, height, "part", block); const auto decr = BinaryOperator::CreateSub(count, height, "decr", block); count->addIncoming(decr, block); @@ -201,6 +206,8 @@ public: const auto slice = CallInst::Create(sliceType, slicePtr, {ctx.GetFactory(), value, offset}, "slice", block); + ValueCleanup(EValueRepresentation::Any, value, ctx, block); + output->addIncoming(slice, block); BranchInst::Create(exit, block); @@ -214,9 +221,8 @@ public: #endif private: static NUdf::TUnboxedValuePod SliceBlock(const THolderFactory& holderFactory, NUdf::TUnboxedValuePod block, const uint64_t offset) { - NUdf::TUnboxedValue b(block); - auto& datum = TArrowBlock::From(b).GetDatum(); - return datum.is_scalar() ? b.Release() : holderFactory.CreateArrowBlock(DeepSlice(datum.array(), offset, datum.array()->length - offset)); + const auto& datum = TArrowBlock::From(block).GetDatum(); + return datum.is_scalar() ? block : holderFactory.CreateArrowBlock(DeepSlice(datum.array(), offset, datum.array()->length - offset)); } void RegisterDependencies() const final { @@ -325,7 +331,11 @@ public: block = good; - const auto height = CallInst::Create(getCount, { WrapArgumentForWindows(getres.second.back()(ctx, block), ctx, block) }, "height", block); + const auto countValue = getres.second.back()(ctx, block); + const auto height = CallInst::Create(getCount, { WrapArgumentForWindows(countValue, ctx, block) }, "height", block); + + ValueCleanup(EValueRepresentation::Any, countValue, ctx, block); + const auto part = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT, count, height, "part", block); const auto decr = BinaryOperator::CreateSub(count, height, "decr", block); @@ -395,6 +405,8 @@ public: const auto slice = CallInst::Create(sliceType, slicePtr, {ctx.GetFactory(), value, size}, "slice", block); + ValueCleanup(EValueRepresentation::Any, value, ctx, block); + output->addIncoming(slice, block); BranchInst::Create(exit, block); @@ -408,9 +420,8 @@ public: #endif private: static NUdf::TUnboxedValuePod SliceBlock(const THolderFactory& holderFactory, NUdf::TUnboxedValuePod block, const uint64_t offset) { - NUdf::TUnboxedValue b(block); - auto& datum = TArrowBlock::From(b).GetDatum(); - return datum.is_scalar() ? b.Release() : holderFactory.CreateArrowBlock(DeepSlice(datum.array(), 0ULL, offset)); + const auto& datum = TArrowBlock::From(block).GetDatum(); + return datum.is_scalar() ? block : holderFactory.CreateArrowBlock(DeepSlice(datum.array(), 0ULL, offset)); } void RegisterDependencies() const final { diff --git a/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp b/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp index 2f81664239..2013b7efd5 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp @@ -383,7 +383,7 @@ public: if (auto item = s.GetValue(ctx.HolderFactory); !item.IsInvalid()) return item; - if (const auto input = Flow_->GetValue(ctx).Release(); input.IsSpecial()) + if (const auto input = Flow_->GetValue(ctx); input.IsSpecial()) return input; else s.Reset(input); @@ -443,6 +443,8 @@ public: const auto setPtr = CastInst::Create(Instruction::IntToPtr, setFunc, PointerType::getUnqual(setType), "set", block); CallInst::Create(setType, setPtr, {stateArg, input }, "", block); + ValueCleanup(EValueRepresentation::Any, input, ctx, block); + BranchInst::Create(work, block); block = done; @@ -475,8 +477,7 @@ private: } void Reset(const NUdf::TUnboxedValuePod block) { - const NUdf::TUnboxedValue v(block); - const auto& datum = TArrowBlock::From(v).GetDatum(); + const auto& datum = TArrowBlock::From(block).GetDatum(); MKQL_ENSURE(datum.is_arraylike(), "Expecting array as FromBlocks argument"); MKQL_ENSURE(Arrays_.empty(), "Not all input is processed"); if (datum.is_array()) { @@ -634,6 +635,8 @@ public: const auto countValue = getres.second.back()(ctx, block); const auto height = CallInst::Create(getCount, { WrapArgumentForWindows(countValue, ctx, block) }, "height", block); + ValueCleanup(EValueRepresentation::Any, countValue, ctx, block); + new StoreInst(height, countPtr, block); new StoreInst(ConstantInt::get(indexType, 0), indexPtr, block); @@ -953,9 +956,8 @@ private: } arrow::Datum DoReplicate(const NUdf::TUnboxedValuePod val, const NUdf::TUnboxedValuePod cnt, TComputationContext& ctx) const { - const NUdf::TUnboxedValue v(val), c(cnt); - const auto value = TArrowBlock::From(v).GetDatum().scalar(); - const ui64 count = TArrowBlock::From(c).GetDatum().scalar_as<arrow::UInt64Scalar>().value; + const auto value = TArrowBlock::From(val).GetDatum().scalar(); + const ui64 count = TArrowBlock::From(cnt).GetDatum().scalar_as<arrow::UInt64Scalar>().value; const auto reader = MakeBlockReader(TTypeInfoHelper(), Type_); const auto builder = MakeArrayBuilder(TTypeInfoHelper(), Type_, ctx.ArrowMemoryPool, count, &ctx.Builder->GetPgBuilder()); diff --git a/yql/essentials/minikql/computation/mkql_block_impl.cpp b/yql/essentials/minikql/computation/mkql_block_impl.cpp index ba7758b47d..2920a1ac3c 100644 --- a/yql/essentials/minikql/computation/mkql_block_impl.cpp +++ b/yql/essentials/minikql/computation/mkql_block_impl.cpp @@ -18,8 +18,7 @@ extern "C" uint64_t GetBlockCount(const NYql::NUdf::TUnboxedValuePod data) { } extern "C" uint64_t GetBitmapPopCountCount(const NYql::NUdf::TUnboxedValuePod data) { - const NYql::NUdf::TUnboxedValue v(data); - const auto& arr = NKikimr::NMiniKQL::TArrowBlock::From(v).GetDatum().array(); + const auto& arr = NKikimr::NMiniKQL::TArrowBlock::From(data).GetDatum().array(); const size_t len = (size_t)arr->length; MKQL_ENSURE(arr->GetNullCount() == 0, "Bitmap block should not have nulls"); const ui8* src = arr->GetValues<ui8>(1); @@ -259,7 +258,8 @@ NUdf::TUnboxedValuePod TBlockFuncNode::DoCalculate(TComputationContext& ctx) con std::vector<arrow::Datum> argDatums; for (ui32 i = 0; i < ArgsNodes.size(); ++i) { - argDatums.emplace_back(TArrowBlock::From(ArgsNodes[i]->GetValue(ctx)).GetDatum()); + const auto& value = ArgsNodes[i]->GetValue(ctx); + argDatums.emplace_back(TArrowBlock::From(value).GetDatum()); ARROW_DEBUG_CHECK_DATUM_TYPES(ArgsValuesDescr[i], argDatums.back().descr()); } @@ -352,7 +352,7 @@ void TBlockState::FillArrays() { for (size_t i = 0U; i < Deques.size(); ++i) { Deques[i].clear(); - if (const auto value = Values[i]) { + if (const auto& value = Values[i]) { const auto& datum = TArrowBlock::From(value).GetDatum(); if (datum.is_scalar()) { return; diff --git a/yql/essentials/minikql/computation/mkql_computation_node.cpp b/yql/essentials/minikql/computation/mkql_computation_node.cpp index 2303e954a4..ec47df7634 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node.cpp +++ b/yql/essentials/minikql/computation/mkql_computation_node.cpp @@ -39,7 +39,8 @@ TDatumProvider MakeDatumProvider(const arrow::Datum& datum) { TDatumProvider MakeDatumProvider(const IComputationNode* node, TComputationContext& ctx) { return [node, &ctx]() { - return TArrowBlock::From(node->GetValue(ctx)).GetDatum(); + const auto& value = node->GetValue(ctx); + return TArrowBlock::From(value).GetDatum(); }; } diff --git a/yql/essentials/minikql/computation/mkql_computation_node_holders.h b/yql/essentials/minikql/computation/mkql_computation_node_holders.h index 4f6344a743..f2bb09006d 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node_holders.h +++ b/yql/essentials/minikql/computation/mkql_computation_node_holders.h @@ -582,11 +582,13 @@ public: { } - inline static TArrowBlock& From(const NUdf::TUnboxedValue& value) { - return *static_cast<TArrowBlock*>(value.AsBoxed().Get()); + inline static const TArrowBlock& From(const NUdf::TUnboxedValuePod& value) { + return *static_cast<TArrowBlock*>(value.AsRawBoxed()); } - inline arrow::Datum& GetDatum() { + inline static const TArrowBlock& From(NUdf::TUnboxedValuePod&& value) = delete; + + inline const arrow::Datum& GetDatum() const { return Datum_; } diff --git a/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp b/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp index e7b755cb19..84562ee82c 100644 --- a/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp +++ b/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp @@ -170,7 +170,8 @@ public: OutputItemType batch = Batch_.Get(); size_t nvalues = DatumToMemberIDMap_.size(); - const auto& sizeDatum = TArrowBlock::From(value.GetElement(BatchLengthID_)).GetDatum(); + const auto& sizeValue = value.GetElement(BatchLengthID_); + const auto& sizeDatum = TArrowBlock::From(sizeValue).GetDatum(); Y_ENSURE(sizeDatum.is_scalar()); const auto& sizeScalar = sizeDatum.scalar(); const auto& sizeData = arrow::internal::checked_cast<const arrow::UInt64Scalar&>(*sizeScalar); @@ -179,7 +180,8 @@ public: TVector<arrow::Datum> datums(nvalues); for (size_t i = 0; i < nvalues; i++) { const ui32 id = DatumToMemberIDMap_[i]; - const auto& datum = TArrowBlock::From(value.GetElement(id)).GetDatum(); + const auto& datumValue = value.GetElement(id); + const auto& datum = TArrowBlock::From(datumValue).GetDatum(); datums[i] = datum; if (datum.is_scalar()) { continue; |