diff options
| author | imunkin <[email protected]> | 2024-12-09 12:17:13 +0300 | 
|---|---|---|
| committer | imunkin <[email protected]> | 2024-12-09 12:39:10 +0300 | 
| commit | 16918dad02d32c3b02a3b71bc7be468d69c7c45b (patch) | |
| tree | 60579e76fe33a84dab54588d282e49a68f92a209 | |
| parent | 23c050605f08c239b580fc04a248e2e8d4476692 (diff) | |
Fix TArrowBlock::From and its usage
commit_hash:0d0fe61d962e4c365f99bd84ebdb2229696c4dc0
8 files changed, 53 insertions, 28 deletions
diff --git a/yql/essentials/minikql/comp_nodes/mkql_apply.cpp b/yql/essentials/minikql/comp_nodes/mkql_apply.cpp index 0dd8a5a121a..97545dfebf2 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_apply.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_apply.cpp @@ -54,7 +54,7 @@ public:                      state.Args[i] = state.HolderFactory.CreateArrowBlock(arrow::Datum(batch.values[i]));                  } -                const auto ret = Callable_.Run(&state.ValueBuilder, state.Args.data()); +                const auto& ret = Callable_.Run(&state.ValueBuilder, state.Args.data());                  *res = TArrowBlock::From(ret).GetDatum();                  return arrow::Status::OK();              }) diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_compress.cpp b/yql/essentials/minikql/comp_nodes/mkql_block_compress.cpp index c7b68b54c45..12a9858ba10 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_block_compress.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_block_compress.cpp @@ -84,6 +84,9 @@ public:          const auto bitmapValue = getres.second[BitmapIndex_](ctx, block);          const auto bitmap = CallInst::Create(getBitmap, { WrapArgumentForWindows(bitmapValue, ctx, block) }, "bitmap", block); + +        ValueCleanup(EValueRepresentation::Any, bitmapValue, ctx, block); +          const auto one = ConstantInt::get(bitmapType, 1);          const auto band = BinaryOperator::CreateAnd(bitmap, one, "band", block);          const auto good = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, band, one, "good", block); @@ -189,6 +192,9 @@ public:          const auto bitmapValue = getres.second[BitmapIndex_](ctx, block);          const auto pops = CallInst::Create(getPopCount, { WrapArgumentForWindows(bitmapValue, ctx, block) }, "pops", block); + +        ValueCleanup(EValueRepresentation::Any, bitmapValue, ctx, block); +          const auto good = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT, pops, ConstantInt::get(sizeType, 0), "good", block);          BranchInst::Create(fill, loop, good, block); @@ -270,7 +276,7 @@ public:                          s.IsFinished_ = true;                          break;                      case EFetchResult::One: -                        switch (s.Check(bitmap.Release())) { +                        switch (s.Check(bitmap)) {                              case TState::EStep::Copy:                                  for (ui32 i = 0; i < s.Values.size(); ++i) {                                      if (const auto out = output[i]) { @@ -415,6 +421,8 @@ public:          const auto checkPtr = CastInst::Create(Instruction::IntToPtr, checkFunc, PointerType::getUnqual(checkType), "check_func", block);          const auto check = CallInst::Create(checkType, checkPtr, {stateArg, bitmapArg}, "check", block); +        ValueCleanup(EValueRepresentation::Any, bitmap, ctx, block); +          result->addIncoming(ConstantInt::get(statusType, static_cast<i32>(EFetchResult::One)), block);          const auto step = SwitchInst::Create(check, save, 2U, block); @@ -552,9 +560,8 @@ private:          EStep Check(const NUdf::TUnboxedValuePod bitmapValue) {              Y_ABORT_UNLESS(!IsFinished_);              Y_ABORT_UNLESS(!InputSize_); -            const NUdf::TUnboxedValue b(std::move(bitmapValue));              auto& bitmap = Arrays_.back(); -            bitmap = TArrowBlock::From(b).GetDatum().array(); +            bitmap = TArrowBlock::From(bitmapValue).GetDatum().array();              if (!bitmap->length)                  return EStep::Skip; diff --git a/yql/essentials/minikql/comp_nodes/mkql_block_skiptake.cpp b/yql/essentials/minikql/comp_nodes/mkql_block_skiptake.cpp index 505c9d635a5..2c29ecd4c96 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_block_skiptake.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_block_skiptake.cpp @@ -117,7 +117,12 @@ public:          block = test; -        const auto height = CallInst::Create(getCount, { WrapArgumentForWindows(getres.second.back()(ctx, block), ctx, block) }, "height", block); + +        const auto countValue = getres.second.back()(ctx, block); +        const auto height = CallInst::Create(getCount, { WrapArgumentForWindows(countValue, ctx, block) }, "height", block); + +        ValueCleanup(EValueRepresentation::Any, countValue, ctx, block); +          const auto part = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT, count, height, "part", block);          const auto decr = BinaryOperator::CreateSub(count, height, "decr", block);          count->addIncoming(decr, block); @@ -201,6 +206,8 @@ public:                  const auto slice = CallInst::Create(sliceType, slicePtr, {ctx.GetFactory(), value, offset}, "slice", block); +                ValueCleanup(EValueRepresentation::Any, value, ctx, block); +                  output->addIncoming(slice, block);                  BranchInst::Create(exit, block); @@ -214,9 +221,8 @@ public:  #endif  private:      static NUdf::TUnboxedValuePod SliceBlock(const THolderFactory& holderFactory, NUdf::TUnboxedValuePod block, const uint64_t offset) { -        NUdf::TUnboxedValue b(block); -        auto& datum = TArrowBlock::From(b).GetDatum(); -        return datum.is_scalar() ? b.Release() : holderFactory.CreateArrowBlock(DeepSlice(datum.array(), offset, datum.array()->length - offset)); +        const auto& datum = TArrowBlock::From(block).GetDatum(); +        return datum.is_scalar() ? block : holderFactory.CreateArrowBlock(DeepSlice(datum.array(), offset, datum.array()->length - offset));      }      void RegisterDependencies() const final { @@ -325,7 +331,11 @@ public:          block = good; -        const auto height = CallInst::Create(getCount, { WrapArgumentForWindows(getres.second.back()(ctx, block), ctx, block) }, "height", block); +        const auto countValue = getres.second.back()(ctx, block); +        const auto height = CallInst::Create(getCount, { WrapArgumentForWindows(countValue, ctx, block) }, "height", block); + +        ValueCleanup(EValueRepresentation::Any, countValue, ctx, block); +          const auto part = CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT, count, height, "part", block);          const auto decr = BinaryOperator::CreateSub(count, height, "decr", block); @@ -395,6 +405,8 @@ public:                  const auto slice = CallInst::Create(sliceType, slicePtr, {ctx.GetFactory(), value, size}, "slice", block); +                ValueCleanup(EValueRepresentation::Any, value, ctx, block); +                  output->addIncoming(slice, block);                  BranchInst::Create(exit, block); @@ -408,9 +420,8 @@ public:  #endif  private:      static NUdf::TUnboxedValuePod SliceBlock(const THolderFactory& holderFactory, NUdf::TUnboxedValuePod block, const uint64_t offset) { -        NUdf::TUnboxedValue b(block); -        auto& datum = TArrowBlock::From(b).GetDatum(); -        return datum.is_scalar() ? b.Release() : holderFactory.CreateArrowBlock(DeepSlice(datum.array(), 0ULL, offset)); +        const auto& datum = TArrowBlock::From(block).GetDatum(); +        return datum.is_scalar() ? block : holderFactory.CreateArrowBlock(DeepSlice(datum.array(), 0ULL, offset));      }      void RegisterDependencies() const final { diff --git a/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp b/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp index 2f81664239f..2013b7efd52 100644 --- a/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp +++ b/yql/essentials/minikql/comp_nodes/mkql_blocks.cpp @@ -383,7 +383,7 @@ public:              if (auto item = s.GetValue(ctx.HolderFactory); !item.IsInvalid())                  return item; -            if (const auto input = Flow_->GetValue(ctx).Release(); input.IsSpecial()) +            if (const auto input = Flow_->GetValue(ctx); input.IsSpecial())                  return input;              else                  s.Reset(input); @@ -443,6 +443,8 @@ public:          const auto setPtr = CastInst::Create(Instruction::IntToPtr, setFunc, PointerType::getUnqual(setType), "set", block);          CallInst::Create(setType, setPtr, {stateArg, input }, "", block); +        ValueCleanup(EValueRepresentation::Any, input, ctx, block); +          BranchInst::Create(work, block);          block = done; @@ -475,8 +477,7 @@ private:          }          void Reset(const NUdf::TUnboxedValuePod block) { -            const NUdf::TUnboxedValue v(block); -            const auto& datum = TArrowBlock::From(v).GetDatum(); +            const auto& datum = TArrowBlock::From(block).GetDatum();              MKQL_ENSURE(datum.is_arraylike(), "Expecting array as FromBlocks argument");              MKQL_ENSURE(Arrays_.empty(), "Not all input is processed");              if (datum.is_array()) { @@ -634,6 +635,8 @@ public:          const auto countValue = getres.second.back()(ctx, block);          const auto height = CallInst::Create(getCount, { WrapArgumentForWindows(countValue, ctx, block) }, "height", block); +        ValueCleanup(EValueRepresentation::Any, countValue, ctx, block); +          new StoreInst(height, countPtr, block);          new StoreInst(ConstantInt::get(indexType, 0), indexPtr, block); @@ -953,9 +956,8 @@ private:      }      arrow::Datum DoReplicate(const NUdf::TUnboxedValuePod val, const NUdf::TUnboxedValuePod cnt, TComputationContext& ctx) const { -        const NUdf::TUnboxedValue v(val), c(cnt); -        const auto value = TArrowBlock::From(v).GetDatum().scalar(); -        const ui64 count = TArrowBlock::From(c).GetDatum().scalar_as<arrow::UInt64Scalar>().value; +        const auto value = TArrowBlock::From(val).GetDatum().scalar(); +        const ui64 count = TArrowBlock::From(cnt).GetDatum().scalar_as<arrow::UInt64Scalar>().value;          const auto reader = MakeBlockReader(TTypeInfoHelper(), Type_);          const auto builder = MakeArrayBuilder(TTypeInfoHelper(), Type_, ctx.ArrowMemoryPool, count, &ctx.Builder->GetPgBuilder()); diff --git a/yql/essentials/minikql/computation/mkql_block_impl.cpp b/yql/essentials/minikql/computation/mkql_block_impl.cpp index ba7758b47de..2920a1ac3c3 100644 --- a/yql/essentials/minikql/computation/mkql_block_impl.cpp +++ b/yql/essentials/minikql/computation/mkql_block_impl.cpp @@ -18,8 +18,7 @@ extern "C" uint64_t GetBlockCount(const NYql::NUdf::TUnboxedValuePod data) {  }  extern "C" uint64_t GetBitmapPopCountCount(const NYql::NUdf::TUnboxedValuePod data) { -    const NYql::NUdf::TUnboxedValue v(data); -    const auto& arr = NKikimr::NMiniKQL::TArrowBlock::From(v).GetDatum().array(); +    const auto& arr = NKikimr::NMiniKQL::TArrowBlock::From(data).GetDatum().array();      const size_t len = (size_t)arr->length;      MKQL_ENSURE(arr->GetNullCount() == 0, "Bitmap block should not have nulls");      const ui8* src = arr->GetValues<ui8>(1); @@ -259,7 +258,8 @@ NUdf::TUnboxedValuePod TBlockFuncNode::DoCalculate(TComputationContext& ctx) con      std::vector<arrow::Datum> argDatums;      for (ui32 i = 0; i < ArgsNodes.size(); ++i) { -        argDatums.emplace_back(TArrowBlock::From(ArgsNodes[i]->GetValue(ctx)).GetDatum()); +        const auto& value = ArgsNodes[i]->GetValue(ctx); +        argDatums.emplace_back(TArrowBlock::From(value).GetDatum());          ARROW_DEBUG_CHECK_DATUM_TYPES(ArgsValuesDescr[i], argDatums.back().descr());      } @@ -352,7 +352,7 @@ void TBlockState::FillArrays() {      for (size_t i = 0U; i < Deques.size(); ++i) {          Deques[i].clear(); -        if (const auto value = Values[i]) { +        if (const auto& value = Values[i]) {              const auto& datum = TArrowBlock::From(value).GetDatum();              if (datum.is_scalar()) {                  return; diff --git a/yql/essentials/minikql/computation/mkql_computation_node.cpp b/yql/essentials/minikql/computation/mkql_computation_node.cpp index 2303e954a49..ec47df76345 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node.cpp +++ b/yql/essentials/minikql/computation/mkql_computation_node.cpp @@ -39,7 +39,8 @@ TDatumProvider MakeDatumProvider(const arrow::Datum& datum) {  TDatumProvider MakeDatumProvider(const IComputationNode* node, TComputationContext& ctx) {      return [node, &ctx]() { -        return TArrowBlock::From(node->GetValue(ctx)).GetDatum(); +        const auto& value = node->GetValue(ctx); +        return TArrowBlock::From(value).GetDatum();      };  } diff --git a/yql/essentials/minikql/computation/mkql_computation_node_holders.h b/yql/essentials/minikql/computation/mkql_computation_node_holders.h index 4f6344a7435..f2bb09006de 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node_holders.h +++ b/yql/essentials/minikql/computation/mkql_computation_node_holders.h @@ -582,11 +582,13 @@ public:      {      } -    inline static TArrowBlock& From(const NUdf::TUnboxedValue& value) { -        return *static_cast<TArrowBlock*>(value.AsBoxed().Get()); +    inline static const TArrowBlock& From(const NUdf::TUnboxedValuePod& value) { +        return *static_cast<TArrowBlock*>(value.AsRawBoxed());      } -    inline arrow::Datum& GetDatum() { +    inline static const TArrowBlock& From(NUdf::TUnboxedValuePod&& value) = delete; + +    inline const arrow::Datum& GetDatum() const {          return Datum_;      } diff --git a/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp b/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp index e7b755cb195..84562ee82ce 100644 --- a/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp +++ b/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp @@ -170,7 +170,8 @@ public:          OutputItemType batch = Batch_.Get();          size_t nvalues = DatumToMemberIDMap_.size(); -        const auto& sizeDatum = TArrowBlock::From(value.GetElement(BatchLengthID_)).GetDatum(); +        const auto& sizeValue = value.GetElement(BatchLengthID_); +        const auto& sizeDatum = TArrowBlock::From(sizeValue).GetDatum();          Y_ENSURE(sizeDatum.is_scalar());          const auto& sizeScalar = sizeDatum.scalar();          const auto& sizeData = arrow::internal::checked_cast<const arrow::UInt64Scalar&>(*sizeScalar); @@ -179,7 +180,8 @@ public:          TVector<arrow::Datum> datums(nvalues);          for (size_t i = 0; i < nvalues; i++) {              const ui32 id = DatumToMemberIDMap_[i]; -            const auto& datum = TArrowBlock::From(value.GetElement(id)).GetDatum(); +            const auto& datumValue = value.GetElement(id); +            const auto& datum = TArrowBlock::From(datumValue).GetDatum();              datums[i] = datum;              if (datum.is_scalar()) {                  continue;  | 
