diff options
author | aneporada <aneporada@ydb.tech> | 2023-03-01 16:05:58 +0300 |
---|---|---|
committer | aneporada <aneporada@ydb.tech> | 2023-03-01 16:05:58 +0300 |
commit | 7b8096097dc79f46e096d02b56cddc7b50704bc7 (patch) | |
tree | d521724bb3c8d90e001a09a12de9193f72e82671 | |
parent | 313655d9256cea661cf09efb7c94d7c9d0a937d6 (diff) | |
download | ydb-7b8096097dc79f46e096d02b56cddc7b50704bc7.tar.gz |
Support strings and tuples in Clickhouse UDF (block mode)
-rw-r--r-- | ydb/library/yql/public/udf/arrow/block_builder.h | 15 | ||||
-rw-r--r-- | ydb/library/yql/public/udf/arrow/block_reader.h | 12 | ||||
-rw-r--r-- | ydb/library/yql/public/udf/arrow/util.cpp | 6 | ||||
-rw-r--r-- | ydb/library/yql/public/udf/arrow/util.h | 1 |
4 files changed, 23 insertions, 11 deletions
diff --git a/ydb/library/yql/public/udf/arrow/block_builder.h b/ydb/library/yql/public/udf/arrow/block_builder.h index 95dcd61f16..b4c71f235e 100644 --- a/ydb/library/yql/public/udf/arrow/block_builder.h +++ b/ydb/library/yql/public/udf/arrow/block_builder.h @@ -294,8 +294,8 @@ private: size_t CurrLen = 0; }; -template <typename T, bool Nullable> -class TFixedSizeArrayBuilder : public TArrayBuilderBase { +template<typename T, bool Nullable> +class TFixedSizeArrayBuilder final : public TArrayBuilderBase { public: TFixedSizeArrayBuilder(const ITypeInfoHelper& typeInfoHelper, std::shared_ptr<arrow::DataType> arrowType, arrow::MemoryPool& pool, size_t maxLen) : TArrayBuilderBase(typeInfoHelper, std::move(arrowType), pool, maxLen) @@ -432,9 +432,14 @@ private: }; template<typename TStringType, bool Nullable> -class TStringArrayBuilder : public TArrayBuilderBase { +class TStringArrayBuilder final : public TArrayBuilderBase { public: using TOffset = typename TStringType::offset_type; + TStringArrayBuilder(const ITypeInfoHelper& typeInfoHelper, std::shared_ptr<arrow::DataType> arrowType, arrow::MemoryPool& pool, size_t maxLen) + : TArrayBuilderBase(typeInfoHelper, std::move(arrowType), pool, maxLen) + { + Reserve(); + } TStringArrayBuilder(const ITypeInfoHelper& typeInfoHelper, const TType* type, arrow::MemoryPool& pool, size_t maxLen) : TArrayBuilderBase(typeInfoHelper, type, pool, maxLen) @@ -717,7 +722,7 @@ private: }; template<bool Nullable> -class TTupleArrayBuilder : public TArrayBuilderBase { +class TTupleArrayBuilder final : public TArrayBuilderBase { public: TTupleArrayBuilder(const ITypeInfoHelper& typeInfoHelper, const TType* type, arrow::MemoryPool& pool, size_t maxLen, TVector<TArrayBuilderBase::Ptr>&& children) @@ -878,7 +883,7 @@ private: std::unique_ptr<TTypedBufferBuilder<ui8>> NullBuilder; }; -class TExternalOptionalArrayBuilder : public TArrayBuilderBase { +class TExternalOptionalArrayBuilder final : public TArrayBuilderBase { public: TExternalOptionalArrayBuilder(const ITypeInfoHelper& typeInfoHelper, const TType* type, arrow::MemoryPool& pool, size_t maxLen, std::unique_ptr<TArrayBuilderBase>&& inner) : TArrayBuilderBase(typeInfoHelper, type, pool, maxLen) diff --git a/ydb/library/yql/public/udf/arrow/block_reader.h b/ydb/library/yql/public/udf/arrow/block_reader.h index 7c353ee45a..5302e8ffc4 100644 --- a/ydb/library/yql/public/udf/arrow/block_reader.h +++ b/ydb/library/yql/public/udf/arrow/block_reader.h @@ -27,8 +27,8 @@ struct TBlockItemSerializeProps { bool IsFixed = true; // true if each block item takes fixed size }; -template <typename T, bool Nullable> -class TFixedSizeBlockReader : public IBlockReader { +template<typename T, bool Nullable> +class TFixedSizeBlockReader final : public IBlockReader { public: TBlockItem GetItem(const arrow::ArrayData& data, size_t index) final { if constexpr (Nullable) { @@ -74,7 +74,7 @@ public: }; template<typename TStringType, bool Nullable> -class TStringBlockReader : public IBlockReader { +class TStringBlockReader final : public IBlockReader { public: using TOffset = typename TStringType::offset_type; @@ -135,8 +135,8 @@ public: } }; -template <bool Nullable> -class TTupleBlockReader : public IBlockReader { +template<bool Nullable> +class TTupleBlockReader final : public IBlockReader { public: TTupleBlockReader(TVector<std::unique_ptr<IBlockReader>>&& children) : Children(std::move(children)) @@ -206,7 +206,7 @@ private: TVector<TBlockItem> Items; }; -class TExternalOptionalBlockReader : public IBlockReader { +class TExternalOptionalBlockReader final : public IBlockReader { public: TExternalOptionalBlockReader(std::unique_ptr<IBlockReader>&& inner) : Inner(std::move(inner)) diff --git a/ydb/library/yql/public/udf/arrow/util.cpp b/ydb/library/yql/public/udf/arrow/util.cpp index aff82a8024..be385c43ed 100644 --- a/ydb/library/yql/public/udf/arrow/util.cpp +++ b/ydb/library/yql/public/udf/arrow/util.cpp @@ -103,6 +103,12 @@ std::shared_ptr<arrow::Buffer> MakeDenseBitmap(const ui8* srcSparse, size_t len, return bitmap; } +std::shared_ptr<arrow::Buffer> MakeDenseBitmapNegate(const ui8* srcSparse, size_t len, arrow::MemoryPool* pool) { + auto bitmap = AllocateBitmapWithReserve(len, pool); + CompressSparseBitmapNegate(bitmap->mutable_data(), srcSparse, len); + return bitmap; +} + std::shared_ptr<arrow::ArrayData> DeepSlice(const std::shared_ptr<arrow::ArrayData>& data, size_t offset, size_t len) { Y_ENSURE(data->length >= 0); Y_ENSURE(offset + len <= (size_t)data->length); diff --git a/ydb/library/yql/public/udf/arrow/util.h b/ydb/library/yql/public/udf/arrow/util.h index 1ed4cf8172..06651f259b 100644 --- a/ydb/library/yql/public/udf/arrow/util.h +++ b/ydb/library/yql/public/udf/arrow/util.h @@ -15,6 +15,7 @@ namespace NUdf { std::shared_ptr<arrow::Buffer> AllocateBitmapWithReserve(size_t bitCount, arrow::MemoryPool* pool); std::shared_ptr<arrow::Buffer> MakeDenseBitmap(const ui8* srcSparse, size_t len, arrow::MemoryPool* pool); +std::shared_ptr<arrow::Buffer> MakeDenseBitmapNegate(const ui8* srcSparse, size_t len, arrow::MemoryPool* pool); /// \brief Recursive version of ArrayData::Slice() method std::shared_ptr<arrow::ArrayData> DeepSlice(const std::shared_ptr<arrow::ArrayData>& data, size_t offset, size_t len); |