aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraneporada <aneporada@ydb.tech>2023-03-01 16:05:58 +0300
committeraneporada <aneporada@ydb.tech>2023-03-01 16:05:58 +0300
commit7b8096097dc79f46e096d02b56cddc7b50704bc7 (patch)
treed521724bb3c8d90e001a09a12de9193f72e82671
parent313655d9256cea661cf09efb7c94d7c9d0a937d6 (diff)
downloadydb-7b8096097dc79f46e096d02b56cddc7b50704bc7.tar.gz
Support strings and tuples in Clickhouse UDF (block mode)
-rw-r--r--ydb/library/yql/public/udf/arrow/block_builder.h15
-rw-r--r--ydb/library/yql/public/udf/arrow/block_reader.h12
-rw-r--r--ydb/library/yql/public/udf/arrow/util.cpp6
-rw-r--r--ydb/library/yql/public/udf/arrow/util.h1
4 files changed, 23 insertions, 11 deletions
diff --git a/ydb/library/yql/public/udf/arrow/block_builder.h b/ydb/library/yql/public/udf/arrow/block_builder.h
index 95dcd61f16..b4c71f235e 100644
--- a/ydb/library/yql/public/udf/arrow/block_builder.h
+++ b/ydb/library/yql/public/udf/arrow/block_builder.h
@@ -294,8 +294,8 @@ private:
size_t CurrLen = 0;
};
-template <typename T, bool Nullable>
-class TFixedSizeArrayBuilder : public TArrayBuilderBase {
+template<typename T, bool Nullable>
+class TFixedSizeArrayBuilder final : public TArrayBuilderBase {
public:
TFixedSizeArrayBuilder(const ITypeInfoHelper& typeInfoHelper, std::shared_ptr<arrow::DataType> arrowType, arrow::MemoryPool& pool, size_t maxLen)
: TArrayBuilderBase(typeInfoHelper, std::move(arrowType), pool, maxLen)
@@ -432,9 +432,14 @@ private:
};
template<typename TStringType, bool Nullable>
-class TStringArrayBuilder : public TArrayBuilderBase {
+class TStringArrayBuilder final : public TArrayBuilderBase {
public:
using TOffset = typename TStringType::offset_type;
+ TStringArrayBuilder(const ITypeInfoHelper& typeInfoHelper, std::shared_ptr<arrow::DataType> arrowType, arrow::MemoryPool& pool, size_t maxLen)
+ : TArrayBuilderBase(typeInfoHelper, std::move(arrowType), pool, maxLen)
+ {
+ Reserve();
+ }
TStringArrayBuilder(const ITypeInfoHelper& typeInfoHelper, const TType* type, arrow::MemoryPool& pool, size_t maxLen)
: TArrayBuilderBase(typeInfoHelper, type, pool, maxLen)
@@ -717,7 +722,7 @@ private:
};
template<bool Nullable>
-class TTupleArrayBuilder : public TArrayBuilderBase {
+class TTupleArrayBuilder final : public TArrayBuilderBase {
public:
TTupleArrayBuilder(const ITypeInfoHelper& typeInfoHelper, const TType* type, arrow::MemoryPool& pool, size_t maxLen,
TVector<TArrayBuilderBase::Ptr>&& children)
@@ -878,7 +883,7 @@ private:
std::unique_ptr<TTypedBufferBuilder<ui8>> NullBuilder;
};
-class TExternalOptionalArrayBuilder : public TArrayBuilderBase {
+class TExternalOptionalArrayBuilder final : public TArrayBuilderBase {
public:
TExternalOptionalArrayBuilder(const ITypeInfoHelper& typeInfoHelper, const TType* type, arrow::MemoryPool& pool, size_t maxLen, std::unique_ptr<TArrayBuilderBase>&& inner)
: TArrayBuilderBase(typeInfoHelper, type, pool, maxLen)
diff --git a/ydb/library/yql/public/udf/arrow/block_reader.h b/ydb/library/yql/public/udf/arrow/block_reader.h
index 7c353ee45a..5302e8ffc4 100644
--- a/ydb/library/yql/public/udf/arrow/block_reader.h
+++ b/ydb/library/yql/public/udf/arrow/block_reader.h
@@ -27,8 +27,8 @@ struct TBlockItemSerializeProps {
bool IsFixed = true; // true if each block item takes fixed size
};
-template <typename T, bool Nullable>
-class TFixedSizeBlockReader : public IBlockReader {
+template<typename T, bool Nullable>
+class TFixedSizeBlockReader final : public IBlockReader {
public:
TBlockItem GetItem(const arrow::ArrayData& data, size_t index) final {
if constexpr (Nullable) {
@@ -74,7 +74,7 @@ public:
};
template<typename TStringType, bool Nullable>
-class TStringBlockReader : public IBlockReader {
+class TStringBlockReader final : public IBlockReader {
public:
using TOffset = typename TStringType::offset_type;
@@ -135,8 +135,8 @@ public:
}
};
-template <bool Nullable>
-class TTupleBlockReader : public IBlockReader {
+template<bool Nullable>
+class TTupleBlockReader final : public IBlockReader {
public:
TTupleBlockReader(TVector<std::unique_ptr<IBlockReader>>&& children)
: Children(std::move(children))
@@ -206,7 +206,7 @@ private:
TVector<TBlockItem> Items;
};
-class TExternalOptionalBlockReader : public IBlockReader {
+class TExternalOptionalBlockReader final : public IBlockReader {
public:
TExternalOptionalBlockReader(std::unique_ptr<IBlockReader>&& inner)
: Inner(std::move(inner))
diff --git a/ydb/library/yql/public/udf/arrow/util.cpp b/ydb/library/yql/public/udf/arrow/util.cpp
index aff82a8024..be385c43ed 100644
--- a/ydb/library/yql/public/udf/arrow/util.cpp
+++ b/ydb/library/yql/public/udf/arrow/util.cpp
@@ -103,6 +103,12 @@ std::shared_ptr<arrow::Buffer> MakeDenseBitmap(const ui8* srcSparse, size_t len,
return bitmap;
}
+std::shared_ptr<arrow::Buffer> MakeDenseBitmapNegate(const ui8* srcSparse, size_t len, arrow::MemoryPool* pool) {
+ auto bitmap = AllocateBitmapWithReserve(len, pool);
+ CompressSparseBitmapNegate(bitmap->mutable_data(), srcSparse, len);
+ return bitmap;
+}
+
std::shared_ptr<arrow::ArrayData> DeepSlice(const std::shared_ptr<arrow::ArrayData>& data, size_t offset, size_t len) {
Y_ENSURE(data->length >= 0);
Y_ENSURE(offset + len <= (size_t)data->length);
diff --git a/ydb/library/yql/public/udf/arrow/util.h b/ydb/library/yql/public/udf/arrow/util.h
index 1ed4cf8172..06651f259b 100644
--- a/ydb/library/yql/public/udf/arrow/util.h
+++ b/ydb/library/yql/public/udf/arrow/util.h
@@ -15,6 +15,7 @@ namespace NUdf {
std::shared_ptr<arrow::Buffer> AllocateBitmapWithReserve(size_t bitCount, arrow::MemoryPool* pool);
std::shared_ptr<arrow::Buffer> MakeDenseBitmap(const ui8* srcSparse, size_t len, arrow::MemoryPool* pool);
+std::shared_ptr<arrow::Buffer> MakeDenseBitmapNegate(const ui8* srcSparse, size_t len, arrow::MemoryPool* pool);
/// \brief Recursive version of ArrayData::Slice() method
std::shared_ptr<arrow::ArrayData> DeepSlice(const std::shared_ptr<arrow::ArrayData>& data, size_t offset, size_t len);