// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "contrib/libs/apache/arrow_next/cpp/src/arrow/tensor/converter.h" #include #include #include #include #include "contrib/libs/apache/arrow_next/cpp/src/arrow/buffer.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/status.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/type.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/checked_cast.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/visit_type_inline.h" namespace arrow20 { class MemoryPool; namespace internal { namespace { // ---------------------------------------------------------------------- // SparseTensorConverter for SparseCSRIndex class SparseCSXMatrixConverter : private SparseTensorConverterMixin { using SparseTensorConverterMixin::AssignIndex; using SparseTensorConverterMixin::IsNonZero; public: SparseCSXMatrixConverter(SparseMatrixCompressedAxis axis, const Tensor& tensor, const std::shared_ptr& index_value_type, MemoryPool* pool) : axis_(axis), tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {} Status Convert() { RETURN_NOT_OK(::arrow20::internal::CheckSparseIndexMaximumValue(index_value_type_, tensor_.shape())); const int index_elsize = index_value_type_->byte_width(); const int value_elsize = tensor_.type()->byte_width(); const int64_t ndim = tensor_.ndim(); if (ndim > 2) { return Status::Invalid("Invalid tensor dimension"); } const int major_axis = static_cast(axis_); const int64_t n_major = tensor_.shape()[major_axis]; const int64_t n_minor = tensor_.shape()[1 - major_axis]; ARROW_ASSIGN_OR_RAISE(int64_t nonzero_count, tensor_.CountNonZero()); std::shared_ptr indptr_buffer; std::shared_ptr indices_buffer; ARROW_ASSIGN_OR_RAISE(auto values_buffer, AllocateBuffer(value_elsize * nonzero_count, pool_)); auto* values = values_buffer->mutable_data(); const auto* tensor_data = tensor_.raw_data(); if (ndim <= 1) { return Status::NotImplemented("TODO for ndim <= 1"); } else { ARROW_ASSIGN_OR_RAISE(indptr_buffer, AllocateBuffer(index_elsize * (n_major + 1), pool_)); auto* indptr = indptr_buffer->mutable_data(); ARROW_ASSIGN_OR_RAISE(indices_buffer, AllocateBuffer(index_elsize * nonzero_count, pool_)); auto* indices = indices_buffer->mutable_data(); std::vector coords(2); int64_t k = 0; std::fill_n(indptr, index_elsize, 0); indptr += index_elsize; for (int64_t i = 0; i < n_major; ++i) { for (int64_t j = 0; j < n_minor; ++j) { if (axis_ == SparseMatrixCompressedAxis::ROW) { coords = {i, j}; } else { coords = {j, i}; } const int64_t offset = tensor_.CalculateValueOffset(coords); if (std::any_of(tensor_data + offset, tensor_data + offset + value_elsize, IsNonZero)) { std::copy_n(tensor_data + offset, value_elsize, values); values += value_elsize; AssignIndex(indices, j, index_elsize); indices += index_elsize; k++; } } AssignIndex(indptr, k, index_elsize); indptr += index_elsize; } } std::vector indptr_shape({n_major + 1}); std::shared_ptr indptr_tensor = std::make_shared(index_value_type_, indptr_buffer, indptr_shape); std::vector indices_shape({nonzero_count}); std::shared_ptr indices_tensor = std::make_shared(index_value_type_, indices_buffer, indices_shape); if (axis_ == SparseMatrixCompressedAxis::ROW) { sparse_index = std::make_shared(indptr_tensor, indices_tensor); } else { sparse_index = std::make_shared(indptr_tensor, indices_tensor); } data = std::move(values_buffer); return Status::OK(); } std::shared_ptr sparse_index; std::shared_ptr data; private: SparseMatrixCompressedAxis axis_; const Tensor& tensor_; const std::shared_ptr& index_value_type_; MemoryPool* pool_; }; } // namespace Status MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis axis, const Tensor& tensor, const std::shared_ptr& index_value_type, MemoryPool* pool, std::shared_ptr* out_sparse_index, std::shared_ptr* out_data) { SparseCSXMatrixConverter converter(axis, tensor, index_value_type, pool); RETURN_NOT_OK(converter.Convert()); *out_sparse_index = converter.sparse_index; *out_data = converter.data; return Status::OK(); } Result> MakeTensorFromSparseCSXMatrix( SparseMatrixCompressedAxis axis, MemoryPool* pool, const std::shared_ptr& indptr, const std::shared_ptr& indices, const int64_t non_zero_length, const std::shared_ptr& value_type, const std::vector& shape, const int64_t tensor_size, const uint8_t* raw_data, const std::vector& dim_names) { const auto* indptr_data = indptr->raw_data(); const auto* indices_data = indices->raw_data(); const int indptr_elsize = indptr->type()->byte_width(); const int indices_elsize = indices->type()->byte_width(); const auto& fw_value_type = checked_cast(*value_type); const int value_elsize = fw_value_type.byte_width(); ARROW_ASSIGN_OR_RAISE(auto values_buffer, AllocateBuffer(value_elsize * tensor_size, pool)); auto values = values_buffer->mutable_data(); std::fill_n(values, value_elsize * tensor_size, 0); std::vector strides; RETURN_NOT_OK(ComputeRowMajorStrides(fw_value_type, shape, &strides)); const auto nc = shape[1]; int64_t offset = 0; for (int64_t i = 0; i < indptr->size() - 1; ++i) { const auto start = SparseTensorConverterMixin::GetIndexValue(indptr_data, indptr_elsize); const auto stop = SparseTensorConverterMixin::GetIndexValue( indptr_data + indptr_elsize, indptr_elsize); for (int64_t j = start; j < stop; ++j) { const auto index = SparseTensorConverterMixin::GetIndexValue( indices_data + j * indices_elsize, indices_elsize); switch (axis) { case SparseMatrixCompressedAxis::ROW: offset = (index + i * nc) * value_elsize; break; case SparseMatrixCompressedAxis::COLUMN: offset = (i + index * nc) * value_elsize; break; } std::copy_n(raw_data, value_elsize, values + offset); raw_data += value_elsize; } indptr_data += indptr_elsize; } return std::make_shared(value_type, std::move(values_buffer), shape, strides, dim_names); } Result> MakeTensorFromSparseCSRMatrix( MemoryPool* pool, const SparseCSRMatrix* sparse_tensor) { const auto& sparse_index = internal::checked_cast(*sparse_tensor->sparse_index()); const auto& indptr = sparse_index.indptr(); const auto& indices = sparse_index.indices(); const auto non_zero_length = sparse_tensor->non_zero_length(); return MakeTensorFromSparseCSXMatrix( SparseMatrixCompressedAxis::ROW, pool, indptr, indices, non_zero_length, sparse_tensor->type(), sparse_tensor->shape(), sparse_tensor->size(), sparse_tensor->raw_data(), sparse_tensor->dim_names()); } Result> MakeTensorFromSparseCSCMatrix( MemoryPool* pool, const SparseCSCMatrix* sparse_tensor) { const auto& sparse_index = internal::checked_cast(*sparse_tensor->sparse_index()); const auto& indptr = sparse_index.indptr(); const auto& indices = sparse_index.indices(); const auto non_zero_length = sparse_tensor->non_zero_length(); return MakeTensorFromSparseCSXMatrix( SparseMatrixCompressedAxis::COLUMN, pool, indptr, indices, non_zero_length, sparse_tensor->type(), sparse_tensor->shape(), sparse_tensor->size(), sparse_tensor->raw_data(), sparse_tensor->dim_names()); } } // namespace internal } // namespace arrow20