diff options
| author | maxim-yurchuk <[email protected]> | 2024-10-09 12:29:46 +0300 |
|---|---|---|
| committer | maxim-yurchuk <[email protected]> | 2024-10-09 13:14:22 +0300 |
| commit | 9731d8a4bb7ee2cc8554eaf133bb85498a4c7d80 (patch) | |
| tree | a8fb3181d5947c0d78cf402aa56e686130179049 /contrib/python/pyarrow | |
| parent | a44b779cd359f06c3ebbef4ec98c6b38609d9d85 (diff) | |
publishFullContrib: true for ydb
<HIDDEN_URL>
commit_hash:c82a80ac4594723cebf2c7387dec9c60217f603e
Diffstat (limited to 'contrib/python/pyarrow')
| -rw-r--r-- | contrib/python/pyarrow/.yandex_meta/yamaker.yaml | 18 | ||||
| -rw-r--r-- | contrib/python/pyarrow/patches/01-disable-hdfsio.patch | 39 | ||||
| -rw-r--r-- | contrib/python/pyarrow/pyarrow/__init__.pxd | 42 | ||||
| -rw-r--r-- | contrib/python/pyarrow/pyarrow/_cuda.pxd | 67 | ||||
| -rw-r--r-- | contrib/python/pyarrow/pyarrow/includes/__init__.pxd | 0 | ||||
| -rw-r--r-- | contrib/python/pyarrow/pyarrow/includes/libarrow_cuda.pxd | 107 | ||||
| -rw-r--r-- | contrib/python/pyarrow/pyarrow/includes/libarrow_dataset.pxd | 465 | ||||
| -rw-r--r-- | contrib/python/pyarrow/pyarrow/includes/libarrow_flight.pxd | 560 | ||||
| -rw-r--r-- | contrib/python/pyarrow/pyarrow/includes/libgandiva.pxd | 286 | ||||
| -rw-r--r-- | contrib/python/pyarrow/pyarrow/includes/libplasma.pxd | 25 |
10 files changed, 1609 insertions, 0 deletions
diff --git a/contrib/python/pyarrow/.yandex_meta/yamaker.yaml b/contrib/python/pyarrow/.yandex_meta/yamaker.yaml new file mode 100644 index 00000000000..44ecc9b4abb --- /dev/null +++ b/contrib/python/pyarrow/.yandex_meta/yamaker.yaml @@ -0,0 +1,18 @@ +requirements: + - contrib/libs/apache/arrow/cpp/src/arrow/python +exclude: + - pyarrow/tensorflow/plasma_op.cc + - pyarrow/cuda.py + - pyarrow/dataset.py + - pyarrow/flight.py + - pyarrow/hdfs.py + - pyarrow/gandiva.pyx + - pyarrow/plasma.py + - pyarrow/_cuda.pyx + - pyarrow/_dataset.pyx + - pyarrow/_flight.pyx + - pyarrow/_hdfs.pyx + - pyarrow/_hdfsio.pyx + - pyarrow/_plasma.pyx + - pyarrow/_s3fs.pyx + - pyarrow/cffi.py diff --git a/contrib/python/pyarrow/patches/01-disable-hdfsio.patch b/contrib/python/pyarrow/patches/01-disable-hdfsio.patch new file mode 100644 index 00000000000..7f234470af2 --- /dev/null +++ b/contrib/python/pyarrow/patches/01-disable-hdfsio.patch @@ -0,0 +1,39 @@ +--- contrib/python/pyarrow/pyarrow/__init__.py (index) ++++ contrib/python/pyarrow/pyarrow/__init__.py (working tree) +@@ -175,7 +175,8 @@ from pyarrow.lib import (NativeFile, PythonFile, + create_memory_map, MockOutputStream, + input_stream, output_stream) + +-from pyarrow._hdfsio import HdfsFile, have_libhdfs ++# Disable for Arcadia, arrow imported without support hdfs ++# from pyarrow._hdfsio import HdfsFile, have_libhdfs + + from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table, + concat_arrays, concat_tables) +@@ -200,7 +201,7 @@ from pyarrow.lib import (deserialize_from, deserialize, + SerializationCallbackError, + DeserializationCallbackError) + +-import pyarrow.hdfs as hdfs ++# import pyarrow.hdfs as hdfs + + from pyarrow.ipc import serialize_pandas, deserialize_pandas + import pyarrow.ipc as ipc +@@ -217,7 +218,7 @@ import pyarrow.types as types + + from pyarrow.filesystem import FileSystem as _FileSystem + from pyarrow.filesystem import LocalFileSystem as _LocalFileSystem +-from pyarrow.hdfs import HadoopFileSystem as _HadoopFileSystem ++# from pyarrow.hdfs import HadoopFileSystem as _HadoopFileSystem + + from pyarrow.lib import SerializationContext as _SerializationContext + from pyarrow.lib import SerializedPyObject as _SerializedPyObject +@@ -239,7 +240,7 @@ _deprecated = { + "localfs": (_localfs, "LocalFileSystem"), + "FileSystem": (_FileSystem, "FileSystem"), + "LocalFileSystem": (_LocalFileSystem, "LocalFileSystem"), +- "HadoopFileSystem": (_HadoopFileSystem, "HadoopFileSystem"), ++ # "HadoopFileSystem": (_HadoopFileSystem, "HadoopFileSystem"), + } + + _serialization_deprecatd = { diff --git a/contrib/python/pyarrow/pyarrow/__init__.pxd b/contrib/python/pyarrow/pyarrow/__init__.pxd new file mode 100644 index 00000000000..8cc54b4c6bf --- /dev/null +++ b/contrib/python/pyarrow/pyarrow/__init__.pxd @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from libcpp.memory cimport shared_ptr +from pyarrow.includes.libarrow cimport (CArray, CBuffer, CDataType, + CField, CRecordBatch, CSchema, + CTable, CTensor, CSparseCOOTensor, + CSparseCSRMatrix, CSparseCSCMatrix, + CSparseCSFTensor) + +cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py": + cdef int import_pyarrow() except -1 + cdef object wrap_buffer(const shared_ptr[CBuffer]& buffer) + cdef object wrap_data_type(const shared_ptr[CDataType]& type) + cdef object wrap_field(const shared_ptr[CField]& field) + cdef object wrap_schema(const shared_ptr[CSchema]& schema) + cdef object wrap_array(const shared_ptr[CArray]& sp_array) + cdef object wrap_tensor(const shared_ptr[CTensor]& sp_tensor) + cdef object wrap_sparse_tensor_coo( + const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor) + cdef object wrap_sparse_tensor_csr( + const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor) + cdef object wrap_sparse_tensor_csc( + const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor) + cdef object wrap_sparse_tensor_csf( + const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor) + cdef object wrap_table(const shared_ptr[CTable]& ctable) + cdef object wrap_batch(const shared_ptr[CRecordBatch]& cbatch) diff --git a/contrib/python/pyarrow/pyarrow/_cuda.pxd b/contrib/python/pyarrow/pyarrow/_cuda.pxd new file mode 100644 index 00000000000..6acb8826d17 --- /dev/null +++ b/contrib/python/pyarrow/pyarrow/_cuda.pxd @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: language_level = 3 + +from pyarrow.lib cimport * +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport * +from pyarrow.includes.libarrow_cuda cimport * + + +cdef class Context(_Weakrefable): + cdef: + shared_ptr[CCudaContext] context + int device_number + + cdef void init(self, const shared_ptr[CCudaContext]& ctx) + + +cdef class IpcMemHandle(_Weakrefable): + cdef: + shared_ptr[CCudaIpcMemHandle] handle + + cdef void init(self, shared_ptr[CCudaIpcMemHandle]& h) + + +cdef class CudaBuffer(Buffer): + cdef: + shared_ptr[CCudaBuffer] cuda_buffer + object base + + cdef void init_cuda(self, + const shared_ptr[CCudaBuffer]& buffer, + object base) + + +cdef class HostBuffer(Buffer): + cdef: + shared_ptr[CCudaHostBuffer] host_buffer + + cdef void init_host(self, const shared_ptr[CCudaHostBuffer]& buffer) + + +cdef class BufferReader(NativeFile): + cdef: + CCudaBufferReader* reader + CudaBuffer buffer + + +cdef class BufferWriter(NativeFile): + cdef: + CCudaBufferWriter* writer + CudaBuffer buffer diff --git a/contrib/python/pyarrow/pyarrow/includes/__init__.pxd b/contrib/python/pyarrow/pyarrow/includes/__init__.pxd new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/contrib/python/pyarrow/pyarrow/includes/__init__.pxd diff --git a/contrib/python/pyarrow/pyarrow/includes/libarrow_cuda.pxd b/contrib/python/pyarrow/pyarrow/includes/libarrow_cuda.pxd new file mode 100644 index 00000000000..3ac943cf941 --- /dev/null +++ b/contrib/python/pyarrow/pyarrow/includes/libarrow_cuda.pxd @@ -0,0 +1,107 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from pyarrow.includes.libarrow cimport * + +cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::cuda" nogil: + + cdef cppclass CCudaDeviceManager" arrow::cuda::CudaDeviceManager": + @staticmethod + CResult[CCudaDeviceManager*] Instance() + CResult[shared_ptr[CCudaContext]] GetContext(int gpu_number) + CResult[shared_ptr[CCudaContext]] GetSharedContext(int gpu_number, + void* handle) + CStatus AllocateHost(int device_number, int64_t nbytes, + shared_ptr[CCudaHostBuffer]* buffer) + int num_devices() const + + cdef cppclass CCudaContext" arrow::cuda::CudaContext": + CResult[shared_ptr[CCudaBuffer]] Allocate(int64_t nbytes) + CResult[shared_ptr[CCudaBuffer]] View(uint8_t* data, int64_t nbytes) + CResult[shared_ptr[CCudaBuffer]] OpenIpcBuffer( + const CCudaIpcMemHandle& ipc_handle) + CStatus Synchronize() + int64_t bytes_allocated() const + const void* handle() const + int device_number() const + CResult[uintptr_t] GetDeviceAddress(uintptr_t addr) + + cdef cppclass CCudaIpcMemHandle" arrow::cuda::CudaIpcMemHandle": + @staticmethod + CResult[shared_ptr[CCudaIpcMemHandle]] FromBuffer( + const void* opaque_handle) + CResult[shared_ptr[CBuffer]] Serialize(CMemoryPool* pool) const + + cdef cppclass CCudaBuffer" arrow::cuda::CudaBuffer"(CBuffer): + CCudaBuffer(uint8_t* data, int64_t size, + const shared_ptr[CCudaContext]& context, + c_bool own_data=false, c_bool is_ipc=false) + CCudaBuffer(const shared_ptr[CCudaBuffer]& parent, + const int64_t offset, const int64_t size) + + @staticmethod + CResult[shared_ptr[CCudaBuffer]] FromBuffer(shared_ptr[CBuffer] buf) + + CStatus CopyToHost(const int64_t position, const int64_t nbytes, + void* out) const + CStatus CopyFromHost(const int64_t position, const void* data, + int64_t nbytes) + CStatus CopyFromDevice(const int64_t position, const void* data, + int64_t nbytes) + CStatus CopyFromAnotherDevice(const shared_ptr[CCudaContext]& src_ctx, + const int64_t position, const void* data, + int64_t nbytes) + CResult[shared_ptr[CCudaIpcMemHandle]] ExportForIpc() + shared_ptr[CCudaContext] context() const + + cdef cppclass \ + CCudaHostBuffer" arrow::cuda::CudaHostBuffer"(CMutableBuffer): + pass + + cdef cppclass \ + CCudaBufferReader" arrow::cuda::CudaBufferReader"(CBufferReader): + CCudaBufferReader(const shared_ptr[CBuffer]& buffer) + CResult[int64_t] Read(int64_t nbytes, void* buffer) + CResult[shared_ptr[CBuffer]] Read(int64_t nbytes) + + cdef cppclass \ + CCudaBufferWriter" arrow::cuda::CudaBufferWriter"(WritableFile): + CCudaBufferWriter(const shared_ptr[CCudaBuffer]& buffer) + CStatus Close() + CStatus Write(const void* data, int64_t nbytes) + CStatus WriteAt(int64_t position, const void* data, int64_t nbytes) + CStatus SetBufferSize(const int64_t buffer_size) + int64_t buffer_size() + int64_t num_bytes_buffered() const + + CResult[shared_ptr[CCudaHostBuffer]] AllocateCudaHostBuffer( + int device_number, const int64_t size) + + # Cuda prefix is added to avoid picking up arrow::cuda functions + # from arrow namespace. + CResult[shared_ptr[CCudaBuffer]] \ + CudaSerializeRecordBatch" arrow::cuda::SerializeRecordBatch"\ + (const CRecordBatch& batch, + CCudaContext* ctx) + CResult[shared_ptr[CRecordBatch]] \ + CudaReadRecordBatch" arrow::cuda::ReadRecordBatch"\ + (const shared_ptr[CSchema]& schema, + CDictionaryMemo* dictionary_memo, + const shared_ptr[CCudaBuffer]& buffer, + CMemoryPool* pool) diff --git a/contrib/python/pyarrow/pyarrow/includes/libarrow_dataset.pxd b/contrib/python/pyarrow/pyarrow/includes/libarrow_dataset.pxd new file mode 100644 index 00000000000..51ae9881f92 --- /dev/null +++ b/contrib/python/pyarrow/pyarrow/includes/libarrow_dataset.pxd @@ -0,0 +1,465 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from libcpp.unordered_map cimport unordered_map + +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport * +from pyarrow.includes.libarrow_fs cimport * +from pyarrow._parquet cimport * + + +cdef extern from "arrow/api.h" namespace "arrow" nogil: + + cdef cppclass CRecordBatchIterator "arrow::RecordBatchIterator"( + CIterator[shared_ptr[CRecordBatch]]): + pass + + +cdef extern from * namespace "arrow::compute": + # inlined from expression_internal.h to avoid + # proliferation of #include <unordered_map> + """ + #include <unordered_map> + + #include "arrow/type.h" + #include "arrow/datum.h" + + namespace arrow { + namespace compute { + struct KnownFieldValues { + std::unordered_map<FieldRef, Datum, FieldRef::Hash> map; + }; + } // namespace compute + } // namespace arrow + """ + cdef struct CKnownFieldValues "arrow::compute::KnownFieldValues": + unordered_map[CFieldRef, CDatum, CFieldRefHash] map + +cdef extern from "arrow/compute/exec/expression.h" \ + namespace "arrow::compute" nogil: + + cdef cppclass CExpression "arrow::compute::Expression": + c_bool Equals(const CExpression& other) const + c_string ToString() const + CResult[CExpression] Bind(const CSchema&) + + cdef CExpression CMakeScalarExpression \ + "arrow::compute::literal"(shared_ptr[CScalar] value) + + cdef CExpression CMakeFieldExpression \ + "arrow::compute::field_ref"(c_string name) + + cdef CExpression CMakeCallExpression \ + "arrow::compute::call"(c_string function, + vector[CExpression] arguments, + shared_ptr[CFunctionOptions] options) + + cdef CResult[shared_ptr[CBuffer]] CSerializeExpression \ + "arrow::compute::Serialize"(const CExpression&) + + cdef CResult[CExpression] CDeserializeExpression \ + "arrow::compute::Deserialize"(shared_ptr[CBuffer]) + + cdef CResult[CKnownFieldValues] \ + CExtractKnownFieldValues "arrow::compute::ExtractKnownFieldValues"( + const CExpression& partition_expression) + +ctypedef CStatus cb_writer_finish_internal(CFileWriter*) +ctypedef void cb_writer_finish(dict, CFileWriter*) + +cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil: + + cdef cppclass CScanOptions "arrow::dataset::ScanOptions": + @staticmethod + shared_ptr[CScanOptions] Make(shared_ptr[CSchema] schema) + + shared_ptr[CSchema] dataset_schema + shared_ptr[CSchema] projected_schema + + cdef cppclass CFragmentScanOptions "arrow::dataset::FragmentScanOptions": + c_string type_name() const + + ctypedef CIterator[shared_ptr[CScanTask]] CScanTaskIterator \ + "arrow::dataset::ScanTaskIterator" + + cdef cppclass CScanTask" arrow::dataset::ScanTask": + CResult[CRecordBatchIterator] Execute() + + cdef cppclass CFragment "arrow::dataset::Fragment": + CResult[shared_ptr[CSchema]] ReadPhysicalSchema() + CResult[CScanTaskIterator] Scan(shared_ptr[CScanOptions] options) + c_bool splittable() const + c_string type_name() const + const CExpression& partition_expression() const + + ctypedef vector[shared_ptr[CFragment]] CFragmentVector \ + "arrow::dataset::FragmentVector" + + ctypedef CIterator[shared_ptr[CFragment]] CFragmentIterator \ + "arrow::dataset::FragmentIterator" + + cdef cppclass CInMemoryFragment "arrow::dataset::InMemoryFragment"( + CFragment): + CInMemoryFragment(vector[shared_ptr[CRecordBatch]] record_batches, + CExpression partition_expression) + + cdef cppclass CTaggedRecordBatch "arrow::dataset::TaggedRecordBatch": + shared_ptr[CRecordBatch] record_batch + shared_ptr[CFragment] fragment + + ctypedef CIterator[CTaggedRecordBatch] CTaggedRecordBatchIterator \ + "arrow::dataset::TaggedRecordBatchIterator" + + cdef cppclass CScanner "arrow::dataset::Scanner": + CScanner(shared_ptr[CDataset], shared_ptr[CScanOptions]) + CScanner(shared_ptr[CFragment], shared_ptr[CScanOptions]) + CResult[CScanTaskIterator] Scan() + CResult[CTaggedRecordBatchIterator] ScanBatches() + CResult[shared_ptr[CTable]] ToTable() + CResult[shared_ptr[CTable]] TakeRows(const CArray& indices) + CResult[shared_ptr[CTable]] Head(int64_t num_rows) + CResult[int64_t] CountRows() + CResult[CFragmentIterator] GetFragments() + CResult[shared_ptr[CRecordBatchReader]] ToRecordBatchReader() + const shared_ptr[CScanOptions]& options() + + cdef cppclass CScannerBuilder "arrow::dataset::ScannerBuilder": + CScannerBuilder(shared_ptr[CDataset], + shared_ptr[CScanOptions] scan_options) + CScannerBuilder(shared_ptr[CSchema], shared_ptr[CFragment], + shared_ptr[CScanOptions] scan_options) + + @staticmethod + shared_ptr[CScannerBuilder] FromRecordBatchReader( + shared_ptr[CRecordBatchReader] reader) + CStatus ProjectColumns "Project"(const vector[c_string]& columns) + CStatus Project(vector[CExpression]& exprs, vector[c_string]& columns) + CStatus Filter(CExpression filter) + CStatus UseThreads(c_bool use_threads) + CStatus UseAsync(c_bool use_async) + CStatus Pool(CMemoryPool* pool) + CStatus BatchSize(int64_t batch_size) + CStatus FragmentScanOptions( + shared_ptr[CFragmentScanOptions] fragment_scan_options) + CResult[shared_ptr[CScanner]] Finish() + shared_ptr[CSchema] schema() const + + ctypedef vector[shared_ptr[CDataset]] CDatasetVector \ + "arrow::dataset::DatasetVector" + + cdef cppclass CDataset "arrow::dataset::Dataset": + const shared_ptr[CSchema] & schema() + CResult[CFragmentIterator] GetFragments() + CResult[CFragmentIterator] GetFragments(CExpression predicate) + const CExpression & partition_expression() + c_string type_name() + + CResult[shared_ptr[CDataset]] ReplaceSchema(shared_ptr[CSchema]) + + CResult[shared_ptr[CScannerBuilder]] NewScan() + + cdef cppclass CInMemoryDataset "arrow::dataset::InMemoryDataset"( + CDataset): + CInMemoryDataset(shared_ptr[CRecordBatchReader]) + CInMemoryDataset(shared_ptr[CTable]) + + cdef cppclass CUnionDataset "arrow::dataset::UnionDataset"( + CDataset): + @staticmethod + CResult[shared_ptr[CUnionDataset]] Make(shared_ptr[CSchema] schema, + CDatasetVector children) + + const CDatasetVector& children() const + + cdef cppclass CInspectOptions "arrow::dataset::InspectOptions": + int fragments + + cdef cppclass CFinishOptions "arrow::dataset::FinishOptions": + shared_ptr[CSchema] schema + CInspectOptions inspect_options + c_bool validate_fragments + + cdef cppclass CDatasetFactory "arrow::dataset::DatasetFactory": + CResult[vector[shared_ptr[CSchema]]] InspectSchemas(CInspectOptions) + CResult[shared_ptr[CSchema]] Inspect(CInspectOptions) + CResult[shared_ptr[CDataset]] FinishWithSchema "Finish"( + const shared_ptr[CSchema]& schema) + CResult[shared_ptr[CDataset]] Finish() + const CExpression& root_partition() + CStatus SetRootPartition(CExpression partition) + + cdef cppclass CUnionDatasetFactory "arrow::dataset::UnionDatasetFactory": + @staticmethod + CResult[shared_ptr[CDatasetFactory]] Make( + vector[shared_ptr[CDatasetFactory]] factories) + + cdef cppclass CFileSource "arrow::dataset::FileSource": + const c_string& path() const + const shared_ptr[CFileSystem]& filesystem() const + const shared_ptr[CBuffer]& buffer() const + # HACK: Cython can't handle all the overloads so don't declare them. + # This means invalid construction of CFileSource won't be caught in + # the C++ generation phase (though it will still be caught when + # the generated C++ is compiled). + CFileSource(...) + + cdef cppclass CFileWriteOptions \ + "arrow::dataset::FileWriteOptions": + const shared_ptr[CFileFormat]& format() const + c_string type_name() const + + cdef cppclass CFileWriter \ + "arrow::dataset::FileWriter": + const shared_ptr[CFileFormat]& format() const + const shared_ptr[CSchema]& schema() const + const shared_ptr[CFileWriteOptions]& options() const + const CFileLocator& destination() const + + cdef cppclass CParquetFileWriter \ + "arrow::dataset::ParquetFileWriter"(CFileWriter): + const shared_ptr[FileWriter]& parquet_writer() const + + cdef cppclass CFileFormat "arrow::dataset::FileFormat": + shared_ptr[CFragmentScanOptions] default_fragment_scan_options + c_string type_name() const + CResult[shared_ptr[CSchema]] Inspect(const CFileSource&) const + CResult[shared_ptr[CFileFragment]] MakeFragment( + CFileSource source, + CExpression partition_expression, + shared_ptr[CSchema] physical_schema) + shared_ptr[CFileWriteOptions] DefaultWriteOptions() + + cdef cppclass CFileFragment "arrow::dataset::FileFragment"( + CFragment): + const CFileSource& source() const + const shared_ptr[CFileFormat]& format() const + + cdef cppclass CParquetFileWriteOptions \ + "arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions): + shared_ptr[WriterProperties] writer_properties + shared_ptr[ArrowWriterProperties] arrow_writer_properties + + cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"( + CFileFragment): + const vector[int]& row_groups() const + shared_ptr[CFileMetaData] metadata() const + CResult[vector[shared_ptr[CFragment]]] SplitByRowGroup( + CExpression predicate) + CResult[shared_ptr[CFragment]] SubsetWithFilter "Subset"( + CExpression predicate) + CResult[shared_ptr[CFragment]] SubsetWithIds "Subset"( + vector[int] row_group_ids) + CStatus EnsureCompleteMetadata() + + cdef cppclass CFileSystemDatasetWriteOptions \ + "arrow::dataset::FileSystemDatasetWriteOptions": + shared_ptr[CFileWriteOptions] file_write_options + shared_ptr[CFileSystem] filesystem + c_string base_dir + shared_ptr[CPartitioning] partitioning + int max_partitions + c_string basename_template + function[cb_writer_finish_internal] writer_pre_finish + function[cb_writer_finish_internal] writer_post_finish + + cdef cppclass CFileSystemDataset \ + "arrow::dataset::FileSystemDataset"(CDataset): + @staticmethod + CResult[shared_ptr[CDataset]] Make( + shared_ptr[CSchema] schema, + CExpression source_partition, + shared_ptr[CFileFormat] format, + shared_ptr[CFileSystem] filesystem, + vector[shared_ptr[CFileFragment]] fragments) + + @staticmethod + CStatus Write( + const CFileSystemDatasetWriteOptions& write_options, + shared_ptr[CScanner] scanner) + + c_string type() + vector[c_string] files() + const shared_ptr[CFileFormat]& format() const + const shared_ptr[CFileSystem]& filesystem() const + const shared_ptr[CPartitioning]& partitioning() const + + cdef cppclass CParquetFileFormatReaderOptions \ + "arrow::dataset::ParquetFileFormat::ReaderOptions": + unordered_set[c_string] dict_columns + TimeUnit coerce_int96_timestamp_unit + + cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"( + CFileFormat): + CParquetFileFormatReaderOptions reader_options + CResult[shared_ptr[CFileFragment]] MakeFragment( + CFileSource source, + CExpression partition_expression, + shared_ptr[CSchema] physical_schema, + vector[int] row_groups) + + cdef cppclass CParquetFragmentScanOptions \ + "arrow::dataset::ParquetFragmentScanOptions"(CFragmentScanOptions): + shared_ptr[CReaderProperties] reader_properties + shared_ptr[ArrowReaderProperties] arrow_reader_properties + c_bool enable_parallel_column_conversion + + cdef cppclass CIpcFileWriteOptions \ + "arrow::dataset::IpcFileWriteOptions"(CFileWriteOptions): + pass + + cdef cppclass CIpcFileFormat "arrow::dataset::IpcFileFormat"( + CFileFormat): + pass + + cdef cppclass CCsvFileWriteOptions \ + "arrow::dataset::CsvFileWriteOptions"(CFileWriteOptions): + shared_ptr[CCSVWriteOptions] write_options + CMemoryPool* pool + + cdef cppclass CCsvFileFormat "arrow::dataset::CsvFileFormat"( + CFileFormat): + CCSVParseOptions parse_options + + cdef cppclass CCsvFragmentScanOptions \ + "arrow::dataset::CsvFragmentScanOptions"(CFragmentScanOptions): + CCSVConvertOptions convert_options + CCSVReadOptions read_options + + cdef cppclass CPartitioning "arrow::dataset::Partitioning": + c_string type_name() const + CResult[CExpression] Parse(const c_string & path) const + const shared_ptr[CSchema] & schema() + + cdef cppclass CSegmentEncoding" arrow::dataset::SegmentEncoding": + pass + + CSegmentEncoding CSegmentEncodingNone\ + " arrow::dataset::SegmentEncoding::None" + CSegmentEncoding CSegmentEncodingUri\ + " arrow::dataset::SegmentEncoding::Uri" + + cdef cppclass CKeyValuePartitioningOptions \ + "arrow::dataset::KeyValuePartitioningOptions": + CSegmentEncoding segment_encoding + + cdef cppclass CHivePartitioningOptions \ + "arrow::dataset::HivePartitioningOptions": + CSegmentEncoding segment_encoding + c_string null_fallback + + cdef cppclass CPartitioningFactoryOptions \ + "arrow::dataset::PartitioningFactoryOptions": + c_bool infer_dictionary + shared_ptr[CSchema] schema + CSegmentEncoding segment_encoding + + cdef cppclass CHivePartitioningFactoryOptions \ + "arrow::dataset::HivePartitioningFactoryOptions": + c_bool infer_dictionary + c_string null_fallback + shared_ptr[CSchema] schema + CSegmentEncoding segment_encoding + + cdef cppclass CPartitioningFactory "arrow::dataset::PartitioningFactory": + pass + + cdef cppclass CDirectoryPartitioning \ + "arrow::dataset::DirectoryPartitioning"(CPartitioning): + CDirectoryPartitioning(shared_ptr[CSchema] schema, + vector[shared_ptr[CArray]] dictionaries) + + @staticmethod + shared_ptr[CPartitioningFactory] MakeFactory( + vector[c_string] field_names, CPartitioningFactoryOptions) + + vector[shared_ptr[CArray]] dictionaries() const + + cdef cppclass CHivePartitioning \ + "arrow::dataset::HivePartitioning"(CPartitioning): + CHivePartitioning(shared_ptr[CSchema] schema, + vector[shared_ptr[CArray]] dictionaries, + CHivePartitioningOptions options) + + @staticmethod + shared_ptr[CPartitioningFactory] MakeFactory( + CHivePartitioningFactoryOptions) + + vector[shared_ptr[CArray]] dictionaries() const + + cdef cppclass CPartitioningOrFactory \ + "arrow::dataset::PartitioningOrFactory": + CPartitioningOrFactory(shared_ptr[CPartitioning]) + CPartitioningOrFactory(shared_ptr[CPartitioningFactory]) + CPartitioningOrFactory & operator = (shared_ptr[CPartitioning]) + CPartitioningOrFactory & operator = ( + shared_ptr[CPartitioningFactory]) + shared_ptr[CPartitioning] partitioning() const + shared_ptr[CPartitioningFactory] factory() const + + cdef cppclass CFileSystemFactoryOptions \ + "arrow::dataset::FileSystemFactoryOptions": + CPartitioningOrFactory partitioning + c_string partition_base_dir + c_bool exclude_invalid_files + vector[c_string] selector_ignore_prefixes + + cdef cppclass CFileSystemDatasetFactory \ + "arrow::dataset::FileSystemDatasetFactory"( + CDatasetFactory): + @staticmethod + CResult[shared_ptr[CDatasetFactory]] MakeFromPaths "Make"( + shared_ptr[CFileSystem] filesystem, + vector[c_string] paths, + shared_ptr[CFileFormat] format, + CFileSystemFactoryOptions options + ) + + @staticmethod + CResult[shared_ptr[CDatasetFactory]] MakeFromSelector "Make"( + shared_ptr[CFileSystem] filesystem, + CFileSelector, + shared_ptr[CFileFormat] format, + CFileSystemFactoryOptions options + ) + + cdef cppclass CParquetFactoryOptions \ + "arrow::dataset::ParquetFactoryOptions": + CPartitioningOrFactory partitioning + c_string partition_base_dir + c_bool validate_column_chunk_paths + + cdef cppclass CParquetDatasetFactory \ + "arrow::dataset::ParquetDatasetFactory"(CDatasetFactory): + @staticmethod + CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataPath "Make"( + const c_string& metadata_path, + shared_ptr[CFileSystem] filesystem, + shared_ptr[CParquetFileFormat] format, + CParquetFactoryOptions options + ) + + @staticmethod + CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataSource "Make"( + const CFileSource& metadata_path, + const c_string& base_path, + shared_ptr[CFileSystem] filesystem, + shared_ptr[CParquetFileFormat] format, + CParquetFactoryOptions options + ) diff --git a/contrib/python/pyarrow/pyarrow/includes/libarrow_flight.pxd b/contrib/python/pyarrow/pyarrow/includes/libarrow_flight.pxd new file mode 100644 index 00000000000..2ac737abaa0 --- /dev/null +++ b/contrib/python/pyarrow/pyarrow/includes/libarrow_flight.pxd @@ -0,0 +1,560 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport * + + +cdef extern from "arrow/flight/api.h" namespace "arrow" nogil: + cdef char* CPyServerMiddlewareName\ + " arrow::py::flight::kPyServerMiddlewareName" + + cdef cppclass CActionType" arrow::flight::ActionType": + c_string type + c_string description + + cdef cppclass CAction" arrow::flight::Action": + c_string type + shared_ptr[CBuffer] body + + cdef cppclass CFlightResult" arrow::flight::Result": + CFlightResult() + CFlightResult(CFlightResult) + shared_ptr[CBuffer] body + + cdef cppclass CBasicAuth" arrow::flight::BasicAuth": + CBasicAuth() + CBasicAuth(CBuffer) + CBasicAuth(CBasicAuth) + c_string username + c_string password + + cdef cppclass CResultStream" arrow::flight::ResultStream": + CStatus Next(unique_ptr[CFlightResult]* result) + + cdef cppclass CDescriptorType \ + " arrow::flight::FlightDescriptor::DescriptorType": + bint operator==(CDescriptorType) + + CDescriptorType CDescriptorTypeUnknown\ + " arrow::flight::FlightDescriptor::UNKNOWN" + CDescriptorType CDescriptorTypePath\ + " arrow::flight::FlightDescriptor::PATH" + CDescriptorType CDescriptorTypeCmd\ + " arrow::flight::FlightDescriptor::CMD" + + cdef cppclass CFlightDescriptor" arrow::flight::FlightDescriptor": + CDescriptorType type + c_string cmd + vector[c_string] path + CStatus SerializeToString(c_string* out) + + @staticmethod + CStatus Deserialize(const c_string& serialized, + CFlightDescriptor* out) + bint operator==(CFlightDescriptor) + + cdef cppclass CTicket" arrow::flight::Ticket": + CTicket() + c_string ticket + bint operator==(CTicket) + CStatus SerializeToString(c_string* out) + + @staticmethod + CStatus Deserialize(const c_string& serialized, CTicket* out) + + cdef cppclass CCriteria" arrow::flight::Criteria": + CCriteria() + c_string expression + + cdef cppclass CLocation" arrow::flight::Location": + CLocation() + c_string ToString() + c_bool Equals(const CLocation& other) + + @staticmethod + CStatus Parse(c_string& uri_string, CLocation* location) + + @staticmethod + CStatus ForGrpcTcp(c_string& host, int port, CLocation* location) + + @staticmethod + CStatus ForGrpcTls(c_string& host, int port, CLocation* location) + + @staticmethod + CStatus ForGrpcUnix(c_string& path, CLocation* location) + + cdef cppclass CFlightEndpoint" arrow::flight::FlightEndpoint": + CFlightEndpoint() + + CTicket ticket + vector[CLocation] locations + + bint operator==(CFlightEndpoint) + + cdef cppclass CFlightInfo" arrow::flight::FlightInfo": + CFlightInfo(CFlightInfo info) + int64_t total_records() + int64_t total_bytes() + CStatus GetSchema(CDictionaryMemo* memo, shared_ptr[CSchema]* out) + CFlightDescriptor& descriptor() + const vector[CFlightEndpoint]& endpoints() + CStatus SerializeToString(c_string* out) + + @staticmethod + CStatus Deserialize(const c_string& serialized, + unique_ptr[CFlightInfo]* out) + + cdef cppclass CSchemaResult" arrow::flight::SchemaResult": + CSchemaResult(CSchemaResult result) + CStatus GetSchema(CDictionaryMemo* memo, shared_ptr[CSchema]* out) + + cdef cppclass CFlightListing" arrow::flight::FlightListing": + CStatus Next(unique_ptr[CFlightInfo]* info) + + cdef cppclass CSimpleFlightListing" arrow::flight::SimpleFlightListing": + CSimpleFlightListing(vector[CFlightInfo]&& info) + + cdef cppclass CFlightPayload" arrow::flight::FlightPayload": + shared_ptr[CBuffer] descriptor + shared_ptr[CBuffer] app_metadata + CIpcPayload ipc_message + + cdef cppclass CFlightDataStream" arrow::flight::FlightDataStream": + shared_ptr[CSchema] schema() + CStatus Next(CFlightPayload*) + + cdef cppclass CFlightStreamChunk" arrow::flight::FlightStreamChunk": + CFlightStreamChunk() + shared_ptr[CRecordBatch] data + shared_ptr[CBuffer] app_metadata + + cdef cppclass CMetadataRecordBatchReader \ + " arrow::flight::MetadataRecordBatchReader": + CResult[shared_ptr[CSchema]] GetSchema() + CStatus Next(CFlightStreamChunk* out) + CStatus ReadAll(shared_ptr[CTable]* table) + + CResult[shared_ptr[CRecordBatchReader]] MakeRecordBatchReader\ + " arrow::flight::MakeRecordBatchReader"( + shared_ptr[CMetadataRecordBatchReader]) + + cdef cppclass CMetadataRecordBatchWriter \ + " arrow::flight::MetadataRecordBatchWriter"(CRecordBatchWriter): + CStatus Begin(shared_ptr[CSchema] schema, + const CIpcWriteOptions& options) + CStatus WriteMetadata(shared_ptr[CBuffer] app_metadata) + CStatus WriteWithMetadata(const CRecordBatch& batch, + shared_ptr[CBuffer] app_metadata) + + cdef cppclass CFlightStreamReader \ + " arrow::flight::FlightStreamReader"(CMetadataRecordBatchReader): + void Cancel() + CStatus ReadAllWithStopToken" ReadAll"\ + (shared_ptr[CTable]* table, const CStopToken& stop_token) + + cdef cppclass CFlightMessageReader \ + " arrow::flight::FlightMessageReader"(CMetadataRecordBatchReader): + CFlightDescriptor& descriptor() + + cdef cppclass CFlightMessageWriter \ + " arrow::flight::FlightMessageWriter"(CMetadataRecordBatchWriter): + pass + + cdef cppclass CFlightStreamWriter \ + " arrow::flight::FlightStreamWriter"(CMetadataRecordBatchWriter): + CStatus DoneWriting() + + cdef cppclass CRecordBatchStream \ + " arrow::flight::RecordBatchStream"(CFlightDataStream): + CRecordBatchStream(shared_ptr[CRecordBatchReader]& reader, + const CIpcWriteOptions& options) + + cdef cppclass CFlightMetadataReader" arrow::flight::FlightMetadataReader": + CStatus ReadMetadata(shared_ptr[CBuffer]* out) + + cdef cppclass CFlightMetadataWriter" arrow::flight::FlightMetadataWriter": + CStatus WriteMetadata(const CBuffer& message) + + cdef cppclass CServerAuthReader" arrow::flight::ServerAuthReader": + CStatus Read(c_string* token) + + cdef cppclass CServerAuthSender" arrow::flight::ServerAuthSender": + CStatus Write(c_string& token) + + cdef cppclass CClientAuthReader" arrow::flight::ClientAuthReader": + CStatus Read(c_string* token) + + cdef cppclass CClientAuthSender" arrow::flight::ClientAuthSender": + CStatus Write(c_string& token) + + cdef cppclass CServerAuthHandler" arrow::flight::ServerAuthHandler": + pass + + cdef cppclass CClientAuthHandler" arrow::flight::ClientAuthHandler": + pass + + cdef cppclass CServerCallContext" arrow::flight::ServerCallContext": + c_string& peer_identity() + c_string& peer() + c_bool is_cancelled() + CServerMiddleware* GetMiddleware(const c_string& key) + + cdef cppclass CTimeoutDuration" arrow::flight::TimeoutDuration": + CTimeoutDuration(double) + + cdef cppclass CFlightCallOptions" arrow::flight::FlightCallOptions": + CFlightCallOptions() + CTimeoutDuration timeout + CIpcWriteOptions write_options + vector[pair[c_string, c_string]] headers + CStopToken stop_token + + cdef cppclass CCertKeyPair" arrow::flight::CertKeyPair": + CCertKeyPair() + c_string pem_cert + c_string pem_key + + cdef cppclass CFlightMethod" arrow::flight::FlightMethod": + bint operator==(CFlightMethod) + + CFlightMethod CFlightMethodInvalid\ + " arrow::flight::FlightMethod::Invalid" + CFlightMethod CFlightMethodHandshake\ + " arrow::flight::FlightMethod::Handshake" + CFlightMethod CFlightMethodListFlights\ + " arrow::flight::FlightMethod::ListFlights" + CFlightMethod CFlightMethodGetFlightInfo\ + " arrow::flight::FlightMethod::GetFlightInfo" + CFlightMethod CFlightMethodGetSchema\ + " arrow::flight::FlightMethod::GetSchema" + CFlightMethod CFlightMethodDoGet\ + " arrow::flight::FlightMethod::DoGet" + CFlightMethod CFlightMethodDoPut\ + " arrow::flight::FlightMethod::DoPut" + CFlightMethod CFlightMethodDoAction\ + " arrow::flight::FlightMethod::DoAction" + CFlightMethod CFlightMethodListActions\ + " arrow::flight::FlightMethod::ListActions" + CFlightMethod CFlightMethodDoExchange\ + " arrow::flight::FlightMethod::DoExchange" + + cdef cppclass CCallInfo" arrow::flight::CallInfo": + CFlightMethod method + + # This is really std::unordered_multimap, but Cython has no + # bindings for it, so treat it as an opaque class and bind the + # methods we need + cdef cppclass CCallHeaders" arrow::flight::CallHeaders": + cppclass const_iterator: + pair[c_string, c_string] operator*() + const_iterator operator++() + bint operator==(const_iterator) + bint operator!=(const_iterator) + const_iterator cbegin() + const_iterator cend() + + cdef cppclass CAddCallHeaders" arrow::flight::AddCallHeaders": + void AddHeader(const c_string& key, const c_string& value) + + cdef cppclass CServerMiddleware" arrow::flight::ServerMiddleware": + c_string name() + + cdef cppclass CServerMiddlewareFactory\ + " arrow::flight::ServerMiddlewareFactory": + pass + + cdef cppclass CClientMiddleware" arrow::flight::ClientMiddleware": + pass + + cdef cppclass CClientMiddlewareFactory\ + " arrow::flight::ClientMiddlewareFactory": + pass + + cdef cppclass CFlightServerOptions" arrow::flight::FlightServerOptions": + CFlightServerOptions(const CLocation& location) + CLocation location + unique_ptr[CServerAuthHandler] auth_handler + vector[CCertKeyPair] tls_certificates + c_bool verify_client + c_string root_certificates + vector[pair[c_string, shared_ptr[CServerMiddlewareFactory]]] middleware + + cdef cppclass CFlightClientOptions" arrow::flight::FlightClientOptions": + c_string tls_root_certs + c_string cert_chain + c_string private_key + c_string override_hostname + vector[shared_ptr[CClientMiddlewareFactory]] middleware + int64_t write_size_limit_bytes + vector[pair[c_string, CIntStringVariant]] generic_options + c_bool disable_server_verification + + @staticmethod + CFlightClientOptions Defaults() + + cdef cppclass CFlightClient" arrow::flight::FlightClient": + @staticmethod + CStatus Connect(const CLocation& location, + const CFlightClientOptions& options, + unique_ptr[CFlightClient]* client) + + CStatus Authenticate(CFlightCallOptions& options, + unique_ptr[CClientAuthHandler] auth_handler) + + CResult[pair[c_string, c_string]] AuthenticateBasicToken( + CFlightCallOptions& options, + const c_string& username, + const c_string& password) + + CStatus DoAction(CFlightCallOptions& options, CAction& action, + unique_ptr[CResultStream]* results) + CStatus ListActions(CFlightCallOptions& options, + vector[CActionType]* actions) + + CStatus ListFlights(CFlightCallOptions& options, CCriteria criteria, + unique_ptr[CFlightListing]* listing) + CStatus GetFlightInfo(CFlightCallOptions& options, + CFlightDescriptor& descriptor, + unique_ptr[CFlightInfo]* info) + CStatus GetSchema(CFlightCallOptions& options, + CFlightDescriptor& descriptor, + unique_ptr[CSchemaResult]* result) + CStatus DoGet(CFlightCallOptions& options, CTicket& ticket, + unique_ptr[CFlightStreamReader]* stream) + CStatus DoPut(CFlightCallOptions& options, + CFlightDescriptor& descriptor, + shared_ptr[CSchema]& schema, + unique_ptr[CFlightStreamWriter]* stream, + unique_ptr[CFlightMetadataReader]* reader) + CStatus DoExchange(CFlightCallOptions& options, + CFlightDescriptor& descriptor, + unique_ptr[CFlightStreamWriter]* writer, + unique_ptr[CFlightStreamReader]* reader) + + cdef cppclass CFlightStatusCode" arrow::flight::FlightStatusCode": + bint operator==(CFlightStatusCode) + + CFlightStatusCode CFlightStatusInternal \ + " arrow::flight::FlightStatusCode::Internal" + CFlightStatusCode CFlightStatusTimedOut \ + " arrow::flight::FlightStatusCode::TimedOut" + CFlightStatusCode CFlightStatusCancelled \ + " arrow::flight::FlightStatusCode::Cancelled" + CFlightStatusCode CFlightStatusUnauthenticated \ + " arrow::flight::FlightStatusCode::Unauthenticated" + CFlightStatusCode CFlightStatusUnauthorized \ + " arrow::flight::FlightStatusCode::Unauthorized" + CFlightStatusCode CFlightStatusUnavailable \ + " arrow::flight::FlightStatusCode::Unavailable" + CFlightStatusCode CFlightStatusFailed \ + " arrow::flight::FlightStatusCode::Failed" + + cdef cppclass FlightStatusDetail" arrow::flight::FlightStatusDetail": + CFlightStatusCode code() + c_string extra_info() + + @staticmethod + shared_ptr[FlightStatusDetail] UnwrapStatus(const CStatus& status) + + cdef cppclass FlightWriteSizeStatusDetail\ + " arrow::flight::FlightWriteSizeStatusDetail": + int64_t limit() + int64_t actual() + + @staticmethod + shared_ptr[FlightWriteSizeStatusDetail] UnwrapStatus( + const CStatus& status) + + cdef CStatus MakeFlightError" arrow::flight::MakeFlightError" \ + (CFlightStatusCode code, const c_string& message) + + cdef CStatus MakeFlightError" arrow::flight::MakeFlightError" \ + (CFlightStatusCode code, + const c_string& message, + const c_string& extra_info) + +# Callbacks for implementing Flight servers +# Use typedef to emulate syntax for std::function<void(..)> +ctypedef CStatus cb_list_flights(object, const CServerCallContext&, + const CCriteria*, + unique_ptr[CFlightListing]*) +ctypedef CStatus cb_get_flight_info(object, const CServerCallContext&, + const CFlightDescriptor&, + unique_ptr[CFlightInfo]*) +ctypedef CStatus cb_get_schema(object, const CServerCallContext&, + const CFlightDescriptor&, + unique_ptr[CSchemaResult]*) +ctypedef CStatus cb_do_put(object, const CServerCallContext&, + unique_ptr[CFlightMessageReader], + unique_ptr[CFlightMetadataWriter]) +ctypedef CStatus cb_do_get(object, const CServerCallContext&, + const CTicket&, + unique_ptr[CFlightDataStream]*) +ctypedef CStatus cb_do_exchange(object, const CServerCallContext&, + unique_ptr[CFlightMessageReader], + unique_ptr[CFlightMessageWriter]) +ctypedef CStatus cb_do_action(object, const CServerCallContext&, + const CAction&, + unique_ptr[CResultStream]*) +ctypedef CStatus cb_list_actions(object, const CServerCallContext&, + vector[CActionType]*) +ctypedef CStatus cb_result_next(object, unique_ptr[CFlightResult]*) +ctypedef CStatus cb_data_stream_next(object, CFlightPayload*) +ctypedef CStatus cb_server_authenticate(object, CServerAuthSender*, + CServerAuthReader*) +ctypedef CStatus cb_is_valid(object, const c_string&, c_string*) +ctypedef CStatus cb_client_authenticate(object, CClientAuthSender*, + CClientAuthReader*) +ctypedef CStatus cb_get_token(object, c_string*) + +ctypedef CStatus cb_middleware_sending_headers(object, CAddCallHeaders*) +ctypedef CStatus cb_middleware_call_completed(object, const CStatus&) +ctypedef CStatus cb_client_middleware_received_headers( + object, const CCallHeaders&) +ctypedef CStatus cb_server_middleware_start_call( + object, + const CCallInfo&, + const CCallHeaders&, + shared_ptr[CServerMiddleware]*) +ctypedef CStatus cb_client_middleware_start_call( + object, + const CCallInfo&, + unique_ptr[CClientMiddleware]*) + +cdef extern from "arrow/python/flight.h" namespace "arrow::py::flight" nogil: + cdef cppclass PyFlightServerVtable: + PyFlightServerVtable() + function[cb_list_flights] list_flights + function[cb_get_flight_info] get_flight_info + function[cb_get_schema] get_schema + function[cb_do_put] do_put + function[cb_do_get] do_get + function[cb_do_exchange] do_exchange + function[cb_do_action] do_action + function[cb_list_actions] list_actions + + cdef cppclass PyServerAuthHandlerVtable: + PyServerAuthHandlerVtable() + function[cb_server_authenticate] authenticate + function[cb_is_valid] is_valid + + cdef cppclass PyClientAuthHandlerVtable: + PyClientAuthHandlerVtable() + function[cb_client_authenticate] authenticate + function[cb_get_token] get_token + + cdef cppclass PyFlightServer: + PyFlightServer(object server, PyFlightServerVtable vtable) + + CStatus Init(CFlightServerOptions& options) + int port() + CStatus ServeWithSignals() except * + CStatus Shutdown() + CStatus Wait() + + cdef cppclass PyServerAuthHandler\ + " arrow::py::flight::PyServerAuthHandler"(CServerAuthHandler): + PyServerAuthHandler(object handler, PyServerAuthHandlerVtable vtable) + + cdef cppclass PyClientAuthHandler\ + " arrow::py::flight::PyClientAuthHandler"(CClientAuthHandler): + PyClientAuthHandler(object handler, PyClientAuthHandlerVtable vtable) + + cdef cppclass CPyFlightResultStream\ + " arrow::py::flight::PyFlightResultStream"(CResultStream): + CPyFlightResultStream(object generator, + function[cb_result_next] callback) + + cdef cppclass CPyFlightDataStream\ + " arrow::py::flight::PyFlightDataStream"(CFlightDataStream): + CPyFlightDataStream(object data_source, + unique_ptr[CFlightDataStream] stream) + + cdef cppclass CPyGeneratorFlightDataStream\ + " arrow::py::flight::PyGeneratorFlightDataStream"\ + (CFlightDataStream): + CPyGeneratorFlightDataStream(object generator, + shared_ptr[CSchema] schema, + function[cb_data_stream_next] callback, + const CIpcWriteOptions& options) + + cdef cppclass PyServerMiddlewareVtable\ + " arrow::py::flight::PyServerMiddleware::Vtable": + PyServerMiddlewareVtable() + function[cb_middleware_sending_headers] sending_headers + function[cb_middleware_call_completed] call_completed + + cdef cppclass PyClientMiddlewareVtable\ + " arrow::py::flight::PyClientMiddleware::Vtable": + PyClientMiddlewareVtable() + function[cb_middleware_sending_headers] sending_headers + function[cb_client_middleware_received_headers] received_headers + function[cb_middleware_call_completed] call_completed + + cdef cppclass CPyServerMiddleware\ + " arrow::py::flight::PyServerMiddleware"(CServerMiddleware): + CPyServerMiddleware(object middleware, PyServerMiddlewareVtable vtable) + void* py_object() + + cdef cppclass CPyServerMiddlewareFactory\ + " arrow::py::flight::PyServerMiddlewareFactory"\ + (CServerMiddlewareFactory): + CPyServerMiddlewareFactory( + object factory, + function[cb_server_middleware_start_call] start_call) + + cdef cppclass CPyClientMiddleware\ + " arrow::py::flight::PyClientMiddleware"(CClientMiddleware): + CPyClientMiddleware(object middleware, PyClientMiddlewareVtable vtable) + + cdef cppclass CPyClientMiddlewareFactory\ + " arrow::py::flight::PyClientMiddlewareFactory"\ + (CClientMiddlewareFactory): + CPyClientMiddlewareFactory( + object factory, + function[cb_client_middleware_start_call] start_call) + + cdef CStatus CreateFlightInfo" arrow::py::flight::CreateFlightInfo"( + shared_ptr[CSchema] schema, + CFlightDescriptor& descriptor, + vector[CFlightEndpoint] endpoints, + int64_t total_records, + int64_t total_bytes, + unique_ptr[CFlightInfo]* out) + + cdef CStatus CreateSchemaResult" arrow::py::flight::CreateSchemaResult"( + shared_ptr[CSchema] schema, + unique_ptr[CSchemaResult]* out) + + cdef CStatus DeserializeBasicAuth\ + " arrow::py::flight::DeserializeBasicAuth"( + c_string buf, + unique_ptr[CBasicAuth]* out) + + cdef CStatus SerializeBasicAuth" arrow::py::flight::SerializeBasicAuth"( + CBasicAuth basic_auth, + c_string* out) + + +cdef extern from "arrow/util/variant.h" namespace "arrow" nogil: + cdef cppclass CIntStringVariant" arrow::util::Variant<int, std::string>": + CIntStringVariant() + CIntStringVariant(int) + CIntStringVariant(c_string) diff --git a/contrib/python/pyarrow/pyarrow/includes/libgandiva.pxd b/contrib/python/pyarrow/pyarrow/includes/libgandiva.pxd new file mode 100644 index 00000000000..c75977d37e8 --- /dev/null +++ b/contrib/python/pyarrow/pyarrow/includes/libgandiva.pxd @@ -0,0 +1,286 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from libcpp.string cimport string as c_string +from libcpp.unordered_set cimport unordered_set as c_unordered_set +from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t + +from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport * + +cdef extern from "gandiva/node.h" namespace "gandiva" nogil: + + cdef cppclass CNode" gandiva::Node": + c_string ToString() + shared_ptr[CDataType] return_type() + + cdef cppclass CExpression" gandiva::Expression": + c_string ToString() + shared_ptr[CNode] root() + shared_ptr[CField] result() + + ctypedef vector[shared_ptr[CNode]] CNodeVector" gandiva::NodeVector" + + ctypedef vector[shared_ptr[CExpression]] \ + CExpressionVector" gandiva::ExpressionVector" + +cdef extern from "gandiva/selection_vector.h" namespace "gandiva" nogil: + + cdef cppclass CSelectionVector" gandiva::SelectionVector": + + shared_ptr[CArray] ToArray() + + enum CSelectionVector_Mode" gandiva::SelectionVector::Mode": + CSelectionVector_Mode_NONE" gandiva::SelectionVector::Mode::MODE_NONE" + CSelectionVector_Mode_UINT16" \ + gandiva::SelectionVector::Mode::MODE_UINT16" + CSelectionVector_Mode_UINT32" \ + gandiva::SelectionVector::Mode::MODE_UINT32" + CSelectionVector_Mode_UINT64" \ + gandiva::SelectionVector::Mode::MODE_UINT64" + + cdef CStatus SelectionVector_MakeInt16\ + "gandiva::SelectionVector::MakeInt16"( + int64_t max_slots, CMemoryPool* pool, + shared_ptr[CSelectionVector]* selection_vector) + + cdef CStatus SelectionVector_MakeInt32\ + "gandiva::SelectionVector::MakeInt32"( + int64_t max_slots, CMemoryPool* pool, + shared_ptr[CSelectionVector]* selection_vector) + + cdef CStatus SelectionVector_MakeInt64\ + "gandiva::SelectionVector::MakeInt64"( + int64_t max_slots, CMemoryPool* pool, + shared_ptr[CSelectionVector]* selection_vector) + +cdef inline CSelectionVector_Mode _ensure_selection_mode(str name) except *: + uppercase = name.upper() + if uppercase == 'NONE': + return CSelectionVector_Mode_NONE + elif uppercase == 'UINT16': + return CSelectionVector_Mode_UINT16 + elif uppercase == 'UINT32': + return CSelectionVector_Mode_UINT32 + elif uppercase == 'UINT64': + return CSelectionVector_Mode_UINT64 + else: + raise ValueError('Invalid value for Selection Mode: {!r}'.format(name)) + +cdef inline str _selection_mode_name(CSelectionVector_Mode ctype): + if ctype == CSelectionVector_Mode_NONE: + return 'NONE' + elif ctype == CSelectionVector_Mode_UINT16: + return 'UINT16' + elif ctype == CSelectionVector_Mode_UINT32: + return 'UINT32' + elif ctype == CSelectionVector_Mode_UINT64: + return 'UINT64' + else: + raise RuntimeError('Unexpected CSelectionVector_Mode value') + +cdef extern from "gandiva/condition.h" namespace "gandiva" nogil: + + cdef cppclass CCondition" gandiva::Condition": + c_string ToString() + shared_ptr[CNode] root() + shared_ptr[CField] result() + +cdef extern from "gandiva/arrow.h" namespace "gandiva" nogil: + + ctypedef vector[shared_ptr[CArray]] CArrayVector" gandiva::ArrayVector" + + +cdef extern from "gandiva/tree_expr_builder.h" namespace "gandiva" nogil: + + cdef shared_ptr[CNode] TreeExprBuilder_MakeBoolLiteral \ + "gandiva::TreeExprBuilder::MakeLiteral"(c_bool value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt8Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(uint8_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt16Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(uint16_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt32Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(uint32_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt64Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(uint64_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInt8Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(int8_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInt16Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(int16_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInt32Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(int32_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInt64Literal \ + "gandiva::TreeExprBuilder::MakeLiteral"(int64_t value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeFloatLiteral \ + "gandiva::TreeExprBuilder::MakeLiteral"(float value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeDoubleLiteral \ + "gandiva::TreeExprBuilder::MakeLiteral"(double value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeStringLiteral \ + "gandiva::TreeExprBuilder::MakeStringLiteral"(const c_string& value) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeBinaryLiteral \ + "gandiva::TreeExprBuilder::MakeBinaryLiteral"(const c_string& value) + + cdef shared_ptr[CExpression] TreeExprBuilder_MakeExpression\ + "gandiva::TreeExprBuilder::MakeExpression"( + shared_ptr[CNode] root_node, shared_ptr[CField] result_field) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeFunction \ + "gandiva::TreeExprBuilder::MakeFunction"( + const c_string& name, const CNodeVector& children, + shared_ptr[CDataType] return_type) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeField \ + "gandiva::TreeExprBuilder::MakeField"(shared_ptr[CField] field) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeIf \ + "gandiva::TreeExprBuilder::MakeIf"( + shared_ptr[CNode] condition, shared_ptr[CNode] this_node, + shared_ptr[CNode] else_node, shared_ptr[CDataType] return_type) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeAnd \ + "gandiva::TreeExprBuilder::MakeAnd"(const CNodeVector& children) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeOr \ + "gandiva::TreeExprBuilder::MakeOr"(const CNodeVector& children) + + cdef shared_ptr[CCondition] TreeExprBuilder_MakeCondition \ + "gandiva::TreeExprBuilder::MakeCondition"( + shared_ptr[CNode] condition) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionInt32 \ + "gandiva::TreeExprBuilder::MakeInExpressionInt32"( + shared_ptr[CNode] node, const c_unordered_set[int32_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionInt64 \ + "gandiva::TreeExprBuilder::MakeInExpressionInt64"( + shared_ptr[CNode] node, const c_unordered_set[int64_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTime32 \ + "gandiva::TreeExprBuilder::MakeInExpressionTime32"( + shared_ptr[CNode] node, const c_unordered_set[int32_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTime64 \ + "gandiva::TreeExprBuilder::MakeInExpressionTime64"( + shared_ptr[CNode] node, const c_unordered_set[int64_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionDate32 \ + "gandiva::TreeExprBuilder::MakeInExpressionDate32"( + shared_ptr[CNode] node, const c_unordered_set[int32_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionDate64 \ + "gandiva::TreeExprBuilder::MakeInExpressionDate64"( + shared_ptr[CNode] node, const c_unordered_set[int64_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTimeStamp \ + "gandiva::TreeExprBuilder::MakeInExpressionTimeStamp"( + shared_ptr[CNode] node, const c_unordered_set[int64_t]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionString \ + "gandiva::TreeExprBuilder::MakeInExpressionString"( + shared_ptr[CNode] node, const c_unordered_set[c_string]& values) + + cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionBinary \ + "gandiva::TreeExprBuilder::MakeInExpressionBinary"( + shared_ptr[CNode] node, const c_unordered_set[c_string]& values) + +cdef extern from "gandiva/projector.h" namespace "gandiva" nogil: + + cdef cppclass CProjector" gandiva::Projector": + + CStatus Evaluate( + const CRecordBatch& batch, CMemoryPool* pool, + const CArrayVector* output) + + CStatus Evaluate( + const CRecordBatch& batch, + const CSelectionVector* selection, + CMemoryPool* pool, + const CArrayVector* output) + + c_string DumpIR() + + cdef CStatus Projector_Make \ + "gandiva::Projector::Make"( + shared_ptr[CSchema] schema, const CExpressionVector& children, + shared_ptr[CProjector]* projector) + + cdef CStatus Projector_Make \ + "gandiva::Projector::Make"( + shared_ptr[CSchema] schema, const CExpressionVector& children, + CSelectionVector_Mode mode, + shared_ptr[CConfiguration] configuration, + shared_ptr[CProjector]* projector) + +cdef extern from "gandiva/filter.h" namespace "gandiva" nogil: + + cdef cppclass CFilter" gandiva::Filter": + + CStatus Evaluate( + const CRecordBatch& batch, + shared_ptr[CSelectionVector] out_selection) + + c_string DumpIR() + + cdef CStatus Filter_Make \ + "gandiva::Filter::Make"( + shared_ptr[CSchema] schema, shared_ptr[CCondition] condition, + shared_ptr[CFilter]* filter) + +cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil: + + cdef cppclass CFunctionSignature" gandiva::FunctionSignature": + + CFunctionSignature(const c_string& base_name, + vector[shared_ptr[CDataType]] param_types, + shared_ptr[CDataType] ret_type) + + shared_ptr[CDataType] ret_type() const + + const c_string& base_name() const + + vector[shared_ptr[CDataType]] param_types() const + + c_string ToString() const + +cdef extern from "gandiva/expression_registry.h" namespace "gandiva" nogil: + + cdef vector[shared_ptr[CFunctionSignature]] \ + GetRegisteredFunctionSignatures() + +cdef extern from "gandiva/configuration.h" namespace "gandiva" nogil: + + cdef cppclass CConfiguration" gandiva::Configuration": + pass + + cdef cppclass CConfigurationBuilder \ + " gandiva::ConfigurationBuilder": + @staticmethod + shared_ptr[CConfiguration] DefaultConfiguration() diff --git a/contrib/python/pyarrow/pyarrow/includes/libplasma.pxd b/contrib/python/pyarrow/pyarrow/includes/libplasma.pxd new file mode 100644 index 00000000000..d54e9f484ba --- /dev/null +++ b/contrib/python/pyarrow/pyarrow/includes/libplasma.pxd @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# distutils: language = c++ + +from pyarrow.includes.common cimport * + +cdef extern from "plasma/common.h" namespace "plasma" nogil: + cdef c_bool IsPlasmaObjectExists(const CStatus& status) + cdef c_bool IsPlasmaObjectNotFound(const CStatus& status) + cdef c_bool IsPlasmaStoreFull(const CStatus& status) |
