diff options
author | hor911 <hor911@ydb.tech> | 2023-03-06 13:08:19 +0300 |
---|---|---|
committer | hor911 <hor911@ydb.tech> | 2023-03-06 13:08:19 +0300 |
commit | ff7e0c38bdda0b99c4493bb809aaf5f9e6ac3511 (patch) | |
tree | 1657d528c97ff4e8ebcfa1475e6783a223225b7f /contrib/libs/apache/arrow | |
parent | 5e173e098fd5ea097c89978a38210a2d446a8ab4 (diff) | |
download | ydb-ff7e0c38bdda0b99c4493bb809aaf5f9e6ac3511.tar.gz |
[C++][Parquet] Split arrow::FileReader::ReadRowGroups() for flexible async IO #34461
https://github.com/apache/arrow/pull/34461
Diffstat (limited to 'contrib/libs/apache/arrow')
-rw-r--r-- | contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc | 30 | ||||
-rw-r--r-- | contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h | 7 |
2 files changed, 34 insertions, 3 deletions
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc index 4f5f79c964..f4cca2bb77 100644 --- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc +++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc @@ -289,6 +289,13 @@ class FileReaderImpl : public FileReader { return ReadTable(Iota(reader_->metadata()->num_columns()), table); } + Status WillNeedRowGroups(const std::vector<int>& row_groups, + const std::vector<int>& column_indices) override; + + Status DecodeRowGroups(const std::vector<int>& row_groups, + const std::vector<int>& column_indices, + std::shared_ptr<::arrow::Table>* out) override; + Status ReadRowGroups(const std::vector<int>& row_groups, const std::vector<int>& indices, std::shared_ptr<Table>* table) override; @@ -1085,9 +1092,8 @@ Status FileReaderImpl::GetColumn(int i, FileColumnIteratorFactory iterator_facto return Status::OK(); } -Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups, - const std::vector<int>& column_indices, - std::shared_ptr<Table>* out) { +Status FileReaderImpl::WillNeedRowGroups(const std::vector<int>& row_groups, + const std::vector<int>& column_indices) { RETURN_NOT_OK(BoundsCheck(row_groups, column_indices)); // PARQUET-1698/PARQUET-1820: pre-buffer row groups/column chunks if enabled @@ -1098,6 +1104,24 @@ Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups, reader_properties_.cache_options()); END_PARQUET_CATCH_EXCEPTIONS } + return Status::OK(); +} + +Status FileReaderImpl::DecodeRowGroups(const std::vector<int>& row_groups, + const std::vector<int>& column_indices, + std::shared_ptr<::arrow::Table>* out) { + RETURN_NOT_OK(BoundsCheck(row_groups, column_indices)); + + auto fut = DecodeRowGroups(/*self=*/nullptr, row_groups, column_indices, + /*cpu_executor=*/nullptr); + ARROW_ASSIGN_OR_RAISE(*out, fut.MoveResult()); + return Status::OK(); +} + +Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups, + const std::vector<int>& column_indices, + std::shared_ptr<Table>* out) { + RETURN_NOT_OK(WillNeedRowGroups(row_groups, column_indices)); auto fut = DecodeRowGroups(/*self=*/nullptr, row_groups, column_indices, /*cpu_executor=*/nullptr); diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h index 2d6a5ef2c3..97a1e9c30f 100644 --- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h +++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h @@ -212,6 +212,13 @@ class PARQUET_EXPORT FileReader { virtual ::arrow::Status ReadRowGroup(int i, std::shared_ptr<::arrow::Table>* out) = 0; + virtual ::arrow::Status WillNeedRowGroups(const std::vector<int>& row_groups, + const std::vector<int>& column_indices) = 0; + + virtual ::arrow::Status DecodeRowGroups(const std::vector<int>& row_groups, + const std::vector<int>& column_indices, + std::shared_ptr<::arrow::Table>* out) = 0; + virtual ::arrow::Status ReadRowGroups(const std::vector<int>& row_groups, const std::vector<int>& column_indices, std::shared_ptr<::arrow::Table>* out) = 0; |