aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/apache/arrow
diff options
context:
space:
mode:
authorhor911 <hor911@ydb.tech>2023-03-06 13:08:19 +0300
committerhor911 <hor911@ydb.tech>2023-03-06 13:08:19 +0300
commitff7e0c38bdda0b99c4493bb809aaf5f9e6ac3511 (patch)
tree1657d528c97ff4e8ebcfa1475e6783a223225b7f /contrib/libs/apache/arrow
parent5e173e098fd5ea097c89978a38210a2d446a8ab4 (diff)
downloadydb-ff7e0c38bdda0b99c4493bb809aaf5f9e6ac3511.tar.gz
[C++][Parquet] Split arrow::FileReader::ReadRowGroups() for flexible async IO #34461
https://github.com/apache/arrow/pull/34461
Diffstat (limited to 'contrib/libs/apache/arrow')
-rw-r--r--contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc30
-rw-r--r--contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h7
2 files changed, 34 insertions, 3 deletions
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc
index 4f5f79c964..f4cca2bb77 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.cc
@@ -289,6 +289,13 @@ class FileReaderImpl : public FileReader {
return ReadTable(Iota(reader_->metadata()->num_columns()), table);
}
+ Status WillNeedRowGroups(const std::vector<int>& row_groups,
+ const std::vector<int>& column_indices) override;
+
+ Status DecodeRowGroups(const std::vector<int>& row_groups,
+ const std::vector<int>& column_indices,
+ std::shared_ptr<::arrow::Table>* out) override;
+
Status ReadRowGroups(const std::vector<int>& row_groups,
const std::vector<int>& indices,
std::shared_ptr<Table>* table) override;
@@ -1085,9 +1092,8 @@ Status FileReaderImpl::GetColumn(int i, FileColumnIteratorFactory iterator_facto
return Status::OK();
}
-Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups,
- const std::vector<int>& column_indices,
- std::shared_ptr<Table>* out) {
+Status FileReaderImpl::WillNeedRowGroups(const std::vector<int>& row_groups,
+ const std::vector<int>& column_indices) {
RETURN_NOT_OK(BoundsCheck(row_groups, column_indices));
// PARQUET-1698/PARQUET-1820: pre-buffer row groups/column chunks if enabled
@@ -1098,6 +1104,24 @@ Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups,
reader_properties_.cache_options());
END_PARQUET_CATCH_EXCEPTIONS
}
+ return Status::OK();
+}
+
+Status FileReaderImpl::DecodeRowGroups(const std::vector<int>& row_groups,
+ const std::vector<int>& column_indices,
+ std::shared_ptr<::arrow::Table>* out) {
+ RETURN_NOT_OK(BoundsCheck(row_groups, column_indices));
+
+ auto fut = DecodeRowGroups(/*self=*/nullptr, row_groups, column_indices,
+ /*cpu_executor=*/nullptr);
+ ARROW_ASSIGN_OR_RAISE(*out, fut.MoveResult());
+ return Status::OK();
+}
+
+Status FileReaderImpl::ReadRowGroups(const std::vector<int>& row_groups,
+ const std::vector<int>& column_indices,
+ std::shared_ptr<Table>* out) {
+ RETURN_NOT_OK(WillNeedRowGroups(row_groups, column_indices));
auto fut = DecodeRowGroups(/*self=*/nullptr, row_groups, column_indices,
/*cpu_executor=*/nullptr);
diff --git a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h
index 2d6a5ef2c3..97a1e9c30f 100644
--- a/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h
+++ b/contrib/libs/apache/arrow/cpp/src/parquet/arrow/reader.h
@@ -212,6 +212,13 @@ class PARQUET_EXPORT FileReader {
virtual ::arrow::Status ReadRowGroup(int i, std::shared_ptr<::arrow::Table>* out) = 0;
+ virtual ::arrow::Status WillNeedRowGroups(const std::vector<int>& row_groups,
+ const std::vector<int>& column_indices) = 0;
+
+ virtual ::arrow::Status DecodeRowGroups(const std::vector<int>& row_groups,
+ const std::vector<int>& column_indices,
+ std::shared_ptr<::arrow::Table>* out) = 0;
+
virtual ::arrow::Status ReadRowGroups(const std::vector<int>& row_groups,
const std::vector<int>& column_indices,
std::shared_ptr<::arrow::Table>* out) = 0;